From 499e1a54f5bf58dafc327183b02592eea9c99d1d Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Mon, 20 Mar 2023 10:14:02 +0100 Subject: [PATCH 001/691] [mlir] Apply ClangTidy performance finding (NFC). --- mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp index e001f59b21e93..f44c99b64589f 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp @@ -541,7 +541,7 @@ FailureOr linalg::pack(RewriterBase &rewriter, // Step 2. Propagate packing to all LinalgOp operands. SmallVector inputsAndInits, results; - for (auto operandsList : + for (const auto& operandsList : {linalgOp.getDpsInputOperands(), linalgOp.getDpsInitOperands()}) { for (OpOperand *opOperandPtr : operandsList) { int64_t pos = opOperandPtr->getOperandNumber(); From a348ba21f8d88f7cb8a218b9ef2eec3f5602ec6f Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Mon, 20 Mar 2023 08:28:48 +0100 Subject: [PATCH 002/691] [mlir][Bazel] Avoid __attribute__((weak)) for MSVC. Differential Revision: https://reviews.llvm.org/D146405 --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 869d7a9fe7c4a..c9a68e1b27a65 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -4101,7 +4101,13 @@ write_file( // Provide a weak registration stub in case the real SerializeToCubin is not // linked in. +#if defined(_MSC_VER) +// This might not work correctly, but it avoids a compilation error because +// MSVC does not support __attribute__((weak)). +void mlir::registerGpuSerializeToCubinPass() {} +#else __attribute__((weak)) void mlir::registerGpuSerializeToCubinPass() {} +#endif """, ], ) From 28997feb0c3ac243cb5cc89d7682993e23463ca7 Mon Sep 17 00:00:00 2001 From: Pavel Kosov Date: Mon, 20 Mar 2023 12:48:45 +0300 Subject: [PATCH 003/691] [LLVM][OHOS] Clang toolchain and targets Add a clang part of OpenHarmony target Related LLVM part: D138202 ~~~ Huawei RRI, OS Lab Reviewed By: DavidSpickett Differential Revision: https://reviews.llvm.org/D145227 --- clang/include/clang/Basic/Attr.td | 1 + clang/include/clang/Driver/ToolChain.h | 2 +- clang/lib/Basic/TargetInfo.cpp | 3 +- clang/lib/Basic/Targets.cpp | 32 +- clang/lib/Basic/Targets/ARM.cpp | 1 + clang/lib/Basic/Targets/OSTargets.h | 62 +++ clang/lib/Basic/Targets/X86.h | 22 + clang/lib/CodeGen/ABIInfo.h | 1 + clang/lib/CodeGen/TargetInfo.cpp | 8 +- clang/lib/Driver/CMakeLists.txt | 1 + clang/lib/Driver/Driver.cpp | 7 +- clang/lib/Driver/ToolChains/Arch/AArch64.cpp | 2 +- clang/lib/Driver/ToolChains/Arch/ARM.cpp | 9 + clang/lib/Driver/ToolChains/CommonArgs.cpp | 12 +- clang/lib/Driver/ToolChains/Gnu.cpp | 7 +- clang/lib/Driver/ToolChains/OHOS.cpp | 411 ++++++++++++++++++ clang/lib/Driver/ToolChains/OHOS.h | 94 ++++ .../CodeGen/aarch64-fix-cortex-a53-835769.c | 2 + .../Inputs/ohos_native_tree/llvm/bin/.keep | 0 .../llvm/include/c++/v1/.keep | 0 .../a7_hard_neon-vfpv4/libc++.so | 0 .../lib/arm-liteos-ohos/a7_soft/libc++.so | 0 .../a7_softfp_neon-vfpv4/libc++.so | 0 .../c++/a7_hard_neon-vfpv4/libc++.so | 0 .../lib/arm-liteos-ohos/c++/a7_soft/libc++.so | 0 .../c++/a7_softfp_neon-vfpv4/libc++.so | 0 .../llvm/lib/arm-liteos-ohos/libc++.so | 0 .../lib/clang/x.y.z/lib/arm-liteos-ohos/.keep | 0 .../arm-liteos-ohos/a7_hard_neon-vfpv4/.keep | 0 .../a7_hard_neon-vfpv4/clang_rt.crtbegin.o | 0 .../a7_hard_neon-vfpv4/clang_rt.crtend.o | 0 .../a7_hard_neon-vfpv4/libclang_rt.builtins.a | 0 .../a7_hard_neon-vfpv4/libclang_rt.profile.a | 0 .../x.y.z/lib/arm-liteos-ohos/a7_soft/.keep | 0 .../a7_soft/clang_rt.crtbegin.o | 0 .../arm-liteos-ohos/a7_soft/clang_rt.crtend.o | 0 .../a7_soft/libclang_rt.builtins.a | 0 .../a7_soft/libclang_rt.profile.a | 0 .../a7_softfp_neon-vfpv4/.keep | 0 .../a7_softfp_neon-vfpv4/clang_rt.crtbegin.o | 0 .../a7_softfp_neon-vfpv4/clang_rt.crtend.o | 0 .../libclang_rt.builtins.a | 0 .../libclang_rt.profile.a | 0 .../lib/arm-liteos-ohos/clang_rt.crtbegin.o | 0 .../lib/arm-liteos-ohos/clang_rt.crtend.o | 0 .../arm-liteos-ohos/libclang_rt.builtins.a | 0 .../lib/arm-liteos-ohos/libclang_rt.profile.a | 0 .../sysroot/usr/include/.keep | 0 .../sysroot/usr/include/arm-liteos-ohos/.keep | 0 .../ohos_native_tree/sysroot/usr/lib/.keep | 0 .../sysroot/usr/lib/arm-liteos-ohos/.keep | 0 .../arm-liteos-ohos/a7_hard_neon-vfpv4/.keep | 0 .../usr/lib/arm-liteos-ohos/a7_soft/.keep | 0 .../a7_softfp_neon-vfpv4/.keep | 0 clang/test/Driver/ohos.c | 239 ++++++++++ clang/test/Driver/ohos.cpp | 120 +++++ clang/test/Preprocessor/ohos.c | 15 + 57 files changed, 1038 insertions(+), 13 deletions(-) create mode 100644 clang/lib/Driver/ToolChains/OHOS.cpp create mode 100644 clang/lib/Driver/ToolChains/OHOS.h create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/bin/.keep create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/include/c++/v1/.keep create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/arm-liteos-ohos/a7_hard_neon-vfpv4/libc++.so create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/arm-liteos-ohos/a7_soft/libc++.so create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/arm-liteos-ohos/a7_softfp_neon-vfpv4/libc++.so create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/arm-liteos-ohos/c++/a7_hard_neon-vfpv4/libc++.so create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/arm-liteos-ohos/c++/a7_soft/libc++.so create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/arm-liteos-ohos/c++/a7_softfp_neon-vfpv4/libc++.so create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/arm-liteos-ohos/libc++.so create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/.keep create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_hard_neon-vfpv4/.keep create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_hard_neon-vfpv4/clang_rt.crtbegin.o create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_hard_neon-vfpv4/clang_rt.crtend.o create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_hard_neon-vfpv4/libclang_rt.builtins.a create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_hard_neon-vfpv4/libclang_rt.profile.a create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_soft/.keep create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_soft/clang_rt.crtbegin.o create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_soft/clang_rt.crtend.o create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_soft/libclang_rt.builtins.a create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_soft/libclang_rt.profile.a create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_softfp_neon-vfpv4/.keep create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_softfp_neon-vfpv4/clang_rt.crtbegin.o create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_softfp_neon-vfpv4/clang_rt.crtend.o create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_softfp_neon-vfpv4/libclang_rt.builtins.a create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_softfp_neon-vfpv4/libclang_rt.profile.a create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/clang_rt.crtbegin.o create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/clang_rt.crtend.o create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/libclang_rt.builtins.a create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/libclang_rt.profile.a create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/sysroot/usr/include/.keep create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/sysroot/usr/include/arm-liteos-ohos/.keep create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/sysroot/usr/lib/.keep create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/sysroot/usr/lib/arm-liteos-ohos/.keep create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/sysroot/usr/lib/arm-liteos-ohos/a7_hard_neon-vfpv4/.keep create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/sysroot/usr/lib/arm-liteos-ohos/a7_soft/.keep create mode 100644 clang/test/Driver/Inputs/ohos_native_tree/sysroot/usr/lib/arm-liteos-ohos/a7_softfp_neon-vfpv4/.keep create mode 100644 clang/test/Driver/ohos.c create mode 100644 clang/test/Driver/ohos.cpp create mode 100644 clang/test/Preprocessor/ohos.c diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 6c55465926bf7..3e086ebee6a46 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -925,6 +925,7 @@ def Availability : InheritableAttr { .Case("maccatalyst_app_extension", "macCatalyst (App Extension)") .Case("swift", "Swift") .Case("shadermodel", "HLSL ShaderModel") + .Case("ohos", "OpenHarmony OS") .Default(llvm::StringRef()); } static llvm::StringRef getPlatformNameSourceSpelling(llvm::StringRef Platform) { diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index 989e631a187e8..4d66e577b107f 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -494,7 +494,7 @@ class ToolChain { // Returns /lib//. This is used by runtimes (such // as OpenMP) to find arch-specific libraries. - std::string getArchSpecificLibPath() const; + virtual std::string getArchSpecificLibPath() const; // Returns part of above. virtual StringRef getOSLibName() const; diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp index e168d45020e77..1ae85928b234f 100644 --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -99,7 +99,8 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : Triple(T) { // https://www.gnu.org/software/libc/manual/html_node/Malloc-Examples.html. // This alignment guarantee also applies to Windows and Android. On Darwin // and OpenBSD, the alignment is 16 bytes on both 64-bit and 32-bit systems. - if (T.isGNUEnvironment() || T.isWindowsMSVCEnvironment() || T.isAndroid()) + if (T.isGNUEnvironment() || T.isWindowsMSVCEnvironment() || T.isAndroid() || + T.isOHOSFamily()) NewAlign = Triple.isArch64Bit() ? 128 : Triple.isArch32Bit() ? 64 : 0; else if (T.isOSDarwin() || T.isOSOpenBSD()) NewAlign = 128; diff --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp index 6f504e7867325..7e687c119c1c4 100644 --- a/clang/lib/Basic/Targets.cpp +++ b/clang/lib/Basic/Targets.cpp @@ -148,7 +148,12 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple, case llvm::Triple::Fuchsia: return new FuchsiaTargetInfo(Triple, Opts); case llvm::Triple::Linux: - return new LinuxTargetInfo(Triple, Opts); + switch (Triple.getEnvironment()) { + default: + return new LinuxTargetInfo(Triple, Opts); + case llvm::Triple::OpenHOS: + return new OHOSTargetInfo(Triple, Opts); + } case llvm::Triple::NetBSD: return new NetBSDTargetInfo(Triple, Opts); case llvm::Triple::OpenBSD: @@ -188,7 +193,14 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple, case llvm::Triple::CloudABI: return new CloudABITargetInfo(Triple, Opts); case llvm::Triple::Linux: - return new LinuxTargetInfo(Triple, Opts); + switch (Triple.getEnvironment()) { + default: + return new LinuxTargetInfo(Triple, Opts); + case llvm::Triple::OpenHOS: + return new OHOSTargetInfo(Triple, Opts); + } + case llvm::Triple::LiteOS: + return new OHOSTargetInfo(Triple, Opts); case llvm::Triple::FreeBSD: return new FreeBSDTargetInfo(Triple, Opts); case llvm::Triple::NetBSD: @@ -263,7 +275,12 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple, case llvm::Triple::mipsel: switch (os) { case llvm::Triple::Linux: - return new LinuxTargetInfo(Triple, Opts); + switch (Triple.getEnvironment()) { + default: + return new LinuxTargetInfo(Triple, Opts); + case llvm::Triple::OpenHOS: + return new OHOSTargetInfo(Triple, Opts); + } case llvm::Triple::RTEMS: return new RTEMSTargetInfo(Triple, Opts); case llvm::Triple::FreeBSD: @@ -423,7 +440,12 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple, case llvm::Triple::Fuchsia: return new FuchsiaTargetInfo(Triple, Opts); case llvm::Triple::Linux: - return new LinuxTargetInfo(Triple, Opts); + switch (Triple.getEnvironment()) { + default: + return new LinuxTargetInfo(Triple, Opts); + case llvm::Triple::OpenHOS: + return new OHOSTargetInfo(Triple, Opts); + } default: return new RISCV64TargetInfo(Triple, Opts); } @@ -561,6 +583,8 @@ TargetInfo *AllocateTarget(const llvm::Triple &Triple, return new LinuxTargetInfo(Triple, Opts); case llvm::Triple::Android: return new AndroidX86_64TargetInfo(Triple, Opts); + case llvm::Triple::OpenHOS: + return new OHOSX86_64TargetInfo(Triple, Opts); } } case llvm::Triple::DragonFly: diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp index e01379ec82fb4..5e0e8f5c476af 100644 --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -310,6 +310,7 @@ ARMTargetInfo::ARMTargetInfo(const llvm::Triple &Triple, case llvm::Triple::GNUEABIHF: case llvm::Triple::MuslEABI: case llvm::Triple::MuslEABIHF: + case llvm::Triple::OpenHOS: setABI("aapcs-linux"); break; case llvm::Triple::EABIHF: diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h index 0280129b3c950..ea21f7eafaba1 100644 --- a/clang/lib/Basic/Targets/OSTargets.h +++ b/clang/lib/Basic/Targets/OSTargets.h @@ -1007,6 +1007,68 @@ class LLVM_LIBRARY_VISIBILITY EmscriptenTargetInfo } }; +// OHOS target +template +class LLVM_LIBRARY_VISIBILITY OHOSTargetInfo : public OSTargetInfo { +protected: + void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple, + MacroBuilder &Builder) const override { + // Linux defines; list based off of gcc output + DefineStd(Builder, "unix", Opts); + + Builder.defineMacro("__ELF__"); + + // Generic OHOS target defines + if (Triple.isOHOSFamily()) { + Builder.defineMacro("__OHOS_FAMILY__", "1"); + + auto Version = Triple.getEnvironmentVersion(); + this->PlatformName = "ohos"; + this->PlatformMinVersion = Version; + Builder.defineMacro("__OHOS_Major__", Twine(Version.getMajor())); + if (auto Minor = Version.getMinor()) + Builder.defineMacro("__OHOS_Minor__", Twine(*Minor)); + if (auto Subminor = Version.getSubminor()) + Builder.defineMacro("__OHOS_Micro__", Twine(*Subminor)); + } + + if (Triple.isOpenHOS()) + Builder.defineMacro("__OHOS__"); + + if (Triple.isOSLinux()) { + DefineStd(Builder, "linux", Opts); + } else if (Triple.isOSLiteOS()) { + Builder.defineMacro("__LITEOS__"); + } + + if (Opts.POSIXThreads) + Builder.defineMacro("_REENTRANT"); + if (Opts.CPlusPlus) + Builder.defineMacro("_GNU_SOURCE"); + if (this->HasFloat128) + Builder.defineMacro("__FLOAT128__"); + } + +public: + OHOSTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) + : OSTargetInfo(Triple, Opts) { + this->WIntType = TargetInfo::UnsignedInt; + + switch (Triple.getArch()) { + default: + break; + case llvm::Triple::x86: + case llvm::Triple::x86_64: + this->HasFloat128 = true; + break; + } + } + + const char *getStaticInitSectionSpecifier() const override { + return ".text.startup"; + } +}; + } // namespace targets } // namespace clang #endif // LLVM_CLANG_LIB_BASIC_TARGETS_OSTARGETS_H diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index 50b9eb444b6d7..816bf13770a0c 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -966,6 +966,28 @@ class LLVM_LIBRARY_VISIBILITY AndroidX86_64TargetInfo LongDoubleFormat = &llvm::APFloat::IEEEquad(); } }; + +// x86_32 OHOS target +class LLVM_LIBRARY_VISIBILITY OHOSX86_32TargetInfo + : public OHOSTargetInfo { +public: + OHOSX86_32TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) + : OHOSTargetInfo(Triple, Opts) { + SuitableAlign = 32; + LongDoubleWidth = 64; + LongDoubleFormat = &llvm::APFloat::IEEEdouble(); + } +}; + +// x86_64 OHOS target +class LLVM_LIBRARY_VISIBILITY OHOSX86_64TargetInfo + : public OHOSTargetInfo { +public: + OHOSX86_64TargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) + : OHOSTargetInfo(Triple, Opts) { + LongDoubleFormat = &llvm::APFloat::IEEEquad(); + } +}; } // namespace targets } // namespace clang #endif // LLVM_CLANG_LIB_BASIC_TARGETS_X86_H diff --git a/clang/lib/CodeGen/ABIInfo.h b/clang/lib/CodeGen/ABIInfo.h index 755d2aaa7beb8..1dffa41c57c58 100644 --- a/clang/lib/CodeGen/ABIInfo.h +++ b/clang/lib/CodeGen/ABIInfo.h @@ -81,6 +81,7 @@ namespace CodeGen { QualType Ty) const = 0; bool isAndroid() const; + bool isOHOSFamily() const; /// Emit the target dependent code to load a value of /// \arg Ty from the \c __builtin_ms_va_list pointed to by \arg VAListAddr. diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 40cc2da46bd05..d3329ae023394 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -236,6 +236,10 @@ const CodeGenOptions &ABIInfo::getCodeGenOpts() const { bool ABIInfo::isAndroid() const { return getTarget().getTriple().isAndroid(); } +bool ABIInfo::isOHOSFamily() const { + return getTarget().getTriple().isOHOSFamily(); +} + bool ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { return false; } @@ -5733,7 +5737,7 @@ ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType Ty) const { uint64_t Size = getContext().getTypeSize(Ty); // Android promotes <2 x i8> to i16, not i32 - if (isAndroid() && (Size <= 16)) { + if ((isAndroid() || isOHOSFamily()) && (Size <= 16)) { llvm::Type *ResType = llvm::Type::getInt16Ty(getVMContext()); return ABIArgInfo::getDirect(ResType); } @@ -6340,7 +6344,7 @@ class ARMABIInfo : public ABIInfo { case llvm::Triple::MuslEABIHF: return true; default: - return false; + return getTarget().getTriple().isOHOSFamily(); } } diff --git a/clang/lib/Driver/CMakeLists.txt b/clang/lib/Driver/CMakeLists.txt index e0bf3efc44c20..3d83c86100bec 100644 --- a/clang/lib/Driver/CMakeLists.txt +++ b/clang/lib/Driver/CMakeLists.txt @@ -76,6 +76,7 @@ add_clang_library(clangDriver ToolChains/Myriad.cpp ToolChains/NaCl.cpp ToolChains/NetBSD.cpp + ToolChains/OHOS.cpp ToolChains/OpenBSD.cpp ToolChains/PS4CPU.cpp ToolChains/RISCVToolchain.cpp diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index ead6252776a28..591025cc10c4b 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -40,6 +40,7 @@ #include "ToolChains/Myriad.h" #include "ToolChains/NaCl.h" #include "ToolChains/NetBSD.h" +#include "ToolChains/OHOS.h" #include "ToolChains/OpenBSD.h" #include "ToolChains/PPCFreeBSD.h" #include "ToolChains/PPCLinux.h" @@ -6074,7 +6075,8 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, Args); else if (Target.getArch() == llvm::Triple::ve) TC = std::make_unique(*this, Target, Args); - + else if (Target.isOHOSFamily()) + TC = std::make_unique(*this, Target, Args); else TC = std::make_unique(*this, Target, Args); break; @@ -6138,6 +6140,9 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, case llvm::Triple::Hurd: TC = std::make_unique(*this, Target, Args); break; + case llvm::Triple::LiteOS: + TC = std::make_unique(*this, Target, Args); + break; case llvm::Triple::ZOS: TC = std::make_unique(*this, Target, Args); break; diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp index 0b2ee72f7868e..f3bc00188c784 100644 --- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp +++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp @@ -606,7 +606,7 @@ void aarch64::getAArch64TargetFeatures(const Driver &D, Features.push_back("+fix-cortex-a53-835769"); else Features.push_back("-fix-cortex-a53-835769"); - } else if (Triple.isAndroid()) { + } else if (Triple.isAndroid() || Triple.isOHOSFamily()) { // Enabled A53 errata (835769) workaround by default on android Features.push_back("+fix-cortex-a53-835769"); } else if (Triple.isOSFuchsia()) { diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp index 7843031a4c22f..d5d5ec6114a1d 100644 --- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -278,6 +278,11 @@ void arm::setArchNameInTriple(const Driver &D, const ArgList &Args, void arm::setFloatABIInTriple(const Driver &D, const ArgList &Args, llvm::Triple &Triple) { + if (Triple.isOSLiteOS()) { + Triple.setEnvironment(llvm::Triple::OpenHOS); + return; + } + bool isHardFloat = (arm::getARMFloatABI(D, Triple, Args) == arm::FloatABI::Hard); @@ -297,6 +302,8 @@ void arm::setFloatABIInTriple(const Driver &D, const ArgList &Args, Triple.setEnvironment(isHardFloat ? llvm::Triple::MuslEABIHF : llvm::Triple::MuslEABI); break; + case llvm::Triple::OpenHOS: + break; default: { arm::FloatABI DefaultABI = arm::getDefaultFloatABI(Triple); if (DefaultABI != arm::FloatABI::Invalid && @@ -366,6 +373,8 @@ arm::FloatABI arm::getDefaultFloatABI(const llvm::Triple &Triple) { return FloatABI::SoftFP; default: + if (Triple.isOHOSFamily()) + return FloatABI::Soft; switch (Triple.getEnvironment()) { case llvm::Triple::GNUEABIHF: case llvm::Triple::MuslEABIHF: diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 774a1843fa2c8..9074b572c3991 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -957,7 +957,7 @@ void tools::linkSanitizerRuntimeDeps(const ToolChain &TC, CmdArgs.push_back(getAsNeededOption(TC, false)); // There's no libpthread or librt on RTEMS & Android. if (TC.getTriple().getOS() != llvm::Triple::RTEMS && - !TC.getTriple().isAndroid()) { + !TC.getTriple().isAndroid() && !TC.getTriple().isOHOSFamily()) { CmdArgs.push_back("-lpthread"); if (!TC.getTriple().isOSOpenBSD()) CmdArgs.push_back("-lrt"); @@ -1381,6 +1381,10 @@ tools::ParsePICArgs(const ToolChain &ToolChain, const ArgList &Args) { } } + // OHOS-specific defaults for PIC/PIE + if (Triple.isOHOSFamily() && Triple.getArch() == llvm::Triple::aarch64) + PIC = true; + // OpenBSD-specific defaults for PIE if (Triple.isOSOpenBSD()) { switch (ToolChain.getArch()) { @@ -1667,6 +1671,12 @@ static LibGccType getLibGccType(const ToolChain &TC, const Driver &D, static void AddUnwindLibrary(const ToolChain &TC, const Driver &D, ArgStringList &CmdArgs, const ArgList &Args) { ToolChain::UnwindLibType UNW = TC.GetUnwindLibType(Args); + // By default OHOS binaries are linked statically to libunwind. + if (TC.getTriple().isOHOSFamily() && UNW == ToolChain::UNW_CompilerRT) { + CmdArgs.push_back("-l:libunwind.a"); + return; + } + // Targets that don't use unwind libraries. if ((TC.getTriple().isAndroid() && UNW == ToolChain::UNW_Libgcc) || TC.getTriple().isOSIAMCU() || TC.getTriple().isOSBinFormatWasm() || diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 24fbdcffc07ba..7e72a1d1433da 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -405,6 +405,7 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, const llvm::Triple &Triple = getToolChain().getEffectiveTriple(); const llvm::Triple::ArchType Arch = ToolChain.getArch(); + const bool isOHOSFamily = ToolChain.getTriple().isOHOSFamily(); const bool isAndroid = ToolChain.getTriple().isAndroid(); const bool IsIAMCU = ToolChain.getTriple().isOSIAMCU(); const bool IsVE = ToolChain.getTriple().isVE(); @@ -455,7 +456,7 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, // Most Android ARM64 targets should enable the linker fix for erratum // 843419. Only non-Cortex-A53 devices are allowed to skip this flag. - if (Arch == llvm::Triple::aarch64 && isAndroid) { + if (Arch == llvm::Triple::aarch64 && (isAndroid || isOHOSFamily)) { std::string CPU = getCPUName(D, Args, Triple); if (CPU.empty() || CPU == "generic" || CPU == "cortex-a53") CmdArgs.push_back("--fix-cortex-a53-843419"); @@ -641,7 +642,9 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("--pop-state"); } - if (WantPthread && !isAndroid) + // We don't need libpthread neither for bionic (Android) nor for musl, + // (used by OHOS as runtime library). + if (WantPthread && !isAndroid && !isOHOSFamily) CmdArgs.push_back("-lpthread"); if (Args.hasArg(options::OPT_fsplit_stack)) diff --git a/clang/lib/Driver/ToolChains/OHOS.cpp b/clang/lib/Driver/ToolChains/OHOS.cpp new file mode 100644 index 0000000000000..71a4ccd042ac8 --- /dev/null +++ b/clang/lib/Driver/ToolChains/OHOS.cpp @@ -0,0 +1,411 @@ +//===--- OHOS.cpp - OHOS ToolChain Implementations --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "OHOS.h" +#include "Arch/ARM.h" +#include "CommonArgs.h" +#include "clang/Config/config.h" +#include "clang/Driver/Compilation.h" +#include "clang/Driver/Driver.h" +#include "clang/Driver/DriverDiagnostic.h" +#include "clang/Driver/Options.h" +#include "clang/Driver/SanitizerArgs.h" +#include "llvm/Option/ArgList.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/VirtualFileSystem.h" +#include "llvm/Support/ScopedPrinter.h" + +using namespace clang::driver; +using namespace clang::driver::toolchains; +using namespace clang::driver::tools; +using namespace clang; +using namespace llvm::opt; +using namespace clang::driver::tools::arm; + +using tools::addMultilibFlag; +using tools::addPathIfExists; + +static bool findOHOSMuslMultilibs(const Multilib::flags_list &Flags, + DetectedMultilibs &Result) { + MultilibSet Multilibs; + Multilibs.push_back(Multilib()); + // -mcpu=cortex-a7 + // -mfloat-abi=soft -mfloat-abi=softfp -mfloat-abi=hard + // -mfpu=neon-vfpv4 + Multilibs.push_back(Multilib("/a7_soft", {}, {}, 1, + {"+mcpu=cortex-a7", "+mfloat-abi=soft"})); + + Multilibs.push_back(Multilib("/a7_softfp_neon-vfpv4", {}, {}, 1, + {"+mcpu=cortex-a7", "+mfloat-abi=softfp", "+mfpu=neon-vfpv4"})); + + Multilibs.push_back(Multilib("/a7_hard_neon-vfpv4", {}, {}, 1, + {"+mcpu=cortex-a7", "+mfloat-abi=hard", "+mfpu=neon-vfpv4"})); + + if (Multilibs.select(Flags, Result.SelectedMultilib)) { + Result.Multilibs = Multilibs; + return true; + } + return false; +} + +static bool findOHOSMultilibs(const Driver &D, + const ToolChain &TC, + const llvm::Triple &TargetTriple, + StringRef Path, const ArgList &Args, + DetectedMultilibs &Result) { + Multilib::flags_list Flags; + bool IsA7 = false; + if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) + IsA7 = A->getValue() == StringRef("cortex-a7"); + addMultilibFlag(IsA7, "mcpu=cortex-a7", Flags); + + bool IsMFPU = false; + if (const Arg *A = Args.getLastArg(options::OPT_mfpu_EQ)) + IsMFPU = A->getValue() == StringRef("neon-vfpv4"); + addMultilibFlag(IsMFPU, "mfpu=neon-vfpv4", Flags); + + tools::arm::FloatABI ARMFloatABI = getARMFloatABI(D, TargetTriple, Args); + addMultilibFlag((ARMFloatABI == tools::arm::FloatABI::Soft), + "mfloat-abi=soft", Flags); + addMultilibFlag((ARMFloatABI == tools::arm::FloatABI::SoftFP), + "mfloat-abi=softfp", Flags); + addMultilibFlag((ARMFloatABI == tools::arm::FloatABI::Hard), + "mfloat-abi=hard", Flags); + + return findOHOSMuslMultilibs(Flags, Result); +} + +std::string OHOS::getMultiarchTriple(const llvm::Triple &T) const { + // For most architectures, just use whatever we have rather than trying to be + // clever. + switch (T.getArch()) { + default: + break; + + // We use the existence of '/lib/' as a directory to detect some + // common linux triples that don't quite match the Clang triple for both + // 32-bit and 64-bit targets. Multiarch fixes its install triples to these + // regardless of what the actual target triple is. + case llvm::Triple::arm: + case llvm::Triple::thumb: + return T.isOSLiteOS() ? "arm-liteos-ohos" : "arm-linux-ohos"; + case llvm::Triple::riscv32: + return "riscv32-linux-ohos"; + case llvm::Triple::riscv64: + return "riscv64-linux-ohos"; + case llvm::Triple::mipsel: + return "mipsel-linux-ohos"; + case llvm::Triple::x86: + return "i686-linux-ohos"; + case llvm::Triple::x86_64: + return "x86_64-linux-ohos"; + case llvm::Triple::aarch64: + return "aarch64-linux-ohos"; + } + return T.str(); +} + +std::string OHOS::getMultiarchTriple(const Driver &D, + const llvm::Triple &TargetTriple, + StringRef SysRoot) const { + return getMultiarchTriple(TargetTriple); +} + +static std::string makePath(const std::initializer_list &IL) { + SmallString<128> P; + for (const auto &S : IL) + llvm::sys::path::append(P, S); + return static_cast(P.str()); +} + +/// OHOS Toolchain +OHOS::OHOS(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) + : Generic_ELF(D, Triple, Args) { + std::string SysRoot = computeSysRoot(); + + // Select the correct multilib according to the given arguments. + DetectedMultilibs Result; + findOHOSMultilibs(D, *this, Triple, "", Args, Result); + Multilibs = Result.Multilibs; + SelectedMultilib = Result.SelectedMultilib; + + getFilePaths().clear(); + std::string CandidateLibPath = getArchSpecificLibPath(); + if (getVFS().exists(CandidateLibPath)) + getFilePaths().push_back(CandidateLibPath); + + getLibraryPaths().clear(); + for (auto &Path : getRuntimePaths()) + if (getVFS().exists(Path)) + getLibraryPaths().push_back(Path); + + // OHOS sysroots contain a library directory for each supported OS + // version as well as some unversioned libraries in the usual multiarch + // directory. Support --target=aarch64-linux-ohosX.Y.Z or + // --target=aarch64-linux-ohosX.Y or --target=aarch64-linux-ohosX + path_list &Paths = getFilePaths(); + std::string SysRootLibPath = makePath({SysRoot, "usr", "lib"}); + std::string MultiarchTriple = getMultiarchTriple(getTriple()); + addPathIfExists(D, makePath({SysRootLibPath, SelectedMultilib.gccSuffix()}), + Paths); + addPathIfExists(D, + makePath({D.Dir, "..", "lib", MultiarchTriple, + SelectedMultilib.gccSuffix()}), + Paths); + + addPathIfExists( + D, + makePath({SysRootLibPath, MultiarchTriple, SelectedMultilib.gccSuffix()}), + Paths); +} + +ToolChain::RuntimeLibType OHOS::GetRuntimeLibType( + const ArgList &Args) const { + if (Arg *A = Args.getLastArg(clang::driver::options::OPT_rtlib_EQ)) { + StringRef Value = A->getValue(); + if (Value != "compiler-rt") + getDriver().Diag(clang::diag::err_drv_invalid_rtlib_name) + << A->getAsString(Args); + } + + return ToolChain::RLT_CompilerRT; +} + +ToolChain::CXXStdlibType +OHOS::GetCXXStdlibType(const ArgList &Args) const { + if (Arg *A = Args.getLastArg(options::OPT_stdlib_EQ)) { + StringRef Value = A->getValue(); + if (Value != "libc++") + getDriver().Diag(diag::err_drv_invalid_stdlib_name) + << A->getAsString(Args); + } + + return ToolChain::CST_Libcxx; +} + +void OHOS::AddClangSystemIncludeArgs(const ArgList &DriverArgs, + ArgStringList &CC1Args) const { + const Driver &D = getDriver(); + const llvm::Triple &Triple = getTriple(); + std::string SysRoot = computeSysRoot(); + + if (DriverArgs.hasArg(options::OPT_nostdinc)) + return; + + if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) { + SmallString<128> P(D.ResourceDir); + llvm::sys::path::append(P, "include"); + addSystemInclude(DriverArgs, CC1Args, P); + } + + if (DriverArgs.hasArg(options::OPT_nostdlibinc)) + return; + + // Check for configure-time C include directories. + StringRef CIncludeDirs(C_INCLUDE_DIRS); + if (CIncludeDirs != "") { + SmallVector dirs; + CIncludeDirs.split(dirs, ":"); + for (StringRef dir : dirs) { + StringRef Prefix = + llvm::sys::path::is_absolute(dir) ? StringRef(SysRoot) : ""; + addExternCSystemInclude(DriverArgs, CC1Args, Prefix + dir); + } + return; + } + + addExternCSystemInclude(DriverArgs, CC1Args, + SysRoot + "/usr/include/" + + getMultiarchTriple(Triple)); + addExternCSystemInclude(DriverArgs, CC1Args, SysRoot + "/include"); + addExternCSystemInclude(DriverArgs, CC1Args, SysRoot + "/usr/include"); +} + +void OHOS::AddClangCXXStdlibIncludeArgs(const ArgList &DriverArgs, + ArgStringList &CC1Args) const { + if (DriverArgs.hasArg(options::OPT_nostdlibinc) || + DriverArgs.hasArg(options::OPT_nostdincxx)) + return; + + switch (GetCXXStdlibType(DriverArgs)) { + case ToolChain::CST_Libcxx: { + std::string IncPath = makePath({getDriver().Dir, "..", "include"}); + std::string IncTargetPath = + makePath({IncPath, getMultiarchTriple(getTriple()), "c++", "v1"}); + if (getVFS().exists(IncTargetPath)) { + addSystemInclude(DriverArgs, CC1Args, makePath({IncPath, "c++", "v1"})); + addSystemInclude(DriverArgs, CC1Args, IncTargetPath); + } + break; + } + + default: + llvm_unreachable("invalid stdlib name"); + } +} + +void OHOS::AddCXXStdlibLibArgs(const ArgList &Args, + ArgStringList &CmdArgs) const { + switch (GetCXXStdlibType(Args)) { + case ToolChain::CST_Libcxx: + CmdArgs.push_back("-lc++"); + CmdArgs.push_back("-lc++abi"); + CmdArgs.push_back("-lunwind"); + break; + + case ToolChain::CST_Libstdcxx: + llvm_unreachable("invalid stdlib name"); + } +} + +std::string OHOS::computeSysRoot() const { + std::string SysRoot = + !getDriver().SysRoot.empty() + ? getDriver().SysRoot + : makePath({getDriver().getInstalledDir(), "..", "..", "sysroot"}); + if (!llvm::sys::fs::exists(SysRoot)) + return std::string(); + + std::string ArchRoot = makePath({SysRoot, getMultiarchTriple(getTriple())}); + return llvm::sys::fs::exists(ArchRoot) ? ArchRoot : SysRoot; +} + +ToolChain::path_list OHOS::getRuntimePaths() const { + SmallString<128> P; + path_list Paths; + const Driver &D = getDriver(); + const llvm::Triple &Triple = getTriple(); + + // First try the triple passed to driver as --target=. + P.assign(D.ResourceDir); + llvm::sys::path::append(P, "lib", D.getTargetTriple(), SelectedMultilib.gccSuffix()); + Paths.push_back(P.c_str()); + + // Second try the normalized triple. + P.assign(D.ResourceDir); + llvm::sys::path::append(P, "lib", Triple.str(), SelectedMultilib.gccSuffix()); + Paths.push_back(P.c_str()); + + // Third try the effective triple. + P.assign(D.ResourceDir); + std::string SysRoot = computeSysRoot(); + llvm::sys::path::append(P, "lib", getMultiarchTriple(Triple), + SelectedMultilib.gccSuffix()); + Paths.push_back(P.c_str()); + + return Paths; +} + +std::string OHOS::getDynamicLinker(const ArgList &Args) const { + const llvm::Triple &Triple = getTriple(); + const llvm::Triple::ArchType Arch = getArch(); + + assert(Triple.isMusl()); + std::string ArchName; + bool IsArm = false; + + switch (Arch) { + case llvm::Triple::arm: + case llvm::Triple::thumb: + ArchName = "arm"; + IsArm = true; + break; + case llvm::Triple::armeb: + case llvm::Triple::thumbeb: + ArchName = "armeb"; + IsArm = true; + break; + default: + ArchName = Triple.getArchName().str(); + } + if (IsArm && + (tools::arm::getARMFloatABI(*this, Args) == tools::arm::FloatABI::Hard)) + ArchName += "hf"; + + return "/lib/ld-musl-" + ArchName + ".so.1"; +} + +std::string OHOS::getCompilerRT(const ArgList &Args, StringRef Component, + FileType Type) const { + SmallString<128> Path(getDriver().ResourceDir); + llvm::sys::path::append(Path, "lib", getMultiarchTriple(getTriple()), + SelectedMultilib.gccSuffix()); + const char *Prefix = + Type == ToolChain::FT_Object ? "" : "lib"; + const char *Suffix; + switch (Type) { + case ToolChain::FT_Object: + Suffix = ".o"; + break; + case ToolChain::FT_Static: + Suffix = ".a"; + break; + case ToolChain::FT_Shared: + Suffix = ".so"; + break; + } + llvm::sys::path::append( + Path, Prefix + Twine("clang_rt.") + Component + Suffix); + return static_cast(Path.str()); +} + +void OHOS::addExtraOpts(llvm::opt::ArgStringList &CmdArgs) const { + CmdArgs.push_back("-z"); + CmdArgs.push_back("now"); + CmdArgs.push_back("-z"); + CmdArgs.push_back("relro"); + CmdArgs.push_back("-z"); + CmdArgs.push_back("max-page-size=4096"); + // .gnu.hash section is not compatible with the MIPS target + if (getArch() != llvm::Triple::mipsel) + CmdArgs.push_back("--hash-style=both"); +#ifdef ENABLE_LINKER_BUILD_ID + CmdArgs.push_back("--build-id"); +#endif + CmdArgs.push_back("--enable-new-dtags"); +} + +SanitizerMask OHOS::getSupportedSanitizers() const { + SanitizerMask Res = ToolChain::getSupportedSanitizers(); + Res |= SanitizerKind::Address; + Res |= SanitizerKind::PointerCompare; + Res |= SanitizerKind::PointerSubtract; + Res |= SanitizerKind::Fuzzer; + Res |= SanitizerKind::FuzzerNoLink; + Res |= SanitizerKind::Memory; + Res |= SanitizerKind::Vptr; + Res |= SanitizerKind::SafeStack; + Res |= SanitizerKind::Scudo; + // TODO: kASAN for liteos ?? + // TODO: Support TSAN and HWASAN and update mask. + return Res; +} + +// TODO: Make a base class for Linux and OHOS and move this there. +void OHOS::addProfileRTLibs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const { + // Add linker option -u__llvm_profile_runtime to cause runtime + // initialization module to be linked in. + if (needsProfileRT(Args)) + CmdArgs.push_back(Args.MakeArgString( + Twine("-u", llvm::getInstrProfRuntimeHookVarName()))); + ToolChain::addProfileRTLibs(Args, CmdArgs); +} + +std::string OHOS::getArchSpecificLibPath() const { + llvm::Triple Triple = getTriple(); + return makePath({getDriver().ResourceDir, "lib", getMultiarchTriple(Triple)}); +} + +ToolChain::UnwindLibType OHOS::GetUnwindLibType(const llvm::opt::ArgList &Args) const { + if (const Arg *A = Args.getLastArg(options::OPT_unwindlib_EQ)) + return Generic_ELF::GetUnwindLibType(Args); + return GetDefaultUnwindLibType(); +} diff --git a/clang/lib/Driver/ToolChains/OHOS.h b/clang/lib/Driver/ToolChains/OHOS.h new file mode 100644 index 0000000000000..9ce0d0c4325bf --- /dev/null +++ b/clang/lib/Driver/ToolChains/OHOS.h @@ -0,0 +1,94 @@ +//===--- OHOS.h - OHOS ToolChain Implementations ----------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_OHOS_H +#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_OHOS_H + +#include "Linux.h" +#include "clang/Driver/Tool.h" +#include "clang/Driver/ToolChain.h" + +namespace clang { +namespace driver { +namespace toolchains { + +class LLVM_LIBRARY_VISIBILITY OHOS : public Generic_ELF { +public: + OHOS(const Driver &D, const llvm::Triple &Triple, + const llvm::opt::ArgList &Args); + + bool HasNativeLLVMSupport() const override { return true; } + bool IsIntegratedAssemblerDefault() const override { return true; } + bool IsMathErrnoDefault() const override { return false; } + RuntimeLibType GetDefaultRuntimeLibType() const override { + return ToolChain::RLT_CompilerRT; + } + CXXStdlibType GetDefaultCXXStdlibType() const override { + return ToolChain::CST_Libcxx; + } + // Not add -funwind-tables by default + bool isPICDefault() const override { return false; } + bool isPIEDefault(const llvm::opt::ArgList &Args) const override { return true; } + bool isPICDefaultForced() const override { return false; } + bool useRelaxRelocations() const override { return false; } + UnwindLibType GetUnwindLibType(const llvm::opt::ArgList &Args) const override; + UnwindLibType GetDefaultUnwindLibType() const override { return UNW_CompilerRT; } + + RuntimeLibType + GetRuntimeLibType(const llvm::opt::ArgList &Args) const override; + CXXStdlibType + GetCXXStdlibType(const llvm::opt::ArgList &Args) const override; + + void + AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const override; + void + AddClangCXXStdlibIncludeArgs(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const override; + void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const override; + + std::string computeSysRoot() const override; + std::string getDynamicLinker(const llvm::opt::ArgList &Args) const override; + + std::string + getCompilerRT(const llvm::opt::ArgList &Args, StringRef Component, + FileType Type = ToolChain::FT_Static) const override; + + const char *getDefaultLinker() const override { + return "ld.lld"; + } + + Tool *buildLinker() const override { + return new tools::gnutools::Linker(*this); + } + Tool *buildAssembler() const override { + return new tools::gnutools::Assembler(*this); + } + + path_list getRuntimePaths() const; + +protected: + std::string getMultiarchTriple(const llvm::Triple &T) const; + std::string getMultiarchTriple(const Driver &D, + const llvm::Triple &TargetTriple, + StringRef SysRoot) const override; + void addExtraOpts(llvm::opt::ArgStringList &CmdArgs) const override; + SanitizerMask getSupportedSanitizers() const override; + void addProfileRTLibs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const override; + std::string getArchSpecificLibPath() const override; +private: + Multilib SelectedMultilib; +}; + +} // end namespace toolchains +} // end namespace driver +} // end namespace clang + +#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_OHOS_H diff --git a/clang/test/CodeGen/aarch64-fix-cortex-a53-835769.c b/clang/test/CodeGen/aarch64-fix-cortex-a53-835769.c index 3ce5b9d122176..3d1a2c7aceb1f 100644 --- a/clang/test/CodeGen/aarch64-fix-cortex-a53-835769.c +++ b/clang/test/CodeGen/aarch64-fix-cortex-a53-835769.c @@ -7,6 +7,8 @@ // RUN: %clang -O3 -target aarch64-android-eabi %s -S -o- \ // RUN: | FileCheck --check-prefix=CHECK-YES --check-prefix=CHECK %s +// RUN: %clang -O3 -target aarch64-linux-ohos %s -S -o- \ +// RUN: | FileCheck --check-prefix=CHECK-YES --check-prefix=CHECK %s // RUN: %clang -O3 -target aarch64-android-eabi -mfix-cortex-a53-835769 %s -S -o- \ // RUN: | FileCheck --check-prefix=CHECK-YES --check-prefix=CHECK %s // RUN: %clang -O3 -target aarch64-android-eabi -mno-fix-cortex-a53-835769 %s -S -o- \ diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/bin/.keep b/clang/test/Driver/Inputs/ohos_native_tree/llvm/bin/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/include/c++/v1/.keep b/clang/test/Driver/Inputs/ohos_native_tree/llvm/include/c++/v1/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/arm-liteos-ohos/a7_hard_neon-vfpv4/libc++.so b/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/arm-liteos-ohos/a7_hard_neon-vfpv4/libc++.so new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/arm-liteos-ohos/a7_soft/libc++.so b/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/arm-liteos-ohos/a7_soft/libc++.so new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/arm-liteos-ohos/a7_softfp_neon-vfpv4/libc++.so b/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/arm-liteos-ohos/a7_softfp_neon-vfpv4/libc++.so new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/arm-liteos-ohos/c++/a7_hard_neon-vfpv4/libc++.so b/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/arm-liteos-ohos/c++/a7_hard_neon-vfpv4/libc++.so new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/arm-liteos-ohos/c++/a7_soft/libc++.so b/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/arm-liteos-ohos/c++/a7_soft/libc++.so new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/arm-liteos-ohos/c++/a7_softfp_neon-vfpv4/libc++.so b/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/arm-liteos-ohos/c++/a7_softfp_neon-vfpv4/libc++.so new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/arm-liteos-ohos/libc++.so b/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/arm-liteos-ohos/libc++.so new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/.keep b/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_hard_neon-vfpv4/.keep b/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_hard_neon-vfpv4/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_hard_neon-vfpv4/clang_rt.crtbegin.o b/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_hard_neon-vfpv4/clang_rt.crtbegin.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_hard_neon-vfpv4/clang_rt.crtend.o b/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_hard_neon-vfpv4/clang_rt.crtend.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_hard_neon-vfpv4/libclang_rt.builtins.a b/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_hard_neon-vfpv4/libclang_rt.builtins.a new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_hard_neon-vfpv4/libclang_rt.profile.a b/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_hard_neon-vfpv4/libclang_rt.profile.a new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_soft/.keep b/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_soft/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_soft/clang_rt.crtbegin.o b/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_soft/clang_rt.crtbegin.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_soft/clang_rt.crtend.o b/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_soft/clang_rt.crtend.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_soft/libclang_rt.builtins.a b/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_soft/libclang_rt.builtins.a new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_soft/libclang_rt.profile.a b/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_soft/libclang_rt.profile.a new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_softfp_neon-vfpv4/.keep b/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_softfp_neon-vfpv4/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_softfp_neon-vfpv4/clang_rt.crtbegin.o b/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_softfp_neon-vfpv4/clang_rt.crtbegin.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_softfp_neon-vfpv4/clang_rt.crtend.o b/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_softfp_neon-vfpv4/clang_rt.crtend.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_softfp_neon-vfpv4/libclang_rt.builtins.a b/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_softfp_neon-vfpv4/libclang_rt.builtins.a new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_softfp_neon-vfpv4/libclang_rt.profile.a b/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/a7_softfp_neon-vfpv4/libclang_rt.profile.a new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/clang_rt.crtbegin.o b/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/clang_rt.crtbegin.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/clang_rt.crtend.o b/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/clang_rt.crtend.o new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/libclang_rt.builtins.a b/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/libclang_rt.builtins.a new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/libclang_rt.profile.a b/clang/test/Driver/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z/lib/arm-liteos-ohos/libclang_rt.profile.a new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/sysroot/usr/include/.keep b/clang/test/Driver/Inputs/ohos_native_tree/sysroot/usr/include/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/sysroot/usr/include/arm-liteos-ohos/.keep b/clang/test/Driver/Inputs/ohos_native_tree/sysroot/usr/include/arm-liteos-ohos/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/sysroot/usr/lib/.keep b/clang/test/Driver/Inputs/ohos_native_tree/sysroot/usr/lib/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/sysroot/usr/lib/arm-liteos-ohos/.keep b/clang/test/Driver/Inputs/ohos_native_tree/sysroot/usr/lib/arm-liteos-ohos/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/sysroot/usr/lib/arm-liteos-ohos/a7_hard_neon-vfpv4/.keep b/clang/test/Driver/Inputs/ohos_native_tree/sysroot/usr/lib/arm-liteos-ohos/a7_hard_neon-vfpv4/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/sysroot/usr/lib/arm-liteos-ohos/a7_soft/.keep b/clang/test/Driver/Inputs/ohos_native_tree/sysroot/usr/lib/arm-liteos-ohos/a7_soft/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/Inputs/ohos_native_tree/sysroot/usr/lib/arm-liteos-ohos/a7_softfp_neon-vfpv4/.keep b/clang/test/Driver/Inputs/ohos_native_tree/sysroot/usr/lib/arm-liteos-ohos/a7_softfp_neon-vfpv4/.keep new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/ohos.c b/clang/test/Driver/ohos.c new file mode 100644 index 0000000000000..b279e9185cc41 --- /dev/null +++ b/clang/test/Driver/ohos.c @@ -0,0 +1,239 @@ +// RUN: %clang %s -### -no-canonical-prefixes --target=arm-liteos \ +// RUN: -resource-dir=%S/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z \ +// RUN: --sysroot=%S/Inputs/ohos_native_tree/sysroot -fuse-ld=lld -march=armv7-a 2>&1 \ +// RUN: | FileCheck -check-prefixes=CHECK,CHECK-ARM %s +// RUN: %clang %s -### -no-canonical-prefixes --target=arm-liteos \ +// RUN: -resource-dir=%S/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z \ +// RUN: --sysroot=%S/Inputs/ohos_native_tree/sysroot -fuse-ld=lld -march=armv7-a -mcpu=cortex-a7 -mfloat-abi=soft 2>&1 \ +// RUN: | FileCheck -check-prefixes=CHECK,CHECK-ARM-A7-SOFT %s +// CHECK: {{.*}}clang{{.*}}" "-cc1" +// CHECK-NOT: "--mrelax-relocations" +// CHECK-NOT: "-munwind-tables" +// CHECK: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" +// CHECK: "-isysroot" "[[SYSROOT:[^"]+]]" +// CHECK: "-internal-externc-isystem" "[[SYSROOT]]{{/|\\\\}}include" +// CHECK-NOT: "-fsanitize=safe-stack" +// CHECK-NOT: "-stack-protector" "2" +// CHECK-NOT: "-fno-common" +// CHECK: {{.*}}ld.lld{{.*}}" "--sysroot=[[SYSROOT]]" +// CHECK-NOT: "--sysroot=[[SYSROOT]]" +// CHECK: "-pie" +// CHECK-NOT: "--build-id" +// CHECK: "--hash-style=both" +// CHECK: "-dynamic-linker" "/lib/ld-musl-arm.so.1" +// CHECK: Scrt1.o +// CHECK: crti.o +// CHECK: clang_rt.crtbegin.o +// CHECK-ARM: "-L[[SYSROOT]]{{/|\\\\}}usr{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}" +// CHECK-ARM-A7-SOFT: "-L[[SYSROOT]]{{/|\\\\}}usr{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}a7_soft" +// CHECK-ARM: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}libclang_rt.builtins.a" +// CHECK-ARM-A7-SOFT: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos/a7_soft{{/|\\\\}}libclang_rt.builtins.a" +// CHECK: "-lc" +// CHECK: clang_rt.crtend.o +// CHECK: crtn.o + +// RUN: %clang %s -### --target=arm-liteos -rtlib=libgcc -fuse-ld=lld 2>&1 \ +// RUN: | FileCheck %s -check-prefix=CHECK-RTLIB +// CHECK-RTLIB: error: invalid runtime library name in argument '-rtlib=libgcc' + +// RUN: %clang %s -### --target=arm-liteos -static -fuse-ld=lld 2>&1 \ +// RUN: | FileCheck %s -check-prefix=CHECK-STATIC +// CHECK-STATIC: "-static" +// CHECK-STATIC-NOT: "-Bdynamic" +// CHECK-STATIC: "-l:libunwind.a" +// CHECK-STATIC: "-lc" + +// RUN: %clang %s -### --target=arm-liteos -shared -fuse-ld=lld 2>&1 \ +// RUN: | FileCheck %s -check-prefix=CHECK-SHARED +// CHECK-SHARED-NOT: "-pie" +// CHECK-SHARED: "-shared" +// CHECK-SHARED: "-lc" +// CHECK-SHARED: "-l:libunwind.a" + +// RUN: %clang %s -### --target=arm-linux-ohos -fuse-ld=lld 2>&1 \ +// RUN: | FileCheck %s -check-prefix=CHECK-RUNTIME +// RUN: %clang %s -### --target=aarch64-linux-ohos -fuse-ld=lld 2>&1 \ +// RUN: | FileCheck %s -check-prefix=CHECK-RUNTIME +// RUN: %clang %s -### --target=mipsel-linux-ohos -fuse-ld=lld 2>&1 \ +// RUN: | FileCheck %s -check-prefix=CHECK-RUNTIME +// RUN: %clang %s -### --target=x86_64-linux-ohos -fuse-ld=lld 2>&1 \ +// RUN: | FileCheck %s -check-prefix=CHECK-RUNTIME +// CHECK-RUNTIME: "{{.*}}libclang_rt.builtins.a" +// CHECK-RUNTIME: "-l:libunwind.a" +// CHECK-LIBM: "-lm" + +// RUN: %clang %s -### --target=arm-liteos -r -fuse-ld=lld 2>&1 \ +// RUN: | FileCheck %s -check-prefix=CHECK-RELOCATABLE +// CHECK-RELOCATABLE-NOT: "-pie" +// CHECK-RELOCATABLE: "-r" + +// RUN: %clang %s -### --target=arm-liteos -nodefaultlibs -fuse-ld=lld 2>&1 \ +// RUN: -resource-dir=%S/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z \ +// RUN: | FileCheck %s -check-prefix=CHECK-NODEFAULTLIBS +// CHECK-NODEFAULTLIBS: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" +// CHECK-NODEFAULTLIBS-NOT: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}libclang_rt.builtins.a" +// CHECK-NODEFAULTLIBS-NOT: "-lc" + +// RUN: %clang %s -### --target=arm-liteos -nostdlib -fuse-ld=lld 2>&1 \ +// RUN: -resource-dir=%S/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z \ +// RUN: | FileCheck %s -check-prefix=CHECK-NOSTDLIB +// CHECK-NOSTDLIB: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" +// CHECK-NOSTDLIB-NOT: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}libclang_rt.builtins.a" +// CHECK-NOSTDLIB-NOT: "-lc" + +// RUN: %clang %s -### --target=arm-liteos -nolibc -fuse-ld=lld 2>&1 \ +// RUN: -resource-dir=%S/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z \ +// RUN: | FileCheck %s -check-prefix=CHECK-NOLIBC +// CHECK-NOLIBC: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" +// CHECK-NOLIBC: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}libclang_rt.builtins.a" +// CHECK-NOLIBC-NOT: "-lc" + +// RUN: %clang %s -### --target=arm-liteos \ +// RUN: -fsanitize=safe-stack 2>&1 \ +// RUN: -resource-dir=%S/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z \ +// RUN: -fuse-ld=lld \ +// RUN: | FileCheck %s -check-prefix=CHECK-SAFESTACK +// CHECK-SAFESTACK: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" +// CHECK-SAFESTACK: "-fsanitize=safe-stack" +// CHECK-SAFESTACK: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}libclang_rt.safestack.a" +// CHECK-SAFESTACK: "__safestack_init" + +// RUN: %clang %s -### --target=arm-liteos \ +// RUN: -fsanitize=address 2>&1 \ +// RUN: -resource-dir=%S/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z \ +// RUN: -fuse-ld=lld \ +// RUN: | FileCheck %s -check-prefix=CHECK-ASAN-ARM +// CHECK-ASAN-ARM: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" +// CHECK-ASAN-ARM: "-fsanitize=address" +// CHECK-ASAN-ARM: "-fsanitize-address-use-after-scope" +// CHECK-ASAN-ARM: "-dynamic-linker" "/lib/ld-musl-arm.so.1" +// CHECK-ASAN-ARM: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}libclang_rt.asan.a" +// CHECK-ASAN-ARM-NOT: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}libclang_rt.asan-preinit.a" + +// RUN: %clang %s -### --target=arm-liteos \ +// RUN: -fsanitize=address -fPIC -shared 2>&1 \ +// RUN: -resource-dir=%S/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z \ +// RUN: -shared-libsan \ +// RUN: -fuse-ld=lld \ +// RUN: | FileCheck %s -check-prefix=CHECK-ASAN-SHARED +// CHECK-ASAN-SHARED: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" +// CHECK-ASAN-SHARED: "-fsanitize=address" +// CHECK-ASAN-SHARED: "-fsanitize-address-use-after-scope" +// CHECK-ASAN-SHARED: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}libclang_rt.asan.so" +// CHECK-ASAN-SHARED-NOT: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}libclang_rt.asan-preinit.a" + +// RUN: %clang %s -### --target=arm-liteos \ +// RUN: -fsanitize=fuzzer 2>&1 \ +// RUN: -resource-dir=%S/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z \ +// RUN: -fuse-ld=lld \ +// RUN: | FileCheck %s -check-prefix=CHECK-FUZZER-ARM +// CHECK-FUZZER-ARM: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" +// CHECK-FUZZER-ARM: "-fsanitize=fuzzer,fuzzer-no-link" +// CHECK-FUZZER-ARM: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}libclang_rt.fuzzer.a" + +// RUN: %clang %s -### --target=arm-liteos \ +// RUN: -fsanitize=scudo 2>&1 \ +// RUN: -resource-dir=%S/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z \ +// RUN: -fuse-ld=lld \ +// RUN: | FileCheck %s -check-prefix=CHECK-SCUDO-ARM +// CHECK-SCUDO-ARM: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" +// CHECK-SCUDO-ARM: "-fsanitize=scudo" +// CHECK-SCUDO-ARM: "-pie" +// CHECK-SCUDO-ARM: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}libclang_rt.scudo_standalone.a" + +// RUN: %clang %s -### --target=arm-liteos \ +// RUN: -fsanitize=scudo -fPIC -shared 2>&1 \ +// RUN: -resource-dir=%S/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z \ +// RUN: -shared-libsan \ +// RUN: -fuse-ld=lld \ +// RUN: | FileCheck %s -check-prefix=CHECK-SCUDO-SHARED +// CHECK-SCUDO-SHARED: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" +// CHECK-SCUDO-SHARED: "-fsanitize=scudo" +// CHECK-SCUDO-SHARED: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}libclang_rt.scudo_standalone.so" + +// RUN: %clang %s -### --target=arm-liteos \ +// RUN: -fxray-instrument -fxray-modes=xray-basic \ +// RUN: -resource-dir=%S/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z \ +// RUN: -fuse-ld=lld 2>&1 \ +// RUN: | FileCheck %s -check-prefix=CHECK-XRAY-ARM +// CHECK-XRAY-ARM: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" +// CHECK-XRAY-ARM: "-fxray-instrument" +// CHECK-XRAY-ARM: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}libclang_rt.xray.a" +// CHECK-XRAY-ARM: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}libclang_rt.xray-basic.a" + +// RUN: %clang %s -### --target=arm-liteos \ +// RUN: -O3 -flto -mcpu=cortex-a53 2>&1 \ +// RUN: -fuse-ld=lld \ +// RUN: | FileCheck %s -check-prefix=CHECK-LTO +// CHECK-LTO: "-plugin-opt=mcpu=cortex-a53" +// CHECK-LTO: "-plugin-opt=O3" + +// RUN: %clang %s -### --target=arm-liteos \ +// RUN: -flto=thin -flto-jobs=8 -mcpu=cortex-a7 2>&1 \ +// RUN: -fuse-ld=lld \ +// RUN: | FileCheck %s -check-prefix=CHECK-THINLTO +// CHECK-THINLTO: "-plugin-opt=mcpu=cortex-a7" +// CHECK-THINLTO: "-plugin-opt=thinlto" +// CHECK-THINLTO: "-plugin-opt=jobs=8" + +// RUN: %clang %s -### --target=arm-liteos \ +// RUN: -ccc-install-dir %S/Inputs/ohos_native_tree/llvm/bin \ +// RUN: -resource-dir=%S/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z \ +// RUN: --sysroot=%S/Inputs/ohos_native_tree/sysroot \ +// RUN: -march=armv7-a -mfloat-abi=soft 2>&1\ +// RUN: | FileCheck %s -check-prefixes=CHECK-MULTILIB,CHECK-MULTILIB-SF,CHECK-MULTILIB-ARM +// RUN: %clang %s -### --target=arm-liteos \ +// RUN: -ccc-install-dir %S/Inputs/ohos_native_tree/llvm/bin \ +// RUN: -resource-dir=%S/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z \ +// RUN: --sysroot=%S/Inputs/ohos_native_tree/sysroot \ +// RUN: -march=armv7-a -mcpu=cortex-a7 -mfloat-abi=soft 2>&1\ +// RUN: | FileCheck %s -check-prefixes=CHECK-MULTILIB,CHECK-MULTILIB-SF,CHECK-MULTILIB-ARM-A7-SOFT +// RUN: %clang %s -### --target=arm-liteos \ +// RUN: -ccc-install-dir %S/Inputs/ohos_native_tree/llvm/bin \ +// RUN: -resource-dir=%S/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z \ +// RUN: --sysroot=%S/Inputs/ohos_native_tree/sysroot \ +// RUN: -march=armv7-a -mcpu=cortex-a7 -mfloat-abi=softfp -mfpu=neon-vfpv4 2>&1\ +// RUN: | FileCheck %s -check-prefixes=CHECK-MULTILIB,CHECK-MULTILIB-SF,CHECK-MULTILIB-ARM-A7-SOFTFP +// RUN: %clang %s -### --target=arm-liteos \ +// RUN: -ccc-install-dir %S/Inputs/ohos_native_tree/llvm/bin \ +// RUN: -resource-dir=%S/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z \ +// RUN: --sysroot=%S/Inputs/ohos_native_tree/sysroot \ +// RUN: -march=armv7-a -mcpu=cortex-a7 -mfloat-abi=hard -mfpu=neon-vfpv4 2>&1\ +// RUN: | FileCheck %s -check-prefixes=CHECK-MULTILIB,CHECK-MULTILIB-HF,CHECK-MULTILIB-ARM-A7-HARD +// CHECK-MULTILIB: {{.*}}clang{{.*}}" "-cc1" +// CHECK-MULTILIB: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" +// CHECK-MULTILIB: "-isysroot" "[[SYSROOT:[^"]+]]" +// CHECK-MULTILIB: {{.*}}ld.lld{{.*}}" "--sysroot=[[SYSROOT]]" +// CHECK-MULTILIB-SF: "-dynamic-linker" "/lib/ld-musl-arm.so.1" +// CHECK-MULTILIB-HF: "-dynamic-linker" "/lib/ld-musl-armhf.so.1" + +// CHECK-MULTILIB-ARM: "-L[[SYSROOT]]{{/|\\\\}}usr{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}" + +// CHECK-MULTILIB-ARM-A7-SOFT: "-L[[SYSROOT]]{{/|\\\\}}usr{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}a7_soft" + +// CHECK-MULTILIB-ARM-A7-SOFTFP: "-L[[SYSROOT]]{{/|\\\\}}usr{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}a7_softfp_neon-vfpv4" + +// CHECK-MULTILIB-ARM-A7-HARD: "-L[[SYSROOT]]{{/|\\\\}}usr{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}a7_hard_neon-vfpv4" + +// CHECK-MULTILIB-ARM: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}libclang_rt.builtins.a" +// CHECK-MULTILIB-ARM-A7-SOFT: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}a7_soft{{/|\\\\}}libclang_rt.builtins.a" +// CHECK-MULTILIB-ARM-A7-SOFTFP: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}a7_softfp_neon-vfpv4{{/|\\\\}}libclang_rt.builtins.a" +// CHECK-MULTILIB-ARM-A7-HARD: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}a7_hard_neon-vfpv4{{/|\\\\}}libclang_rt.builtins.a" + +// RUN: %clang %s -### -no-canonical-prefixes --target=arm-linux-ohos -fprofile-instr-generate -v \ +// RUN: -resource-dir=%S/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z \ +// RUN: --sysroot=%S/Inputs/ohos_native_tree/sysroot -fuse-ld=lld -march=armv7-a 2>&1 \ +// RUN: | FileCheck -check-prefixes=CHECK-PROFILE-RTLIB %s + +// CHECK-PROFILE-RTLIB: -u__llvm_profile_runtime +// CHECK-PROFILE-RTLIB: libclang_rt.profile + +// RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \ +// RUN: --target=arm64-linux-ohos -pthread \ +// RUN: --gcc-toolchain="" \ +// RUN: --sysroot=%S/Inputs/ohos_native_tree/sysroot \ +// RUN: -shared \ +// RUN: | FileCheck --check-prefix=CHECK-OHOS-PTHREAD %s + +// CHECK-OHOS-PTHREAD-NOT: -lpthread + diff --git a/clang/test/Driver/ohos.cpp b/clang/test/Driver/ohos.cpp new file mode 100644 index 0000000000000..45788cf048869 --- /dev/null +++ b/clang/test/Driver/ohos.cpp @@ -0,0 +1,120 @@ +// RUN: %clangxx %s -### -no-canonical-prefixes --target=arm-liteos -march=armv7-a \ +// RUN: -ccc-install-dir %S/Inputs/ohos_native_tree/llvm/bin \ +// RUN: -resource-dir=%S/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z \ +// RUN: --sysroot=%S/Inputs/ohos_native_tree/sysroot -fuse-ld=lld 2>&1 | FileCheck %s +// CHECK: {{.*}}clang{{.*}}" "-cc1" +// CHECK: "-triple" "armv7-unknown-liteos-ohos" +// CHECK-NOT: "-fuse-init-array" +// CHECK: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" +// CHECK: "-isysroot" "[[SYSROOT:[^"]+]]" +// CHECK: "-internal-externc-isystem" "[[SYSROOT]]{{/|\\\\}}include" +// CHECK: {{.*}}ld.lld{{.*}}" "--sysroot=[[SYSROOT]]" +// CHECK: "-pie" +// CHECK: "-dynamic-linker" "/lib/ld-musl-arm.so.1" +// CHECK: Scrt1.o +// CHECK: crti.o +// CHECK: clang_rt.crtbegin.o +// CHECK: "-L{{.*[/\\]}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}" +// CHECK-NOT: "--push-state" +// CHECK-NOT: "--as-needed" +// CHECK: "-lc++" +// CHECK: "-lm" +// CHECK-NOT: "--pop-state" +// CHECK: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}libclang_rt.builtins.a" +// CHECK: "-lc" +// CHECK: clang_rt.crtend.o +// CHECK: crtn.o + +// RUN: %clangxx %s -### --target=arm-unknown-liteos -stdlib=libstdc++ \ +// RUN: -fuse-ld=lld 2>&1 \ +// RUN: | FileCheck %s -check-prefix=CHECK-STDLIB +// CHECK-STDLIB: error: invalid library name in argument '-stdlib=libstdc++' + +// RUN: %clangxx %s -### --target=arm-unknown-liteos -static-libstdc++ \ +// RUN: -fuse-ld=lld 2>&1 \ +// RUN: | FileCheck %s -check-prefix=CHECK-STATIC +// CHECK-STATIC-NOT: "--push-state" +// CHECK-STATIC-NOT: "--as-needed" +// CHECK-STATIC: "-Bstatic" +// CHECK-STATIC: "-lc++" +// CHECK-STATIC: "-Bdynamic" +// CHECK-STATIC: "-lm" +// CHECK-STATIC-NOT: "--pop-state" +// CHECK-STATIC: "-lc" + +// RUN: %clangxx %s -### --target=arm-unknown-liteos -static \ +// RUN: -fuse-ld=lld 2>&1 \ +// RUN: | FileCheck %s -check-prefix=CHECK-STATIC1 +// CHECK-STATIC1-NOT: "-fuse-init-array" +// CHECK-STATIC1: "-static" +// CHECK-STATIC1: "-lc++" +// CHECK-STATIC1: "-lc++abi" +// CHECK-STATIC1: "-lunwind" +// CHECK-STATIC1: "-lm" +// CHECK-STATIC1: "-lc" + +// RUN: %clangxx %s -### --target=arm-unknown-liteos -march=armv7-a -mfloat-abi=soft -static -fPIE -fPIC -fpic -pie \ +// RUN: --sysroot=%S/Inputs/ohos_native_tree/sysroot -fuse-ld=lld 2>&1 \ +// RUN: | FileCheck %s -check-prefix=CHECK-STATIC2 +// CHECK-STATIC2: "-isysroot" "[[SYSROOT:[^"]+]]" +// CHECK-STATIC2: {{.*}}ld.lld{{.*}}" "--sysroot=[[SYSROOT]]" +// CHECK-STATIC2: "-static" +// CHECK-STATIC2: "-lc++" +// CHECK-STATIC2: "-lc++abi" +// CHECK-STATIC2: "-lunwind" +// CHECK-STATIC2: "-lm" +// CHECK-STATIC2: "-lc" + +// RUN: %clangxx %s -### --target=arm-liteos -nostdlib++ -fuse-ld=lld 2>&1 \ +// RUN: | FileCheck %s -check-prefix=CHECK-NOSTDLIBXX +// CHECK-NOSTDLIBXX-NOT: "-lc++" +// CHECK-NOSTDLIBXX: "-lm" +// CHECK-NOSTDLIBXX: "-lc" + +// RUN: %clangxx %s -### --target=arm-liteos \ +// RUN: -ccc-install-dir %S/Inputs/ohos_native_tree/llvm/bin \ +// RUN: -resource-dir=%S/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z \ +// RUN: --sysroot=%S/Inputs/ohos_native_tree/sysroot \ +// RUN: -march=armv7-a -mfloat-abi=soft 2>&1\ +// RUN: | FileCheck %s -check-prefixes=CHECK-MULTILIB,CHECK-MULTILIB-SF,CHECK-MULTILIB-ARM +// RUN: %clangxx %s -### --target=arm-liteos \ +// RUN: -ccc-install-dir %S/Inputs/ohos_native_tree/llvm/bin \ +// RUN: -resource-dir=%S/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z \ +// RUN: --sysroot=%S/Inputs/ohos_native_tree/sysroot \ +// RUN: -march=armv7-a -mcpu=cortex-a7 -mfloat-abi=soft 2>&1\ +// RUN: | FileCheck %s -check-prefixes=CHECK-MULTILIB,CHECK-MULTILIB-SF,CHECK-MULTILIB-ARM-A7-SOFT +// RUN: %clangxx %s -### --target=arm-liteos \ +// RUN: -ccc-install-dir %S/Inputs/ohos_native_tree/llvm/bin \ +// RUN: -resource-dir=%S/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z \ +// RUN: --sysroot=%S/Inputs/ohos_native_tree/sysroot \ +// RUN: -march=armv7-a -mcpu=cortex-a7 -mfloat-abi=softfp -mfpu=neon-vfpv4 2>&1\ +// RUN: | FileCheck %s -check-prefixes=CHECK-MULTILIB,CHECK-MULTILIB-SF,CHECK-MULTILIB-ARM-A7-SOFTFP +// RUN: %clangxx %s -### --target=arm-liteos \ +// RUN: -ccc-install-dir %S/Inputs/ohos_native_tree/llvm/bin \ +// RUN: -resource-dir=%S/Inputs/ohos_native_tree/llvm/lib/clang/x.y.z \ +// RUN: --sysroot=%S/Inputs/ohos_native_tree/sysroot \ +// RUN: -march=armv7-a -mcpu=cortex-a7 -mfloat-abi=hard -mfpu=neon-vfpv4 2>&1\ +// RUN: | FileCheck %s -check-prefixes=CHECK-MULTILIB,CHECK-MULTILIB-HF,CHECK-MULTILIB-ARM-A7-HARD +// CHECK-MULTILIB: {{.*}}clang{{.*}}" "-cc1" +// CHECK-MULTILIB: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" +// CHECK-MULTILIB: "-isysroot" "[[SYSROOT:[^"]+]]" +// CHECK-MULTILIB: {{.*}}ld.lld{{.*}}" "--sysroot=[[SYSROOT]]" +// CHECK-MULTILIB-SF: "-dynamic-linker" "/lib/ld-musl-arm.so.1" +// CHECK-MULTILIB-HF: "-dynamic-linker" "/lib/ld-musl-armhf.so.1" + +// CHECK-MULTILIB-ARM: "-L{{.*}}{{/|\\\\}}..{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}" +// CHECK-MULTILIB-ARM: "-L[[SYSROOT]]{{/|\\\\}}usr{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}" + +// CHECK-MULTILIB-ARM-A7-SOFT: "-L{{.*}}{{/|\\\\}}..{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}a7_soft" +// CHECK-MULTILIB-ARM-A7-SOFT: "-L[[SYSROOT]]{{/|\\\\}}usr{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}a7_soft" + +// CHECK-MULTILIB-ARM-A7-SOFTFP: "-L{{.*}}{{/|\\\\}}..{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}a7_softfp_neon-vfpv4" +// CHECK-MULTILIB-ARM-A7-SOFTFP: "-L[[SYSROOT]]{{/|\\\\}}usr{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}a7_softfp_neon-vfpv4" + +// CHECK-MULTILIB-ARM-A7-HARD: "-L{{.*}}{{/|\\\\}}..{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}a7_hard_neon-vfpv4" +// CHECK-MULTILIB-ARM-A7-HARD: "-L[[SYSROOT]]{{/|\\\\}}usr{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}a7_hard_neon-vfpv4" + +// CHECK-MULTILIB-ARM: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}libclang_rt.builtins.a" +// CHECK-MULTILIB-ARM-A7-SOFT: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}a7_soft{{/|\\\\}}libclang_rt.builtins.a" +// CHECK-MULTILIB-ARM-A7-SOFTFP: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}a7_softfp_neon-vfpv4{{/|\\\\}}libclang_rt.builtins.a" +// CHECK-MULTILIB-ARM-A7-HARD: "[[RESOURCE_DIR]]{{/|\\\\}}lib{{/|\\\\}}arm-liteos-ohos{{/|\\\\}}a7_hard_neon-vfpv4{{/|\\\\}}libclang_rt.builtins.a" diff --git a/clang/test/Preprocessor/ohos.c b/clang/test/Preprocessor/ohos.c new file mode 100644 index 0000000000000..0c435c7ed5ab4 --- /dev/null +++ b/clang/test/Preprocessor/ohos.c @@ -0,0 +1,15 @@ +// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=arm-linux-ohos < /dev/null | FileCheck %s -match-full-lines -check-prefix=ARM-OHOS-CXX +// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=aarch64-linux-ohos < /dev/null | FileCheck %s -match-full-lines -check-prefix=ARM64-OHOS-CXX +// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=riscv64-linux-ohos < /dev/null | FileCheck %s -match-full-lines -check-prefix=RISCV64-OHOS-CXX +// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=mipsel-linux-ohos < /dev/null | FileCheck %s -match-full-lines -check-prefix=MIPSEL-OHOS-CXX +// RUN: %clang_cc1 -x c++ -E -dM -ffreestanding -triple=x86_64-linux-ohos < /dev/null | FileCheck %s -match-full-lines -check-prefix=X86_64-OHOS-CXX +// RUN: %clang_cc1 -E -dM -ffreestanding -triple=arm-linux-ohos < /dev/null | FileCheck %s -check-prefix=OHOS-DEFS + +// ARM-OHOS-CXX: #define __STDCPP_DEFAULT_NEW_ALIGNMENT__ 8U +// ARM64-OHOS-CXX: #define __STDCPP_DEFAULT_NEW_ALIGNMENT__ 16UL +// RISCV64-OHOS-CXX: #define __STDCPP_DEFAULT_NEW_ALIGNMENT__ 16UL +// MIPSEL-OHOS-CXX: #define __STDCPP_DEFAULT_NEW_ALIGNMENT__ 8U +// X86_64-OHOS-CXX: #define __STDCPP_DEFAULT_NEW_ALIGNMENT__ 16UL +// OHOS-DEFS: __OHOS_FAMILY__ +// OHOS-DEFS: __OHOS__ +// OHOS-DEFS-NOT: __OHOS__ From 682808d9c9c81306c0a08a1fdb496b4f572566b4 Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Fri, 10 Feb 2023 08:50:49 -0800 Subject: [PATCH 004/691] Reland [clang-format] Add a space between an overloaded operator and '>' The token annotator doesn't annotate the template opener and closer as such if they enclose an overloaded operator. This causes the space between the operator and the closer to be removed, resulting in invalid C++ code. Fixes #58602. Differential Revision: https://reviews.llvm.org/D143755 --- clang/lib/Format/FormatTokenLexer.cpp | 33 ++++++++-- clang/lib/Format/FormatTokenLexer.h | 1 + clang/lib/Format/TokenAnnotator.cpp | 31 ++++++--- clang/unittests/Format/FormatTest.cpp | 8 +++ clang/unittests/Format/TokenAnnotatorTest.cpp | 65 +++++++++++++++++++ 5 files changed, 124 insertions(+), 14 deletions(-) diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index f06f9fb9949d2..ae54de93daf51 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -103,6 +103,8 @@ void FormatTokenLexer::tryMergePreviousTokens() { return; if (tryMergeLessLess()) return; + if (tryMergeGreaterGreater()) + return; if (tryMergeForEach()) return; if (Style.isCpp() && tryTransformTryUsageForC()) @@ -460,12 +462,11 @@ bool FormatTokenLexer::tryMergeLessLess() { return false; auto X = Tokens.size() > 3 ? First[-1] : nullptr; - auto Y = First[2]; - if ((X && X->is(tok::less)) || Y->is(tok::less)) + if (X && X->is(tok::less)) return false; - // Do not remove a whitespace between the two "<" e.g. "operator< <>". - if (X && X->is(tok::kw_operator) && Y->is(tok::greater)) + auto Y = First[2]; + if ((!X || X->isNot(tok::kw_operator)) && Y->is(tok::less)) return false; First[0]->Tok.setKind(tok::lessless); @@ -475,6 +476,30 @@ bool FormatTokenLexer::tryMergeLessLess() { return true; } +bool FormatTokenLexer::tryMergeGreaterGreater() { + // Merge kw_operator,greater,greater into kw_operator,greatergreater. + if (Tokens.size() < 2) + return false; + + auto First = Tokens.end() - 2; + if (First[0]->isNot(tok::greater) || First[1]->isNot(tok::greater)) + return false; + + // Only merge if there currently is no whitespace between the first two ">". + if (First[1]->hasWhitespaceBefore()) + return false; + + auto Tok = Tokens.size() > 2 ? First[-1] : nullptr; + if (Tok && Tok->isNot(tok::kw_operator)) + return false; + + First[0]->Tok.setKind(tok::greatergreater); + First[0]->TokenText = ">>"; + First[0]->ColumnWidth += 1; + Tokens.erase(Tokens.end() - 1); + return true; +} + bool FormatTokenLexer::tryMergeTokens(ArrayRef Kinds, TokenType NewType) { if (Tokens.size() < Kinds.size()) diff --git a/clang/lib/Format/FormatTokenLexer.h b/clang/lib/Format/FormatTokenLexer.h index 950305a37d68b..0a8123fed2934 100644 --- a/clang/lib/Format/FormatTokenLexer.h +++ b/clang/lib/Format/FormatTokenLexer.h @@ -51,6 +51,7 @@ class FormatTokenLexer { void tryMergePreviousTokens(); bool tryMergeLessLess(); + bool tryMergeGreaterGreater(); bool tryMergeNSStringLiteral(); bool tryMergeJSPrivateIdentifier(); bool tryMergeCSharpStringLiteral(); diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 6b6b9eb8d31d3..c5644c5bfea16 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1215,19 +1215,26 @@ class AnnotatingParser { !CurrentToken->isOneOf(tok::l_paren, tok::semi, tok::r_paren)) { if (CurrentToken->isOneOf(tok::star, tok::amp)) CurrentToken->setType(TT_PointerOrReference); - consumeToken(); + auto Next = CurrentToken->getNextNonComment(); + if (!Next) + break; + if (Next->is(tok::less)) + next(); + else + consumeToken(); if (!CurrentToken) - continue; - if (CurrentToken->is(tok::comma) && - CurrentToken->Previous->isNot(tok::kw_operator)) { break; - } - if (CurrentToken->Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator, - tok::comma, tok::star, tok::arrow, - tok::amp, tok::ampamp) || + auto Previous = CurrentToken->getPreviousNonComment(); + assert(Previous); + if (CurrentToken->is(tok::comma) && Previous->isNot(tok::kw_operator)) + break; + if (Previous->isOneOf(TT_BinaryOperator, TT_UnaryOperator, tok::comma, + tok::star, tok::arrow, tok::amp, tok::ampamp) || // User defined literal. - CurrentToken->Previous->TokenText.startswith("\"\"")) { - CurrentToken->Previous->setType(TT_OverloadedOperator); + Previous->TokenText.startswith("\"\"")) { + Previous->setType(TT_OverloadedOperator); + if (CurrentToken->isOneOf(tok::less, tok::greater)) + break; } } if (CurrentToken && CurrentToken->is(tok::l_paren)) @@ -4048,6 +4055,10 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, return true; if (Style.isCpp()) { + if (Left.is(TT_OverloadedOperator) && + Right.isOneOf(TT_TemplateOpener, TT_TemplateCloser)) { + return true; + } // Space between UDL and dot: auto b = 4s .count(); if (Right.is(tok::period) && Left.is(tok::numeric_constant)) return true; diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index efd2942ddf27f..1beb6a75c5225 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -10604,6 +10604,14 @@ TEST_F(FormatTest, UnderstandsOverloadedOperators) { verifyFormat("foo() { ::operator new(n * sizeof(foo)); }"); } +TEST_F(FormatTest, SpaceBeforeTemplateCloser) { + verifyFormat("C<&operator- > minus;"); + verifyFormat("C<&operator> > gt;"); + verifyFormat("C<&operator>= > ge;"); + verifyFormat("C<&operator<= > le;"); + verifyFormat("C<&operator< > lt;"); +} + TEST_F(FormatTest, UnderstandsFunctionRefQualification) { verifyFormat("void A::b() && {}"); verifyFormat("void A::b() && noexcept {}"); diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index 9f65709924a36..bc8f7f36372d2 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -578,6 +578,71 @@ TEST_F(TokenAnnotatorTest, UnderstandsOverloadedOperators) { EXPECT_TOKEN(Tokens[4], tok::l_paren, TT_OverloadedOperatorLParen); } +TEST_F(TokenAnnotatorTest, OverloadedOperatorInTemplate) { + struct { + const char *Text; + tok::TokenKind Kind; + } Operators[] = {{"+", tok::plus}, + {"-", tok::minus}, + // FIXME: + // {"*", tok::star}, + {"/", tok::slash}, + {"%", tok::percent}, + {"^", tok::caret}, + // FIXME: + // {"&", tok::amp}, + {"|", tok::pipe}, + {"~", tok::tilde}, + {"!", tok::exclaim}, + {"=", tok::equal}, + // FIXME: + // {"<", tok::less}, + {">", tok::greater}, + {"+=", tok::plusequal}, + {"-=", tok::minusequal}, + {"*=", tok::starequal}, + {"/=", tok::slashequal}, + {"%=", tok::percentequal}, + {"^=", tok::caretequal}, + {"&=", tok::ampequal}, + {"|=", tok::pipeequal}, + {"<<", tok::lessless}, + {">>", tok::greatergreater}, + {">>=", tok::greatergreaterequal}, + {"<<=", tok::lesslessequal}, + {"==", tok::equalequal}, + {"!=", tok::exclaimequal}, + {"<=", tok::lessequal}, + {">=", tok::greaterequal}, + {"<=>", tok::spaceship}, + {"&&", tok::ampamp}, + {"||", tok::pipepipe}, + {"++", tok::plusplus}, + {"--", tok::minusminus}, + {",", tok::comma}, + {"->*", tok::arrowstar}, + {"->", tok::arrow}}; + + for (const auto &Operator : Operators) { + std::string Input("C<&operator"); + Input += Operator.Text; + Input += " > a;"; + auto Tokens = annotate(std::string(Input)); + ASSERT_EQ(Tokens.size(), 9u) << Tokens; + EXPECT_TOKEN(Tokens[1], tok::less, TT_TemplateOpener); + EXPECT_TOKEN(Tokens[4], Operator.Kind, TT_OverloadedOperator); + EXPECT_TOKEN(Tokens[5], tok::greater, TT_TemplateCloser); + } + + auto Tokens = annotate("C<&operator< > lt;"); + ASSERT_EQ(Tokens.size(), 12u) << Tokens; + EXPECT_TOKEN(Tokens[1], tok::less, TT_TemplateOpener); + EXPECT_TOKEN(Tokens[4], tok::less, TT_OverloadedOperator); + EXPECT_TOKEN(Tokens[5], tok::less, TT_TemplateOpener); + EXPECT_TOKEN(Tokens[7], tok::greater, TT_TemplateCloser); + EXPECT_TOKEN(Tokens[8], tok::greater, TT_TemplateCloser); +} + TEST_F(TokenAnnotatorTest, UnderstandsRequiresClausesAndConcepts) { auto Tokens = annotate("template \n" "concept C = (Foo && Bar) && (Bar && Baz);"); From 8e5aa969d0e9960bfc3d4e14144899076895e1b4 Mon Sep 17 00:00:00 2001 From: Alon Kom Date: Tue, 31 Jan 2023 10:21:53 +0200 Subject: [PATCH 005/691] [SCEV] Preserve divisibility and min/max information in applyLoopGuards applyLoopGuards doesn't always preserve information when there are multiple assumes. This patch tries to deal with multiple assumes regarding a SCEV's divisibility and min/max values, and rewrite it into a SCEV that still preserves all of the information. For example, let the trip count of the loop be TC. Consider the 3 following assumes: 1. __builtin_assume(TC % 8 == 0); 2. __builtin_assume(TC > 0); 3. __builtin_assume(TC < 100); Before this patch, depending on the assume processing order applyLoopGuards could create the following SCEV: max(min((8 * (TC / 8)) , 99), 1) Looking at this SCEV, it doesn't preserve the divisibility by 8 information. After this patch, depending on the assume processing order applyLoopGuards could create the following SCEV: max(min((8 * (TC / 8)) , 96), 8) By aligning up 1 to 8, and aligning down 99 to 96, the new SCEV still preserves all of the original assumes. Differential Revision: https://reviews.llvm.org/D144947 --- llvm/lib/Analysis/ScalarEvolution.cpp | 157 +++++++++++++++++- .../trip-multiple-guard-info.ll | 14 +- .../Analysis/ScalarEvolutionTest.cpp | 38 +++++ 3 files changed, 199 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index e3c4fc57c202c..df525f4d6be7a 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -15023,6 +15023,93 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) { if (MatchRangeCheckIdiom()) return; + // Return true if \p Expr is a MinMax SCEV expression with a non-negative + // constant operand. If so, return in \p SCTy the SCEV type and in \p RHS + // the non-constant operand and in \p LHS the constant operand. + auto IsMinMaxSCEVWithNonNegativeConstant = + [&](const SCEV *Expr, SCEVTypes &SCTy, const SCEV *&LHS, + const SCEV *&RHS) { + if (auto *MinMax = dyn_cast(Expr)) { + if (MinMax->getNumOperands() != 2) + return false; + if (auto *C = dyn_cast(MinMax->getOperand(0))) { + if (C->getAPInt().isNegative()) + return false; + SCTy = MinMax->getSCEVType(); + LHS = MinMax->getOperand(0); + RHS = MinMax->getOperand(1); + return true; + } + } + return false; + }; + + // Checks whether Expr is a non-negative constant, and Divisor is a positive + // constant, and returns their APInt in ExprVal and in DivisorVal. + auto GetNonNegExprAndPosDivisor = [&](const SCEV *Expr, const SCEV *Divisor, + APInt &ExprVal, APInt &DivisorVal) { + auto *ConstExpr = dyn_cast(Expr); + auto *ConstDivisor = dyn_cast(Divisor); + if (!ConstExpr || !ConstDivisor) + return false; + ExprVal = ConstExpr->getAPInt(); + DivisorVal = ConstDivisor->getAPInt(); + return ExprVal.isNonNegative() && !DivisorVal.isNonPositive(); + }; + + // Return a new SCEV that modifies \p Expr to the closest number divides by + // \p Divisor and greater or equal than Expr. + // For now, only handle constant Expr and Divisor. + auto GetNextSCEVDividesByDivisor = [&](const SCEV *Expr, + const SCEV *Divisor) { + APInt ExprVal; + APInt DivisorVal; + if (!GetNonNegExprAndPosDivisor(Expr, Divisor, ExprVal, DivisorVal)) + return Expr; + APInt Rem = ExprVal.urem(DivisorVal); + if (!Rem.isZero()) + // return the SCEV: Expr + Divisor - Expr % Divisor + return getConstant(ExprVal + DivisorVal - Rem); + return Expr; + }; + + // Return a new SCEV that modifies \p Expr to the closest number divides by + // \p Divisor and less or equal than Expr. + // For now, only handle constant Expr and Divisor. + auto GetPreviousSCEVDividesByDivisor = [&](const SCEV *Expr, + const SCEV *Divisor) { + APInt ExprVal; + APInt DivisorVal; + if (!GetNonNegExprAndPosDivisor(Expr, Divisor, ExprVal, DivisorVal)) + return Expr; + APInt Rem = ExprVal.urem(DivisorVal); + // return the SCEV: Expr - Expr % Divisor + return getConstant(ExprVal - Rem); + }; + + // Apply divisibilty by \p Divisor on MinMaxExpr with constant values, + // recursively. This is done by aligning up/down the constant value to the + // Divisor. + std::function + ApplyDivisibiltyOnMinMaxExpr = [&](const SCEV *MinMaxExpr, + const SCEV *Divisor) { + const SCEV *MinMaxLHS = nullptr, *MinMaxRHS = nullptr; + SCEVTypes SCTy; + if (!IsMinMaxSCEVWithNonNegativeConstant(MinMaxExpr, SCTy, MinMaxLHS, + MinMaxRHS)) + return MinMaxExpr; + auto IsMin = + isa(MinMaxExpr) || isa(MinMaxExpr); + assert(isKnownNonNegative(MinMaxLHS) && + "Expected non-negative operand!"); + auto *DivisibleExpr = + IsMin ? GetPreviousSCEVDividesByDivisor(MinMaxLHS, Divisor) + : GetNextSCEVDividesByDivisor(MinMaxLHS, Divisor); + SmallVector Ops = { + ApplyDivisibiltyOnMinMaxExpr(MinMaxRHS, Divisor), DivisibleExpr}; + return getMinMaxExpr(SCTy, Ops); + }; + // If we have LHS == 0, check if LHS is computing a property of some unknown // SCEV %v which we can rewrite %v to express explicitly. const SCEVConstant *RHSC = dyn_cast(RHS); @@ -15034,7 +15121,12 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) { const SCEV *URemRHS = nullptr; if (matchURem(LHS, URemLHS, URemRHS)) { if (const SCEVUnknown *LHSUnknown = dyn_cast(URemLHS)) { - const auto *Multiple = getMulExpr(getUDivExpr(URemLHS, URemRHS), URemRHS); + auto I = RewriteMap.find(LHSUnknown); + const SCEV *RewrittenLHS = + I != RewriteMap.end() ? I->second : LHSUnknown; + RewrittenLHS = ApplyDivisibiltyOnMinMaxExpr(RewrittenLHS, URemRHS); + const auto *Multiple = + getMulExpr(getUDivExpr(RewrittenLHS, URemRHS), URemRHS); RewriteMap[LHSUnknown] = Multiple; ExprsToRewrite.push_back(LHSUnknown); return; @@ -15071,6 +15163,52 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) { return I != RewriteMap.end() ? I->second : S; }; + // Check for the SCEV expression (A /u B) * B while B is a constant, inside + // \p Expr. The check is done recuresively on \p Expr, which is assumed to + // be a composition of Min/Max SCEVs. Return whether the SCEV expression (A + // /u B) * B was found, and return the divisor B in \p DividesBy. For + // example, if Expr = umin (umax ((A /u 8) * 8, 16), 64), return true since + // (A /u 8) * 8 matched the pattern, and return the constant SCEV 8 in \p + // DividesBy. + std::function HasDivisibiltyInfo = + [&](const SCEV *Expr, const SCEV *&DividesBy) { + if (auto *Mul = dyn_cast(Expr)) { + if (Mul->getNumOperands() != 2) + return false; + auto *MulLHS = Mul->getOperand(0); + auto *MulRHS = Mul->getOperand(1); + if (isa(MulLHS)) + std::swap(MulLHS, MulRHS); + if (auto *Div = dyn_cast(MulLHS)) + if (Div->getOperand(1) == MulRHS) { + DividesBy = MulRHS; + return true; + } + } + if (auto *MinMax = dyn_cast(Expr)) + return HasDivisibiltyInfo(MinMax->getOperand(0), DividesBy) || + HasDivisibiltyInfo(MinMax->getOperand(1), DividesBy); + return false; + }; + + // Return true if Expr known to divide by \p DividesBy. + std::function IsKnownToDivideBy = + [&](const SCEV *Expr, const SCEV *DividesBy) { + if (getURemExpr(Expr, DividesBy)->isZero()) + return true; + if (auto *MinMax = dyn_cast(Expr)) + return IsKnownToDivideBy(MinMax->getOperand(0), DividesBy) && + IsKnownToDivideBy(MinMax->getOperand(1), DividesBy); + return false; + }; + + const SCEV *RewrittenLHS = GetMaybeRewritten(LHS); + const SCEV *DividesBy = nullptr; + if (HasDivisibiltyInfo(RewrittenLHS, DividesBy)) + // Check that the whole expression is divided by DividesBy + DividesBy = + IsKnownToDivideBy(RewrittenLHS, DividesBy) ? DividesBy : nullptr; + // Collect rewrites for LHS and its transitive operands based on the // condition. // For min/max expressions, also apply the guard to its operands: @@ -15091,11 +15229,21 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) { LLVM_FALLTHROUGH; case CmpInst::ICMP_SLT: { RHS = getMinusSCEV(RHS, One); + RHS = DividesBy ? GetPreviousSCEVDividesByDivisor(RHS, DividesBy) : RHS; break; } case CmpInst::ICMP_UGT: case CmpInst::ICMP_SGT: RHS = getAddExpr(RHS, One); + RHS = DividesBy ? GetNextSCEVDividesByDivisor(RHS, DividesBy) : RHS; + break; + case CmpInst::ICMP_ULE: + case CmpInst::ICMP_SLE: + RHS = DividesBy ? GetPreviousSCEVDividesByDivisor(RHS, DividesBy) : RHS; + break; + case CmpInst::ICMP_UGE: + case CmpInst::ICMP_SGE: + RHS = DividesBy ? GetNextSCEVDividesByDivisor(RHS, DividesBy) : RHS; break; default: break; @@ -15148,8 +15296,11 @@ const SCEV *ScalarEvolution::applyLoopGuards(const SCEV *Expr, const Loop *L) { break; case CmpInst::ICMP_NE: if (isa(RHS) && - cast(RHS)->getValue()->isNullValue()) - To = getUMaxExpr(FromRewritten, One); + cast(RHS)->getValue()->isNullValue()) { + const SCEV *OneAlignedUp = + DividesBy ? GetNextSCEVDividesByDivisor(One, DividesBy) : One; + To = getUMaxExpr(FromRewritten, OneAlignedUp); + } break; default: break; diff --git a/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll index cfa91e3cc7473..492ed9c4d2653 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll @@ -125,7 +125,7 @@ define void @test_trip_multiple_4_ugt_5_order_swapped(i32 %num) { ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + %num) ; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + %num) ; CHECK-NEXT: Predicates: -; CHECK: Loop %for.body: Trip multiple is 2 +; CHECK: Loop %for.body: Trip multiple is 4 ; entry: %u = urem i32 %num, 4 @@ -196,7 +196,7 @@ define void @test_trip_multiple_4_sgt_5_order_swapped(i32 %num) { ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + %num) ; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + %num) ; CHECK-NEXT: Predicates: -; CHECK: Loop %for.body: Trip multiple is 2 +; CHECK: Loop %for.body: Trip multiple is 4 ; entry: %u = urem i32 %num, 4 @@ -267,7 +267,7 @@ define void @test_trip_multiple_4_uge_5_order_swapped(i32 %num) { ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + %num) ; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + %num) ; CHECK-NEXT: Predicates: -; CHECK: Loop %for.body: Trip multiple is 1 +; CHECK: Loop %for.body: Trip multiple is 4 ; entry: %u = urem i32 %num, 4 @@ -338,7 +338,7 @@ define void @test_trip_multiple_4_sge_5_order_swapped(i32 %num) { ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + %num) ; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + %num) ; CHECK-NEXT: Predicates: -; CHECK: Loop %for.body: Trip multiple is 1 +; CHECK: Loop %for.body: Trip multiple is 4 ; entry: %u = urem i32 %num, 4 @@ -409,7 +409,7 @@ define void @test_trip_multiple_4_upper_lower_bounds(i32 %num) { ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + %num) ; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + %num) ; CHECK-NEXT: Predicates: -; CHECK: Loop %for.body: Trip multiple is 1 +; CHECK: Loop %for.body: Trip multiple is 4 ; entry: %cmp.1 = icmp uge i32 %num, 5 @@ -446,7 +446,7 @@ define void @test_trip_multiple_4_upper_lower_bounds_swapped1(i32 %num) { ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + %num) ; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + %num) ; CHECK-NEXT: Predicates: -; CHECK: Loop %for.body: Trip multiple is 1 +; CHECK: Loop %for.body: Trip multiple is 4 ; entry: %cmp.1 = icmp uge i32 %num, 5 @@ -483,7 +483,7 @@ define void @test_trip_multiple_4_upper_lower_bounds_swapped2(i32 %num) { ; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is (-1 + %num) ; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is (-1 + %num) ; CHECK-NEXT: Predicates: -; CHECK: Loop %for.body: Trip multiple is 1 +; CHECK: Loop %for.body: Trip multiple is 4 ; entry: %cmp.1 = icmp uge i32 %num, 5 diff --git a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp index 985d1cbc642a3..1834e8cad56fa 100644 --- a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp +++ b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp @@ -1760,4 +1760,42 @@ TEST_F(ScalarEvolutionsTest, CheckGetPowerOfTwo) { ->equalsInt(1ULL << i)); } +TEST_F(ScalarEvolutionsTest, ApplyLoopGuards) { + LLVMContext C; + SMDiagnostic Err; + std::unique_ptr M = parseAssemblyString( + "declare void @llvm.assume(i1)\n" + "define void @test(i32 %num) {\n" + "entry:\n" + " %u = urem i32 %num, 4\n" + " %cmp = icmp eq i32 %u, 0\n" + " tail call void @llvm.assume(i1 %cmp)\n" + " %cmp.1 = icmp ugt i32 %num, 0\n" + " tail call void @llvm.assume(i1 %cmp.1)\n" + " br label %for.body\n" + "for.body:\n" + " %i.010 = phi i32 [ 0, %entry ], [ %inc, %for.body ]\n" + " %inc = add nuw nsw i32 %i.010, 1\n" + " %cmp2 = icmp ult i32 %inc, %num\n" + " br i1 %cmp2, label %for.body, label %exit\n" + "exit:\n" + " ret void\n" + "}\n", + Err, C); + + ASSERT_TRUE(M && "Could not parse module?"); + ASSERT_TRUE(!verifyModule(*M) && "Must have been well formed!"); + + runWithSE(*M, "test", [](Function &F, LoopInfo &LI, ScalarEvolution &SE) { + auto *TCScev = SE.getSCEV(getArgByName(F, "num")); + auto *ApplyLoopGuardsTC = SE.applyLoopGuards(TCScev, *LI.begin()); + // Assert that the new TC is (4 * ((4 umax %num) /u 4)) + APInt Four(32, 4); + auto *Constant4 = SE.getConstant(Four); + auto *Max = SE.getUMaxExpr(TCScev, Constant4); + auto *Mul = SE.getMulExpr(SE.getUDivExpr(Max, Constant4), Constant4); + ASSERT_TRUE(Mul == ApplyLoopGuardsTC); + }); +} + } // end namespace llvm From e0f8f1fce6aec86ed56af5679af42f2ace3a997a Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 20 Mar 2023 10:07:45 +0000 Subject: [PATCH 006/691] [gn build] Port 28997feb0c3a --- llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn index e02a25c1b3bf5..9ae76c3350a4f 100644 --- a/llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn @@ -92,6 +92,7 @@ static_library("Driver") { "ToolChains/Myriad.cpp", "ToolChains/NaCl.cpp", "ToolChains/NetBSD.cpp", + "ToolChains/OHOS.cpp", "ToolChains/OpenBSD.cpp", "ToolChains/PPCFreeBSD.cpp", "ToolChains/PPCLinux.cpp", From 31aa8ea252c0b6acdcb362c1d0f01cc4b810d6d0 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Mon, 20 Mar 2023 02:02:23 -0700 Subject: [PATCH 007/691] [mlir][Linalg][Transform] Avoid FunctionalStyleTransformOpTrait where unnecesseary to improve usability Differential Revision: https://reviews.llvm.org/D146305 --- .../GPU/TransformOps/GPUTransformOps.td | 12 +- .../Linalg/TransformOps/LinalgTransformOps.td | 67 +++---- .../Vector/TransformOps/VectorTransformOps.td | 21 ++- .../GPU/TransformOps/GPUTransformOps.cpp | 7 +- .../TransformOps/LinalgTransformOps.cpp | 72 ++++---- .../TransformOps/VectorTransformOps.cpp | 166 +++++++++--------- .../dialects/_structured_transform_ops_ext.py | 2 - .../Dialect/GPU/transform-gpu-failing.mlir | 8 +- mlir/test/Dialect/GPU/transform-gpu.mlir | 6 + mlir/test/Dialect/LLVM/transform-e2e.mlir | 5 +- mlir/test/Dialect/Linalg/hoisting.mlir | 22 ++- .../Linalg/transform-op-vectorize.mlir | 10 +- mlir/test/Dialect/Linalg/vectorization.mlir | 147 +++++++++------- .../Transform/selective-targeting.mlir | 6 +- .../test/Dialect/Vector/transform-vector.mlir | 3 +- .../dialects/transform_structured_ext.py | 2 +- 16 files changed, 302 insertions(+), 254 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td b/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td index c719fedc90e33..a218db3a02ce3 100644 --- a/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td +++ b/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td @@ -17,8 +17,7 @@ include "mlir/IR/OpBase.td" def MapNestedForallToThreads : Op, TransformEachOpTrait, TransformOpInterface]> { let description = [{ @@ -72,9 +71,7 @@ def MapNestedForallToThreads : scf.forall operations with mappings other than gpu.thread are ignored. - The returned handle points to the same LaunchOp operand, consuming it and - producing a new SSA value to satisfy chaining and linearity of the IR - properties. + This operation returns nothing. #### Example: @@ -111,11 +108,11 @@ def MapNestedForallToThreads : ``` }]; - let arguments = (ins PDL_Operation:$target, + let arguments = (ins TransformHandleTypeInterface:$target, DefaultValuedAttr:$block_dims, DefaultValuedOptionalAttr:$warp_dims, DefaultValuedAttr:$sync_after_distribute); - let results = (outs PDL_Operation:$result); + let results = (outs); let assemblyFormat = [{ $target @@ -123,6 +120,7 @@ def MapNestedForallToThreads : (`warp_dims` `=` $warp_dims^)? (`sync_after_distribute` `=` $sync_after_distribute^)? attr-dict + `:` functional-type(operands, results) }]; let extraClassDeclaration = [{ ::mlir::DiagnosedSilenceableFailure applyToOne( diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td index 712abf341f460..c16c286ece484 100644 --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -1651,11 +1651,13 @@ def TileToScfForOp : Op { + [DeclareOpInterfaceMethods, + TransformEachOpTrait, + TransformOpInterface]> { let description = [{ Indicates that the given `target` op all the ops it contains should be vectorized with the configuration specified by the attributes of this op. + This vectorization only handles structured ops that operate on shaped types and does not vectorize loops or straight-line. Internally, it applies a set of rewrite patterns, some of which enable vectorization and some of @@ -1685,24 +1687,22 @@ def VectorizeOp : Op:$vectorizePadding, - CArg<"bool", "false">:$vectorizeNDExtract)>, - ]; let extraClassDeclaration = [{ ::mlir::DiagnosedSilenceableFailure applyToOne( ::mlir::Operation *target, @@ -1711,6 +1711,10 @@ def VectorizeOp : Op, TransformOpInterface]> { @@ -1765,8 +1769,9 @@ def MaskedVectorizeOp : Op { + [DeclareOpInterfaceMethods, + TransformEachOpTrait, + TransformOpInterface]> { let description = [{ Hoist vector.transfer_read / vector.transfer_write pairs out of immediately enclosing scf::ForOp iteratively, if the following conditions are true: @@ -1782,18 +1787,17 @@ def HoistRedundantVectorTransfersOp : #### Return modes: - The operation always succeeds and returns a handle to the transformed - function op. + The operation always succeeds and returns nothing. }]; let arguments = (ins TransformHandleTypeInterface:$target); - let results = (outs TransformHandleTypeInterface:$transformed); - - let assemblyFormat = "$target attr-dict `:` functional-type(operands, results) "; + let results = (outs); + let assemblyFormat = [{ + $target + attr-dict + `:` functional-type(operands, results) + }]; - let builders = [ - OpBuilder<(ins "Value":$target)>, - ]; let extraClassDeclaration = [{ ::mlir::DiagnosedSilenceableFailure applyToOne( ::mlir::func::FuncOp target, @@ -1884,8 +1888,9 @@ def ConvertConv2DToImg2ColOp : Op { + [DeclareOpInterfaceMethods, + TransformEachOpTrait, + TransformOpInterface]> { let description = [{ Hoists supported tensor subset extract/insert operation pairs out of immediately enclosing loop iteratively, if the following conditions @@ -1905,18 +1910,18 @@ def HoistRedundantTensorSubsetsOp : #### Return modes: - The operation always succeeds and returns a handle to the transformed - function op. + The operation always succeeds and returns nothing. }]; let arguments = (ins TransformHandleTypeInterface:$target); - let results = (outs TransformHandleTypeInterface:$transformed); + let results = (outs); - let assemblyFormat = "$target attr-dict `:` functional-type(operands, results) "; + let assemblyFormat = [{ + $target + attr-dict + `:` functional-type(operands, results) + }]; - let builders = [ - OpBuilder<(ins "Value":$target)>, - ]; let extraClassDeclaration = [{ ::mlir::DiagnosedSilenceableFailure applyToOne( ::mlir::Operation *target, diff --git a/mlir/include/mlir/Dialect/Vector/TransformOps/VectorTransformOps.td b/mlir/include/mlir/Dialect/Vector/TransformOps/VectorTransformOps.td index 4533c5a8d6425..4be84e9800d72 100644 --- a/mlir/include/mlir/Dialect/Vector/TransformOps/VectorTransformOps.td +++ b/mlir/include/mlir/Dialect/Vector/TransformOps/VectorTransformOps.td @@ -17,8 +17,9 @@ include "mlir/Interfaces/SideEffectInterfaces.td" include "mlir/IR/OpBase.td" def LowerVectorsOp : Op, - DeclareOpInterfaceMethods]> { + [DeclareOpInterfaceMethods, + TransformEachOpTrait, + TransformOpInterface]> { let description = [{ Indicates that the vector operations nested under the isolated from above op `target` should be lowered to finer-grained vector primitives. @@ -27,10 +28,14 @@ def LowerVectorsOp : Op:$contraction_lowering, DefaultValuedAttr:$transpose_avx2_lowering, DefaultValuedAttr:$unroll_vector_transfers ); - let results = (outs PDL_Operation:$results); + let results = (outs); let builders = [ OpBuilder<(ins "Type":$resultType, "Value":$target, @@ -66,6 +71,14 @@ def LowerVectorsOp : Op &effects) { + onlyReadsHandle(getTarget(), effects); + modifiesPayload(effects); +} + DiagnosedSilenceableFailure transform::MapNestedForallToThreads::applyToOne( Operation *target, ApplyToEachResultList &results, TransformState &state) { LaunchOp gpuLaunch = dyn_cast(target); @@ -880,7 +886,6 @@ DiagnosedSilenceableFailure transform::MapNestedForallToThreads::applyToOne( mapNestedForallToThreadsImpl(rewriter, transformOp, gpuLaunch, blockDims, getWarpDims(), getSyncAfterDistribute()); - results.push_back(gpuLaunch.getOperation()); return diag; } diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index 407b8d213de1c..332a9bfa680d1 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -1786,7 +1786,7 @@ LogicalResult transform::PadOp::verify() { } //===---------------------------------------------------------------------===// -// HoistPadOp +// PadOp //===---------------------------------------------------------------------===// DiagnosedSilenceableFailure @@ -2977,21 +2977,6 @@ void transform::TileToScfForOp::getEffects( // VectorizeOp //===----------------------------------------------------------------------===// -void transform::VectorizeOp::build(OpBuilder &builder, OperationState &result, - Value target, bool vectorizePadding, - bool vectorizeExtract) { - result.addOperands(target); - if (vectorizePadding) { - result.addAttribute(VectorizeOp::getVectorizePaddingAttrName(result.name), - builder.getUnitAttr()); - } - if (vectorizeExtract) { - result.addAttribute(VectorizeOp::getVectorizeNdExtractAttrName(result.name), - builder.getUnitAttr()); - } - result.addTypes(pdl::OperationType::get(builder.getContext())); -} - namespace { /// This is an helper only to call vectorize via a pattern inside of /// VectorizeOp::applyToOne. @@ -3050,10 +3035,15 @@ transform::VectorizeOp::applyToOne(Operation *target, if (failed(applyPatternsAndFoldGreedily(target, std::move(patterns)))) return emitDefaultDefiniteFailure(target); - results.push_back(target); return DiagnosedSilenceableFailure::success(); } +void transform::VectorizeOp::getEffects( + SmallVectorImpl &effects) { + transform::onlyReadsHandle(getTarget(), effects); + transform::modifiesPayload(effects); +} + //===----------------------------------------------------------------------===// // MaskedVectorizeOp //===----------------------------------------------------------------------===// @@ -3133,22 +3123,6 @@ SmallVector MaskedVectorizeOp::getMixedVectorSizes() { return getMixedValues(getStaticVectorSizes(), getVectorSizes(), b); } -//===----------------------------------------------------------------------===// -// HoistRedundantVectorTransfersOp -//===----------------------------------------------------------------------===// - -DiagnosedSilenceableFailure -transform::HoistRedundantVectorTransfersOp::applyToOne( - func::FuncOp target, transform::ApplyToEachResultList &results, - transform::TransformState &state) { - // WARNING: This hoisting does not model parallelism and is generally - // incorrect when used on distributed loops with memref semantics! - // TODO: obsolete and should be retired. - linalg::hoistRedundantVectorTransfers(target); - results.push_back(target); - return DiagnosedSilenceableFailure::success(); -} - //===----------------------------------------------------------------------===// // ConvertConv2DToImg2ColOp. //===----------------------------------------------------------------------===// @@ -3193,9 +3167,7 @@ transform::HoistRedundantTensorSubsetsOp::applyToOne( IRRewriter rewriter(target->getContext()); auto forOp = dyn_cast(target); if (forOp) { - scf::ForOp newForOp = - linalg::hoistRedundantSubsetExtractInsert(rewriter, forOp); - results.push_back(newForOp); + linalg::hoistRedundantSubsetExtractInsert(rewriter, forOp); return DiagnosedSilenceableFailure::success(); } @@ -3204,10 +3176,36 @@ transform::HoistRedundantTensorSubsetsOp::applyToOne( target->walk([&](scf::ForOp forOp) { hoistRedundantSubsetExtractInsert(rewriter, forOp); }); - results.push_back(target); return DiagnosedSilenceableFailure::success(); } +void transform::HoistRedundantTensorSubsetsOp::getEffects( + SmallVectorImpl &effects) { + transform::onlyReadsHandle(getTarget(), effects); + transform::modifiesPayload(effects); +} + +//===----------------------------------------------------------------------===// +// HoistRedundantVectorTransfersOp +//===----------------------------------------------------------------------===// + +DiagnosedSilenceableFailure +transform::HoistRedundantVectorTransfersOp::applyToOne( + func::FuncOp target, transform::ApplyToEachResultList &results, + transform::TransformState &state) { + // WARNING: This hoisting does not model parallelism and is generally + // incorrect when used on distributed loops with memref semantics! + // TODO: obsolete and should be retired. + linalg::hoistRedundantVectorTransfers(target); + return DiagnosedSilenceableFailure::success(); +} + +void transform::HoistRedundantVectorTransfersOp::getEffects( + SmallVectorImpl &effects) { + transform::onlyReadsHandle(getTarget(), effects); + transform::modifiesPayload(effects); +} + //===----------------------------------------------------------------------===// // Transform op registration //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp b/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp index 60996b9add614..9b2e1d7d4cfe0 100644 --- a/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp +++ b/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp @@ -29,98 +29,90 @@ using namespace mlir::transform; void transform::LowerVectorsOp::getEffects( SmallVectorImpl &effects) { - consumesHandle(getTarget(), effects); - producesHandle(getResults(), effects); + onlyReadsHandle(getTarget(), effects); modifiesPayload(effects); } -DiagnosedSilenceableFailure transform::LowerVectorsOp::apply( - mlir::transform::TransformResults &transformResults, - mlir::transform::TransformState &state) { - - SmallVector results; - ArrayRef payloadOps = state.getPayloadOps(getTarget()); - for (Operation *target : payloadOps) { - // This check can't be part of the verifier because payload IR is - // independent from transform IR and may not even exist. - if (!target->hasTrait()) { - return mlir::emitDefiniteFailure(target, - "applies only to isolated-from-above " - "targets because it needs to apply " - "patterns greedily"); - } - - MLIRContext *ctx = getContext(); - RewritePatternSet patterns(ctx); - vector::VectorTransposeLowering vectorTransposeLowering = - getTransposeLowering(); - vector::VectorMultiReductionLowering vectorMultiReductionLowering = - getMultireductionLowering(); - vector::VectorContractLowering vectorContractLowering = - getContractionLowering(); - vector::VectorTransferSplit vectorTransferSplit = getSplitTransfers(); - - vector::VectorTransformsOptions vectorTransformOptions; - vectorTransformOptions.setVectorTransformsOptions(vectorContractLowering) - .setVectorMultiReductionLowering(vectorMultiReductionLowering) - .setVectorTransposeLowering(vectorTransposeLowering) - .setVectorTransferSplit(vectorTransferSplit); - - VectorTransferToSCFOptions vectorTransferToSCFOptions = - VectorTransferToSCFOptions().enableFullUnroll( - getUnrollVectorTransfers()); - - int maxTransferRank = 1; - - auto avx2LoweringOptions = - x86vector::avx2::LoweringOptions().setTransposeOptions( - x86vector::avx2::TransposeLoweringOptions() - .lower4x8xf32(getTransposeAvx2Lowering()) - .lower8x8xf32(getTransposeAvx2Lowering())); - - vector::populateVectorToVectorCanonicalizationPatterns(patterns); - - // In the future we may want to more finely select particular stages. - // Stage 1: contraction lowerings. - patterns.add(vectorTransformOptions, - ctx); - vector::populateVectorTransferPermutationMapLoweringPatterns(patterns); - - // Stage 2: multi-reduction lowerings. - vector::populateVectorMultiReductionLoweringPatterns( - patterns, vectorTransformOptions.vectorMultiReductionLowering); - - // Stage 3: Rewrite vector.transfer into full and partial parts. - patterns.add( - ctx, vectorTransformOptions); - - // Stage 4: Lower vector transfers. - vector::populateVectorTransferLoweringPatterns(patterns, maxTransferRank); - - // Stage 5: Vector to scf patterns. - populateVectorToSCFConversionPatterns( - patterns, vectorTransferToSCFOptions.setTargetRank(maxTransferRank)); - - // Stage 6: Lower vector.shape_cast. - vector::populateVectorShapeCastLoweringPatterns(patterns); - - // Stage 7: Lower vector.transpose. - vector::populateVectorTransposeLoweringPatterns(patterns, - vectorTransformOptions); - if (getTransposeAvx2Lowering()) - x86vector::avx2::populateSpecializedTransposeLoweringPatterns( - patterns, avx2LoweringOptions, /*benefit=*/10); - - // Apply everything. - if (failed(applyPatternsAndFoldGreedily(target, std::move(patterns)))) - return DiagnosedSilenceableFailure::definiteFailure(); - - results.push_back(target); +DiagnosedSilenceableFailure transform::LowerVectorsOp::applyToOne( + ::mlir::Operation *target, + ::mlir::transform::ApplyToEachResultList &results, + ::mlir::transform::TransformState &state) { + + // This check can't be part of the verifier because payload IR is + // independent from transform IR and may not even exist. + if (!target->hasTrait()) { + return mlir::emitDefiniteFailure(target, + "applies only to isolated-from-above " + "targets because it needs to apply " + "patterns greedily"); } - transformResults.set(getResults().cast(), results); + MLIRContext *ctx = getContext(); + RewritePatternSet patterns(ctx); + vector::VectorTransposeLowering vectorTransposeLowering = + getTransposeLowering(); + vector::VectorMultiReductionLowering vectorMultiReductionLowering = + getMultireductionLowering(); + vector::VectorContractLowering vectorContractLowering = + getContractionLowering(); + vector::VectorTransferSplit vectorTransferSplit = getSplitTransfers(); + + vector::VectorTransformsOptions vectorTransformOptions; + vectorTransformOptions.setVectorTransformsOptions(vectorContractLowering) + .setVectorMultiReductionLowering(vectorMultiReductionLowering) + .setVectorTransposeLowering(vectorTransposeLowering) + .setVectorTransferSplit(vectorTransferSplit); + + VectorTransferToSCFOptions vectorTransferToSCFOptions = + VectorTransferToSCFOptions().enableFullUnroll(getUnrollVectorTransfers()); + + int maxTransferRank = 1; + + auto avx2LoweringOptions = + x86vector::avx2::LoweringOptions().setTransposeOptions( + x86vector::avx2::TransposeLoweringOptions() + .lower4x8xf32(getTransposeAvx2Lowering()) + .lower8x8xf32(getTransposeAvx2Lowering())); + + vector::populateVectorToVectorCanonicalizationPatterns(patterns); + + // In the future we may want to more finely select particular stages. + // Stage 1: contraction lowerings. + patterns.add(vectorTransformOptions, + ctx); + vector::populateVectorTransferPermutationMapLoweringPatterns(patterns); + + // Stage 2: multi-reduction lowerings. + vector::populateVectorMultiReductionLoweringPatterns( + patterns, vectorTransformOptions.vectorMultiReductionLowering); + + // Stage 3: Rewrite vector.transfer into full and partial parts. + patterns.add( + ctx, vectorTransformOptions); + + // Stage 4: Lower vector transfers. + vector::populateVectorTransferLoweringPatterns(patterns, maxTransferRank); + + // Stage 5: Vector to scf patterns. + populateVectorToSCFConversionPatterns( + patterns, vectorTransferToSCFOptions.setTargetRank(maxTransferRank)); + + // Stage 6: Lower vector.shape_cast. + vector::populateVectorShapeCastLoweringPatterns(patterns); + + // Stage 7: Lower vector.transpose. + vector::populateVectorTransposeLoweringPatterns(patterns, + vectorTransformOptions); + if (getTransposeAvx2Lowering()) + x86vector::avx2::populateSpecializedTransposeLoweringPatterns( + patterns, avx2LoweringOptions, /*benefit=*/10); + + // Apply everything. + if (failed(applyPatternsAndFoldGreedily(target, std::move(patterns)))) + return DiagnosedSilenceableFailure::definiteFailure(); + return DiagnosedSilenceableFailure::success(); } diff --git a/mlir/python/mlir/dialects/_structured_transform_ops_ext.py b/mlir/python/mlir/dialects/_structured_transform_ops_ext.py index e2c262ca50201..f314496c693f1 100644 --- a/mlir/python/mlir/dialects/_structured_transform_ops_ext.py +++ b/mlir/python/mlir/dialects/_structured_transform_ops_ext.py @@ -325,11 +325,9 @@ def __init__(self, vectorize_padding: Union[bool, BoolAttr] = False, loc=None, ip=None): - pdl_operation_type = pdl.OperationType.get() if isinstance(vectorize_padding, bool): vectorize_padding = UnitAttr.get() super().__init__( - pdl_operation_type, _get_op_result_or_value(target), vectorize_padding=vectorize_padding, loc=loc, diff --git a/mlir/test/Dialect/GPU/transform-gpu-failing.mlir b/mlir/test/Dialect/GPU/transform-gpu-failing.mlir index 459b800f76d35..c9ded7d7ef193 100644 --- a/mlir/test/Dialect/GPU/transform-gpu-failing.mlir +++ b/mlir/test/Dialect/GPU/transform-gpu-failing.mlir @@ -8,7 +8,8 @@ transform.sequence failures(propagate) { ^bb0(%arg0: !pdl.operation): %funcop = transform.structured.match ops{["tensor.empty"]} in %arg0 : (!pdl.operation) -> !pdl.operation // expected-error @below {{Given target is not a gpu.launch}} - %1 = transform.gpu.map_nested_forall_to_threads %funcop block_dims = [1, 1, 1] + transform.gpu.map_nested_forall_to_threads %funcop block_dims = [1, 1, 1] + : (!pdl.operation) -> () } // ----- @@ -50,6 +51,7 @@ transform.sequence failures(propagate) { // expected-error @below {{Trying to launch a GPU kernel with grid_dims = (1, 1, 1) block_dims = (1200, 9, 1). It is larger than the limits.}} // expected-note @below {{"block_dims" is too large}} transform.gpu.map_nested_forall_to_threads %funcop block_dims = [1200, 9, 1] + : (!pdl.operation) -> () } // ----- @@ -91,6 +93,7 @@ transform.sequence failures(propagate) { %funcop = transform.structured.match ops{["gpu.launch"]} in %arg0 : (!pdl.operation) -> !pdl.operation // expected-error @below {{Trying to map to fewer GPU threads than loop iterations but overprovisioning is not yet supported. Try additional tiling of the before mapping or map to more threads.}} transform.gpu.map_nested_forall_to_threads %funcop block_dims = [128, 4, 1] + : (!pdl.operation) -> () } // ----- @@ -117,6 +120,7 @@ transform.sequence failures(propagate) { %funcop = transform.structured.match ops{["gpu.launch"]} in %arg0 : (!pdl.operation) -> !pdl.operation // expected-error @below {{unsupported dynamic sizes}} transform.gpu.map_nested_forall_to_threads %funcop block_dims = [128, 4, 1] + : (!pdl.operation) -> () } // ----- @@ -139,6 +143,7 @@ transform.sequence failures(propagate) { %funcop = transform.structured.match ops{["gpu.launch"]} in %arg0 : (!pdl.operation) -> !pdl.operation // expected-error @below {{only bufferized scf.forall can be mapped}} transform.gpu.map_nested_forall_to_threads %funcop block_dims = [128, 4, 1] + : (!pdl.operation) -> () } // ----- @@ -272,6 +277,7 @@ transform.sequence failures(propagate) { %funcop = transform.structured.match ops{["gpu.launch"]} in %arg0 : (!pdl.operation) -> !pdl.operation // expected-error @below {{duplicated attribute, cannot map different loops to the same processor}} transform.gpu.map_nested_forall_to_threads %funcop block_dims = [32, 32, 1] + : (!pdl.operation) -> () } // ----- diff --git a/mlir/test/Dialect/GPU/transform-gpu.mlir b/mlir/test/Dialect/GPU/transform-gpu.mlir index fcf56c8024bfa..e54af051c3445 100644 --- a/mlir/test/Dialect/GPU/transform-gpu.mlir +++ b/mlir/test/Dialect/GPU/transform-gpu.mlir @@ -88,6 +88,7 @@ transform.sequence failures(propagate) { ^bb1(%arg0: !pdl.operation): %funcop = transform.structured.match ops{["gpu.launch"]} in %arg0 : (!pdl.operation) -> !pdl.operation transform.gpu.map_nested_forall_to_threads %funcop block_dims = [12, 9, 1] + : (!pdl.operation) -> () } // ----- @@ -128,6 +129,7 @@ transform.sequence failures(propagate) { %funcop = transform.structured.match ops{["func.func"]} in %arg0 : (!pdl.operation) -> !pdl.operation %gpuLaunch = transform.gpu.map_forall_to_blocks %funcop { generate_gpu_launch } transform.gpu.map_nested_forall_to_threads %gpuLaunch block_dims = [32, 4, 1] + : (!pdl.operation) -> () } // ----- @@ -161,6 +163,7 @@ transform.sequence failures(propagate) { ^bb1(%arg0: !pdl.operation): %funcop = transform.structured.match ops{["gpu.launch"]} in %arg0 : (!pdl.operation) -> !pdl.operation transform.gpu.map_nested_forall_to_threads %funcop block_dims = [12, 9, 1] sync_after_distribute = false + : (!pdl.operation) -> () } // ----- @@ -193,6 +196,7 @@ transform.sequence failures(propagate) { ^bb1(%arg0: !pdl.operation): %funcop = transform.structured.match ops{["gpu.launch"]} in %arg0 : (!pdl.operation) -> !pdl.operation transform.gpu.map_nested_forall_to_threads %funcop block_dims = [32, 1, 1] + : (!pdl.operation) -> () } // ----- @@ -229,6 +233,7 @@ transform.sequence failures(propagate) { ^bb1(%arg0: !pdl.operation): %funcop = transform.structured.match ops{["gpu.launch"]} in %arg0 : (!pdl.operation) -> !pdl.operation transform.gpu.map_nested_forall_to_threads %funcop block_dims = [12, 9, 1] sync_after_distribute = false + : (!pdl.operation) -> () } // ----- @@ -304,4 +309,5 @@ transform.sequence failures(propagate) { %funcop = transform.structured.match ops{["gpu.launch"]} in %arg0 : (!pdl.operation) -> !pdl.operation transform.gpu.map_nested_forall_to_threads %funcop block_dims = [12, 11, 1] warp_dims = [2, 2, 1] + : (!pdl.operation) -> () } diff --git a/mlir/test/Dialect/LLVM/transform-e2e.mlir b/mlir/test/Dialect/LLVM/transform-e2e.mlir index d091e9d18d1b5..7d7a2c85a379d 100644 --- a/mlir/test/Dialect/LLVM/transform-e2e.mlir +++ b/mlir/test/Dialect/LLVM/transform-e2e.mlir @@ -17,9 +17,10 @@ transform.sequence failures(propagate) { %0 = transform.structured.match ops{["linalg.matmul"]} in %module_op : (!pdl.operation) -> !pdl.operation %1, %loops:3 = transform.structured.tile %0 [2, 2, 2] : (!pdl.operation) -> (!pdl.operation, !pdl.operation, !pdl.operation, !pdl.operation) %2 = get_closest_isolated_parent %1 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %2 + transform.structured.vectorize %2 : (!pdl.operation) -> () transform.bufferization.one_shot_bufferize layout{IdentityLayoutMap} %module_op {bufferize_function_boundaries = true} %func = transform.structured.match ops{["func.func"]} in %module_op : (!pdl.operation) -> !pdl.operation - transform.vector.lower_vectors %func multireduction_lowering = "innerreduction" + transform.vector.lower_vectors %func multireduction_lowering = "innerreduction" + : (!pdl.operation) -> () } diff --git a/mlir/test/Dialect/Linalg/hoisting.mlir b/mlir/test/Dialect/Linalg/hoisting.mlir index aeecb8cf95f89..96d809a1fd694 100644 --- a/mlir/test/Dialect/Linalg/hoisting.mlir +++ b/mlir/test/Dialect/Linalg/hoisting.mlir @@ -79,7 +79,10 @@ transform.sequence failures(propagate) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!pdl.operation) -> !pdl.operation transform.structured.hoist_redundant_vector_transfers %0 - : (!pdl.operation) -> !pdl.operation + : (!pdl.operation) -> () + // Test we can call the op twice without consuming the handle. + transform.structured.hoist_redundant_vector_transfers %0 + : (!pdl.operation) -> () } // ----- @@ -168,7 +171,7 @@ transform.sequence failures(propagate) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!pdl.operation) -> !pdl.operation transform.structured.hoist_redundant_vector_transfers %0 - : (!pdl.operation) -> !pdl.operation + : (!pdl.operation) -> () } // ----- @@ -213,7 +216,7 @@ transform.sequence failures(propagate) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!pdl.operation) -> !pdl.operation transform.structured.hoist_redundant_vector_transfers %0 - : (!pdl.operation) -> !pdl.operation + : (!pdl.operation) -> () } // ----- @@ -302,7 +305,10 @@ transform.sequence failures(propagate) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!pdl.operation) -> !pdl.operation transform.structured.hoist_redundant_tensor_subsets %0 - : (!pdl.operation) -> !pdl.operation + : (!pdl.operation) -> () + // Test we can call the op twice without consuming the handle. + transform.structured.hoist_redundant_tensor_subsets %0 + : (!pdl.operation) -> () } // ----- @@ -397,7 +403,7 @@ transform.sequence failures(propagate) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!pdl.operation) -> !pdl.operation transform.structured.hoist_redundant_tensor_subsets %0 - : (!pdl.operation) -> !pdl.operation + : (!pdl.operation) -> () } // ----- @@ -514,7 +520,7 @@ transform.sequence failures(propagate) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!pdl.operation) -> !pdl.operation transform.structured.hoist_redundant_tensor_subsets %0 - : (!pdl.operation) -> !pdl.operation + : (!pdl.operation) -> () } // ----- @@ -561,7 +567,7 @@ transform.sequence failures(propagate) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!pdl.operation) -> !pdl.operation transform.structured.hoist_redundant_tensor_subsets %0 - : (!pdl.operation) -> !pdl.operation + : (!pdl.operation) -> () } // ----- @@ -674,5 +680,5 @@ transform.sequence failures(propagate) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!pdl.operation) -> !pdl.operation transform.structured.hoist_redundant_tensor_subsets %0 - : (!pdl.operation) -> !pdl.operation + : (!pdl.operation) -> () } diff --git a/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir b/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir index 155b0785d2ec7..b31df69456f24 100644 --- a/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir @@ -20,7 +20,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 + transform.structured.vectorize %1 : (!pdl.operation) -> () } // ----- @@ -66,7 +66,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 + transform.structured.vectorize %1 : (!pdl.operation) -> () } // ----- @@ -114,7 +114,9 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 {vectorize_padding} + transform.structured.vectorize %1 {vectorize_padding} : (!pdl.operation) -> () + // Apply transform twice to ensure %1 is not consumed. + transform.structured.vectorize %1 {vectorize_padding} : (!pdl.operation) -> () } // ----- @@ -131,5 +133,5 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!pdl.operation) -> !pdl.operation // expected-error @below {{op requires isolated-from-above targets}} - %2 = transform.structured.vectorize %0 + transform.structured.vectorize %0 : (!pdl.operation) -> () } diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir index 26e27c108ce81..3b1b51e347d1d 100644 --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -14,7 +14,8 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.dot"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns } + transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns } + : (!pdl.operation) -> () } // ----- @@ -33,7 +34,8 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.matvec"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns } + transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns } + : (!pdl.operation) -> () } // ----- @@ -51,7 +53,8 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns } + transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns } + : (!pdl.operation) -> () } // ----- @@ -70,7 +73,8 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.batch_matmul"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns } + transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns } + : (!pdl.operation) -> () } // ----- @@ -110,7 +114,8 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } + transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } + : (!pdl.operation) -> () } // ----- @@ -150,7 +155,8 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } + transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } + : (!pdl.operation) -> () } // ----- @@ -177,7 +183,8 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } + transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } + : (!pdl.operation) -> () } // ----- @@ -217,7 +224,8 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } + transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } + : (!pdl.operation) -> () } // ----- @@ -237,7 +245,8 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns } + transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns } + : (!pdl.operation) -> () } // ----- @@ -261,7 +270,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 + transform.structured.vectorize %1 : (!pdl.operation) -> () } // ----- @@ -285,7 +294,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 + transform.structured.vectorize %1 : (!pdl.operation) -> () } // ----- @@ -330,7 +339,8 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { vectorize_nd_extract } + transform.structured.vectorize %1 { vectorize_nd_extract } + : (!pdl.operation) -> () } // ----- @@ -347,7 +357,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.fill"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 + transform.structured.vectorize %1 : (!pdl.operation) -> () } // ----- @@ -365,7 +375,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.fill"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 + transform.structured.vectorize %1 : (!pdl.operation) -> () } // ----- @@ -382,7 +392,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["memref.copy"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 + transform.structured.vectorize %1 : (!pdl.operation) -> () } // ----- @@ -402,7 +412,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["memref.copy"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 + transform.structured.vectorize %1 : (!pdl.operation) -> () } // ----- @@ -418,7 +428,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["memref.copy"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 + transform.structured.vectorize %1 : (!pdl.operation) -> () } // ----- @@ -446,7 +456,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 + transform.structured.vectorize %1 : (!pdl.operation) -> () } // ----- @@ -475,7 +485,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 + transform.structured.vectorize %1 : (!pdl.operation) -> () } // ----- @@ -560,7 +570,8 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { disable_transfer_permutation_map_lowering_patterns } + transform.structured.vectorize %1 { disable_transfer_permutation_map_lowering_patterns } + : (!pdl.operation) -> () } // ----- @@ -651,7 +662,8 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { disable_transfer_permutation_map_lowering_patterns } + transform.structured.vectorize %1 { disable_transfer_permutation_map_lowering_patterns } + : (!pdl.operation) -> () } // ----- @@ -695,7 +707,8 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { disable_transfer_permutation_map_lowering_patterns } + transform.structured.vectorize %1 { disable_transfer_permutation_map_lowering_patterns } + : (!pdl.operation) -> () } // ----- @@ -738,7 +751,8 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { disable_transfer_permutation_map_lowering_patterns } + transform.structured.vectorize %1 { disable_transfer_permutation_map_lowering_patterns } + : (!pdl.operation) -> () } // ----- @@ -770,7 +784,8 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } + transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } + : (!pdl.operation) -> () } // ----- @@ -799,7 +814,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { vectorize_padding } + transform.structured.vectorize %1 { vectorize_padding } : (!pdl.operation) -> () } // ----- @@ -828,7 +843,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { vectorize_padding } + transform.structured.vectorize %1 { vectorize_padding } : (!pdl.operation) -> () } @@ -865,7 +880,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { vectorize_padding } + transform.structured.vectorize %1 { vectorize_padding } : (!pdl.operation) -> () } // ----- @@ -885,7 +900,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { vectorize_padding } + transform.structured.vectorize %1 { vectorize_padding } : (!pdl.operation) -> () } // ----- @@ -915,7 +930,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { vectorize_padding } + transform.structured.vectorize %1 { vectorize_padding } : (!pdl.operation) -> () } // ----- @@ -948,7 +963,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - %5 = transform.structured.vectorize %4 { vectorize_padding } + transform.structured.vectorize %4 { vectorize_padding } : (!pdl.operation) -> () } @@ -985,7 +1000,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - %5 = transform.structured.vectorize %4 { vectorize_padding } + transform.structured.vectorize %4 { vectorize_padding } : (!pdl.operation) -> () } @@ -1019,7 +1034,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - %5 = transform.structured.vectorize %4 { vectorize_padding } + transform.structured.vectorize %4 { vectorize_padding } : (!pdl.operation) -> () } @@ -1047,7 +1062,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - %5 = transform.structured.vectorize %4 + transform.structured.vectorize %4 : (!pdl.operation) -> () } // ----- @@ -1084,7 +1099,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - %5 = transform.structured.vectorize %4 { vectorize_padding } + transform.structured.vectorize %4 { vectorize_padding } : (!pdl.operation) -> () } // ----- @@ -1119,7 +1134,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - %5 = transform.structured.vectorize %4 + transform.structured.vectorize %4 : (!pdl.operation) -> () } // ----- @@ -1164,7 +1179,8 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - %5 = transform.structured.vectorize %4 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } + transform.structured.vectorize %4 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } + : (!pdl.operation) -> () } // ----- @@ -1194,7 +1210,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - %5 = transform.structured.vectorize %4 { vectorize_padding } + transform.structured.vectorize %4 { vectorize_padding } : (!pdl.operation) -> () } // ----- @@ -1225,7 +1241,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - %5 = transform.structured.vectorize %4 + transform.structured.vectorize %4 : (!pdl.operation) -> () } // ----- @@ -1255,7 +1271,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - %5 = transform.structured.vectorize %4 + transform.structured.vectorize %4 : (!pdl.operation) -> () } // ----- @@ -1285,7 +1301,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - %5 = transform.structured.vectorize %4 + transform.structured.vectorize %4 : (!pdl.operation) -> () } // ----- @@ -1315,7 +1331,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - %5 = transform.structured.vectorize %4 + transform.structured.vectorize %4 : (!pdl.operation) -> () } // ----- @@ -1345,7 +1361,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - %5 = transform.structured.vectorize %4 + transform.structured.vectorize %4 : (!pdl.operation) -> () } // ----- @@ -1379,7 +1395,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - %5 = transform.structured.vectorize %4 + transform.structured.vectorize %4 : (!pdl.operation) -> () } // ----- @@ -1417,11 +1433,11 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.fill"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 + transform.structured.vectorize %1 : (!pdl.operation) -> () %3 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - %5 = transform.structured.vectorize %4 + transform.structured.vectorize %4 : (!pdl.operation) -> () } // ----- @@ -1464,7 +1480,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 + transform.structured.vectorize %1 : (!pdl.operation) -> () } @@ -1495,7 +1511,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 + transform.structured.vectorize %1 : (!pdl.operation) -> () } // ----- @@ -1534,7 +1550,8 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } + transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } + : (!pdl.operation) -> () } // ----- @@ -1570,7 +1587,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 + transform.structured.vectorize %1 : (!pdl.operation) -> () } // ----- @@ -1606,7 +1623,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { vectorize_nd_extract } + transform.structured.vectorize %1 { vectorize_nd_extract } : (!pdl.operation) -> () } // ----- @@ -1645,7 +1662,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { vectorize_nd_extract } + transform.structured.vectorize %1 { vectorize_nd_extract } : (!pdl.operation) -> () } // ----- @@ -1695,7 +1712,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { vectorize_nd_extract } + transform.structured.vectorize %1 { vectorize_nd_extract } : (!pdl.operation) -> () } // ----- @@ -1743,7 +1760,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { vectorize_nd_extract } + transform.structured.vectorize %1 { vectorize_nd_extract } : (!pdl.operation) -> () } // ----- @@ -1787,7 +1804,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { vectorize_nd_extract } + transform.structured.vectorize %1 { vectorize_nd_extract } : (!pdl.operation) -> () } // ----- @@ -1829,7 +1846,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { vectorize_nd_extract } + transform.structured.vectorize %1 { vectorize_nd_extract } : (!pdl.operation) -> () } // ----- @@ -1873,7 +1890,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { vectorize_nd_extract } + transform.structured.vectorize %1 { vectorize_nd_extract } : (!pdl.operation) -> () } // ----- @@ -1913,7 +1930,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { vectorize_nd_extract } + transform.structured.vectorize %1 { vectorize_nd_extract } : (!pdl.operation) -> () } // ----- @@ -1953,7 +1970,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { vectorize_nd_extract } + transform.structured.vectorize %1 { vectorize_nd_extract } : (!pdl.operation) -> () } // ----- @@ -1992,7 +2009,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 { vectorize_nd_extract } + transform.structured.vectorize %1 { vectorize_nd_extract } : (!pdl.operation) -> () } @@ -2017,7 +2034,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.map"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 + transform.structured.vectorize %1 : (!pdl.operation) -> () } // ----- @@ -2036,7 +2053,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.transpose"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 + transform.structured.vectorize %1 : (!pdl.operation) -> () } // ----- @@ -2059,7 +2076,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.reduce"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 + transform.structured.vectorize %1 : (!pdl.operation) -> () } // ----- @@ -2310,7 +2327,7 @@ func.func @not_vectorizable(%arg0: tensor<1x?xf32>, %arg1: index, %arg2: index, transform.sequence failures(propagate) { ^bb0(%arg0: !pdl.operation): %0 = transform.structured.match ops{["func.func"]} in %arg0 : (!pdl.operation) -> !pdl.operation - %1 = transform.structured.vectorize %0 + transform.structured.vectorize %0 : (!pdl.operation) -> () } // ----- @@ -2345,7 +2362,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 + transform.structured.vectorize %1 : (!pdl.operation) -> () } // CHECK-LABEL: @wrong_reduction_detection @@ -2374,7 +2391,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - %2 = transform.structured.vectorize %1 + transform.structured.vectorize %1 : (!pdl.operation) -> () } // ----- diff --git a/mlir/test/Dialect/Transform/selective-targeting.mlir b/mlir/test/Dialect/Transform/selective-targeting.mlir index 231ff3099d175..c0a6d6b7a8dc0 100644 --- a/mlir/test/Dialect/Transform/selective-targeting.mlir +++ b/mlir/test/Dialect/Transform/selective-targeting.mlir @@ -80,7 +80,7 @@ transform.with_pdl_patterns { transform.structured.tile %0 [4, 4, 4] : (!pdl.operation) -> (!pdl.operation, !pdl.operation, !pdl.operation, !pdl.operation) %1 = pdl_match @pdl_target_attrC in %arg1 : (!pdl.operation) -> !pdl.operation %2 = transform.get_closest_isolated_parent %1 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %2 + transform.structured.vectorize %2 : (!pdl.operation) -> () } } @@ -125,7 +125,7 @@ transform.with_pdl_patterns { ^bb1(%arg1: !pdl.operation): %0 = pdl_match @pdl_target in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 + transform.structured.vectorize %1 : (!pdl.operation) -> () } } @@ -150,5 +150,5 @@ func.func @vectorize_all( transform.sequence failures(propagate) { ^bb0(%arg0: !pdl.operation): - transform.structured.vectorize %arg0 + transform.structured.vectorize %arg0 : (!pdl.operation) -> () } diff --git a/mlir/test/Dialect/Vector/transform-vector.mlir b/mlir/test/Dialect/Vector/transform-vector.mlir index cf3738f2e9b5e..ce920e18885d4 100644 --- a/mlir/test/Dialect/Vector/transform-vector.mlir +++ b/mlir/test/Dialect/Vector/transform-vector.mlir @@ -18,9 +18,10 @@ transform.sequence failures(propagate) { %0 = transform.structured.match ops{["linalg.matmul"]} in %module_op : (!pdl.operation) -> !pdl.operation %1, %loops:3 = transform.structured.tile %0 [8, 4, 2] : (!pdl.operation) -> (!pdl.operation, !pdl.operation, !pdl.operation, !pdl.operation) %2 = get_closest_isolated_parent %1 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %2 + transform.structured.vectorize %2 : (!pdl.operation) -> () transform.bufferization.one_shot_bufferize %module_op %func = transform.structured.match ops{["func.func"]} in %module_op : (!pdl.operation) -> !pdl.operation transform.vector.lower_vectors %func multireduction_lowering = "innerreduction" + : (!pdl.operation) -> () } diff --git a/mlir/test/python/dialects/transform_structured_ext.py b/mlir/test/python/dialects/transform_structured_ext.py index 9684bfb47f1b0..d88fe2cc0505a 100644 --- a/mlir/test/python/dialects/transform_structured_ext.py +++ b/mlir/test/python/dialects/transform_structured_ext.py @@ -206,5 +206,5 @@ def testVectorize(): transform.YieldOp() # CHECK-LABEL: TEST: testVectorize # CHECK: transform.sequence - # CHECK: = transform.structured.vectorize + # CHECK: transform.structured.vectorize # CHECK: {vectorize_padding} From 8d6ab7d51964333ec9e59235a033fc6dfafbecca Mon Sep 17 00:00:00 2001 From: Muhammad Omair Javaid Date: Mon, 20 Mar 2023 15:15:36 +0500 Subject: [PATCH 008/691] Revert "Revert "[SVE] Add patterns for shift intrinsics with FalseLanesZero mode"" This reverts commit 32bd1f562f835044d11b7ecfb36362a29eb00a02. --- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 4 + llvm/lib/Target/AArch64/SVEInstrFormats.td | 20 ++ .../sve-intrinsics-int-arith-imm-zero.ll | 176 ++++++++++++++++++ 3 files changed, 200 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm-zero.ll diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 278f74ef341f8..858b352c8c72e 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2060,6 +2060,10 @@ let Predicates = [HasSVEorSME, UseExperimentalZeroingPseudos] in { defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd; defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd; + + defm ASR_ZPZI : sve_int_bin_pred_imm_zeroing_bhsd; + defm LSR_ZPZI : sve_int_bin_pred_imm_zeroing_bhsd; + defm LSL_ZPZI : sve_int_bin_pred_imm_zeroing_bhsd; } // End HasSVEorSME, UseExperimentalZeroingPseudos let Predicates = [HasSVEorSME] in { diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 4c97ae88f192a..1d3bf9150ca41 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -571,6 +571,12 @@ class SVE_Shift_DupImm_Any_Predicate_Pat; +class SVE_2_Op_Imm_Pat_Zero +: Pat<(vt (op pt:$Pg, (vselect pt:$Pg, vt:$Op1, (SVEDup0)), + (vt (splat_vector (it (cpx i32:$imm)))))), + (inst $Pg, $Op1, i32:$imm)>; + class SVE_2_Op_Fp_Imm_Pat { def : SVE_3_Op_Pat_SelZero(NAME # _ZERO_D)>; } +multiclass sve_int_bin_pred_imm_zeroing_bhsd { + def _ZERO_B : PredTwoOpImmPseudo, FalseLanesZero>; + def _ZERO_H : PredTwoOpImmPseudo, FalseLanesZero>; + def _ZERO_S : PredTwoOpImmPseudo, FalseLanesZero>; + def _ZERO_D : PredTwoOpImmPseudo, FalseLanesZero>; + + def : SVE_2_Op_Imm_Pat_Zero(NAME # _ZERO_B)>; + def : SVE_2_Op_Imm_Pat_Zero(NAME # _ZERO_H)>; + def : SVE_2_Op_Imm_Pat_Zero(NAME # _ZERO_S)>; + def : SVE_2_Op_Imm_Pat_Zero(NAME # _ZERO_D)>; +} + multiclass sve_int_bin_pred_shift_wide opc, string asm, SDPatternOperator op> { def _B : sve_int_bin_pred_shift<0b00, 0b1, opc, asm, ZPR8, ZPR64>; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm-zero.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm-zero.ll new file mode 100644 index 0000000000000..6593978b03d41 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm-zero.ll @@ -0,0 +1,176 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-experimental-zeroing-pseudos < %s | FileCheck %s + +;; ASR +define @asr_i8( %a, %pg) { +; CHECK-LABEL: asr_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.b, p0/z, z0.b +; CHECK-NEXT: asr z0.b, p0/m, z0.b, #8 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i8 8, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.asr.nxv16i8( %pg, %vsel, %shuffle) + ret %res +} + +define @asr_i16( %a, %pg) { +; CHECK-LABEL: asr_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: asr z0.h, p0/m, z0.h, #16 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i16 16, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.asr.nxv8i16( %pg, %vsel, %shuffle) + ret %res +} + +define @asr_i32( %a, %pg) local_unnamed_addr #0 { +; CHECK-LABEL: asr_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: asr z0.s, p0/m, z0.s, #32 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i32 32, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.asr.nxv4i32( %pg, %vsel, %shuffle) + ret %res +} + +define @asr_i64( %a, %pg) { +; CHECK-LABEL: asr_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: asr z0.d, p0/m, z0.d, #64 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i64 64, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.asr.nxv2i64( %pg, %vsel, %shuffle) + ret %res +} + +;; LSL +define @lsl_i8( %a, %pg) { +; CHECK-LABEL: lsl_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.b, p0/z, z0.b +; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i8 7, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.lsl.nxv16i8( %pg, %vsel, %shuffle) + ret %res +} + +define @lsl_i16( %a, %pg) { +; CHECK-LABEL: lsl_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i16 15, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.lsl.nxv8i16( %pg, %vsel, %shuffle) + ret %res +} + +define @lsl_i32( %a, %pg) local_unnamed_addr #0 { +; CHECK-LABEL: lsl_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i32 31, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.lsl.nxv4i32( %pg, %vsel, %shuffle) + ret %res +} + +define @lsl_i64( %a, %pg) { +; CHECK-LABEL: lsl_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i64 63, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.lsl.nxv2i64( %pg, %vsel, %shuffle) + ret %res +} + +;; LSR +define @lsr_i8( %a, %pg) { +; CHECK-LABEL: lsr_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.b, p0/z, z0.b +; CHECK-NEXT: lsr z0.b, p0/m, z0.b, #8 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i8 8, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.lsr.nxv16i8( %pg, %vsel, %shuffle) + ret %res +} + +define @lsr_i16( %a, %pg) { +; CHECK-LABEL: lsr_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.h, p0/z, z0.h +; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #16 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i16 16, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.lsr.nxv8i16( %pg, %vsel, %shuffle) + ret %res +} + +define @lsr_i32( %a, %pg) local_unnamed_addr #0 { +; CHECK-LABEL: lsr_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.s, p0/z, z0.s +; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #32 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i32 32, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.lsr.nxv4i32( %pg, %vsel, %shuffle) + ret %res +} + +define @lsr_i64( %a, %pg) { +; CHECK-LABEL: lsr_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0.d, p0/z, z0.d +; CHECK-NEXT: lsr z0.d, p0/m, z0.d, #64 +; CHECK-NEXT: ret + %vsel = select %pg, %a, zeroinitializer + %ele = insertelement poison, i64 64, i32 0 + %shuffle = shufflevector %ele, poison, zeroinitializer + %res = call @llvm.aarch64.sve.lsr.nxv2i64( %pg, %vsel, %shuffle) + ret %res +} + +declare @llvm.aarch64.sve.asr.nxv16i8(, , ) +declare @llvm.aarch64.sve.asr.nxv8i16(, , ) +declare @llvm.aarch64.sve.asr.nxv4i32(, , ) +declare @llvm.aarch64.sve.asr.nxv2i64(, , ) + +declare @llvm.aarch64.sve.lsl.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsl.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsl.nxv4i32(, , ) +declare @llvm.aarch64.sve.lsl.nxv2i64(, , ) + +declare @llvm.aarch64.sve.lsr.nxv16i8(, , ) +declare @llvm.aarch64.sve.lsr.nxv8i16(, , ) +declare @llvm.aarch64.sve.lsr.nxv4i32(, , ) +declare @llvm.aarch64.sve.lsr.nxv2i64(, , ) From f721fcb6ed0a186b8f146282467dd8420a5a36d0 Mon Sep 17 00:00:00 2001 From: Muhammad Omair Javaid Date: Mon, 20 Mar 2023 15:32:21 +0500 Subject: [PATCH 009/691] Revert "[JITLink][ELF] Don't skip debug info sections by default." This reverts commit 57aeb305460406f7b822cfe6ab9fb4d730fc3b38. Breaks buildbot https://lab.llvm.org/buildbot/#/builders/197/builds/4272 --- .../JITLink/ELFLinkGraphBuilder.h | 35 ++- ...ELF_debug_section_lifetime_is_NoAlloc.yaml | 195 -------------- .../JITLink/X86/ELF_skip_debug_sections.s | 241 ++++++++++++++++++ 3 files changed, 258 insertions(+), 213 deletions(-) delete mode 100644 llvm/test/ExecutionEngine/JITLink/X86/ELF_debug_section_lifetime_is_NoAlloc.yaml create mode 100644 llvm/test/ExecutionEngine/JITLink/X86/ELF_skip_debug_sections.s diff --git a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h index 26feb8ea3277b..cd046057217f1 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h +++ b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h @@ -62,14 +62,6 @@ class ELFLinkGraphBuilder : public ELFLinkGraphBuilderBase { StringRef FileName, LinkGraph::GetEdgeKindNameFunction GetEdgeKindName); - /// Debug sections are included in the graph by default. Use - /// setProcessDebugSections(false) to ignore them if debug info is not - /// needed. - ELFLinkGraphBuilder &setProcessDebugSections(bool ProcessDebugSections) { - this->ProcessDebugSections = ProcessDebugSections; - return *this; - } - /// Attempt to construct and return the LinkGraph. Expected> buildGraph(); @@ -123,7 +115,8 @@ class ELFLinkGraphBuilder : public ELFLinkGraphBuilderBase { /// template Error forEachRelaRelocation(const typename ELFT::Shdr &RelSect, - RelocHandlerMethod &&Func); + RelocHandlerMethod &&Func, + bool ProcessDebugSections = false); /// Traverse all matching ELFT::Rel relocation records in the given section. /// The handler function Func should be callable with this signature: @@ -132,19 +125,22 @@ class ELFLinkGraphBuilder : public ELFLinkGraphBuilderBase { /// template Error forEachRelRelocation(const typename ELFT::Shdr &RelSect, - RelocHandlerMethod &&Func); + RelocHandlerMethod &&Func, + bool ProcessDebugSections = false); /// Traverse all matching rela relocation records in the given section. /// Convenience wrapper to allow passing a member function for the handler. /// template Error forEachRelaRelocation(const typename ELFT::Shdr &RelSect, - ClassT *Instance, RelocHandlerMethod &&Method) { + ClassT *Instance, RelocHandlerMethod &&Method, + bool ProcessDebugSections = false) { return forEachRelaRelocation( RelSect, [Instance, Method](const auto &Rel, const auto &Target, auto &GS) { return (Instance->*Method)(Rel, Target, GS); - }); + }, + ProcessDebugSections); } /// Traverse all matching rel relocation records in the given section. @@ -152,12 +148,14 @@ class ELFLinkGraphBuilder : public ELFLinkGraphBuilderBase { /// template Error forEachRelRelocation(const typename ELFT::Shdr &RelSect, - ClassT *Instance, RelocHandlerMethod &&Method) { + ClassT *Instance, RelocHandlerMethod &&Method, + bool ProcessDebugSections = false) { return forEachRelRelocation( RelSect, [Instance, Method](const auto &Rel, const auto &Target, auto &GS) { return (Instance->*Method)(Rel, Target, GS); - }); + }, + ProcessDebugSections); } const ELFFile &Obj; @@ -165,7 +163,6 @@ class ELFLinkGraphBuilder : public ELFLinkGraphBuilderBase { typename ELFFile::Elf_Shdr_Range Sections; const typename ELFFile::Elf_Shdr *SymTabSec = nullptr; StringRef SectionStringTab; - bool ProcessDebugSections = true; // Maps ELF section indexes to LinkGraph Blocks. // Only SHF_ALLOC sections will have graph blocks. @@ -321,7 +318,7 @@ template Error ELFLinkGraphBuilder::graphifySections() { // If the name indicates that it's a debug section then skip it: We don't // support those yet. - if (!ProcessDebugSections && isDwarfSection(*Name)) { + if (isDwarfSection(*Name)) { LLVM_DEBUG({ dbgs() << " " << SecIndex << ": \"" << *Name << "\" is a debug section: " @@ -525,7 +522,8 @@ template Error ELFLinkGraphBuilder::graphifySymbols() { template template Error ELFLinkGraphBuilder::forEachRelaRelocation( - const typename ELFT::Shdr &RelSect, RelocHandlerFunction &&Func) { + const typename ELFT::Shdr &RelSect, RelocHandlerFunction &&Func, + bool ProcessDebugSections) { // Only look into sections that store relocation entries. if (RelSect.sh_type != ELF::SHT_RELA) return Error::success(); @@ -571,7 +569,8 @@ Error ELFLinkGraphBuilder::forEachRelaRelocation( template template Error ELFLinkGraphBuilder::forEachRelRelocation( - const typename ELFT::Shdr &RelSect, RelocHandlerFunction &&Func) { + const typename ELFT::Shdr &RelSect, RelocHandlerFunction &&Func, + bool ProcessDebugSections) { // Only look into sections that store relocation entries. if (RelSect.sh_type != ELF::SHT_REL) return Error::success(); diff --git a/llvm/test/ExecutionEngine/JITLink/X86/ELF_debug_section_lifetime_is_NoAlloc.yaml b/llvm/test/ExecutionEngine/JITLink/X86/ELF_debug_section_lifetime_is_NoAlloc.yaml deleted file mode 100644 index 0afcda467c326..0000000000000 --- a/llvm/test/ExecutionEngine/JITLink/X86/ELF_debug_section_lifetime_is_NoAlloc.yaml +++ /dev/null @@ -1,195 +0,0 @@ -# REQUIRES: asserts -# RUN: yaml2obj -o %t.o %s -# RUN: llvm-jitlink -debug-only=jitlink -noexec %t.o 2>&1 | FileCheck %s -# -# Check that debug sections get NoAlloc lifetimes. -# -# CHECK: ".debug_str" is not a SHF_ALLOC section. Using NoAlloc lifetime. -# CHECK: ".debug_abbrev" is not a SHF_ALLOC section. Using NoAlloc lifetime. -# CHECK: ".debug_info" is not a SHF_ALLOC section. Using NoAlloc lifetime. -# CHECK: ".debug_line" is not a SHF_ALLOC section. Using NoAlloc lifetime. - - ---- !ELF -FileHeader: - Class: ELFCLASS64 - Data: ELFDATA2LSB - Type: ET_REL - Machine: EM_X86_64 - SectionHeaderStringTable: .strtab -Sections: - - Name: .text - Type: SHT_PROGBITS - Flags: [ SHF_ALLOC, SHF_EXECINSTR ] - AddressAlign: 0x10 - Content: B82A000000C3662E0F1F840000000000B82A000000C3 - - Name: .debug_abbrev - Type: SHT_PROGBITS - AddressAlign: 0x1 - Content: 011101250E1305030E10171B0E110112060000022E00110112064018974219030E3A0B3B0B271949133F190000032E01110112064018974219030E3A0B3B0B271949133F1900000405000218030E3A0B3B0B49130000052400030E3E0B0B0B0000060F004913000000 - - Name: .debug_info - Type: SHT_PROGBITS - AddressAlign: 0x1 - Content: 8C0000000400000000000801000000000C000000000000000000000000000000000000000000160000000200000000000000000600000001570000000001017700000003000000000000000006000000015700000000010577000000040155000000000105770000000401540000000001057E0000000005000000000504068300000006880000000500000000060100 - - Name: .comment - Type: SHT_PROGBITS - Flags: [ SHF_MERGE, SHF_STRINGS ] - AddressAlign: 0x1 - EntSize: 0x1 - Content: 00636C616E672076657273696F6E2031302E302E302D347562756E7475312000 - - Name: .note.GNU-stack - Type: SHT_PROGBITS - AddressAlign: 0x1 - - Name: .debug_line - Type: SHT_PROGBITS - AddressAlign: 0x1 - Content: 58000000040036000000010101FB0E0D0001010101000000010000012F746D700000454C465F736B69705F64656275675F73656374696F6E732E63000100000000090200000000000000000105030A130500F505030A130206000101 - - Name: .eh_frame - Type: SHT_X86_64_UNWIND - Flags: [ SHF_ALLOC ] - AddressAlign: 0x8 - Content: 1400000000000000017A5200017810011B0C070890010000100000001C0000000000000006000000000000001000000030000000000000000600000000000000 - - Name: .rela.debug_info - Type: SHT_RELA - Flags: [ SHF_INFO_LINK ] - Link: .symtab - AddressAlign: 0x8 - Info: .debug_info - Relocations: - - Offset: 0x6 - Symbol: .debug_abbrev - Type: R_X86_64_32 - - Offset: 0xC - Symbol: .debug_str - Type: R_X86_64_32 - - Offset: 0x12 - Symbol: .debug_str - Type: R_X86_64_32 - Addend: 31 - - Offset: 0x16 - Symbol: .debug_line - Type: R_X86_64_32 - - Offset: 0x1A - Symbol: .debug_str - Type: R_X86_64_32 - Addend: 57 - - Offset: 0x1E - Symbol: .text - Type: R_X86_64_64 - - Offset: 0x2B - Symbol: .text - Type: R_X86_64_64 - - Offset: 0x39 - Symbol: .debug_str - Type: R_X86_64_32 - Addend: 62 - - Offset: 0x44 - Symbol: .text - Type: R_X86_64_64 - Addend: 16 - - Offset: 0x52 - Symbol: .debug_str - Type: R_X86_64_32 - Addend: 70 - - Offset: 0x5F - Symbol: .debug_str - Type: R_X86_64_32 - Addend: 75 - - Offset: 0x6C - Symbol: .debug_str - Type: R_X86_64_32 - Addend: 80 - - Offset: 0x78 - Symbol: .debug_str - Type: R_X86_64_32 - Addend: 66 - - Offset: 0x89 - Symbol: .debug_str - Type: R_X86_64_32 - Addend: 85 - - Name: .rela.debug_line - Type: SHT_RELA - Flags: [ SHF_INFO_LINK ] - Link: .symtab - AddressAlign: 0x8 - Info: .debug_line - Relocations: - - Offset: 0x43 - Symbol: .text - Type: R_X86_64_64 - - Name: .rela.eh_frame - Type: SHT_RELA - Flags: [ SHF_INFO_LINK ] - Link: .symtab - AddressAlign: 0x8 - Info: .eh_frame - Relocations: - - Offset: 0x20 - Symbol: .text - Type: R_X86_64_PC32 - - Offset: 0x34 - Symbol: .text - Type: R_X86_64_PC32 - Addend: 16 - - Name: .llvm_addrsig - Type: SHT_LLVM_ADDRSIG - Flags: [ SHF_EXCLUDE ] - Link: .symtab - AddressAlign: 0x1 - Offset: 0x4C0 - Symbols: [ ] - - Type: SectionHeaderTable - Sections: - - Name: .strtab - - Name: .text - - Name: .debug_str - - Name: .debug_abbrev - - Name: .debug_info - - Name: .rela.debug_info - - Name: .comment - - Name: .note.GNU-stack - - Name: .debug_line - - Name: .rela.debug_line - - Name: .eh_frame - - Name: .rela.eh_frame - - Name: .llvm_addrsig - - Name: .symtab -Symbols: - - Name: ELF_skip_debug_sections.c - Type: STT_FILE - Index: SHN_ABS - - Name: .text - Type: STT_SECTION - Section: .text - - Name: .debug_str - Type: STT_SECTION - Section: .debug_str - - Name: .debug_abbrev - Type: STT_SECTION - Section: .debug_abbrev - - Name: .debug_line - Type: STT_SECTION - Section: .debug_line - - Name: foo - Type: STT_FUNC - Section: .text - Binding: STB_GLOBAL - Size: 0x6 - - Name: main - Type: STT_FUNC - Section: .text - Binding: STB_GLOBAL - Value: 0x10 - Size: 0x6 -DWARF: - debug_str: - - 'clang version 10.0.0-4ubuntu1 ' - - ELF_skip_debug_sections.c - - '/tmp' - - foo - - int - - main - - argc - - argv - - char -... diff --git a/llvm/test/ExecutionEngine/JITLink/X86/ELF_skip_debug_sections.s b/llvm/test/ExecutionEngine/JITLink/X86/ELF_skip_debug_sections.s new file mode 100644 index 0000000000000..acd3ae7ad8b1f --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/X86/ELF_skip_debug_sections.s @@ -0,0 +1,241 @@ +# REQUIRES: asserts +# RUN: llvm-mc -triple=x86_64-pc-linux-gnu -filetype=obj -o %t %s +# RUN: llvm-jitlink -debug-only=jitlink -noexec %t 2>&1 | FileCheck %s +# +# Check that debug sections are not emitted. +# +# CHECK: ".debug_info" is a debug section: No graph section will be created. + + .text + .file "ELF_skip_debug_sections.c" + .globl foo + .p2align 4, 0x90 + .type foo,@function +foo: +.Lfunc_begin0: + .file 1 "/tmp" "ELF_skip_debug_sections.c" + .loc 1 1 0 + .cfi_startproc + + .loc 1 2 3 prologue_end + movl $42, %eax + retq +.Ltmp0: +.Lfunc_end0: + .size foo, .Lfunc_end0-foo + .cfi_endproc + + .globl main + .p2align 4, 0x90 + .type main,@function +main: +.Lfunc_begin1: + .loc 1 5 0 + .cfi_startproc + + + + .loc 1 6 3 prologue_end + movl $42, %eax + retq +.Ltmp1: +.Lfunc_end1: + .size main, .Lfunc_end1-main + .cfi_endproc + + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 10.0.0-4ubuntu1 " +.Linfo_string1: + .asciz "ELF_skip_debug_sections.c" +.Linfo_string2: + .asciz "/tmp" +.Linfo_string3: + .asciz "foo" +.Linfo_string4: + .asciz "int" +.Linfo_string5: + .asciz "main" +.Linfo_string6: + .asciz "argc" +.Linfo_string7: + .asciz "argv" +.Linfo_string8: + .asciz "char" + .section .debug_abbrev,"",@progbits + .byte 1 + .byte 17 + .byte 1 + .byte 37 + .byte 14 + .byte 19 + .byte 5 + .byte 3 + .byte 14 + .byte 16 + .byte 23 + .byte 27 + .byte 14 + .byte 17 + .byte 1 + .byte 18 + .byte 6 + .byte 0 + .byte 0 + .byte 2 + .byte 46 + .byte 0 + .byte 17 + .byte 1 + .byte 18 + .byte 6 + .byte 64 + .byte 24 + .ascii "\227B" + .byte 25 + .byte 3 + .byte 14 + .byte 58 + .byte 11 + .byte 59 + .byte 11 + .byte 39 + .byte 25 + .byte 73 + .byte 19 + .byte 63 + .byte 25 + .byte 0 + .byte 0 + .byte 3 + .byte 46 + .byte 1 + .byte 17 + .byte 1 + .byte 18 + .byte 6 + .byte 64 + .byte 24 + .ascii "\227B" + .byte 25 + .byte 3 + .byte 14 + .byte 58 + .byte 11 + .byte 59 + .byte 11 + .byte 39 + .byte 25 + .byte 73 + .byte 19 + .byte 63 + .byte 25 + .byte 0 + .byte 0 + .byte 4 + .byte 5 + .byte 0 + .byte 2 + .byte 24 + .byte 3 + .byte 14 + .byte 58 + .byte 11 + .byte 59 + .byte 11 + .byte 73 + .byte 19 + .byte 0 + .byte 0 + .byte 5 + .byte 36 + .byte 0 + .byte 3 + .byte 14 + .byte 62 + .byte 11 + .byte 11 + .byte 11 + .byte 0 + .byte 0 + .byte 6 + .byte 15 + .byte 0 + .byte 73 + .byte 19 + .byte 0 + .byte 0 + .byte 0 + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 +.Ldebug_info_start0: + .short 4 + .long .debug_abbrev + .byte 8 + .byte 1 + .long .Linfo_string0 + .short 12 + .long .Linfo_string1 + .long .Lline_table_start0 + .long .Linfo_string2 + .quad .Lfunc_begin0 + .long .Lfunc_end1-.Lfunc_begin0 + .byte 2 + .quad .Lfunc_begin0 + .long .Lfunc_end0-.Lfunc_begin0 + .byte 1 + .byte 87 + + .long .Linfo_string3 + .byte 1 + .byte 1 + + .long 119 + + .byte 3 + .quad .Lfunc_begin1 + .long .Lfunc_end1-.Lfunc_begin1 + .byte 1 + .byte 87 + + .long .Linfo_string5 + .byte 1 + .byte 5 + + .long 119 + + .byte 4 + .byte 1 + .byte 85 + .long .Linfo_string6 + .byte 1 + .byte 5 + .long 119 + .byte 4 + .byte 1 + .byte 84 + .long .Linfo_string7 + .byte 1 + .byte 5 + .long 126 + .byte 0 + .byte 5 + .long .Linfo_string4 + .byte 5 + .byte 4 + .byte 6 + .long 131 + .byte 6 + .long 136 + .byte 5 + .long .Linfo_string8 + .byte 6 + .byte 1 + .byte 0 +.Ldebug_info_end0: + .ident "clang version 10.0.0-4ubuntu1 " + .section ".note.GNU-stack","",@progbits + .addrsig + .section .debug_line,"",@progbits +.Lline_table_start0: From e4dd7ec39f8f833a7020e9fc0f5bb152de6dc07d Mon Sep 17 00:00:00 2001 From: Mark Goncharov Date: Mon, 20 Mar 2023 13:42:24 +0300 Subject: [PATCH 010/691] [LSR] Fold terminating condition not only for eq and ne. Add opportunity to fold any icmp instruction. --- .../Transforms/Scalar/LoopStrengthReduce.cpp | 15 +- .../lsr-term-fold-negative-testcase.ll | 38 +-- .../LoopStrengthReduce/lsr-term-fold.ll | 233 +++++++++++++++++- 3 files changed, 250 insertions(+), 36 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 805e4c0ec4b57..5ff7c1027108b 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -6707,9 +6707,9 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, if (BI->isUnconditional()) return std::nullopt; auto *TermCond = dyn_cast(BI->getCondition()); - if (!TermCond || !TermCond->isEquality()) { - LLVM_DEBUG(dbgs() << "Cannot fold on branching condition that is not an " - "ICmpInst::eq / ICmpInst::ne\n"); + if (!TermCond) { + LLVM_DEBUG( + dbgs() << "Cannot fold on branching condition that is not an ICmpInst"); return std::nullopt; } if (!TermCond->hasOneUse()) { @@ -6934,9 +6934,12 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, IRBuilder<> LatchBuilder(LoopLatch->getTerminator()); // FIXME: We are adding a use of an IV here without account for poison safety. // This is incorrect. - Value *NewTermCond = LatchBuilder.CreateICmp( - OldTermCond->getPredicate(), LoopValue, TermValue, - "lsr_fold_term_cond.replaced_term_cond"); + Value *NewTermCond = + LatchBuilder.CreateICmp(CmpInst::ICMP_EQ, LoopValue, TermValue, + "lsr_fold_term_cond.replaced_term_cond"); + // Swap successors to exit loop body if IV equals to new TermValue + if (BI->getSuccessor(0) == L->getHeader()) + BI->swapSuccessors(); LLVM_DEBUG(dbgs() << "Old term-cond:\n" << *OldTermCond << "\n" diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll index 895ce1c6a6965..cb6c63ad39712 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll @@ -144,7 +144,7 @@ define void @NonSCEVableIV(float %init, float* %A, i32 %N) { ; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr @fp_inc, align 4 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[FOR_BODY]] ], [ [[A]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP:%.*]], [[FOR_BODY]] ], [ [[A]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 1, [[ENTRY]] ] ; CHECK-NEXT: [[X_05:%.*]] = phi float [ [[INIT]], [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: store float [[X_05]], ptr [[LSR_IV1]], align 4 @@ -152,7 +152,7 @@ define void @NonSCEVableIV(float %init, float* %A, i32 %N) { ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[LSR_IV]] to i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] ; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 1 -; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 +; CHECK-NEXT: [[UGLYGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -176,20 +176,21 @@ for.end: ; preds = %for.end ret void } -define void @NonIcmpEqNe(ptr %a) { -; CHECK-LABEL: define void @NonIcmpEqNe +define void @NonIcmp(ptr %a) { +; CHECK-LABEL: define void @NonIcmp ; CHECK-SAME: (ptr [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A]], i64 84 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 379, [[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV2:%.*]] = phi i64 [ [[LSR_IV_NEXT3:%.*]], [[FOR_BODY]] ], [ 378, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY]] ] ; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 -; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], -1 ; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp sle i64 [[LSR_IV_NEXT]], 0 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp sle i64 [[LSR_IV2]], 0 +; CHECK-NEXT: [[FIND_COND:%.*]] = and i1 [[EXITCOND_NOT]], true +; CHECK-NEXT: [[LSR_IV_NEXT3]] = add nsw i64 [[LSR_IV2]], -1 +; CHECK-NEXT: br i1 [[FIND_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -204,7 +205,8 @@ for.body: ; preds = %for.body, %entry %lsr.iv.next = add nsw i64 %lsr.iv, -1 %uglygep2 = getelementptr i8, ptr %lsr.iv1, i64 4 %exitcond.not = icmp sle i64 %lsr.iv.next, 0 - br i1 %exitcond.not, label %for.end, label %for.body + %find.cond = and i1 %exitcond.not, 1 + br i1 %find.cond, label %for.end, label %for.body for.end: ; preds = %for.body ret void @@ -215,7 +217,7 @@ define void @TermCondMoreThanOneUse(ptr %a) { ; CHECK-SAME: (ptr [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A]], i64 84 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 1600 +; CHECK-NEXT: [[UGLYGEP6:%.*]] = getelementptr i8, ptr [[A]], i64 1600 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[LSR_IV2:%.*]] = phi i64 [ [[LSR_IV_NEXT3:%.*]], [[FOR_BODY]] ], [ -378, [[ENTRY:%.*]] ] @@ -225,7 +227,7 @@ define void @TermCondMoreThanOneUse(ptr %a) { ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[LSR_IV2]], 0 ; CHECK-NEXT: [[DUMMY:%.*]] = select i1 [[EXITCOND_NOT]], i8 0, i8 1 ; CHECK-NEXT: [[LSR_IV_NEXT3]] = add nsw i64 [[LSR_IV2]], 1 -; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]] +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP6]] ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -278,16 +280,16 @@ define void @ebur128_calc_gating_block(ptr %st, ptr %optional_output) { ; CHECK-NEXT: br i1 [[CMP525_NOT]], label [[FOR_INC11]], label [[FOR_BODY7_LR_PH:%.*]] ; CHECK: for.body7.lr.ph: ; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[AUDIO_DATA]], align 8 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[LSR_IV1]] +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[LSR_IV1]] ; CHECK-NEXT: br label [[FOR_BODY7:%.*]] ; CHECK: for.body7: -; CHECK-NEXT: [[LSR_IV3:%.*]] = phi ptr [ [[SCEVGEP4:%.*]], [[FOR_BODY7]] ], [ [[SCEVGEP]], [[FOR_BODY7_LR_PH]] ] +; CHECK-NEXT: [[LSR_IV3:%.*]] = phi ptr [ [[UGLYGEP4:%.*]], [[FOR_BODY7]] ], [ [[UGLYGEP]], [[FOR_BODY7_LR_PH]] ] ; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY7]] ], [ [[UMAX]], [[FOR_BODY7_LR_PH]] ] ; CHECK-NEXT: [[CHANNEL_SUM_127:%.*]] = phi double [ [[CHANNEL_SUM_030]], [[FOR_BODY7_LR_PH]] ], [ [[ADD10:%.*]], [[FOR_BODY7]] ] ; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[LSR_IV3]], align 8 ; CHECK-NEXT: [[ADD10]] = fadd double [[CHANNEL_SUM_127]], [[TMP4]] ; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1 -; CHECK-NEXT: [[SCEVGEP4]] = getelementptr i8, ptr [[LSR_IV3]], i64 [[TMP2]] +; CHECK-NEXT: [[UGLYGEP4]] = getelementptr i8, ptr [[LSR_IV3]], i64 [[TMP2]] ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_INC11_LOOPEXIT:%.*]], label [[FOR_BODY7]] ; CHECK: for.inc11.loopexit: @@ -367,14 +369,14 @@ define i64 @alac_seek(ptr %0) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DIV:%.*]] = udiv i64 1, 0 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[DIV]], 1 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 12 +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 12 ; CHECK-NEXT: br label [[FOR_BODY_I:%.*]] ; CHECK: for.body.i: -; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP2:%.*]], [[FOR_BODY_I]] ], [ [[SCEVGEP]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY_I]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY_I]] ], [ [[TMP1]], [[ENTRY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[LSR_IV1]], align 4 ; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1 -; CHECK-NEXT: [[SCEVGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 +; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 ; CHECK-NEXT: [[EXITCOND_NOT_I:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 ; CHECK-NEXT: br i1 [[EXITCOND_NOT_I]], label [[ALAC_PAKT_BLOCK_OFFSET_EXIT:%.*]], label [[FOR_BODY_I]] ; CHECK: alac_pakt_block_offset.exit: diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll index 0755b5ee33589..762b8b15a671e 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll @@ -7,13 +7,13 @@ define void @const_tripcount(ptr %a) { ; CHECK-LABEL: @const_tripcount( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 84 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 1600 +; CHECK-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, ptr [[A]], i64 1600 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] ; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 ; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 -; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]] +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP4]] ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -42,13 +42,13 @@ define void @runtime_tripcount(ptr %a, i32 %N) { ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N:%.*]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 84 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]] +; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] ; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 ; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 -; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]] +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP1]] ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -79,14 +79,14 @@ define void @ptr_of_ptr_addrec(ptr %ptrptr, i32 %length) { ; CHECK-NEXT: [[START_PTRPTR:%.*]] = getelementptr ptr, ptr [[PTRPTR:%.*]] ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LENGTH:%.*]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[START_PTRPTR]], i64 [[TMP1]] +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[START_PTRPTR]], i64 [[TMP1]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IT_04:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[START_PTRPTR]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[IT_04]], align 8 ; CHECK-NEXT: tail call void @foo(ptr [[TMP2]]) ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds ptr, ptr [[IT_04]], i64 1 -; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[SCEVGEP]] +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[UGLYGEP]] ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -119,7 +119,7 @@ define void @iv_start_non_preheader(ptr %mark, i32 signext %length) { ; CHECK: for.body.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LENGTH]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[MARK:%.*]], i64 [[TMP1]] +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[MARK:%.*]], i64 [[TMP1]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -130,7 +130,7 @@ define void @iv_start_non_preheader(ptr %mark, i32 signext %length) { ; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DST_04]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = call ptr @foo(ptr [[TMP2]]) ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds ptr, ptr [[DST_04]], i64 1 -; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[SCEVGEP]] +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[UGLYGEP]] ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]] ; entry: @@ -162,13 +162,13 @@ for.body: ; preds = %entry, %for.body define void @inbounds_poison_use(ptr %a) { ; CHECK-LABEL: @inbounds_poison_use( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 16 +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 16 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[A]], [[ENTRY:%.*]] ] ; CHECK-NEXT: store i8 1, ptr [[LSR_IV1]], align 4 ; CHECK-NEXT: [[UGLYGEP2]] = getelementptr inbounds i8, ptr [[LSR_IV1]], i64 4 -; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]] +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP]] ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -198,13 +198,13 @@ define void @iv_size(ptr %a, i128 %N) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = trunc i128 [[N:%.*]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 2 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP1]] +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP1]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[A]], [[ENTRY:%.*]] ] ; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 ; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 -; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]] +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP]] ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -224,3 +224,212 @@ for.body: ; preds = %for.body, %entry for.end: ; preds = %for.body ret void } + +; To check correct folding not equality terminating condition +; Due to SLE offset must be - 1600 +define void @IcmpSle(ptr %a) { +; CHECK-LABEL: @IcmpSle( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 84 +; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 1600 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] +; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 4 +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP1]] +; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %uglygep = getelementptr i8, ptr %a, i32 84 + br label %for.body + +for.body: ; preds = %for.body, %entry + %lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %uglygep, %entry ] + %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 379, %entry ] + store i32 1, ptr %lsr.iv1, align 4 + %lsr.iv.next = add nsw i32 %lsr.iv, -1 + %uglygep2 = getelementptr i8, ptr %lsr.iv1, i32 4 + %exitcond.not = icmp sle i32 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; Due to SLT offset must be - 1604 +define void @IcmpSlt(ptr %a) { +; CHECK-LABEL: @IcmpSlt( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 84 +; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 1604 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] +; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 4 +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP1]] +; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %uglygep = getelementptr i8, ptr %a, i32 84 + br label %for.body + +for.body: ; preds = %for.body, %entry + %lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %uglygep, %entry ] + %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 379, %entry ] + store i32 1, ptr %lsr.iv1, align 4 + %lsr.iv.next = add nsw i32 %lsr.iv, -1 + %uglygep2 = getelementptr i8, ptr %lsr.iv1, i32 4 + %exitcond.not = icmp slt i32 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + +; Invert predicate and branches +define void @IcmpSgt(ptr %a) { +; CHECK-LABEL: @IcmpSgt( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 84 +; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 88 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] +; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 4 +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP1]] +; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %uglygep = getelementptr i8, ptr %a, i32 84 + br label %for.body + +for.body: ; preds = %for.body, %entry + %lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %uglygep, %entry ] + %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 379, %entry ] + store i32 1, ptr %lsr.iv1, align 4 + %lsr.iv.next = add nsw i32 %lsr.iv, -1 + %uglygep2 = getelementptr i8, ptr %lsr.iv1, i32 4 + %exitcond.not = icmp sgt i32 0, %lsr.iv.next + br i1 %exitcond.not, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + +; Invert predicate and branches +define void @IcmpSgt2(ptr %a) { +; CHECK-LABEL: @IcmpSgt2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 84 +; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 1600 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] +; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 4 +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP1]] +; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %uglygep = getelementptr i8, ptr %a, i32 84 + br label %for.body + +for.body: ; preds = %for.body, %entry + %lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %uglygep, %entry ] + %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 379, %entry ] + store i32 1, ptr %lsr.iv1, align 4 + %lsr.iv.next = add nsw i32 %lsr.iv, -1 + %uglygep2 = getelementptr i8, ptr %lsr.iv1, i32 4 + %exitcond.not = icmp sgt i32 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + +; Invert predicate and branches +define void @SeveralLoopLatch(ptr %a) { +; CHECK-LABEL: @SeveralLoopLatch( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 84 +; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 1600 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[ANOTHER_BRANCH:%.*]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] +; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 4 +; CHECK-NEXT: br label [[ANOTHER_BRANCH]] +; CHECK: another.branch: +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP1]] +; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %uglygep = getelementptr i8, ptr %a, i32 84 + br label %for.body + +for.body: ; preds = %another.branch, %entry + %lsr.iv1 = phi ptr [ %uglygep2, %another.branch ], [ %uglygep, %entry ] + %lsr.iv = phi i32 [ %lsr.iv.next, %another.branch ], [ 379, %entry ] + store i32 1, ptr %lsr.iv1, align 4 + %lsr.iv.next = add nsw i32 %lsr.iv, -1 + %uglygep2 = getelementptr i8, ptr %lsr.iv1, i32 4 + br label %another.branch + +another.branch: + %exitcond.not = icmp sgt i32 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + +; Invert branch in SeveralLoopLatch +define void @SeveralLoopLatch2(ptr %a) { +; CHECK-LABEL: @SeveralLoopLatch2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 84 +; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 1600 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[ANOTHER_BRANCH:%.*]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] +; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 4 +; CHECK-NEXT: br label [[ANOTHER_BRANCH]] +; CHECK: another.branch: +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP1]] +; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %uglygep = getelementptr i8, ptr %a, i32 84 + br label %for.body + +for.body: ; preds = %another.branch, %entry + %lsr.iv1 = phi ptr [ %uglygep2, %another.branch ], [ %uglygep, %entry ] + %lsr.iv = phi i32 [ %lsr.iv.next, %another.branch ], [ 379, %entry ] + store i32 1, ptr %lsr.iv1, align 4 + %lsr.iv.next = add nsw i32 %lsr.iv, -1 + %uglygep2 = getelementptr i8, ptr %lsr.iv1, i32 4 + br label %another.branch + +another.branch: + %exitcond.not = icmp sle i32 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} From b0c854b016a7af54f5c4249c5154a81c9726210b Mon Sep 17 00:00:00 2001 From: luxufan Date: Mon, 20 Mar 2023 19:03:18 +0800 Subject: [PATCH 011/691] [NFC] Regenerate test file InstCombine/bit-checks.ll --- .../test/Transforms/InstCombine/bit-checks.ll | 300 +++++++++--------- 1 file changed, 150 insertions(+), 150 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/bit-checks.ll b/llvm/test/Transforms/InstCombine/bit-checks.ll index 5df3ce90b76cd..aea8d3465268a 100644 --- a/llvm/test/Transforms/InstCombine/bit-checks.ll +++ b/llvm/test/Transforms/InstCombine/bit-checks.ll @@ -4,8 +4,8 @@ define i32 @main1(i32 %argc) { ; CHECK-LABEL: @main1( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3 -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 2, i32 1 +; CHECK-NEXT: [[OR_COND:%.*]] = icmp eq i32 [[TMP1]], 3 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND]], i32 2, i32 1 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; %and = and i32 %argc, 1 @@ -20,8 +20,8 @@ define i32 @main1(i32 %argc) { define i32 @main1_logical(i32 %argc) { ; CHECK-LABEL: @main1_logical( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 3 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 3 -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 2, i32 1 +; CHECK-NEXT: [[OR_COND:%.*]] = icmp eq i32 [[TMP1]], 3 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND]], i32 2, i32 1 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; %and = and i32 %argc, 1 @@ -36,8 +36,8 @@ define i32 @main1_logical(i32 %argc) { define i32 @main2(i32 %argc) { ; CHECK-LABEL: @main2( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 3 -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 3 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = icmp eq i32 [[TMP1]], 3 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[OR_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 1 @@ -52,8 +52,8 @@ define i32 @main2(i32 %argc) { define i32 @main2_logical(i32 %argc) { ; CHECK-LABEL: @main2_logical( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 3 -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 3 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = icmp eq i32 [[TMP1]], 3 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[OR_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 1 @@ -73,8 +73,8 @@ define i32 @main2_logical(i32 %argc) { define i32 @main3(i32 %argc) { ; CHECK-LABEL: @main3( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 55 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -89,8 +89,8 @@ define i32 @main3(i32 %argc) { define i32 @main3_logical(i32 %argc) { ; CHECK-LABEL: @main3_logical( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 55 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -105,8 +105,8 @@ define i32 @main3_logical(i32 %argc) { define i32 @main3b(i32 %argc) { ; CHECK-LABEL: @main3b( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 23 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -121,8 +121,8 @@ define i32 @main3b(i32 %argc) { define i32 @main3b_logical(i32 %argc) { ; CHECK-LABEL: @main3b_logical( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 23 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -138,8 +138,8 @@ define i32 @main3e_like(i32 %argc, i32 %argc2, i32 %argc3) { ; CHECK-LABEL: @main3e_like( ; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[ARGC2:%.*]], [[ARGC3:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne i32 [[TMP2]], 0 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, %argc2 @@ -157,8 +157,8 @@ define i32 @main3e_like_logical(i32 %argc, i32 %argc2, i32 %argc3) { ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[AND]], 0 ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[ARGC]], [[ARGC3:%.*]] ; CHECK-NEXT: [[TOBOOL3:%.*]] = icmp ne i32 [[AND2]], 0 -; CHECK-NEXT: [[AND_COND:%.*]] = select i1 [[TOBOOL]], i1 true, i1 [[TOBOOL3]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 +; CHECK-NEXT: [[AND_COND_NOT:%.*]] = select i1 [[TOBOOL]], i1 true, i1 [[TOBOOL3]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, %argc2 @@ -174,8 +174,8 @@ define i32 @main3e_like_logical(i32 %argc, i32 %argc2, i32 %argc3) { define i32 @main3c(i32 %argc) { ; CHECK-LABEL: @main3c( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 55 -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[OR_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -190,8 +190,8 @@ define i32 @main3c(i32 %argc) { define i32 @main3c_logical(i32 %argc) { ; CHECK-LABEL: @main3c_logical( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 55 -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[OR_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -206,8 +206,8 @@ define i32 @main3c_logical(i32 %argc) { define i32 @main3d(i32 %argc) { ; CHECK-LABEL: @main3d( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 23 -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[OR_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -222,8 +222,8 @@ define i32 @main3d(i32 %argc) { define i32 @main3d_logical(i32 %argc) { ; CHECK-LABEL: @main3d_logical( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 23 -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[OR_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -239,8 +239,8 @@ define i32 @main3f_like(i32 %argc, i32 %argc2, i32 %argc3) { ; CHECK-LABEL: @main3f_like( ; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[ARGC2:%.*]], [[ARGC3:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], 0 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[OR_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, %argc2 @@ -258,8 +258,8 @@ define i32 @main3f_like_logical(i32 %argc, i32 %argc2, i32 %argc3) { ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[AND]], 0 ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[ARGC]], [[ARGC3:%.*]] ; CHECK-NEXT: [[TOBOOL3:%.*]] = icmp eq i32 [[AND2]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TOBOOL]], i1 [[TOBOOL3]], i1 false -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[OR_COND]] to i32 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = select i1 [[TOBOOL]], i1 [[TOBOOL3]], i1 false +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[OR_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, %argc2 @@ -275,8 +275,8 @@ define i32 @main3f_like_logical(i32 %argc, i32 %argc2, i32 %argc3) { define i32 @main4(i32 %argc) { ; CHECK-LABEL: @main4( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 55 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 55 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne i32 [[TMP1]], 55 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -291,8 +291,8 @@ define i32 @main4(i32 %argc) { define <2 x i32> @main4_splat(<2 x i32> %argc) { ; CHECK-LABEL: @main4_splat( ; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[ARGC:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i32> [[TMP1]], -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext <2 x i1> [[TMP2]] to <2 x i32> +; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne <2 x i32> [[TMP1]], +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext <2 x i1> [[AND_COND]] to <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[STOREMERGE]] ; %and = and <2 x i32> %argc, @@ -307,8 +307,8 @@ define <2 x i32> @main4_splat(<2 x i32> %argc) { define i32 @main4_logical(i32 %argc) { ; CHECK-LABEL: @main4_logical( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 55 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 55 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne i32 [[TMP1]], 55 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -323,8 +323,8 @@ define i32 @main4_logical(i32 %argc) { define i32 @main4b(i32 %argc) { ; CHECK-LABEL: @main4b( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 23 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 23 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne i32 [[TMP1]], 23 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -339,8 +339,8 @@ define i32 @main4b(i32 %argc) { define i32 @main4b_logical(i32 %argc) { ; CHECK-LABEL: @main4b_logical( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 23 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 23 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne i32 [[TMP1]], 23 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -356,8 +356,8 @@ define i32 @main4e_like(i32 %argc, i32 %argc2, i32 %argc3) { ; CHECK-LABEL: @main4e_like( ; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[ARGC2:%.*]], [[ARGC3:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, %argc2 @@ -375,8 +375,8 @@ define i32 @main4e_like_logical(i32 %argc, i32 %argc2, i32 %argc3) { ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[AND]], [[ARGC2]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[ARGC]], [[ARGC3:%.*]] ; CHECK-NEXT: [[TOBOOL3:%.*]] = icmp ne i32 [[AND2]], [[ARGC3]] -; CHECK-NEXT: [[AND_COND:%.*]] = select i1 [[TOBOOL]], i1 true, i1 [[TOBOOL3]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 +; CHECK-NEXT: [[AND_COND_NOT:%.*]] = select i1 [[TOBOOL]], i1 true, i1 [[TOBOOL3]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, %argc2 @@ -392,8 +392,8 @@ define i32 @main4e_like_logical(i32 %argc, i32 %argc2, i32 %argc3) { define i32 @main4c(i32 %argc) { ; CHECK-LABEL: @main4c( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 55 -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 55 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = icmp eq i32 [[TMP1]], 55 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[OR_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -408,8 +408,8 @@ define i32 @main4c(i32 %argc) { define i32 @main4c_logical(i32 %argc) { ; CHECK-LABEL: @main4c_logical( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 55 -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 55 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = icmp eq i32 [[TMP1]], 55 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[OR_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -424,8 +424,8 @@ define i32 @main4c_logical(i32 %argc) { define i32 @main4d(i32 %argc) { ; CHECK-LABEL: @main4d( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 23 -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 23 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = icmp eq i32 [[TMP1]], 23 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[OR_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -440,8 +440,8 @@ define i32 @main4d(i32 %argc) { define i32 @main4d_logical(i32 %argc) { ; CHECK-LABEL: @main4d_logical( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 23 -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 23 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = icmp eq i32 [[TMP1]], 23 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[OR_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -457,8 +457,8 @@ define i32 @main4f_like(i32 %argc, i32 %argc2, i32 %argc3) { ; CHECK-LABEL: @main4f_like( ; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[ARGC2:%.*]], [[ARGC3:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = icmp eq i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[OR_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, %argc2 @@ -476,8 +476,8 @@ define i32 @main4f_like_logical(i32 %argc, i32 %argc2, i32 %argc3) { ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[AND]], [[ARGC2]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[ARGC]], [[ARGC3:%.*]] ; CHECK-NEXT: [[TOBOOL3:%.*]] = icmp eq i32 [[AND2]], [[ARGC3]] -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TOBOOL]], i1 [[TOBOOL3]], i1 false -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[OR_COND]] to i32 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = select i1 [[TOBOOL]], i1 [[TOBOOL3]], i1 false +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[OR_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, %argc2 @@ -494,8 +494,8 @@ define i32 @main5_like(i32 %argc, i32 %argc2) { ; CHECK-LABEL: @main5_like( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], [[ARGC2:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 7 -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 7 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne i32 [[TMP2]], 7 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -513,8 +513,8 @@ define i32 @main5_like_logical(i32 %argc, i32 %argc2) { ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[AND]], 7 ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[ARGC2:%.*]], 7 ; CHECK-NEXT: [[TOBOOL3:%.*]] = icmp ne i32 [[AND2]], 7 -; CHECK-NEXT: [[AND_COND:%.*]] = select i1 [[TOBOOL]], i1 true, i1 [[TOBOOL3]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 +; CHECK-NEXT: [[AND_COND_NOT:%.*]] = select i1 [[TOBOOL]], i1 true, i1 [[TOBOOL3]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -530,8 +530,8 @@ define i32 @main5e_like(i32 %argc, i32 %argc2, i32 %argc3) { ; CHECK-LABEL: @main5e_like( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC2:%.*]], [[ARGC3:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[ARGC]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne i32 [[TMP2]], [[ARGC]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, %argc2 @@ -549,8 +549,8 @@ define i32 @main5e_like_logical(i32 %argc, i32 %argc2, i32 %argc3) { ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[AND]], [[ARGC]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[ARGC]], [[ARGC3:%.*]] ; CHECK-NEXT: [[TOBOOL3:%.*]] = icmp ne i32 [[AND2]], [[ARGC]] -; CHECK-NEXT: [[AND_COND:%.*]] = select i1 [[TOBOOL]], i1 true, i1 [[TOBOOL3]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 +; CHECK-NEXT: [[AND_COND_NOT:%.*]] = select i1 [[TOBOOL]], i1 true, i1 [[TOBOOL3]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, %argc2 @@ -567,8 +567,8 @@ define i32 @main5c_like(i32 %argc, i32 %argc2) { ; CHECK-LABEL: @main5c_like( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], [[ARGC2:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 7 -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], 7 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = icmp eq i32 [[TMP2]], 7 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[OR_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -586,8 +586,8 @@ define i32 @main5c_like_logical(i32 %argc, i32 %argc2) { ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[AND]], 7 ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[ARGC2:%.*]], 7 ; CHECK-NEXT: [[TOBOOL3:%.*]] = icmp eq i32 [[AND2]], 7 -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TOBOOL]], i1 [[TOBOOL3]], i1 false -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[OR_COND]] to i32 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = select i1 [[TOBOOL]], i1 [[TOBOOL3]], i1 false +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[OR_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -603,8 +603,8 @@ define i32 @main5f_like(i32 %argc, i32 %argc2, i32 %argc3) { ; CHECK-LABEL: @main5f_like( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC2:%.*]], [[ARGC3:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP2]], [[ARGC]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = icmp eq i32 [[TMP2]], [[ARGC]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[OR_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, %argc2 @@ -622,8 +622,8 @@ define i32 @main5f_like_logical(i32 %argc, i32 %argc2, i32 %argc3) { ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[AND]], [[ARGC]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[ARGC]], [[ARGC3:%.*]] ; CHECK-NEXT: [[TOBOOL3:%.*]] = icmp eq i32 [[AND2]], [[ARGC]] -; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TOBOOL]], i1 [[TOBOOL3]], i1 false -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[OR_COND]] to i32 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = select i1 [[TOBOOL]], i1 [[TOBOOL3]], i1 false +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[OR_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, %argc2 @@ -640,8 +640,8 @@ define i32 @main5f_like_logical(i32 %argc, i32 %argc2, i32 %argc3) { define i32 @main6(i32 %argc) { ; CHECK-LABEL: @main6( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 55 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 19 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne i32 [[TMP1]], 19 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -656,8 +656,8 @@ define i32 @main6(i32 %argc) { define i32 @main6_logical(i32 %argc) { ; CHECK-LABEL: @main6_logical( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 55 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 19 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne i32 [[TMP1]], 19 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -672,8 +672,8 @@ define i32 @main6_logical(i32 %argc) { define i32 @main6b(i32 %argc) { ; CHECK-LABEL: @main6b( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 23 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 19 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne i32 [[TMP1]], 19 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -688,8 +688,8 @@ define i32 @main6b(i32 %argc) { define i32 @main6b_logical(i32 %argc) { ; CHECK-LABEL: @main6b_logical( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 23 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 19 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP2]] to i32 +; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne i32 [[TMP1]], 19 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -706,8 +706,8 @@ define i32 @main6b_logical(i32 %argc) { define i32 @main6c(i32 %argc) { ; CHECK-LABEL: @main6c( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 55 -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 19 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = icmp eq i32 [[TMP1]], 19 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[OR_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -722,8 +722,8 @@ define i32 @main6c(i32 %argc) { define i32 @main6c_logical(i32 %argc) { ; CHECK-LABEL: @main6c_logical( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 55 -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 19 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = icmp eq i32 [[TMP1]], 19 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[OR_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -738,8 +738,8 @@ define i32 @main6c_logical(i32 %argc) { define i32 @main6d(i32 %argc) { ; CHECK-LABEL: @main6d( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 23 -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 19 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = icmp eq i32 [[TMP1]], 19 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[OR_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -754,8 +754,8 @@ define i32 @main6d(i32 %argc) { define i32 @main6d_logical(i32 %argc) { ; CHECK-LABEL: @main6d_logical( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 23 -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 19 -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[DOTNOT]] to i32 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = icmp eq i32 [[TMP1]], 19 +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[OR_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and = and i32 %argc, 7 @@ -773,8 +773,8 @@ define i32 @main7a(i32 %argc, i32 %argc2, i32 %argc3) { ; CHECK-LABEL: @main7a( ; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[ARGC2:%.*]], [[ARGC3:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and1 = and i32 %argc2, %argc @@ -792,8 +792,8 @@ define i32 @main7a_logical(i32 %argc, i32 %argc2, i32 %argc3) { ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[AND1]], [[ARGC2]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[ARGC3:%.*]], [[ARGC]] ; CHECK-NEXT: [[TOBOOL3:%.*]] = icmp ne i32 [[AND2]], [[ARGC3]] -; CHECK-NEXT: [[AND_COND:%.*]] = select i1 [[TOBOOL]], i1 true, i1 [[TOBOOL3]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 +; CHECK-NEXT: [[AND_COND_NOT:%.*]] = select i1 [[TOBOOL]], i1 true, i1 [[TOBOOL3]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and1 = and i32 %argc2, %argc @@ -810,8 +810,8 @@ define i32 @main7b(i32 %argc, i32 %argc2, i32 %argc3) { ; CHECK-LABEL: @main7b( ; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[ARGC2:%.*]], [[ARGC3:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and1 = and i32 %argc, %argc2 @@ -829,8 +829,8 @@ define i32 @main7b_logical(i32 %argc, i32 %argc2, i32 %argc3) { ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[AND1]], [[ARGC2]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[ARGC]], [[ARGC3:%.*]] ; CHECK-NEXT: [[TOBOOL3:%.*]] = icmp ne i32 [[AND2]], [[ARGC3]] -; CHECK-NEXT: [[AND_COND:%.*]] = select i1 [[TOBOOL]], i1 true, i1 [[TOBOOL3]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 +; CHECK-NEXT: [[AND_COND_NOT:%.*]] = select i1 [[TOBOOL]], i1 true, i1 [[TOBOOL3]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and1 = and i32 %argc, %argc2 @@ -847,8 +847,8 @@ define i32 @main7c(i32 %argc, i32 %argc2, i32 %argc3) { ; CHECK-LABEL: @main7c( ; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[ARGC2:%.*]], [[ARGC3:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and1 = and i32 %argc2, %argc @@ -866,8 +866,8 @@ define i32 @main7c_logical(i32 %argc, i32 %argc2, i32 %argc3) { ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[AND1]], [[ARGC2]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[ARGC3:%.*]], [[ARGC]] ; CHECK-NEXT: [[TOBOOL3:%.*]] = icmp ne i32 [[AND2]], [[ARGC3]] -; CHECK-NEXT: [[AND_COND:%.*]] = select i1 [[TOBOOL]], i1 true, i1 [[TOBOOL3]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 +; CHECK-NEXT: [[AND_COND_NOT:%.*]] = select i1 [[TOBOOL]], i1 true, i1 [[TOBOOL3]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %and1 = and i32 %argc2, %argc @@ -886,8 +886,8 @@ define i32 @main7d(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %argc5) { ; CHECK-NEXT: [[DE:%.*]] = and i32 [[ARGC3:%.*]], [[ARGC5:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[BC]], [[DE]] ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %bc = and i32 %argc2, %argc4 @@ -909,8 +909,8 @@ define i32 @main7d_logical(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %a ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[AND1]], [[BC]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[DE]], [[ARGC]] ; CHECK-NEXT: [[TOBOOL3:%.*]] = icmp ne i32 [[AND2]], [[DE]] -; CHECK-NEXT: [[AND_COND:%.*]] = select i1 [[TOBOOL]], i1 true, i1 [[TOBOOL3]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 +; CHECK-NEXT: [[AND_COND_NOT:%.*]] = select i1 [[TOBOOL]], i1 true, i1 [[TOBOOL3]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %bc = and i32 %argc2, %argc4 @@ -931,8 +931,8 @@ define i32 @main7e(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %argc5) { ; CHECK-NEXT: [[DE:%.*]] = and i32 [[ARGC3:%.*]], [[ARGC5:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[BC]], [[DE]] ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %bc = and i32 %argc2, %argc4 @@ -954,8 +954,8 @@ define i32 @main7e_logical(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %a ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[AND1]], [[BC]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[DE]], [[ARGC]] ; CHECK-NEXT: [[TOBOOL3:%.*]] = icmp ne i32 [[AND2]], [[DE]] -; CHECK-NEXT: [[AND_COND:%.*]] = select i1 [[TOBOOL]], i1 true, i1 [[TOBOOL3]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 +; CHECK-NEXT: [[AND_COND_NOT:%.*]] = select i1 [[TOBOOL]], i1 true, i1 [[TOBOOL3]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %bc = and i32 %argc2, %argc4 @@ -976,8 +976,8 @@ define i32 @main7f(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %argc5) { ; CHECK-NEXT: [[DE:%.*]] = and i32 [[ARGC3:%.*]], [[ARGC5:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[BC]], [[DE]] ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %bc = and i32 %argc2, %argc4 @@ -999,8 +999,8 @@ define i32 @main7f_logical(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %a ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[BC]], [[AND1]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[DE]], [[ARGC]] ; CHECK-NEXT: [[TOBOOL3:%.*]] = icmp ne i32 [[DE]], [[AND2]] -; CHECK-NEXT: [[AND_COND:%.*]] = select i1 [[TOBOOL]], i1 true, i1 [[TOBOOL3]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 +; CHECK-NEXT: [[AND_COND_NOT:%.*]] = select i1 [[TOBOOL]], i1 true, i1 [[TOBOOL3]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %bc = and i32 %argc2, %argc4 @@ -1021,8 +1021,8 @@ define i32 @main7g(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %argc5) { ; CHECK-NEXT: [[DE:%.*]] = and i32 [[ARGC3:%.*]], [[ARGC5:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[BC]], [[DE]] ; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], [[ARGC:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[TMP3]] to i32 +; CHECK-NEXT: [[AND_COND:%.*]] = icmp ne i32 [[TMP2]], [[TMP1]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %bc = and i32 %argc2, %argc4 @@ -1044,8 +1044,8 @@ define i32 @main7g_logical(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %a ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[BC]], [[AND1]] ; CHECK-NEXT: [[AND2:%.*]] = and i32 [[DE]], [[ARGC]] ; CHECK-NEXT: [[TOBOOL3:%.*]] = icmp ne i32 [[DE]], [[AND2]] -; CHECK-NEXT: [[AND_COND:%.*]] = select i1 [[TOBOOL]], i1 true, i1 [[TOBOOL3]] -; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND]] to i32 +; CHECK-NEXT: [[AND_COND_NOT:%.*]] = select i1 [[TOBOOL]], i1 true, i1 [[TOBOOL3]] +; CHECK-NEXT: [[STOREMERGE:%.*]] = zext i1 [[AND_COND_NOT]] to i32 ; CHECK-NEXT: ret i32 [[STOREMERGE]] ; %bc = and i32 %argc2, %argc4 @@ -1062,8 +1062,8 @@ define i32 @main7g_logical(i32 %argc, i32 %argc2, i32 %argc3, i32 %argc4, i32 %a define i32 @main8(i32 %argc) { ; CHECK-LABEL: @main8( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 192 -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0 -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], i32 1, i32 2 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND_NOT]], i32 1, i32 2 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; %and = and i32 %argc, 64 @@ -1078,8 +1078,8 @@ define i32 @main8(i32 %argc) { define i32 @main8_logical(i32 %argc) { ; CHECK-LABEL: @main8_logical( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 192 -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0 -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], i32 1, i32 2 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND_NOT]], i32 1, i32 2 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; %and = and i32 %argc, 64 @@ -1094,8 +1094,8 @@ define i32 @main8_logical(i32 %argc) { define i32 @main9(i32 %argc) { ; CHECK-LABEL: @main9( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 192 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 192 -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 2, i32 1 +; CHECK-NEXT: [[OR_COND:%.*]] = icmp eq i32 [[TMP1]], 192 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND]], i32 2, i32 1 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; %and = and i32 %argc, 64 @@ -1110,8 +1110,8 @@ define i32 @main9(i32 %argc) { define i32 @main9_logical(i32 %argc) { ; CHECK-LABEL: @main9_logical( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 192 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 192 -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 2, i32 1 +; CHECK-NEXT: [[OR_COND:%.*]] = icmp eq i32 [[TMP1]], 192 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND]], i32 2, i32 1 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; %and = and i32 %argc, 64 @@ -1126,8 +1126,8 @@ define i32 @main9_logical(i32 %argc) { define i32 @main10(i32 %argc) { ; CHECK-LABEL: @main10( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 192 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 2, i32 1 +; CHECK-NEXT: [[OR_COND:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND]], i32 2, i32 1 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; %and = and i32 %argc, 64 @@ -1142,8 +1142,8 @@ define i32 @main10(i32 %argc) { define i32 @main10_logical(i32 %argc) { ; CHECK-LABEL: @main10_logical( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 192 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 2, i32 1 +; CHECK-NEXT: [[OR_COND:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND]], i32 2, i32 1 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; %and = and i32 %argc, 64 @@ -1158,8 +1158,8 @@ define i32 @main10_logical(i32 %argc) { define i32 @main11(i32 %argc) { ; CHECK-LABEL: @main11( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 192 -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 192 -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], i32 1, i32 2 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = icmp eq i32 [[TMP1]], 192 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND_NOT]], i32 1, i32 2 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; %and = and i32 %argc, 64 @@ -1174,8 +1174,8 @@ define i32 @main11(i32 %argc) { define i32 @main11_logical(i32 %argc) { ; CHECK-LABEL: @main11_logical( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 192 -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 192 -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], i32 1, i32 2 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = icmp eq i32 [[TMP1]], 192 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND_NOT]], i32 1, i32 2 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; %and = and i32 %argc, 64 @@ -1190,8 +1190,8 @@ define i32 @main11_logical(i32 %argc) { define i32 @main12(i32 %argc) { ; CHECK-LABEL: @main12( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 32896 -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0 -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], i32 1, i32 2 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND_NOT]], i32 1, i32 2 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; %trunc = trunc i32 %argc to i16 @@ -1206,8 +1206,8 @@ define i32 @main12(i32 %argc) { define i32 @main12_logical(i32 %argc) { ; CHECK-LABEL: @main12_logical( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 32896 -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 0 -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], i32 1, i32 2 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND_NOT]], i32 1, i32 2 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; %trunc = trunc i32 %argc to i16 @@ -1222,8 +1222,8 @@ define i32 @main12_logical(i32 %argc) { define i32 @main13(i32 %argc) { ; CHECK-LABEL: @main13( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 32896 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 32896 -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 2, i32 1 +; CHECK-NEXT: [[OR_COND:%.*]] = icmp eq i32 [[TMP1]], 32896 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND]], i32 2, i32 1 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; %trunc = trunc i32 %argc to i16 @@ -1238,8 +1238,8 @@ define i32 @main13(i32 %argc) { define i32 @main13_logical(i32 %argc) { ; CHECK-LABEL: @main13_logical( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 32896 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 32896 -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 2, i32 1 +; CHECK-NEXT: [[OR_COND:%.*]] = icmp eq i32 [[TMP1]], 32896 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND]], i32 2, i32 1 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; %trunc = trunc i32 %argc to i16 @@ -1254,8 +1254,8 @@ define i32 @main13_logical(i32 %argc) { define i32 @main14(i32 %argc) { ; CHECK-LABEL: @main14( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 32896 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 2, i32 1 +; CHECK-NEXT: [[OR_COND:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND]], i32 2, i32 1 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; %trunc = trunc i32 %argc to i16 @@ -1270,8 +1270,8 @@ define i32 @main14(i32 %argc) { define i32 @main14_logical(i32 %argc) { ; CHECK-LABEL: @main14_logical( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 32896 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[TMP2]], i32 2, i32 1 +; CHECK-NEXT: [[OR_COND:%.*]] = icmp eq i32 [[TMP1]], 0 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND]], i32 2, i32 1 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; %trunc = trunc i32 %argc to i16 @@ -1286,8 +1286,8 @@ define i32 @main14_logical(i32 %argc) { define i32 @main15(i32 %argc) { ; CHECK-LABEL: @main15( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 32896 -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 32896 -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], i32 1, i32 2 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = icmp eq i32 [[TMP1]], 32896 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND_NOT]], i32 1, i32 2 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; %trunc = trunc i32 %argc to i16 @@ -1302,8 +1302,8 @@ define i32 @main15(i32 %argc) { define i32 @main15_logical(i32 %argc) { ; CHECK-LABEL: @main15_logical( ; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[ARGC:%.*]], 32896 -; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP1]], 32896 -; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[DOTNOT]], i32 1, i32 2 +; CHECK-NEXT: [[OR_COND_NOT:%.*]] = icmp eq i32 [[TMP1]], 32896 +; CHECK-NEXT: [[RETVAL_0:%.*]] = select i1 [[OR_COND_NOT]], i32 1, i32 2 ; CHECK-NEXT: ret i32 [[RETVAL_0]] ; %trunc = trunc i32 %argc to i16 From e62d0e1520e9eb3974b5d766100bf2be39a934d8 Mon Sep 17 00:00:00 2001 From: Dmitry Chernenkov Date: Mon, 20 Mar 2023 11:05:55 +0000 Subject: [PATCH 012/691] [llvm][Demangle][NFC] Fix demangle to work with LIBCPP_REMOVE_TRANSITIVE_INCLUDES --- llvm/include/llvm/Demangle/ItaniumDemangle.h | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h index acdf570e1a6ba..21fd95b7988fb 100644 --- a/llvm/include/llvm/Demangle/ItaniumDemangle.h +++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h @@ -27,6 +27,7 @@ #include #include #include +#include #include DEMANGLE_NAMESPACE_BEGIN From e9a86b7813aa324f282d66ff711d71f1d0d31492 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 20 Mar 2023 10:20:44 +0000 Subject: [PATCH 013/691] [DAG] foldBinOpIntoSelect - remove !CanFoldNonConst check. NFC. These checks are in an if-else chain where CanFoldNonConst is already guaranteed to be false. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 7667b38c744ca..548da756c01a1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2485,15 +2485,13 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { // CBO, CF + CBO NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT) : DAG.getNode(BinOpcode, DL, VT, CT, CBO); - if (!CanFoldNonConst && !NewCT.isUndef() && - !isConstantOrConstantVector(NewCT, true) && + if (!NewCT.isUndef() && !isConstantOrConstantVector(NewCT, true) && !DAG.isConstantFPBuildVectorOrConstantFP(NewCT)) return SDValue(); NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF) : DAG.getNode(BinOpcode, DL, VT, CF, CBO); - if (!CanFoldNonConst && !NewCF.isUndef() && - !isConstantOrConstantVector(NewCF, true) && + if (!NewCF.isUndef() && !isConstantOrConstantVector(NewCF, true) && !DAG.isConstantFPBuildVectorOrConstantFP(NewCF)) return SDValue(); } From 2d4042f4b78ebd4303f558c01b67f8ecabfe47e6 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 20 Mar 2023 11:06:47 +0000 Subject: [PATCH 014/691] [DAG] visitTRUNCATE - use FoldConstantArithmetic to perform constant folding. Avoid needing to perform extra isConstantIntBuildVectorOrConstantInt checks --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 548da756c01a1..43cb2fde1fe9f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -14225,11 +14225,8 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); // fold (truncate c1) -> c1 - if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { - SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); - if (C.getNode() != N) - return C; - } + if (SDValue C = DAG.FoldConstantArithmetic(ISD::TRUNCATE, SDLoc(N), VT, {N0})) + return C; // fold (truncate (ext x)) -> (ext x) or (truncate x) or x if (N0.getOpcode() == ISD::ZERO_EXTEND || From 0107513fe79da7670e37c29c0862794a2213a89c Mon Sep 17 00:00:00 2001 From: David Spickett Date: Tue, 14 Mar 2023 11:52:36 +0000 Subject: [PATCH 015/691] [lldb] Implement CrashReason using UnixSignals By adding signal codes to UnixSignals and adding a new function where you can get a string with optional address and bounds. Added signal codes to the Linux, FreeBSD and NetBSD signal sets. I've checked the numbers against the relevant sources. Each signal code has a code number, description and printing options. By default you just get the descripton, you can opt into adding either a fault address or bounds information. Bounds signals we'll use the description, unless we have the bounds values in which case we say whether it is an upper or lower bound issue. GetCrashReasonString remains in CrashReason because we need it to be compiled only for platforms with siginfo_t. Ideally it would move into NativeProcessProtocol, but that is also used by NativeRegisterContextWindows, where there would be no siginfo_t. Reviewed By: JDevlieghere Differential Revision: https://reviews.llvm.org/D146044 --- lldb/include/lldb/Target/UnixSignals.h | 19 ++ .../Plugins/Process/POSIX/CrashReason.cpp | 289 +----------------- .../Process/Utility/FreeBSDSignals.cpp | 171 ++++++----- .../Plugins/Process/Utility/LinuxSignals.cpp | 34 +++ .../Plugins/Process/Utility/NetBSDSignals.cpp | 102 ++++--- lldb/source/Target/UnixSignals.cpp | 63 ++++ lldb/unittests/Signals/UnixSignalsTest.cpp | 48 +++ 7 files changed, 343 insertions(+), 383 deletions(-) diff --git a/lldb/include/lldb/Target/UnixSignals.h b/lldb/include/lldb/Target/UnixSignals.h index ff9f1ac9be196..ac47a2769b927 100644 --- a/lldb/include/lldb/Target/UnixSignals.h +++ b/lldb/include/lldb/Target/UnixSignals.h @@ -32,6 +32,13 @@ class UnixSignals { const char *GetSignalAsCString(int32_t signo) const; + std::string + GetSignalDescription(int32_t signo, + std::optional code = std::nullopt, + std::optional addr = std::nullopt, + std::optional lower = std::nullopt, + std::optional upper = std::nullopt) const; + bool SignalIsValid(int32_t signo) const; int32_t GetSignalNumberFromName(const char *name) const; @@ -82,6 +89,12 @@ class UnixSignals { bool default_stop, bool default_notify, const char *description, const char *alias = nullptr); + enum SignalCodePrintOption { None, Address, Bounds }; + + void AddSignalCode( + int signo, int code, const char *description, + SignalCodePrintOption print_option = SignalCodePrintOption::None); + void RemoveSignal(int signo); /// Track how many times signals are hit as stop reasons. @@ -111,10 +124,16 @@ class UnixSignals { protected: // Classes that inherit from UnixSignals can see and modify these + struct SignalCode { + ConstString m_description; + SignalCodePrintOption m_print_option; + }; + struct Signal { ConstString m_name; ConstString m_alias; std::string m_description; + std::map m_codes; uint32_t m_hit_count = 0; bool m_suppress : 1, m_stop : 1, m_notify : 1; bool m_default_suppress : 1, m_default_stop : 1, m_default_notify : 1; diff --git a/lldb/source/Plugins/Process/POSIX/CrashReason.cpp b/lldb/source/Plugins/Process/POSIX/CrashReason.cpp index 2ae7f8390575d..d93b7cd0ce562 100644 --- a/lldb/source/Plugins/Process/POSIX/CrashReason.cpp +++ b/lldb/source/Plugins/Process/POSIX/CrashReason.cpp @@ -8,283 +8,7 @@ #include "CrashReason.h" -#include "llvm/Support/raw_ostream.h" - -#include - -enum class CrashReason { - eInvalidCrashReason, - - // SIGSEGV crash reasons. - eInvalidAddress, - ePrivilegedAddress, - eBoundViolation, - eAsyncTagCheckFault, - eSyncTagCheckFault, - - // SIGILL crash reasons. - eIllegalOpcode, - eIllegalOperand, - eIllegalAddressingMode, - eIllegalTrap, - ePrivilegedOpcode, - ePrivilegedRegister, - eCoprocessorError, - eInternalStackError, - - // SIGBUS crash reasons, - eIllegalAlignment, - eIllegalAddress, - eHardwareError, - - // SIGFPE crash reasons, - eIntegerDivideByZero, - eIntegerOverflow, - eFloatDivideByZero, - eFloatOverflow, - eFloatUnderflow, - eFloatInexactResult, - eFloatInvalidOperation, - eFloatSubscriptRange -}; - -static void AppendFaultAddr(std::string &str, lldb::addr_t addr) { - std::stringstream ss; - ss << " (fault address: 0x" << std::hex << addr << ")"; - str += ss.str(); -} - -static void AppendBounds(std::string &str, lldb::addr_t lower_bound, - lldb::addr_t upper_bound, lldb::addr_t addr) { - llvm::raw_string_ostream stream(str); - if ((unsigned long)addr < lower_bound) - stream << ": lower bound violation "; - else - stream << ": upper bound violation "; - stream << "(fault address: 0x"; - stream.write_hex(addr); - stream << ", lower bound: 0x"; - stream.write_hex(lower_bound); - stream << ", upper bound: 0x"; - stream.write_hex(upper_bound); - stream << ")"; - stream.flush(); -} - -static CrashReason GetCrashReasonForSIGSEGV(int code) { - switch (code) { -#ifdef SI_KERNEL - case SI_KERNEL: - // Some platforms will occasionally send nonstandard spurious SI_KERNEL - // codes. One way to get this is via unaligned SIMD loads. - return CrashReason::eInvalidAddress; // for lack of anything better -#endif - case SEGV_MAPERR: - return CrashReason::eInvalidAddress; - case SEGV_ACCERR: - return CrashReason::ePrivilegedAddress; -#ifndef SEGV_BNDERR -#define SEGV_BNDERR 3 -#endif - case SEGV_BNDERR: - return CrashReason::eBoundViolation; -#ifdef __linux__ -#ifndef SEGV_MTEAERR -#define SEGV_MTEAERR 8 -#endif - case SEGV_MTEAERR: - return CrashReason::eAsyncTagCheckFault; -#ifndef SEGV_MTESERR -#define SEGV_MTESERR 9 -#endif - case SEGV_MTESERR: - return CrashReason::eSyncTagCheckFault; -#endif // __linux__ - } - - return CrashReason::eInvalidCrashReason; -} - -static CrashReason GetCrashReasonForSIGILL(int code) { - switch (code) { - case ILL_ILLOPC: - return CrashReason::eIllegalOpcode; - case ILL_ILLOPN: - return CrashReason::eIllegalOperand; - case ILL_ILLADR: - return CrashReason::eIllegalAddressingMode; - case ILL_ILLTRP: - return CrashReason::eIllegalTrap; - case ILL_PRVOPC: - return CrashReason::ePrivilegedOpcode; - case ILL_PRVREG: - return CrashReason::ePrivilegedRegister; - case ILL_COPROC: - return CrashReason::eCoprocessorError; - case ILL_BADSTK: - return CrashReason::eInternalStackError; - } - - return CrashReason::eInvalidCrashReason; -} - -static CrashReason GetCrashReasonForSIGFPE(int code) { - switch (code) { - case FPE_INTDIV: - return CrashReason::eIntegerDivideByZero; - case FPE_INTOVF: - return CrashReason::eIntegerOverflow; - case FPE_FLTDIV: - return CrashReason::eFloatDivideByZero; - case FPE_FLTOVF: - return CrashReason::eFloatOverflow; - case FPE_FLTUND: - return CrashReason::eFloatUnderflow; - case FPE_FLTRES: - return CrashReason::eFloatInexactResult; - case FPE_FLTINV: - return CrashReason::eFloatInvalidOperation; - case FPE_FLTSUB: - return CrashReason::eFloatSubscriptRange; - } - - return CrashReason::eInvalidCrashReason; -} - -static CrashReason GetCrashReasonForSIGBUS(int code) { - switch (code) { - case BUS_ADRALN: - return CrashReason::eIllegalAlignment; - case BUS_ADRERR: - return CrashReason::eIllegalAddress; - case BUS_OBJERR: - return CrashReason::eHardwareError; - } - - return CrashReason::eInvalidCrashReason; -} - -static std::string GetCrashReasonString(CrashReason reason, - lldb::addr_t fault_addr) { - std::string str; - - switch (reason) { - default: - str = "unknown crash reason"; - break; - - case CrashReason::eInvalidAddress: - str = "signal SIGSEGV: invalid address"; - AppendFaultAddr(str, fault_addr); - break; - case CrashReason::ePrivilegedAddress: - str = "signal SIGSEGV: address access protected"; - AppendFaultAddr(str, fault_addr); - break; - case CrashReason::eBoundViolation: - str = "signal SIGSEGV: bound violation"; - break; - case CrashReason::eAsyncTagCheckFault: - str = "signal SIGSEGV: async tag check fault"; - break; - case CrashReason::eSyncTagCheckFault: - str = "signal SIGSEGV: sync tag check fault"; - AppendFaultAddr(str, fault_addr); - break; - case CrashReason::eIllegalOpcode: - str = "signal SIGILL: illegal instruction"; - break; - case CrashReason::eIllegalOperand: - str = "signal SIGILL: illegal instruction operand"; - break; - case CrashReason::eIllegalAddressingMode: - str = "signal SIGILL: illegal addressing mode"; - break; - case CrashReason::eIllegalTrap: - str = "signal SIGILL: illegal trap"; - break; - case CrashReason::ePrivilegedOpcode: - str = "signal SIGILL: privileged instruction"; - break; - case CrashReason::ePrivilegedRegister: - str = "signal SIGILL: privileged register"; - break; - case CrashReason::eCoprocessorError: - str = "signal SIGILL: coprocessor error"; - break; - case CrashReason::eInternalStackError: - str = "signal SIGILL: internal stack error"; - break; - case CrashReason::eIllegalAlignment: - str = "signal SIGBUS: illegal alignment"; - break; - case CrashReason::eIllegalAddress: - str = "signal SIGBUS: illegal address"; - break; - case CrashReason::eHardwareError: - str = "signal SIGBUS: hardware error"; - break; - case CrashReason::eIntegerDivideByZero: - str = "signal SIGFPE: integer divide by zero"; - break; - case CrashReason::eIntegerOverflow: - str = "signal SIGFPE: integer overflow"; - break; - case CrashReason::eFloatDivideByZero: - str = "signal SIGFPE: floating point divide by zero"; - break; - case CrashReason::eFloatOverflow: - str = "signal SIGFPE: floating point overflow"; - break; - case CrashReason::eFloatUnderflow: - str = "signal SIGFPE: floating point underflow"; - break; - case CrashReason::eFloatInexactResult: - str = "signal SIGFPE: inexact floating point result"; - break; - case CrashReason::eFloatInvalidOperation: - str = "signal SIGFPE: invalid floating point operation"; - break; - case CrashReason::eFloatSubscriptRange: - str = "signal SIGFPE: invalid floating point subscript range"; - break; - } - - return str; -} - -static CrashReason GetCrashReason(int signo, int code) { - switch (signo) { - case SIGSEGV: - return GetCrashReasonForSIGSEGV(code); - case SIGBUS: - return GetCrashReasonForSIGBUS(code); - case SIGFPE: - return GetCrashReasonForSIGFPE(code); - case SIGILL: - return GetCrashReasonForSIGILL(code); - } - - assert(false && "unexpected signal"); - return CrashReason::eInvalidCrashReason; -} - -static std::string GetCrashReasonString(int signo, int code, lldb::addr_t addr, - std::optional lower, - std::optional upper) { - CrashReason reason = GetCrashReason(signo, code); - - if (lower && upper) { - std::string str; - if (reason == CrashReason::eBoundViolation) { - str = "signal SIGSEGV"; - AppendBounds(str, *lower, *upper, addr); - return str; - } - } - - return GetCrashReasonString(reason, addr); -} +#include "lldb/Target/UnixSignals.h" std::string GetCrashReasonString(const siginfo_t &info) { #if defined(si_lower) && defined(si_upper) @@ -296,7 +20,12 @@ std::string GetCrashReasonString(const siginfo_t &info) { std::optional lower; std::optional upper; #endif - return GetCrashReasonString(info.si_signo, info.si_code, - reinterpret_cast(info.si_addr), lower, - upper); + + std::string description = + lldb_private::UnixSignals::CreateForHost()->GetSignalDescription( + info.si_signo, info.si_code, + reinterpret_cast(info.si_addr), lower, upper); + assert(description.size() && "unexpected signal"); + + return "signal " + description; } diff --git a/lldb/source/Plugins/Process/Utility/FreeBSDSignals.cpp b/lldb/source/Plugins/Process/Utility/FreeBSDSignals.cpp index 0a4bdc72b364e..c2997e0249219 100644 --- a/lldb/source/Plugins/Process/Utility/FreeBSDSignals.cpp +++ b/lldb/source/Plugins/Process/Utility/FreeBSDSignals.cpp @@ -15,72 +15,107 @@ FreeBSDSignals::FreeBSDSignals() : UnixSignals() { Reset(); } void FreeBSDSignals::Reset() { UnixSignals::Reset(); - // SIGNO NAME SUPPRESS STOP NOTIFY DESCRIPTION - // ====== ============ ======== ====== ====== - // =================================================== - AddSignal(32, "SIGTHR", false, false, false, "thread interrupt"); - AddSignal(33, "SIGLIBRT", false, false, false, - "reserved by real-time library"); - AddSignal(65, "SIGRTMIN", false, false, false, "real time signal 0"); - AddSignal(66, "SIGRTMIN+1", false, false, false, "real time signal 1"); - AddSignal(67, "SIGRTMIN+2", false, false, false, "real time signal 2"); - AddSignal(68, "SIGRTMIN+3", false, false, false, "real time signal 3"); - AddSignal(69, "SIGRTMIN+4", false, false, false, "real time signal 4"); - AddSignal(70, "SIGRTMIN+5", false, false, false, "real time signal 5"); - AddSignal(71, "SIGRTMIN+6", false, false, false, "real time signal 6"); - AddSignal(72, "SIGRTMIN+7", false, false, false, "real time signal 7"); - AddSignal(73, "SIGRTMIN+8", false, false, false, "real time signal 8"); - AddSignal(74, "SIGRTMIN+9", false, false, false, "real time signal 9"); - AddSignal(75, "SIGRTMIN+10", false, false, false, "real time signal 10"); - AddSignal(76, "SIGRTMIN+11", false, false, false, "real time signal 11"); - AddSignal(77, "SIGRTMIN+12", false, false, false, "real time signal 12"); - AddSignal(78, "SIGRTMIN+13", false, false, false, "real time signal 13"); - AddSignal(79, "SIGRTMIN+14", false, false, false, "real time signal 14"); - AddSignal(80, "SIGRTMIN+15", false, false, false, "real time signal 15"); - AddSignal(81, "SIGRTMIN+16", false, false, false, "real time signal 16"); - AddSignal(82, "SIGRTMIN+17", false, false, false, "real time signal 17"); - AddSignal(83, "SIGRTMIN+18", false, false, false, "real time signal 18"); - AddSignal(84, "SIGRTMIN+19", false, false, false, "real time signal 19"); - AddSignal(85, "SIGRTMIN+20", false, false, false, "real time signal 20"); - AddSignal(86, "SIGRTMIN+21", false, false, false, "real time signal 21"); - AddSignal(87, "SIGRTMIN+22", false, false, false, "real time signal 22"); - AddSignal(88, "SIGRTMIN+23", false, false, false, "real time signal 23"); - AddSignal(89, "SIGRTMIN+24", false, false, false, "real time signal 24"); - AddSignal(90, "SIGRTMIN+25", false, false, false, "real time signal 25"); - AddSignal(91, "SIGRTMIN+26", false, false, false, "real time signal 26"); - AddSignal(92, "SIGRTMIN+27", false, false, false, "real time signal 27"); - AddSignal(93, "SIGRTMIN+28", false, false, false, "real time signal 28"); - AddSignal(94, "SIGRTMIN+29", false, false, false, "real time signal 29"); - AddSignal(95, "SIGRTMIN+30", false, false, false, "real time signal 30"); - AddSignal(96, "SIGRTMAX-30", false, false, false, "real time signal 31"); - AddSignal(97, "SIGRTMAX-29", false, false, false, "real time signal 32"); - AddSignal(98, "SIGRTMAX-28", false, false, false, "real time signal 33"); - AddSignal(99, "SIGRTMAX-27", false, false, false, "real time signal 34"); - AddSignal(100, "SIGRTMAX-26", false, false, false, "real time signal 35"); - AddSignal(101, "SIGRTMAX-25", false, false, false, "real time signal 36"); - AddSignal(102, "SIGRTMAX-24", false, false, false, "real time signal 37"); - AddSignal(103, "SIGRTMAX-23", false, false, false, "real time signal 38"); - AddSignal(104, "SIGRTMAX-22", false, false, false, "real time signal 39"); - AddSignal(105, "SIGRTMAX-21", false, false, false, "real time signal 40"); - AddSignal(106, "SIGRTMAX-20", false, false, false, "real time signal 41"); - AddSignal(107, "SIGRTMAX-19", false, false, false, "real time signal 42"); - AddSignal(108, "SIGRTMAX-18", false, false, false, "real time signal 43"); - AddSignal(109, "SIGRTMAX-17", false, false, false, "real time signal 44"); - AddSignal(110, "SIGRTMAX-16", false, false, false, "real time signal 45"); - AddSignal(111, "SIGRTMAX-15", false, false, false, "real time signal 46"); - AddSignal(112, "SIGRTMAX-14", false, false, false, "real time signal 47"); - AddSignal(113, "SIGRTMAX-13", false, false, false, "real time signal 48"); - AddSignal(114, "SIGRTMAX-12", false, false, false, "real time signal 49"); - AddSignal(115, "SIGRTMAX-11", false, false, false, "real time signal 50"); - AddSignal(116, "SIGRTMAX-10", false, false, false, "real time signal 51"); - AddSignal(117, "SIGRTMAX-9", false, false, false, "real time signal 52"); - AddSignal(118, "SIGRTMAX-8", false, false, false, "real time signal 53"); - AddSignal(119, "SIGRTMAX-7", false, false, false, "real time signal 54"); - AddSignal(120, "SIGRTMAX-6", false, false, false, "real time signal 55"); - AddSignal(121, "SIGRTMAX-5", false, false, false, "real time signal 56"); - AddSignal(122, "SIGRTMAX-4", false, false, false, "real time signal 57"); - AddSignal(123, "SIGRTMAX-3", false, false, false, "real time signal 58"); - AddSignal(124, "SIGRTMAX-2", false, false, false, "real time signal 59"); - AddSignal(125, "SIGRTMAX-1", false, false, false, "real time signal 60"); - AddSignal(126, "SIGRTMAX", false, false, false, "real time signal 61"); + // clang-format off + // SIGILL + AddSignalCode(4, 1 /*ILL_ILLOPC*/, "illegal opcode"); + AddSignalCode(4, 2 /*ILL_ILLOPN*/, "illegal operand"); + AddSignalCode(4, 3 /*ILL_ILLADR*/, "illegal addressing mode"); + AddSignalCode(4, 4 /*ILL_ILLTRP*/, "illegal trap"); + AddSignalCode(4, 5 /*ILL_PRVOPC*/, "privileged opcode"); + AddSignalCode(4, 6 /*ILL_PRVREG*/, "privileged register"); + AddSignalCode(4, 7 /*ILL_COPROC*/, "coprocessor error"); + AddSignalCode(4, 8 /*ILL_BADSTK*/, "internal stack error"); + + // SIGFPE + AddSignalCode(8, 1 /*FPE_INTOVF*/, "integer overflow"); + AddSignalCode(8, 2 /*FPE_INTDIV*/, "integer divide by zero"); + AddSignalCode(8, 3 /*FPE_FLTDIV*/, "floating point divide by zero"); + AddSignalCode(8, 4 /*FPE_FLTOVF*/, "floating point overflow"); + AddSignalCode(8, 5 /*FPE_FLTUND*/, "floating point underflow"); + AddSignalCode(8, 6 /*FPE_FLTRES*/, "floating point inexact result"); + AddSignalCode(8, 7 /*FPE_FLTINV*/, "invalid floating point operation"); + AddSignalCode(8, 8 /*FPE_FLTSUB*/, "subscript out of range"); + AddSignalCode(8, 9 /*FPE_FLTIDO*/, "input denormal operation"); + + // SIGBUS + AddSignalCode(10, 1 /*BUS_ADRALN*/, "invalid address alignment"); + AddSignalCode(10, 2 /*BUS_ADRERR*/, "nonexistent physical address"); + AddSignalCode(10, 3 /*BUS_OBJERR*/, "object-specific hardware error"); + AddSignalCode(10, 100 /*BUS_OOMERR*/, "no memory"); + + // SIGSEGV + AddSignalCode(11, 1 /*SEGV_MAPERR*/, "address not mapped to object", + SignalCodePrintOption::Address); + AddSignalCode(11, 2 /*SEGV_ACCERR*/, "invalid permissions for mapped object", + SignalCodePrintOption::Address); + AddSignalCode(11, 100 /*SEGV_PKUERR*/, "PKU violation", + SignalCodePrintOption::Address); + + // SIGNO NAME SUPPRESS STOP NOTIFY DESCRIPTION + // ===== ============== ======== ====== ====== ======================== + AddSignal(32, "SIGTHR", false, false, false, "thread interrupt"); + AddSignal(33, "SIGLIBRT", false, false, false, "reserved by real-time library"); + AddSignal(65, "SIGRTMIN", false, false, false, "real time signal 0"); + AddSignal(66, "SIGRTMIN+1", false, false, false, "real time signal 1"); + AddSignal(67, "SIGRTMIN+2", false, false, false, "real time signal 2"); + AddSignal(68, "SIGRTMIN+3", false, false, false, "real time signal 3"); + AddSignal(69, "SIGRTMIN+4", false, false, false, "real time signal 4"); + AddSignal(70, "SIGRTMIN+5", false, false, false, "real time signal 5"); + AddSignal(71, "SIGRTMIN+6", false, false, false, "real time signal 6"); + AddSignal(72, "SIGRTMIN+7", false, false, false, "real time signal 7"); + AddSignal(73, "SIGRTMIN+8", false, false, false, "real time signal 8"); + AddSignal(74, "SIGRTMIN+9", false, false, false, "real time signal 9"); + AddSignal(75, "SIGRTMIN+10", false, false, false, "real time signal 10"); + AddSignal(76, "SIGRTMIN+11", false, false, false, "real time signal 11"); + AddSignal(77, "SIGRTMIN+12", false, false, false, "real time signal 12"); + AddSignal(78, "SIGRTMIN+13", false, false, false, "real time signal 13"); + AddSignal(79, "SIGRTMIN+14", false, false, false, "real time signal 14"); + AddSignal(80, "SIGRTMIN+15", false, false, false, "real time signal 15"); + AddSignal(81, "SIGRTMIN+16", false, false, false, "real time signal 16"); + AddSignal(82, "SIGRTMIN+17", false, false, false, "real time signal 17"); + AddSignal(83, "SIGRTMIN+18", false, false, false, "real time signal 18"); + AddSignal(84, "SIGRTMIN+19", false, false, false, "real time signal 19"); + AddSignal(85, "SIGRTMIN+20", false, false, false, "real time signal 20"); + AddSignal(86, "SIGRTMIN+21", false, false, false, "real time signal 21"); + AddSignal(87, "SIGRTMIN+22", false, false, false, "real time signal 22"); + AddSignal(88, "SIGRTMIN+23", false, false, false, "real time signal 23"); + AddSignal(89, "SIGRTMIN+24", false, false, false, "real time signal 24"); + AddSignal(90, "SIGRTMIN+25", false, false, false, "real time signal 25"); + AddSignal(91, "SIGRTMIN+26", false, false, false, "real time signal 26"); + AddSignal(92, "SIGRTMIN+27", false, false, false, "real time signal 27"); + AddSignal(93, "SIGRTMIN+28", false, false, false, "real time signal 28"); + AddSignal(94, "SIGRTMIN+29", false, false, false, "real time signal 29"); + AddSignal(95, "SIGRTMIN+30", false, false, false, "real time signal 30"); + AddSignal(96, "SIGRTMAX-30", false, false, false, "real time signal 31"); + AddSignal(97, "SIGRTMAX-29", false, false, false, "real time signal 32"); + AddSignal(98, "SIGRTMAX-28", false, false, false, "real time signal 33"); + AddSignal(99, "SIGRTMAX-27", false, false, false, "real time signal 34"); + AddSignal(100, "SIGRTMAX-26", false, false, false, "real time signal 35"); + AddSignal(101, "SIGRTMAX-25", false, false, false, "real time signal 36"); + AddSignal(102, "SIGRTMAX-24", false, false, false, "real time signal 37"); + AddSignal(103, "SIGRTMAX-23", false, false, false, "real time signal 38"); + AddSignal(104, "SIGRTMAX-22", false, false, false, "real time signal 39"); + AddSignal(105, "SIGRTMAX-21", false, false, false, "real time signal 40"); + AddSignal(106, "SIGRTMAX-20", false, false, false, "real time signal 41"); + AddSignal(107, "SIGRTMAX-19", false, false, false, "real time signal 42"); + AddSignal(108, "SIGRTMAX-18", false, false, false, "real time signal 43"); + AddSignal(109, "SIGRTMAX-17", false, false, false, "real time signal 44"); + AddSignal(110, "SIGRTMAX-16", false, false, false, "real time signal 45"); + AddSignal(111, "SIGRTMAX-15", false, false, false, "real time signal 46"); + AddSignal(112, "SIGRTMAX-14", false, false, false, "real time signal 47"); + AddSignal(113, "SIGRTMAX-13", false, false, false, "real time signal 48"); + AddSignal(114, "SIGRTMAX-12", false, false, false, "real time signal 49"); + AddSignal(115, "SIGRTMAX-11", false, false, false, "real time signal 50"); + AddSignal(116, "SIGRTMAX-10", false, false, false, "real time signal 51"); + AddSignal(117, "SIGRTMAX-9", false, false, false, "real time signal 52"); + AddSignal(118, "SIGRTMAX-8", false, false, false, "real time signal 53"); + AddSignal(119, "SIGRTMAX-7", false, false, false, "real time signal 54"); + AddSignal(120, "SIGRTMAX-6", false, false, false, "real time signal 55"); + AddSignal(121, "SIGRTMAX-5", false, false, false, "real time signal 56"); + AddSignal(122, "SIGRTMAX-4", false, false, false, "real time signal 57"); + AddSignal(123, "SIGRTMAX-3", false, false, false, "real time signal 58"); + AddSignal(124, "SIGRTMAX-2", false, false, false, "real time signal 59"); + AddSignal(125, "SIGRTMAX-1", false, false, false, "real time signal 60"); + AddSignal(126, "SIGRTMAX", false, false, false, "real time signal 61"); + // clang-format on } diff --git a/lldb/source/Plugins/Process/Utility/LinuxSignals.cpp b/lldb/source/Plugins/Process/Utility/LinuxSignals.cpp index d4b0f4039da95..fee7b89ef8371 100644 --- a/lldb/source/Plugins/Process/Utility/LinuxSignals.cpp +++ b/lldb/source/Plugins/Process/Utility/LinuxSignals.cpp @@ -20,14 +20,48 @@ void LinuxSignals::Reset() { AddSignal(1, "SIGHUP", false, true, true, "hangup"); AddSignal(2, "SIGINT", true, true, true, "interrupt"); AddSignal(3, "SIGQUIT", false, true, true, "quit"); + AddSignal(4, "SIGILL", false, true, true, "illegal instruction"); + AddSignalCode(4, 1 /*ILL_ILLOPC*/, "illegal opcode"); + AddSignalCode(4, 2 /*ILL_ILLOPN*/, "illegal operand"); + AddSignalCode(4, 3 /*ILL_ILLADR*/, "illegal addressing mode"); + AddSignalCode(4, 4 /*ILL_ILLTRP*/, "illegal trap"); + AddSignalCode(4, 5 /*ILL_PRVOPC*/, "privileged opcode"); + AddSignalCode(4, 6 /*ILL_PRVREG*/, "privileged register"); + AddSignalCode(4, 7 /*ILL_COPROC*/, "coprocessor error"); + AddSignalCode(4, 8 /*ILL_BADSTK*/, "internal stack error"); + AddSignal(5, "SIGTRAP", true, true, true, "trace trap (not reset when caught)"); AddSignal(6, "SIGABRT", false, true, true, "abort()/IOT trap", "SIGIOT"); + AddSignal(7, "SIGBUS", false, true, true, "bus error"); + AddSignalCode(7, 1 /*BUS_ADRALN*/, "illegal alignment"); + AddSignalCode(7, 2 /*BUS_ADRERR*/, "illegal address"); + AddSignalCode(7, 3 /*BUS_OBJERR*/, "hardware error"); + AddSignal(8, "SIGFPE", false, true, true, "floating point exception"); + AddSignalCode(8, 1 /*FPE_INTDIV*/, "integer divide by zero"); + AddSignalCode(8, 2 /*FPE_INTOVF*/, "integer overflow"); + AddSignalCode(8, 3 /*FPE_FLTDIV*/, "floating point divide by zero"); + AddSignalCode(8, 4 /*FPE_FLTOVF*/, "floating point overflow"); + AddSignalCode(8, 5 /*FPE_FLTUND*/, "floating point underflow"); + AddSignalCode(8, 6 /*FPE_FLTRES*/, "floating point inexact result"); + AddSignalCode(8, 7 /*FPE_FLTINV*/, "floating point invalid operation"); + AddSignalCode(8, 8 /*FPE_FLTSUB*/, "subscript out of range"); + AddSignal(9, "SIGKILL", false, true, true, "kill"); AddSignal(10, "SIGUSR1", false, true, true, "user defined signal 1"); + AddSignal(11, "SIGSEGV", false, true, true, "segmentation violation"); + AddSignalCode(11, 1 /*SEGV_MAPERR*/, "address not mapped to object", SignalCodePrintOption::Address); + AddSignalCode(11, 2 /*SEGV_ACCERR*/, "invalid permissions for mapped object", SignalCodePrintOption::Address); + AddSignalCode(11, 3 /*SEGV_BNDERR*/, "failed address bounds checks", SignalCodePrintOption::Bounds); + AddSignalCode(11, 8 /*SEGV_MTEAERR*/, "async tag check fault"); + AddSignalCode(11, 9 /*SEGV_MTESERR*/, "sync tag check fault", SignalCodePrintOption::Address); + // Some platforms will occasionally send nonstandard spurious SI_KERNEL + // codes. One way to get this is via unaligned SIMD loads. Treat it as invalid address. + AddSignalCode(11, 0x80 /*SI_KERNEL*/, "invalid address", SignalCodePrintOption::Address); + AddSignal(12, "SIGUSR2", false, true, true, "user defined signal 2"); AddSignal(13, "SIGPIPE", false, true, true, "write to pipe with reading end closed"); AddSignal(14, "SIGALRM", false, false, false, "alarm"); diff --git a/lldb/source/Plugins/Process/Utility/NetBSDSignals.cpp b/lldb/source/Plugins/Process/Utility/NetBSDSignals.cpp index ffdfd19b4efe2..84a664c05558c 100644 --- a/lldb/source/Plugins/Process/Utility/NetBSDSignals.cpp +++ b/lldb/source/Plugins/Process/Utility/NetBSDSignals.cpp @@ -14,40 +14,72 @@ NetBSDSignals::NetBSDSignals() : UnixSignals() { Reset(); } void NetBSDSignals::Reset() { UnixSignals::Reset(); + + // clang-format off + // SIGILL + AddSignalCode(4, 1 /*ILL_ILLOPC*/, "illegal opcode"); + AddSignalCode(4, 2 /*ILL_ILLOPN*/, "illegal operand"); + AddSignalCode(4, 3 /*ILL_ILLADR*/, "illegal addressing mode"); + AddSignalCode(4, 4 /*ILL_ILLTRP*/, "illegal trap"); + AddSignalCode(4, 5 /*ILL_PRVOPC*/, "privileged opcode"); + AddSignalCode(4, 6 /*ILL_PRVREG*/, "privileged register"); + AddSignalCode(4, 7 /*ILL_COPROC*/, "coprocessor error"); + AddSignalCode(4, 8 /*ILL_BADSTK*/, "internal stack error"); + + // SIGFPE + AddSignalCode(8, 1 /*FPE_INTDIV*/, "integer divide by zero"); + AddSignalCode(8, 2 /*FPE_INTOVF*/, "integer overflow"); + AddSignalCode(8, 3 /*FPE_FLTDIV*/, "floating point divide by zero"); + AddSignalCode(8, 4 /*FPE_FLTOVF*/, "floating point overflow"); + AddSignalCode(8, 5 /*FPE_FLTUND*/, "floating point underflow"); + AddSignalCode(8, 6 /*FPE_FLTRES*/, "floating point inexact result"); + AddSignalCode(8, 7 /*FPE_FLTINV*/, "invalid floating point operation"); + AddSignalCode(8, 8 /*FPE_FLTSUB*/, "subscript out of range"); + + // SIGBUS + AddSignalCode(10, 1 /*BUS_ADRALN*/, "invalid address alignment"); + AddSignalCode(10, 2 /*BUS_ADRERR*/, "non-existent physical address"); + AddSignalCode(10, 3 /*BUS_OBJERR*/, "object specific hardware error"); + + // SIGSEGV + AddSignalCode(11, 1 /*SEGV_MAPERR*/, "address not mapped to object", + SignalCodePrintOption::Address); + AddSignalCode(11, 2 /*SEGV_ACCERR*/, "invalid permissions for mapped object", + SignalCodePrintOption::Address); + // SIGNO NAME SUPPRESS STOP NOTIFY DESCRIPTION - // ====== ============ ======== ====== ====== - // =================================================== - AddSignal(32, "SIGPWR", false, true, true, - "power fail/restart (not reset when caught)"); - AddSignal(33, "SIGRTMIN", false, false, false, "real time signal 0"); - AddSignal(34, "SIGRTMIN+1", false, false, false, "real time signal 1"); - AddSignal(35, "SIGRTMIN+2", false, false, false, "real time signal 2"); - AddSignal(36, "SIGRTMIN+3", false, false, false, "real time signal 3"); - AddSignal(37, "SIGRTMIN+4", false, false, false, "real time signal 4"); - AddSignal(38, "SIGRTMIN+5", false, false, false, "real time signal 5"); - AddSignal(39, "SIGRTMIN+6", false, false, false, "real time signal 6"); - AddSignal(40, "SIGRTMIN+7", false, false, false, "real time signal 7"); - AddSignal(41, "SIGRTMIN+8", false, false, false, "real time signal 8"); - AddSignal(42, "SIGRTMIN+9", false, false, false, "real time signal 9"); - AddSignal(43, "SIGRTMIN+10", false, false, false, "real time signal 10"); - AddSignal(44, "SIGRTMIN+11", false, false, false, "real time signal 11"); - AddSignal(45, "SIGRTMIN+12", false, false, false, "real time signal 12"); - AddSignal(46, "SIGRTMIN+13", false, false, false, "real time signal 13"); - AddSignal(47, "SIGRTMIN+14", false, false, false, "real time signal 14"); - AddSignal(48, "SIGRTMIN+15", false, false, false, "real time signal 15"); - AddSignal(49, "SIGRTMIN-14", false, false, false, "real time signal 16"); - AddSignal(50, "SIGRTMAX-13", false, false, false, "real time signal 17"); - AddSignal(51, "SIGRTMAX-12", false, false, false, "real time signal 18"); - AddSignal(52, "SIGRTMAX-11", false, false, false, "real time signal 19"); - AddSignal(53, "SIGRTMAX-10", false, false, false, "real time signal 20"); - AddSignal(54, "SIGRTMAX-9", false, false, false, "real time signal 21"); - AddSignal(55, "SIGRTMAX-8", false, false, false, "real time signal 22"); - AddSignal(56, "SIGRTMAX-7", false, false, false, "real time signal 23"); - AddSignal(57, "SIGRTMAX-6", false, false, false, "real time signal 24"); - AddSignal(58, "SIGRTMAX-5", false, false, false, "real time signal 25"); - AddSignal(59, "SIGRTMAX-4", false, false, false, "real time signal 26"); - AddSignal(60, "SIGRTMAX-3", false, false, false, "real time signal 27"); - AddSignal(61, "SIGRTMAX-2", false, false, false, "real time signal 28"); - AddSignal(62, "SIGRTMAX-1", false, false, false, "real time signal 29"); - AddSignal(63, "SIGRTMAX", false, false, false, "real time signal 30"); + // ===== ============== ======== ====== ====== ======================== + AddSignal(32, "SIGPWR", false, true, true, "power fail/restart (not reset when caught)"); + AddSignal(33, "SIGRTMIN", false, false, false, "real time signal 0"); + AddSignal(34, "SIGRTMIN+1", false, false, false, "real time signal 1"); + AddSignal(35, "SIGRTMIN+2", false, false, false, "real time signal 2"); + AddSignal(36, "SIGRTMIN+3", false, false, false, "real time signal 3"); + AddSignal(37, "SIGRTMIN+4", false, false, false, "real time signal 4"); + AddSignal(38, "SIGRTMIN+5", false, false, false, "real time signal 5"); + AddSignal(39, "SIGRTMIN+6", false, false, false, "real time signal 6"); + AddSignal(40, "SIGRTMIN+7", false, false, false, "real time signal 7"); + AddSignal(41, "SIGRTMIN+8", false, false, false, "real time signal 8"); + AddSignal(42, "SIGRTMIN+9", false, false, false, "real time signal 9"); + AddSignal(43, "SIGRTMIN+10", false, false, false, "real time signal 10"); + AddSignal(44, "SIGRTMIN+11", false, false, false, "real time signal 11"); + AddSignal(45, "SIGRTMIN+12", false, false, false, "real time signal 12"); + AddSignal(46, "SIGRTMIN+13", false, false, false, "real time signal 13"); + AddSignal(47, "SIGRTMIN+14", false, false, false, "real time signal 14"); + AddSignal(48, "SIGRTMIN+15", false, false, false, "real time signal 15"); + AddSignal(49, "SIGRTMIN-14", false, false, false, "real time signal 16"); + AddSignal(50, "SIGRTMAX-13", false, false, false, "real time signal 17"); + AddSignal(51, "SIGRTMAX-12", false, false, false, "real time signal 18"); + AddSignal(52, "SIGRTMAX-11", false, false, false, "real time signal 19"); + AddSignal(53, "SIGRTMAX-10", false, false, false, "real time signal 20"); + AddSignal(54, "SIGRTMAX-9", false, false, false, "real time signal 21"); + AddSignal(55, "SIGRTMAX-8", false, false, false, "real time signal 22"); + AddSignal(56, "SIGRTMAX-7", false, false, false, "real time signal 23"); + AddSignal(57, "SIGRTMAX-6", false, false, false, "real time signal 24"); + AddSignal(58, "SIGRTMAX-5", false, false, false, "real time signal 25"); + AddSignal(59, "SIGRTMAX-4", false, false, false, "real time signal 26"); + AddSignal(60, "SIGRTMAX-3", false, false, false, "real time signal 27"); + AddSignal(61, "SIGRTMAX-2", false, false, false, "real time signal 28"); + AddSignal(62, "SIGRTMAX-1", false, false, false, "real time signal 29"); + AddSignal(63, "SIGRTMAX", false, false, false, "real time signal 30"); + // clang-format on } diff --git a/lldb/source/Target/UnixSignals.cpp b/lldb/source/Target/UnixSignals.cpp index 02354be44cfe8..d754537cc4cf4 100644 --- a/lldb/source/Target/UnixSignals.cpp +++ b/lldb/source/Target/UnixSignals.cpp @@ -13,6 +13,7 @@ #include "lldb/Host/HostInfo.h" #include "lldb/Utility/ArchSpec.h" #include +#include using namespace lldb_private; using namespace llvm; @@ -112,6 +113,16 @@ void UnixSignals::AddSignal(int signo, const char *name, bool default_suppress, ++m_version; } +void UnixSignals::AddSignalCode(int signo, int code, const char *description, + SignalCodePrintOption print_option) { + collection::iterator signal = m_signals.find(signo); + assert(signal != m_signals.end() && + "Tried to add code to signal that does not exist."); + signal->second.m_codes.insert( + std::pair{code, SignalCode{ConstString(description), print_option}}); + ++m_version; +} + void UnixSignals::RemoveSignal(int signo) { collection::iterator pos = m_signals.find(signo); if (pos != m_signals.end()) @@ -127,6 +138,58 @@ const char *UnixSignals::GetSignalAsCString(int signo) const { return pos->second.m_name.GetCString(); } +std::string +UnixSignals::GetSignalDescription(int32_t signo, std::optional code, + std::optional addr, + std::optional lower, + std::optional upper) const { + std::string str; + + collection::const_iterator pos = m_signals.find(signo); + if (pos != m_signals.end()) { + str = pos->second.m_name.GetCString(); + + if (code) { + std::map::const_iterator cpos = + pos->second.m_codes.find(*code); + if (cpos != pos->second.m_codes.end()) { + const SignalCode &sc = cpos->second; + str += ": "; + if (sc.m_print_option != SignalCodePrintOption::Bounds) + str += sc.m_description.GetCString(); + + std::stringstream strm; + switch (sc.m_print_option) { + case SignalCodePrintOption::None: + break; + case SignalCodePrintOption::Address: + if (addr) + strm << " (fault address: 0x" << std::hex << *addr << ")"; + break; + case SignalCodePrintOption::Bounds: + if (lower && upper && addr) { + if ((unsigned long)(*addr) < *lower) + strm << "lower bound violation "; + else + strm << "upper bound violation "; + + strm << "(fault address: 0x" << std::hex << *addr; + strm << ", lower bound: 0x" << std::hex << *lower; + strm << ", upper bound: 0x" << std::hex << *upper; + strm << ")"; + } else + strm << sc.m_description.GetCString(); + + break; + } + str += strm.str(); + } + } + } + + return str; +} + bool UnixSignals::SignalIsValid(int32_t signo) const { return m_signals.find(signo) != m_signals.end(); } diff --git a/lldb/unittests/Signals/UnixSignalsTest.cpp b/lldb/unittests/Signals/UnixSignalsTest.cpp index e4c4634862449..2ae1b4ee5138f 100644 --- a/lldb/unittests/Signals/UnixSignalsTest.cpp +++ b/lldb/unittests/Signals/UnixSignalsTest.cpp @@ -23,6 +23,10 @@ class TestSignals : public UnixSignals { AddSignal(4, "SIG4", true, false, true, "DESC4"); AddSignal(8, "SIG8", true, true, true, "DESC8"); AddSignal(16, "SIG16", true, false, false, "DESC16"); + AddSignalCode(16, 1, "a specific type of SIG16"); + AddSignalCode(16, 2, "SIG16 with a fault address", + SignalCodePrintOption::Address); + AddSignalCode(16, 3, "bounds violation", SignalCodePrintOption::Bounds); } }; @@ -93,6 +97,50 @@ TEST(UnixSignalsTest, GetInfo) { EXPECT_EQ(name, signals.GetSignalAsCString(signo)); } +TEST(UnixSignalsTest, GetAsCString) { + TestSignals signals; + + ASSERT_EQ(nullptr, signals.GetSignalAsCString(100)); + std::string name = signals.GetSignalAsCString(16); + ASSERT_EQ("SIG16", name); +} + +TEST(UnixSignalsTest, GetAsString) { + TestSignals signals; + + ASSERT_EQ("", signals.GetSignalDescription(100, std::nullopt)); + ASSERT_EQ("SIG16", signals.GetSignalDescription(16, std::nullopt)); + ASSERT_EQ("", signals.GetSignalDescription(100, 100)); + ASSERT_EQ("SIG16", signals.GetSignalDescription(16, 100)); + ASSERT_EQ("SIG16: a specific type of SIG16", + signals.GetSignalDescription(16, 1)); + + // Unknown code, won't use the address. + ASSERT_EQ("SIG16", signals.GetSignalDescription(16, 100, 0xCAFEF00D)); + // Known code, that shouldn't print fault address. + ASSERT_EQ("SIG16: a specific type of SIG16", + signals.GetSignalDescription(16, 1, 0xCAFEF00D)); + // Known code that should. + ASSERT_EQ("SIG16: SIG16 with a fault address (fault address: 0xcafef00d)", + signals.GetSignalDescription(16, 2, 0xCAFEF00D)); + // No address given just print the code description. + ASSERT_EQ("SIG16: SIG16 with a fault address", + signals.GetSignalDescription(16, 2)); + + const char *expected = "SIG16: bounds violation"; + // Must pass all needed info to get full output. + ASSERT_EQ(expected, signals.GetSignalDescription(16, 3)); + ASSERT_EQ(expected, signals.GetSignalDescription(16, 3, 0xcafef00d)); + ASSERT_EQ(expected, signals.GetSignalDescription(16, 3, 0xcafef00d, 0x1234)); + + ASSERT_EQ("SIG16: upper bound violation (fault address: 0x5679, lower bound: " + "0x1234, upper bound: 0x5678)", + signals.GetSignalDescription(16, 3, 0x5679, 0x1234, 0x5678)); + ASSERT_EQ("SIG16: lower bound violation (fault address: 0x1233, lower bound: " + "0x1234, upper bound: 0x5678)", + signals.GetSignalDescription(16, 3, 0x1233, 0x1234, 0x5678)); +} + TEST(UnixSignalsTest, VersionChange) { TestSignals signals; From ba7f3e1d1e50212bdc8cc438185519fd7257aa44 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Mon, 20 Mar 2023 05:17:39 -0700 Subject: [PATCH 016/691] [mlir][Transform] Fix support for mapping to GPU warps and to linear ids c59465e1203dd78d06e15f7ddf62141807dbd5a7 introduced mapping to warps and linear GPU ids. In the implementation, the delinearization basis is reversed from [x, y, z] to [z, y x] order to properly compute the strides and allow delinearization. Prior to this commit, we forgot to reverse it back to [x, y, z] order before materializing the indices. Fix this oversight. --- .../GPU/TransformOps/GPUTransformOps.cpp | 10 ++++++++-- mlir/test/Dialect/GPU/transform-gpu.mlir | 18 ++++++++++-------- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp index 0f566e4bdea5d..550d8c1f7e44d 100644 --- a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp +++ b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp @@ -140,12 +140,15 @@ struct GpuWarpIdBuilder : public GpuIdBuilder { OpFoldResult warpIdOfr = makeComposedFoldedAffineApply( rewriter, loc, d0.floorDiv(kWarpSize), {linearId}); Value warpId = getValueOrCreateConstantIndexOp(rewriter, loc, warpIdOfr); + // Sizes in [x, y, z] -> [z, y x] order to properly compute strides in + // "row-major" order. SmallVector reverseBasisSizes( llvm::reverse(this->availableMappingSizes)); SmallVector strides = computeStrides(reverseBasisSizes); SmallVector delinearizingExprs = delinearize(d0, strides); SmallVector ids; - for (AffineExpr e : delinearizingExprs) + // Reverse back to be in [x, y, z] order. + for (AffineExpr e : llvm::reverse(delinearizingExprs)) ids.push_back(makeComposedAffineApply(rewriter, loc, e, warpId)); // clang-format off @@ -191,13 +194,16 @@ struct GpuLinearIdBuilder : public GpuIdBuilder { // Build the linear thread id and decompose it in the basis of // `forallMappingSizes`. Value linearId = buildLinearThreadId(rewriter, loc, this->blockDimsOfr); + // Sizes in [x, y, z] -> [z, y x] order to properly compute strides in + // "row-major" order. SmallVector reverseBasisSizes(llvm::reverse(forallMappingSizes)); SmallVector strides = computeStrides(reverseBasisSizes); AffineExpr d0; bindDims(rewriter.getContext(), d0); SmallVector delinearizingExprs = delinearize(d0, strides); SmallVector ids; - for (AffineExpr e : delinearizingExprs) + // Reverse back to be in [x, y, z] order. + for (AffineExpr e : llvm::reverse(delinearizingExprs)) ids.push_back(makeComposedAffineApply(rewriter, loc, e, linearId)); // clang-format off diff --git a/mlir/test/Dialect/GPU/transform-gpu.mlir b/mlir/test/Dialect/GPU/transform-gpu.mlir index e54af051c3445..e485d4107a645 100644 --- a/mlir/test/Dialect/GPU/transform-gpu.mlir +++ b/mlir/test/Dialect/GPU/transform-gpu.mlir @@ -241,12 +241,12 @@ transform.sequence failures(propagate) { !type = memref<2 x 32 x f32> !type1d = memref<32 x f32> -// CHECK-DAG: #[[$MAPWY:.*]] = affine_map<(d0, d1) -> (((d0 + d1 * 12) floordiv 32) floordiv 4)> -// CHECK-DAG: #[[$MAPWX:.*]] = affine_map<(d0, d1) -> ((((d0 + d1 * 12) floordiv 32) mod 4) floordiv 2)> +// CHECK-DAG: #[[$MAPWX:.*]] = affine_map<(d0, d1) -> (((d0 + d1 * 12) floordiv 32) mod 3)> +// CHECK-DAG: #[[$MAPWY:.*]] = affine_map<(d0, d1) -> ((((d0 + d1 * 12) floordiv 32) mod 6) floordiv 3)> // CHECK-DAG: #[[$MAPLIN:.*]] = affine_map<(d0, d1) -> (d0 + d1 * 12)> -// CHECK-DAG: #[[$MAPLY:.*]] = affine_map<(d0, d1) -> ((d0 + d1 * 12) floordiv 20)> -// CHECK-DAG: #[[$MAPLX:.*]] = affine_map<(d0, d1) -> (((d0 + d1 * 12) mod 20) floordiv 10)> +// CHECK-DAG: #[[$MAPLX:.*]] = affine_map<(d0, d1) -> ((d0 + d1 * 12) mod 10)> +// CHECK-DAG: #[[$MAPLY:.*]] = affine_map<(d0, d1) -> (((d0 + d1 * 12) mod 20) floordiv 10)> // CHECK-LABEL: func.func @map_multi_level( func.func @map_multi_level(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %stream : !gpu.async.token) -> !type { @@ -277,11 +277,11 @@ func.func @map_multi_level(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %str memref.store %6, %y[%i, %j] : !type } { mapping = [#gpu.thread, #gpu.thread]} - // CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]](%[[TIDX]], %[[TIDY]]) // CHECK-DAG: %[[WIDX:.*]] = affine.apply #[[$MAPWX]](%[[TIDX]], %[[TIDY]]) + // CHECK-DAG: %[[WIDY:.*]] = affine.apply #[[$MAPWY]](%[[TIDX]], %[[TIDY]]) // CHECK-DAG: %[[CMPX:.*]] = arith.cmpi ult, %[[WIDX]], %[[C1]] : index // CHECK-DAG: %[[CMPY:.*]] = arith.cmpi ult, %[[WIDY]], %[[C1]] : index - // CHECK: %[[COND:.*]] = arith.andi %[[CMPY]], %[[CMPX]] : i1 + // CHECK: %[[COND:.*]] = arith.andi %[[CMPX]], %[[CMPY]] : i1 // CHECK: scf.if %[[COND]] scf.forall (%i) in (%c1) { %7 = memref.load %t[%i] : !type1d @@ -290,10 +290,12 @@ func.func @map_multi_level(%x: !type, %y: !type, %t: !type1d, %alpha : f32, %str } {mapping = [#gpu.warp] } // CHECK-DAG: %[[LIN:.*]] = affine.apply #[[$MAPLIN]](%[[TIDX]], %[[TIDY]]) + // CHECK-DAG: %[[LIDX:.*]] = affine.apply #[[$MAPLX]](%[[TIDX]], %[[TIDY]]) // CHECK-DAG: %[[LIDY:.*]] = affine.apply #[[$MAPLY]](%[[TIDX]], %[[TIDY]]) - // CHECK-DAG: %[[LIDZ:.*]] = affine.apply #[[$MAPLX]](%[[TIDX]], %[[TIDY]]) // CHECK-DAG: %[[COND:.*]] = arith.cmpi ult, %[[LIN]], %[[C20]] : index // CHECK: scf.if %[[COND]] + // CHECK: memref.load %{{.*}}[%[[LIDX]]] : memref<32xf32> + // CHECK: memref.store %{{.*}}[%[[LIDY]]] : memref<32xf32> scf.forall (%i, %j) in (%c10, %c2) { %7 = memref.load %t[%i] : !type1d %8 = arith.addf %alpha, %7 : f32 @@ -308,6 +310,6 @@ transform.sequence failures(propagate) { ^bb1(%arg0: !pdl.operation): %funcop = transform.structured.match ops{["gpu.launch"]} in %arg0 : (!pdl.operation) -> !pdl.operation transform.gpu.map_nested_forall_to_threads %funcop - block_dims = [12, 11, 1] warp_dims = [2, 2, 1] + block_dims = [12, 11, 1] warp_dims = [3, 2, 1] : (!pdl.operation) -> () } From 930744fcdad7b326dd0337622b6e8dc99efcfa60 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 20 Mar 2023 12:56:30 +0100 Subject: [PATCH 017/691] Add an Observer for logging actions application to a stream Integrate the `tracing::ExecutionContext()` into mlir-opt with a new --log-action-to= option to demonstrate the feature. Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D144813 --- mlir/include/mlir/Debug/Counter.h | 2 + .../mlir/Debug/Observers/ActionLogging.h | 42 ++++++++++++++++ .../include/mlir/Tools/mlir-opt/MlirOptMain.h | 11 +++++ mlir/lib/Debug/CMakeLists.txt | 3 ++ mlir/lib/Debug/DebugCounter.cpp | 5 ++ mlir/lib/Debug/Observers/ActionLogging.cpp | 49 +++++++++++++++++++ mlir/lib/Debug/Observers/CMakeLists.txt | 10 ++++ mlir/lib/Pass/Pass.cpp | 26 +++++++--- mlir/lib/Pass/PassDetail.h | 16 ++++++ mlir/lib/Tools/mlir-opt/CMakeLists.txt | 1 + mlir/lib/Tools/mlir-opt/MlirOptMain.cpp | 46 ++++++++++++++++- mlir/test/Pass/action-logging.mlir | 6 +++ 12 files changed, 210 insertions(+), 7 deletions(-) create mode 100644 mlir/include/mlir/Debug/Observers/ActionLogging.h create mode 100644 mlir/lib/Debug/Observers/ActionLogging.cpp create mode 100644 mlir/lib/Debug/Observers/CMakeLists.txt create mode 100644 mlir/test/Pass/action-logging.mlir diff --git a/mlir/include/mlir/Debug/Counter.h b/mlir/include/mlir/Debug/Counter.h index 622a87c9f6066..e158cc648b4ee 100644 --- a/mlir/include/mlir/Debug/Counter.h +++ b/mlir/include/mlir/Debug/Counter.h @@ -47,6 +47,8 @@ class DebugCounter { /// Register the command line options for debug counters. static void registerCLOptions(); + /// Returns true if any of the CL options are activated. + static bool isActivated(); private: // Returns true if the next action matching this tag should be executed. diff --git a/mlir/include/mlir/Debug/Observers/ActionLogging.h b/mlir/include/mlir/Debug/Observers/ActionLogging.h new file mode 100644 index 0000000000000..ff280c59da9ce --- /dev/null +++ b/mlir/include/mlir/Debug/Observers/ActionLogging.h @@ -0,0 +1,42 @@ +//===- ActionLogging.h - Logging Actions *- C++ -*-==========================// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TRACING_OBSERVERS_ACTIONLOGGING_H +#define MLIR_TRACING_OBSERVERS_ACTIONLOGGING_H + +#include "mlir/Debug/BreakpointManager.h" +#include "mlir/Debug/ExecutionContext.h" +#include "mlir/IR/Action.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/raw_ostream.h" + +namespace mlir { +namespace tracing { + +/// This class defines an observer that print Actions before and after execution +/// on the provided stream. +struct ActionLogger : public ExecutionContext::Observer { + ActionLogger(raw_ostream &os, bool printActions = true, + bool printBreakpoints = true) + : os(os), printActions(printActions), printBreakpoints(printBreakpoints) { + } + + void beforeExecute(const ActionActiveStack *action, Breakpoint *breakpoint, + bool willExecute) override; + void afterExecute(const ActionActiveStack *action) override; + +private: + raw_ostream &os; + bool printActions; + bool printBreakpoints; +}; + +} // namespace tracing +} // namespace mlir + +#endif // MLIR_TRACING_OBSERVERS_ACTIONLOGGING_H diff --git a/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h b/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h index 7cfcff129e100..bcefc4f4c79aa 100644 --- a/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h +++ b/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h @@ -74,6 +74,14 @@ class MlirOptMainConfig { } bool shouldEmitBytecode() const { return emitBytecodeFlag; } + /// Set the filename to use for logging actions, use "-" for stdout. + MlirOptMainConfig &logActionsTo(StringRef filename) { + logActionsToFlag = filename; + return *this; + } + /// Get the filename to use for logging actions. + StringRef getLogActionsTo() const { return logActionsToFlag; } + /// Set the callback to populate the pass manager. MlirOptMainConfig & setPassPipelineSetupFn(std::function callback) { @@ -149,6 +157,9 @@ class MlirOptMainConfig { /// Emit bytecode instead of textual assembly when generating output. bool emitBytecodeFlag = false; + /// Log action execution to the given file (or "-" for stdout) + std::string logActionsToFlag; + /// The callback to populate the pass manager. std::function passPipelineCallback; diff --git a/mlir/lib/Debug/CMakeLists.txt b/mlir/lib/Debug/CMakeLists.txt index 336749078dae4..481db88983cc3 100644 --- a/mlir/lib/Debug/CMakeLists.txt +++ b/mlir/lib/Debug/CMakeLists.txt @@ -1,3 +1,5 @@ +add_subdirectory(Observers) + add_mlir_library(MLIRDebug DebugCounter.cpp ExecutionContext.cpp @@ -10,3 +12,4 @@ add_mlir_library(MLIRDebug MLIRIR MLIRSupport ) + diff --git a/mlir/lib/Debug/DebugCounter.cpp b/mlir/lib/Debug/DebugCounter.cpp index a8747f9137c9d..263fc8c80ebb7 100644 --- a/mlir/lib/Debug/DebugCounter.cpp +++ b/mlir/lib/Debug/DebugCounter.cpp @@ -116,6 +116,11 @@ void DebugCounter::registerCLOptions() { #endif } +bool DebugCounter::isActivated() { + return clOptions->counters.getNumOccurrences() || + clOptions->printCounterInfo.getNumOccurrences(); +} + // This is called by the command line parser when it sees a value for the // debug-counter option defined above. void DebugCounter::applyCLOptions() { diff --git a/mlir/lib/Debug/Observers/ActionLogging.cpp b/mlir/lib/Debug/Observers/ActionLogging.cpp new file mode 100644 index 0000000000000..9826adf33ee16 --- /dev/null +++ b/mlir/lib/Debug/Observers/ActionLogging.cpp @@ -0,0 +1,49 @@ +//===- ActionLogging.cpp - Logging Actions *- C++ -*-========================// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Debug/Observers/ActionLogging.h" +#include "llvm/Support/Threading.h" +#include +#include + +using namespace mlir; +using namespace mlir::tracing; + +//===----------------------------------------------------------------------===// +// ActionLogger +//===----------------------------------------------------------------------===// + +void ActionLogger::beforeExecute(const ActionActiveStack *action, + Breakpoint *breakpoint, bool willExecute) { + SmallVector name; + llvm::get_thread_name(name); + os << "[thread " << name << "] "; + if (willExecute) + os << "begins "; + else + os << "skipping "; + if (printBreakpoints) { + if (breakpoint) + os << " (on breakpoint: " << *breakpoint << ") "; + else + os << " (no breakpoint) "; + } + os << "Action "; + if (printActions) + action->getAction().print(os); + else + os << action->getAction().getTag(); + os << "`\n"; +} + +void ActionLogger::afterExecute(const ActionActiveStack *action) { + SmallVector name; + llvm::get_thread_name(name); + os << "[thread " << name << "] completed `" << action->getAction().getTag() + << "`\n"; +} diff --git a/mlir/lib/Debug/Observers/CMakeLists.txt b/mlir/lib/Debug/Observers/CMakeLists.txt new file mode 100644 index 0000000000000..2d7e4b15835df --- /dev/null +++ b/mlir/lib/Debug/Observers/CMakeLists.txt @@ -0,0 +1,10 @@ +add_mlir_library(MLIRObservers + ActionLogging.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Debug/Observers + + LINK_LIBS PUBLIC + ${LLVM_PTHREAD_LIB} + MLIRSupport +) diff --git a/mlir/lib/Pass/Pass.cpp b/mlir/lib/Pass/Pass.cpp index 194ddac32579a..2b07898c8200f 100644 --- a/mlir/lib/Pass/Pass.cpp +++ b/mlir/lib/Pass/Pass.cpp @@ -31,6 +31,15 @@ using namespace mlir; using namespace mlir::detail; +//===----------------------------------------------------------------------===// +// PassExecutionAction +//===----------------------------------------------------------------------===// + +void PassExecutionAction::print(raw_ostream &os) const { + os << llvm::formatv("`{0}` running `{1}` on Operation `{2}`", tag, + pass.getName(), getOp()->getName()); +} + //===----------------------------------------------------------------------===// // Pass //===----------------------------------------------------------------------===// @@ -463,12 +472,17 @@ LogicalResult OpToOpPassAdaptor::run(Pass *pass, Operation *op, if (pi) pi->runBeforePass(pass, op); - // Invoke the virtual runOnOperation method. - if (auto *adaptor = dyn_cast(pass)) - adaptor->runOnOperation(verifyPasses); - else - pass->runOnOperation(); - bool passFailed = pass->passState->irAndPassFailed.getInt(); + bool passFailed = false; + op->getContext()->executeAction( + [&]() { + // Invoke the virtual runOnOperation method. + if (auto *adaptor = dyn_cast(pass)) + adaptor->runOnOperation(verifyPasses); + else + pass->runOnOperation(); + passFailed = pass->passState->irAndPassFailed.getInt(); + }, + *pass, op); // Invalidate any non preserved analyses. am.invalidate(pass->passState->preservedAnalyses); diff --git a/mlir/lib/Pass/PassDetail.h b/mlir/lib/Pass/PassDetail.h index cced03b83b3b4..6fc46aff35835 100644 --- a/mlir/lib/Pass/PassDetail.h +++ b/mlir/lib/Pass/PassDetail.h @@ -8,10 +8,26 @@ #ifndef MLIR_PASS_PASSDETAIL_H_ #define MLIR_PASS_PASSDETAIL_H_ +#include "mlir/IR/Action.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" +#include "llvm/Support/FormatVariadic.h" namespace mlir { +/// Encapsulate the "action" of executing a single pass, used for the MLIR +/// tracing infrastructure. +struct PassExecutionAction : public tracing::ActionImpl { + PassExecutionAction(const Pass &pass, Operation *op) : pass(pass), op(op) {} + static constexpr StringLiteral tag = "pass-execution-action"; + void print(raw_ostream &os) const override; + const Pass &getPass() const { return pass; } + Operation *getOp() const { return op; } + +public: + const Pass &pass; + Operation *op; +}; + namespace detail { //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Tools/mlir-opt/CMakeLists.txt b/mlir/lib/Tools/mlir-opt/CMakeLists.txt index 64e74e21270ef..983e855a8d635 100644 --- a/mlir/lib/Tools/mlir-opt/CMakeLists.txt +++ b/mlir/lib/Tools/mlir-opt/CMakeLists.txt @@ -7,6 +7,7 @@ add_mlir_library(MLIROptLib LINK_LIBS PUBLIC MLIRBytecodeWriter MLIRDebug + MLIRObservers MLIRPass MLIRParser MLIRSupport diff --git a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp index c9b1ae09ccbcd..14f702fc999cc 100644 --- a/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp +++ b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp @@ -14,6 +14,8 @@ #include "mlir/Tools/mlir-opt/MlirOptMain.h" #include "mlir/Bytecode/BytecodeWriter.h" #include "mlir/Debug/Counter.h" +#include "mlir/Debug/ExecutionContext.h" +#include "mlir/Debug/Observers/ActionLogging.h" #include "mlir/IR/AsmState.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/BuiltinOps.h" @@ -71,6 +73,12 @@ struct MlirOptMainConfigCLOptions : public MlirOptMainConfig { "parsing"), cl::location(useExplicitModuleFlag), cl::init(false)); + static cl::opt logActionsTo{ + "log-actions-to", + cl::desc("Log action execution to a file, or stderr if " + " '-' is passed"), + cl::location(logActionsToFlag)}; + static cl::opt showDialects( "show-dialects", cl::desc("Print the list of registered dialects and exit"), @@ -126,6 +134,41 @@ MlirOptMainConfig &MlirOptMainConfig::setPassPipelineParser( return *this; } +/// Set the ExecutionContext on the context and handle the observers. +class InstallDebugHandler { +public: + InstallDebugHandler(MLIRContext &context, const MlirOptMainConfig &config) { + if (config.getLogActionsTo().empty()) { + if (tracing::DebugCounter::isActivated()) + context.registerActionHandler(tracing::DebugCounter()); + return; + } + if (tracing::DebugCounter::isActivated()) + emitError(UnknownLoc::get(&context), + "Debug counters are incompatible with --log-actions-to option " + "and are disabled"); + std::string errorMessage; + logActionsFile = openOutputFile(config.getLogActionsTo(), &errorMessage); + if (!logActionsFile) { + emitError(UnknownLoc::get(&context), + "Opening file for --log-actions-to failed: ") + << errorMessage << "\n"; + return; + } + logActionsFile->keep(); + raw_fd_ostream &logActionsStream = logActionsFile->os(); + actionLogger = std::make_unique(logActionsStream); + + executionContext.registerObserver(actionLogger.get()); + context.registerActionHandler(executionContext); + } + +private: + std::unique_ptr logActionsFile; + std::unique_ptr actionLogger; + tracing::ExecutionContext executionContext; +}; + /// Perform the actions on the input file indicated by the command line flags /// within the specified context. /// @@ -213,7 +256,8 @@ static LogicalResult processBuffer(raw_ostream &os, context.allowUnregisteredDialects(config.shouldAllowUnregisteredDialects()); if (config.shouldVerifyDiagnostics()) context.printOpOnDiagnostic(false); - context.registerActionHandler(tracing::DebugCounter()); + + InstallDebugHandler installDebugHandler(context, config); // If we are in verify diagnostics mode then we have a lot of work to do, // otherwise just perform the actions without worrying about it. diff --git a/mlir/test/Pass/action-logging.mlir b/mlir/test/Pass/action-logging.mlir new file mode 100644 index 0000000000000..943f05a2968fe --- /dev/null +++ b/mlir/test/Pass/action-logging.mlir @@ -0,0 +1,6 @@ +// RUN: mlir-opt %s --log-actions-to=- -canonicalize -test-module-pass | FileCheck %s + +// CHECK: [thread {{.*}}] begins (no breakpoint) Action `pass-execution-action` running `Canonicalizer` on Operation `builtin.module` +// CHECK: [thread {{.*}}] completed `pass-execution-action` +// CHECK: [thread {{.*}}] begins (no breakpoint) Action `pass-execution-action` running `(anonymous namespace)::TestModulePass` on Operation `builtin.module` +// CHECK: [thread {{.*}}] completed `pass-execution-action` From 5736a8a2dab438c8266c79b49d5d5c90f78fdcb4 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 13 Mar 2023 14:33:31 +0100 Subject: [PATCH 018/691] Add a `skipRegion()` feature to the OpPrintingFlags for MLIR ASM printer This is a convenient flag for context where we intend to summarize a top-level operation without the full-blown regions it may hold. Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D145889 --- mlir/include/mlir/IR/OperationSupport.h | 9 ++++++ mlir/lib/IR/AsmPrinter.cpp | 30 +++++++++++++++---- mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp | 19 +++--------- mlir/test/mlir-lsp-server/hover.test | 4 +-- 4 files changed, 40 insertions(+), 22 deletions(-) diff --git a/mlir/include/mlir/IR/OperationSupport.h b/mlir/include/mlir/IR/OperationSupport.h index ebeb0a96523bd..ba8f0a8bd79de 100644 --- a/mlir/include/mlir/IR/OperationSupport.h +++ b/mlir/include/mlir/IR/OperationSupport.h @@ -834,6 +834,9 @@ class OpPrintingFlags { /// Always print operations in the generic form. OpPrintingFlags &printGenericOpForm(); + /// Skip printing regions. + OpPrintingFlags &skipRegions(); + /// Do not verify the operation when using custom operation printers. OpPrintingFlags &assumeVerified(); @@ -861,6 +864,9 @@ class OpPrintingFlags { /// Return if operations should be printed in the generic form. bool shouldPrintGenericOpForm() const; + /// Return if regions should be skipped. + bool shouldSkipRegions() const; + /// Return if operation verification should be skipped. bool shouldAssumeVerified() const; @@ -882,6 +888,9 @@ class OpPrintingFlags { /// Print operations in the generic form. bool printGenericOpFormFlag : 1; + /// Always skip Regions. + bool skipRegionsFlag : 1; + /// Skip operation verification. bool assumeVerifiedFlag : 1; diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp index 18c35f0ed8d7e..dd3112516fc51 100644 --- a/mlir/lib/IR/AsmPrinter.cpp +++ b/mlir/lib/IR/AsmPrinter.cpp @@ -183,8 +183,9 @@ void mlir::registerAsmPrinterCLOptions() { /// Initialize the printing flags with default supplied by the cl::opts above. OpPrintingFlags::OpPrintingFlags() : printDebugInfoFlag(false), printDebugInfoPrettyFormFlag(false), - printGenericOpFormFlag(false), assumeVerifiedFlag(false), - printLocalScope(false), printValueUsersFlag(false) { + printGenericOpFormFlag(false), skipRegionsFlag(false), + assumeVerifiedFlag(false), printLocalScope(false), + printValueUsersFlag(false) { // Initialize based upon command line options, if they are available. if (!clOptions.isConstructed()) return; @@ -223,6 +224,12 @@ OpPrintingFlags &OpPrintingFlags::printGenericOpForm() { return *this; } +/// Always skip Regions. +OpPrintingFlags &OpPrintingFlags::skipRegions() { + skipRegionsFlag = true; + return *this; +} + /// Do not verify the operation when using custom operation printers. OpPrintingFlags &OpPrintingFlags::assumeVerified() { assumeVerifiedFlag = true; @@ -270,6 +277,9 @@ bool OpPrintingFlags::shouldPrintGenericOpForm() const { return printGenericOpFormFlag; } +/// Return if Region should be skipped. +bool OpPrintingFlags::shouldSkipRegions() const { return skipRegionsFlag; } + /// Return if operation verification should be skipped. bool OpPrintingFlags::shouldAssumeVerified() const { return assumeVerifiedFlag; @@ -614,9 +624,11 @@ class DummyAliasOperationPrinter : private OpAsmPrinter { /// Print the given operation in the generic form. void printGenericOp(Operation *op, bool printOpName = true) override { // Consider nested operations for aliases. - for (Region ®ion : op->getRegions()) - printRegion(region, /*printEntryBlockArgs=*/true, - /*printBlockTerminators=*/true); + if (!printerFlags.shouldSkipRegions()) { + for (Region ®ion : op->getRegions()) + printRegion(region, /*printEntryBlockArgs=*/true, + /*printBlockTerminators=*/true); + } // Visit all the types used in the operation. for (Type type : op->getOperandTypes()) @@ -665,6 +677,10 @@ class DummyAliasOperationPrinter : private OpAsmPrinter { bool printEmptyBlock = false) override { if (region.empty()) return; + if (printerFlags.shouldSkipRegions()) { + os << "{...}"; + return; + } auto *entryBlock = ®ion.front(); print(entryBlock, printEntryBlockArgs, printBlockTerminators); @@ -3463,6 +3479,10 @@ void OperationPrinter::printSuccessorAndUseList(Block *successor, void OperationPrinter::printRegion(Region ®ion, bool printEntryBlockArgs, bool printBlockTerminators, bool printEmptyBlock) { + if (printerFlags.shouldSkipRegions()) { + os << "{...}"; + return; + } os << "{" << newLine; if (!region.empty()) { auto restoreDefaultDialect = diff --git a/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp b/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp index 397f2598ef73a..8597423946c79 100644 --- a/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp +++ b/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp @@ -516,23 +516,12 @@ std::optional MLIRDocument::buildHoverForOperation( os << "Generic Form:\n\n```mlir\n"; - // Temporary drop the regions of this operation so that they don't get - // printed in the output. This helps keeps the size of the output hover - // small. - SmallVector> regions; - for (Region ®ion : op.op->getRegions()) { - regions.emplace_back(std::make_unique()); - regions.back()->takeBody(region); - } - - op.op->print( - os, OpPrintingFlags().printGenericOpForm().elideLargeElementsAttrs()); + op.op->print(os, OpPrintingFlags() + .printGenericOpForm() + .elideLargeElementsAttrs() + .skipRegions()); os << "\n```\n"; - // Move the regions back to the current operation. - for (Region ®ion : op.op->getRegions()) - region.takeBody(*regions.back()); - return hover; } diff --git a/mlir/test/mlir-lsp-server/hover.test b/mlir/test/mlir-lsp-server/hover.test index df0fe205c24a2..3a8a4437bc533 100644 --- a/mlir/test/mlir-lsp-server/hover.test +++ b/mlir/test/mlir-lsp-server/hover.test @@ -114,7 +114,7 @@ // CHECK-NEXT: "result": { // CHECK-NEXT: "contents": { // CHECK-NEXT: "kind": "markdown", -// CHECK-NEXT: "value": "\"func.func\" : public @foo\n\nGeneric Form:\n\n```mlir\n\"func.func\"() ({\n}) {function_type = (i1) -> (), sym_name = \"foo\"} : () -> ()\n```\n" +// CHECK-NEXT: "value": "\"func.func\" : public @foo\n\nGeneric Form:\n\n```mlir\n\"func.func\"() ({...}) {function_type = (i1) -> (), sym_name = \"foo\"} : () -> ()\n```\n" // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "end": { @@ -138,7 +138,7 @@ // CHECK-NEXT: "result": { // CHECK-NEXT: "contents": { // CHECK-NEXT: "kind": "markdown", -// CHECK-NEXT: "value": "\"func.func\" : public @foo\n\nGeneric Form:\n\n```mlir\n\"func.func\"() ({\n}) {function_type = (i1) -> (), sym_name = \"foo\"} : () -> ()\n```\n" +// CHECK-NEXT: "value": "\"func.func\" : public @foo\n\nGeneric Form:\n\n```mlir\n\"func.func\"() ({...}) {function_type = (i1) -> (), sym_name = \"foo\"} : () -> ()\n```\n" // CHECK-NEXT: }, // CHECK-NEXT: "range": { // CHECK-NEXT: "end": { From f406adf134c2f81747bbc653b1399656268fe17a Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 20 Mar 2023 13:40:37 +0100 Subject: [PATCH 019/691] Add capture of "IRUnits" as context for an MLIR Action IRUnit is defined as: using IRUnit = PointerUnion; The tracing::Action is extended to take an ArrayRef as context to describe an Action. It is demonstrated in the "ActionLogging" observer. Reviewed By: rriddle, Mogball Differential Revision: https://reviews.llvm.org/D144814 --- .../mlir/Debug/Observers/ActionLogging.h | 7 ++- mlir/include/mlir/IR/Action.h | 14 ++++- mlir/include/mlir/IR/MLIRContext.h | 12 ++-- mlir/include/mlir/IR/Unit.h | 42 +++++++++++++ mlir/lib/Debug/Observers/ActionLogging.cpp | 21 +++++-- mlir/lib/IR/CMakeLists.txt | 1 + mlir/lib/IR/Unit.cpp | 63 +++++++++++++++++++ mlir/lib/Pass/Pass.cpp | 2 +- mlir/lib/Pass/PassDetail.h | 10 ++- mlir/test/Pass/action-logging.mlir | 9 +-- 10 files changed, 161 insertions(+), 20 deletions(-) create mode 100644 mlir/include/mlir/IR/Unit.h create mode 100644 mlir/lib/IR/Unit.cpp diff --git a/mlir/include/mlir/Debug/Observers/ActionLogging.h b/mlir/include/mlir/Debug/Observers/ActionLogging.h index ff280c59da9ce..bd1d56538906a 100644 --- a/mlir/include/mlir/Debug/Observers/ActionLogging.h +++ b/mlir/include/mlir/Debug/Observers/ActionLogging.h @@ -22,9 +22,9 @@ namespace tracing { /// on the provided stream. struct ActionLogger : public ExecutionContext::Observer { ActionLogger(raw_ostream &os, bool printActions = true, - bool printBreakpoints = true) - : os(os), printActions(printActions), printBreakpoints(printBreakpoints) { - } + bool printBreakpoints = true, bool printIRUnits = true) + : os(os), printActions(printActions), printBreakpoints(printBreakpoints), + printIRUnits(printIRUnits) {} void beforeExecute(const ActionActiveStack *action, Breakpoint *breakpoint, bool willExecute) override; @@ -34,6 +34,7 @@ struct ActionLogger : public ExecutionContext::Observer { raw_ostream &os; bool printActions; bool printBreakpoints; + bool printIRUnits; }; } // namespace tracing diff --git a/mlir/include/mlir/IR/Action.h b/mlir/include/mlir/IR/Action.h index 569d4288f2086..9359324dd6090 100644 --- a/mlir/include/mlir/IR/Action.h +++ b/mlir/include/mlir/IR/Action.h @@ -15,6 +15,7 @@ #ifndef MLIR_IR_ACTION_H #define MLIR_IR_ACTION_H +#include "mlir/IR/Unit.h" #include "mlir/Support/LogicalResult.h" #include "mlir/Support/TypeID.h" #include "llvm/ADT/ArrayRef.h" @@ -51,11 +52,19 @@ class Action { os << "Action \"" << getTag() << "\""; } + /// Return the set of IR units that are associated with this action. + virtual ArrayRef getContextIRUnits() const { return irUnits; } + protected: - Action(TypeID actionID) : actionID(actionID) {} + Action(TypeID actionID, ArrayRef irUnits) + : actionID(actionID), irUnits(irUnits) {} /// The type of the derived action class, used for `isa`/`dyn_cast`. TypeID actionID; + + /// Set of IR units (operations, regions, blocks, values) that are associated + /// with this action. + ArrayRef irUnits; }; /// CRTP Implementation of an action. This class provides a base class for @@ -67,7 +76,8 @@ class Action { template class ActionImpl : public Action { public: - ActionImpl() : Action(TypeID::get()) {} + ActionImpl(ArrayRef irUnits = {}) + : Action(TypeID::get(), irUnits) {} /// Provide classof to allow casting between action types. static bool classof(const Action *action) { diff --git a/mlir/include/mlir/IR/MLIRContext.h b/mlir/include/mlir/IR/MLIRContext.h index cc13447d9d584..d9e140bd75f72 100644 --- a/mlir/include/mlir/IR/MLIRContext.h +++ b/mlir/include/mlir/IR/MLIRContext.h @@ -11,6 +11,7 @@ #include "mlir/Support/LLVM.h" #include "mlir/Support/TypeID.h" +#include "llvm/ADT/ArrayRef.h" #include #include #include @@ -265,9 +266,10 @@ class MLIRContext { /// Dispatch the provided action to the handler if any, or just execute it. template - void executeAction(function_ref actionFn, Args &&...args) { + void executeAction(function_ref actionFn, ArrayRef irUnits, + Args &&...args) { if (LLVM_UNLIKELY(hasActionHandler())) - executeActionInternal(actionFn, + executeActionInternal(actionFn, irUnits, std::forward(args)...); else actionFn(); @@ -286,8 +288,10 @@ class MLIRContext { /// avoid calling the ctor for the Action unnecessarily. template LLVM_ATTRIBUTE_NOINLINE void - executeActionInternal(function_ref actionFn, Args &&...args) { - executeActionInternal(actionFn, ActionTy(std::forward(args)...)); + executeActionInternal(function_ref actionFn, ArrayRef irUnits, + Args &&...args) { + executeActionInternal(actionFn, + ActionTy(irUnits, std::forward(args)...)); } const std::unique_ptr impl; diff --git a/mlir/include/mlir/IR/Unit.h b/mlir/include/mlir/IR/Unit.h new file mode 100644 index 0000000000000..033dab5974516 --- /dev/null +++ b/mlir/include/mlir/IR/Unit.h @@ -0,0 +1,42 @@ +//===- Unit.h - IR Unit definition--------------------*- C++ -*-=============// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_IR_UNIT_H +#define MLIR_IR_UNIT_H + +#include "mlir/IR/OperationSupport.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +class raw_ostream; +} // namespace llvm +namespace mlir { +class Operation; +class Region; +class Block; +class Value; + +/// IRUnit is a union of the different types of IR objects that consistute the +/// IR structure (other than Type and Attribute), that is Operation, Region, and +/// Block. +class IRUnit : public PointerUnion { +public: + using PointerUnion::PointerUnion; + + /// Print the IRUnit to the given stream. + void print(raw_ostream &os, + OpPrintingFlags flags = + OpPrintingFlags().skipRegions().useLocalScope()) const; +}; + +raw_ostream &operator<<(raw_ostream &os, const IRUnit &unit); + +} // end namespace mlir + +#endif // MLIR_IR_UNIT_H diff --git a/mlir/lib/Debug/Observers/ActionLogging.cpp b/mlir/lib/Debug/Observers/ActionLogging.cpp index 9826adf33ee16..7e7c5acaaee1f 100644 --- a/mlir/lib/Debug/Observers/ActionLogging.cpp +++ b/mlir/lib/Debug/Observers/ActionLogging.cpp @@ -7,9 +7,9 @@ //===----------------------------------------------------------------------===// #include "mlir/Debug/Observers/ActionLogging.h" +#include "mlir/IR/Action.h" #include "llvm/Support/Threading.h" -#include -#include +#include "llvm/Support/raw_ostream.h" using namespace mlir; using namespace mlir::tracing; @@ -22,6 +22,10 @@ void ActionLogger::beforeExecute(const ActionActiveStack *action, Breakpoint *breakpoint, bool willExecute) { SmallVector name; llvm::get_thread_name(name); + if (name.empty()) { + llvm::raw_svector_ostream os(name); + os << llvm::get_threadid(); + } os << "[thread " << name << "] "; if (willExecute) os << "begins "; @@ -29,21 +33,30 @@ void ActionLogger::beforeExecute(const ActionActiveStack *action, os << "skipping "; if (printBreakpoints) { if (breakpoint) - os << " (on breakpoint: " << *breakpoint << ") "; + os << "(on breakpoint: " << *breakpoint << ") "; else - os << " (no breakpoint) "; + os << "(no breakpoint) "; } os << "Action "; if (printActions) action->getAction().print(os); else os << action->getAction().getTag(); + if (printIRUnits) { + os << " ("; + interleaveComma(action->getAction().getContextIRUnits(), os); + os << ")"; + } os << "`\n"; } void ActionLogger::afterExecute(const ActionActiveStack *action) { SmallVector name; llvm::get_thread_name(name); + if (name.empty()) { + llvm::raw_svector_ostream os(name); + os << llvm::get_threadid(); + } os << "[thread " << name << "] completed `" << action->getAction().getTag() << "`\n"; } diff --git a/mlir/lib/IR/CMakeLists.txt b/mlir/lib/IR/CMakeLists.txt index 8b4fb42e03eab..4377ebe160554 100644 --- a/mlir/lib/IR/CMakeLists.txt +++ b/mlir/lib/IR/CMakeLists.txt @@ -32,6 +32,7 @@ add_mlir_library(MLIRIR Types.cpp TypeRange.cpp TypeUtilities.cpp + Unit.cpp Value.cpp ValueRange.cpp Verifier.cpp diff --git a/mlir/lib/IR/Unit.cpp b/mlir/lib/IR/Unit.cpp new file mode 100644 index 0000000000000..7da714fe7d539 --- /dev/null +++ b/mlir/lib/IR/Unit.cpp @@ -0,0 +1,63 @@ +//===- Unit.cpp - Support for manipulating IR Unit ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/IR/Unit.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/OperationSupport.h" +#include "mlir/IR/Region.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +using namespace mlir; + +static void printOp(llvm::raw_ostream &os, Operation *op, + OpPrintingFlags &flags) { + if (!op) { + os << ""; + return; + } + op->print(os, flags); +} + +static void printRegion(llvm::raw_ostream &os, Region *region, + OpPrintingFlags &flags) { + if (!region) { + os << ""; + return; + } + os << "Region #" << region->getRegionNumber() << " for op "; + printOp(os, region->getParentOp(), flags); +} + +static void printBlock(llvm::raw_ostream &os, Block *block, + OpPrintingFlags &flags) { + Region *region = block->getParent(); + Block *entry = ®ion->front(); + int blockId = std::distance(entry->getIterator(), block->getIterator()); + os << "Block #" << blockId << " for "; + bool shouldSkipRegions = flags.shouldSkipRegions(); + printRegion(os, region, flags.skipRegions()); + if (!shouldSkipRegions) + block->print(os); +} + +void mlir::IRUnit::print(llvm::raw_ostream &os, OpPrintingFlags flags) const { + if (auto *op = this->dyn_cast()) + return printOp(os, op, flags); + if (auto *region = this->dyn_cast()) + return printRegion(os, region, flags); + if (auto *block = this->dyn_cast()) + return printBlock(os, block, flags); + llvm_unreachable("unknown IRUnit"); +} + +llvm::raw_ostream &mlir::operator<<(llvm::raw_ostream &os, const IRUnit &unit) { + unit.print(os); + return os; +} diff --git a/mlir/lib/Pass/Pass.cpp b/mlir/lib/Pass/Pass.cpp index 2b07898c8200f..e496a29e9fbe5 100644 --- a/mlir/lib/Pass/Pass.cpp +++ b/mlir/lib/Pass/Pass.cpp @@ -482,7 +482,7 @@ LogicalResult OpToOpPassAdaptor::run(Pass *pass, Operation *op, pass->runOnOperation(); passFailed = pass->passState->irAndPassFailed.getInt(); }, - *pass, op); + {op}, *pass); // Invalidate any non preserved analyses. am.invalidate(pass->passState->preservedAnalyses); diff --git a/mlir/lib/Pass/PassDetail.h b/mlir/lib/Pass/PassDetail.h index 6fc46aff35835..ca60cf2fa5894 100644 --- a/mlir/lib/Pass/PassDetail.h +++ b/mlir/lib/Pass/PassDetail.h @@ -11,17 +11,23 @@ #include "mlir/IR/Action.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/Support/FormatVariadic.h" namespace mlir { /// Encapsulate the "action" of executing a single pass, used for the MLIR /// tracing infrastructure. struct PassExecutionAction : public tracing::ActionImpl { - PassExecutionAction(const Pass &pass, Operation *op) : pass(pass), op(op) {} + using Base = tracing::ActionImpl; + PassExecutionAction(ArrayRef irUnits, const Pass &pass) + : Base(irUnits), pass(pass) {} static constexpr StringLiteral tag = "pass-execution-action"; void print(raw_ostream &os) const override; const Pass &getPass() const { return pass; } - Operation *getOp() const { return op; } + Operation *getOp() const { + ArrayRef irUnits = getContextIRUnits(); + return irUnits.empty() ? nullptr : irUnits[0].dyn_cast(); + } public: const Pass &pass; diff --git a/mlir/test/Pass/action-logging.mlir b/mlir/test/Pass/action-logging.mlir index 943f05a2968fe..d10c64c2af2ed 100644 --- a/mlir/test/Pass/action-logging.mlir +++ b/mlir/test/Pass/action-logging.mlir @@ -1,6 +1,7 @@ // RUN: mlir-opt %s --log-actions-to=- -canonicalize -test-module-pass | FileCheck %s -// CHECK: [thread {{.*}}] begins (no breakpoint) Action `pass-execution-action` running `Canonicalizer` on Operation `builtin.module` -// CHECK: [thread {{.*}}] completed `pass-execution-action` -// CHECK: [thread {{.*}}] begins (no breakpoint) Action `pass-execution-action` running `(anonymous namespace)::TestModulePass` on Operation `builtin.module` -// CHECK: [thread {{.*}}] completed `pass-execution-action` +// CHECK: [thread {{.*}}] begins (no breakpoint) Action `pass-execution-action` running `Canonicalizer` on Operation `builtin.module` (module {...} +// CHECK-NEXT: [thread {{.*}}] completed `pass-execution-action` +// CHECK-NEXT: [thread {{.*}}] begins (no breakpoint) Action `pass-execution-action` running `{{.*}}TestModulePass` on Operation `builtin.module` (module {...} +// CHECK-NEXT: [thread {{.*}}] completed `pass-execution-action` +// CHECK-NOT: Action From 95d6365ff0c282395881c8a9ea6e2694f93051e1 Mon Sep 17 00:00:00 2001 From: Dmitry Chernenkov Date: Mon, 20 Mar 2023 12:59:05 +0000 Subject: [PATCH 020/691] [libcxxabi] Fix for c9d36bd80760db14f14b33789e6cbc6cb8c64830 Otherwise fails with LIBCPP_REMOVE_TRANSITIVE_INCLUDES --- libcxxabi/src/cxa_aux_runtime.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/libcxxabi/src/cxa_aux_runtime.cpp b/libcxxabi/src/cxa_aux_runtime.cpp index a42990c7eff53..5e6040d75a262 100644 --- a/libcxxabi/src/cxa_aux_runtime.cpp +++ b/libcxxabi/src/cxa_aux_runtime.cpp @@ -10,6 +10,7 @@ //===----------------------------------------------------------------------===// #include "cxxabi.h" +#include #include #include From 46c6166d33915159e95f4ae17998bbe13207b144 Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Mon, 20 Mar 2023 14:03:32 +0100 Subject: [PATCH 021/691] Fix MLIR build with shared library enabled Broken by f406adf134c2f --- mlir/lib/Debug/Observers/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/mlir/lib/Debug/Observers/CMakeLists.txt b/mlir/lib/Debug/Observers/CMakeLists.txt index 2d7e4b15835df..0a4261b0e45cc 100644 --- a/mlir/lib/Debug/Observers/CMakeLists.txt +++ b/mlir/lib/Debug/Observers/CMakeLists.txt @@ -7,4 +7,5 @@ add_mlir_library(MLIRObservers LINK_LIBS PUBLIC ${LLVM_PTHREAD_LIB} MLIRSupport + MLIRIR ) From 6f23fee4ef98a695062aa128a177478ba7d742d4 Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Mon, 20 Mar 2023 12:12:10 +0100 Subject: [PATCH 022/691] [clangd] Fix AddUsing in the face of typo-correction Fixes https://github.com/clangd/clangd/issues/559 Differential Revision: https://reviews.llvm.org/D146417 --- .../clangd/refactor/tweaks/AddUsing.cpp | 87 +++++++++++-------- .../clangd/unittests/tweaks/AddUsingTests.cpp | 40 +++++++-- 2 files changed, 85 insertions(+), 42 deletions(-) diff --git a/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp b/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp index 103e13f44d060..1e51d8fb9a518 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp @@ -8,10 +8,25 @@ #include "AST.h" #include "Config.h" +#include "SourceCode.h" #include "refactor/Tweak.h" #include "support/Logger.h" #include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/NestedNameSpecifier.h" #include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/Type.h" +#include "clang/AST/TypeLoc.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Tooling/Core/Replacement.h" +#include "clang/Tooling/Syntax/Tokens.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include namespace clang { namespace clangd { @@ -45,8 +60,12 @@ class AddUsing : public Tweak { // All of the following are set by prepare(). // The qualifier to remove. NestedNameSpecifierLoc QualifierToRemove; - // The name following QualifierToRemove. - llvm::StringRef Name; + // Qualified name to use when spelling the using declaration. This might be + // different than SpelledQualifier in presence of error correction. + std::string QualifierToSpell; + // The name and qualifier as spelled in the code. + llvm::StringRef SpelledQualifier; + llvm::StringRef SpelledName; // If valid, the insertion point for "using" statement must come after this. // This is relevant when the type is defined in the main file, to make sure // the type/function is already defined at the point where "using" is added. @@ -56,7 +75,7 @@ REGISTER_TWEAK(AddUsing) std::string AddUsing::title() const { return std::string(llvm::formatv( - "Add using-declaration for {0} and remove qualifier", Name)); + "Add using-declaration for {0} and remove qualifier", SpelledName)); } // Locates all "using" statements relevant to SelectionDeclContext. @@ -269,36 +288,23 @@ bool AddUsing::prepare(const Selection &Inputs) { if (Node == nullptr) return false; + SourceRange SpelledNameRange; if (auto *D = Node->ASTNode.get()) { if (auto *II = D->getDecl()->getIdentifier()) { QualifierToRemove = D->getQualifierLoc(); - Name = II->getName(); + SpelledNameRange = D->getSourceRange(); MustInsertAfterLoc = D->getDecl()->getBeginLoc(); } } else if (auto *T = Node->ASTNode.get()) { if (auto E = T->getAs()) { QualifierToRemove = E.getQualifierLoc(); - if (!QualifierToRemove) - return false; - auto NameRange = E.getSourceRange(); + SpelledNameRange = E.getSourceRange(); if (auto T = E.getNamedTypeLoc().getAs()) { // Remove the template arguments from the name. - NameRange.setEnd(T.getLAngleLoc().getLocWithOffset(-1)); + SpelledNameRange.setEnd(T.getLAngleLoc().getLocWithOffset(-1)); } - auto SpelledTokens = TB.spelledForExpanded(TB.expandedTokens(NameRange)); - if (!SpelledTokens) - return false; - auto SpelledRange = syntax::Token::range(SM, SpelledTokens->front(), - SpelledTokens->back()); - Name = SpelledRange.text(SM); - - std::string QualifierToRemoveStr = getNNSLAsString( - QualifierToRemove, Inputs.AST->getASTContext().getPrintingPolicy()); - if (!Name.consume_front(QualifierToRemoveStr)) - return false; // What's spelled doesn't match the qualifier. - if (const auto *ET = E.getTypePtr()) { if (const auto *TDT = dyn_cast(ET->getNamedType().getTypePtr())) { @@ -309,19 +315,14 @@ bool AddUsing::prepare(const Selection &Inputs) { } } } - - // FIXME: This only supports removing qualifiers that are made up of just - // namespace names. If qualifier contains a type, we could take the longest - // namespace prefix and remove that. - if (!QualifierToRemove.hasQualifier() || + if (!QualifierToRemove || + // FIXME: This only supports removing qualifiers that are made up of just + // namespace names. If qualifier contains a type, we could take the + // longest namespace prefix and remove that. !QualifierToRemove.getNestedNameSpecifier()->getAsNamespace() || - Name.empty()) { - return false; - } - - if (isNamespaceForbidden(Inputs, *QualifierToRemove.getNestedNameSpecifier())) + // Respect user config. + isNamespaceForbidden(Inputs, *QualifierToRemove.getNestedNameSpecifier())) return false; - // Macros are difficult. We only want to offer code action when what's spelled // under the cursor is a namespace qualifier. If it's a macro that expands to // a qualifier, user would not know what code action will actually change. @@ -333,23 +334,35 @@ bool AddUsing::prepare(const Selection &Inputs) { return false; } + auto SpelledTokens = + TB.spelledForExpanded(TB.expandedTokens(SpelledNameRange)); + if (!SpelledTokens) + return false; + auto SpelledRange = + syntax::Token::range(SM, SpelledTokens->front(), SpelledTokens->back()); + // We only drop qualifiers that're namespaces, so this is safe. + std::tie(SpelledQualifier, SpelledName) = + splitQualifiedName(SpelledRange.text(SM)); + QualifierToSpell = getNNSLAsString( + QualifierToRemove, Inputs.AST->getASTContext().getPrintingPolicy()); + if (!llvm::StringRef(QualifierToSpell).endswith(SpelledQualifier) || + SpelledName.empty()) + return false; // What's spelled doesn't match the qualifier. return true; } Expected AddUsing::apply(const Selection &Inputs) { auto &SM = Inputs.AST->getSourceManager(); - std::string QualifierToRemoveStr = getNNSLAsString( - QualifierToRemove, Inputs.AST->getASTContext().getPrintingPolicy()); tooling::Replacements R; if (auto Err = R.add(tooling::Replacement( SM, SM.getSpellingLoc(QualifierToRemove.getBeginLoc()), - QualifierToRemoveStr.length(), ""))) { + SpelledQualifier.size(), ""))) { return std::move(Err); } - auto InsertionPoint = - findInsertionPoint(Inputs, QualifierToRemove, Name, MustInsertAfterLoc); + auto InsertionPoint = findInsertionPoint(Inputs, QualifierToRemove, + SpelledName, MustInsertAfterLoc); if (!InsertionPoint) { return InsertionPoint.takeError(); } @@ -362,7 +375,7 @@ Expected AddUsing::apply(const Selection &Inputs) { if (InsertionPoint->AlwaysFullyQualify && !isFullyQualified(QualifierToRemove.getNestedNameSpecifier())) UsingTextStream << "::"; - UsingTextStream << QualifierToRemoveStr << Name << ";" + UsingTextStream << QualifierToSpell << SpelledName << ";" << InsertionPoint->Suffix; assert(SM.getFileID(InsertionPoint->Loc) == SM.getMainFileID()); diff --git a/clang-tools-extra/clangd/unittests/tweaks/AddUsingTests.cpp b/clang-tools-extra/clangd/unittests/tweaks/AddUsingTests.cpp index adfd018f56d27..86077c17f7555 100644 --- a/clang-tools-extra/clangd/unittests/tweaks/AddUsingTests.cpp +++ b/clang-tools-extra/clangd/unittests/tweaks/AddUsingTests.cpp @@ -8,8 +8,11 @@ #include "Config.h" #include "TweakTesting.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include "gmock/gmock.h" #include "gtest/gtest.h" +#include namespace clang { namespace clangd { @@ -30,7 +33,7 @@ namespace one { void oo() {} template class tt {}; namespace two { -enum ee {}; +enum ee { ee_enum_value }; void ff() {} class cc { public: @@ -64,9 +67,6 @@ class cc { EXPECT_UNAVAILABLE(Header + "void fun() { ::ban::fo^o(); }"); EXPECT_AVAILABLE(Header + "void fun() { banana::fo^o(); }"); - // Do not offer code action on typo-corrections. - EXPECT_UNAVAILABLE(Header + "/*error-ok*/c^c C;"); - // NestedNameSpecifier, but no namespace. EXPECT_UNAVAILABLE(Header + "class Foo {}; class F^oo foo;"); @@ -466,7 +466,37 @@ one::v^ec foo; using one::vec; vec foo; -)cpp"}}; +)cpp"}, + // Typo correction. + {R"cpp( +// error-ok +#include "test.hpp" +c^c C; +)cpp", + R"cpp( +// error-ok +#include "test.hpp" +using one::two::cc; + +cc C; +)cpp"}, + {R"cpp( +// error-ok +#include "test.hpp" +void foo() { + switch(one::two::ee{}) { case two::ee_^one:break; } +} +)cpp", + R"cpp( +// error-ok +#include "test.hpp" +using one::two::ee_one; + +void foo() { + switch(one::two::ee{}) { case ee_one:break; } +} +)cpp"}, + }; llvm::StringMap EditedFiles; for (const auto &Case : Cases) { ExtraFiles["test.hpp"] = R"cpp( From 015cd84d3cdaba1b8dd299f540cd017b61864ff2 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Mon, 20 Mar 2023 07:06:57 -0700 Subject: [PATCH 023/691] Revert "[mlir][Linalg][Transform] Avoid FunctionalStyleTransformOpTrait where unnecesseary to improve usability" This reverts commit 31aa8ea252c0b6acdcb362c1d0f01cc4b810d6d0. This is currently not in a good state as we have some footguns due to missing listeners. --- .../GPU/TransformOps/GPUTransformOps.td | 12 +- .../Linalg/TransformOps/LinalgTransformOps.td | 67 ++++--- .../Vector/TransformOps/VectorTransformOps.td | 21 +-- .../GPU/TransformOps/GPUTransformOps.cpp | 7 +- .../TransformOps/LinalgTransformOps.cpp | 72 ++++---- .../TransformOps/VectorTransformOps.cpp | 166 +++++++++--------- .../dialects/_structured_transform_ops_ext.py | 2 + .../Dialect/GPU/transform-gpu-failing.mlir | 8 +- mlir/test/Dialect/GPU/transform-gpu.mlir | 6 - mlir/test/Dialect/LLVM/transform-e2e.mlir | 5 +- mlir/test/Dialect/Linalg/hoisting.mlir | 22 +-- .../Linalg/transform-op-vectorize.mlir | 10 +- mlir/test/Dialect/Linalg/vectorization.mlir | 147 +++++++--------- .../Transform/selective-targeting.mlir | 6 +- .../test/Dialect/Vector/transform-vector.mlir | 3 +- .../dialects/transform_structured_ext.py | 2 +- 16 files changed, 254 insertions(+), 302 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td b/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td index a218db3a02ce3..c719fedc90e33 100644 --- a/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td +++ b/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td @@ -17,7 +17,8 @@ include "mlir/IR/OpBase.td" def MapNestedForallToThreads : Op, + [FunctionalStyleTransformOpTrait, + MemoryEffectsOpInterface, TransformEachOpTrait, TransformOpInterface]> { let description = [{ @@ -71,7 +72,9 @@ def MapNestedForallToThreads : scf.forall operations with mappings other than gpu.thread are ignored. - This operation returns nothing. + The returned handle points to the same LaunchOp operand, consuming it and + producing a new SSA value to satisfy chaining and linearity of the IR + properties. #### Example: @@ -108,11 +111,11 @@ def MapNestedForallToThreads : ``` }]; - let arguments = (ins TransformHandleTypeInterface:$target, + let arguments = (ins PDL_Operation:$target, DefaultValuedAttr:$block_dims, DefaultValuedOptionalAttr:$warp_dims, DefaultValuedAttr:$sync_after_distribute); - let results = (outs); + let results = (outs PDL_Operation:$result); let assemblyFormat = [{ $target @@ -120,7 +123,6 @@ def MapNestedForallToThreads : (`warp_dims` `=` $warp_dims^)? (`sync_after_distribute` `=` $sync_after_distribute^)? attr-dict - `:` functional-type(operands, results) }]; let extraClassDeclaration = [{ ::mlir::DiagnosedSilenceableFailure applyToOne( diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td index c16c286ece484..712abf341f460 100644 --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -1651,13 +1651,11 @@ def TileToScfForOp : Op, - TransformEachOpTrait, - TransformOpInterface]> { + [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface, + TransformEachOpTrait, TransformOpInterface]> { let description = [{ Indicates that the given `target` op all the ops it contains should be vectorized with the configuration specified by the attributes of this op. - This vectorization only handles structured ops that operate on shaped types and does not vectorize loops or straight-line. Internally, it applies a set of rewrite patterns, some of which enable vectorization and some of @@ -1687,22 +1685,24 @@ def VectorizeOp : Op:$vectorizePadding, + CArg<"bool", "false">:$vectorizeNDExtract)>, + ]; let extraClassDeclaration = [{ ::mlir::DiagnosedSilenceableFailure applyToOne( ::mlir::Operation *target, @@ -1711,10 +1711,6 @@ def VectorizeOp : Op, TransformOpInterface]> { @@ -1769,9 +1765,8 @@ def MaskedVectorizeOp : Op, - TransformEachOpTrait, - TransformOpInterface]> { + [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface, + TransformEachOpTrait, TransformOpInterface]> { let description = [{ Hoist vector.transfer_read / vector.transfer_write pairs out of immediately enclosing scf::ForOp iteratively, if the following conditions are true: @@ -1787,17 +1782,18 @@ def HoistRedundantVectorTransfersOp : #### Return modes: - The operation always succeeds and returns nothing. + The operation always succeeds and returns a handle to the transformed + function op. }]; let arguments = (ins TransformHandleTypeInterface:$target); - let results = (outs); - let assemblyFormat = [{ - $target - attr-dict - `:` functional-type(operands, results) - }]; + let results = (outs TransformHandleTypeInterface:$transformed); + + let assemblyFormat = "$target attr-dict `:` functional-type(operands, results) "; + let builders = [ + OpBuilder<(ins "Value":$target)>, + ]; let extraClassDeclaration = [{ ::mlir::DiagnosedSilenceableFailure applyToOne( ::mlir::func::FuncOp target, @@ -1888,9 +1884,8 @@ def ConvertConv2DToImg2ColOp : Op, - TransformEachOpTrait, - TransformOpInterface]> { + [FunctionalStyleTransformOpTrait, MemoryEffectsOpInterface, + TransformEachOpTrait, TransformOpInterface]> { let description = [{ Hoists supported tensor subset extract/insert operation pairs out of immediately enclosing loop iteratively, if the following conditions @@ -1910,18 +1905,18 @@ def HoistRedundantTensorSubsetsOp : #### Return modes: - The operation always succeeds and returns nothing. + The operation always succeeds and returns a handle to the transformed + function op. }]; let arguments = (ins TransformHandleTypeInterface:$target); - let results = (outs); + let results = (outs TransformHandleTypeInterface:$transformed); - let assemblyFormat = [{ - $target - attr-dict - `:` functional-type(operands, results) - }]; + let assemblyFormat = "$target attr-dict `:` functional-type(operands, results) "; + let builders = [ + OpBuilder<(ins "Value":$target)>, + ]; let extraClassDeclaration = [{ ::mlir::DiagnosedSilenceableFailure applyToOne( ::mlir::Operation *target, diff --git a/mlir/include/mlir/Dialect/Vector/TransformOps/VectorTransformOps.td b/mlir/include/mlir/Dialect/Vector/TransformOps/VectorTransformOps.td index 4be84e9800d72..4533c5a8d6425 100644 --- a/mlir/include/mlir/Dialect/Vector/TransformOps/VectorTransformOps.td +++ b/mlir/include/mlir/Dialect/Vector/TransformOps/VectorTransformOps.td @@ -17,9 +17,8 @@ include "mlir/Interfaces/SideEffectInterfaces.td" include "mlir/IR/OpBase.td" def LowerVectorsOp : Op, - TransformEachOpTrait, - TransformOpInterface]> { + [DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods]> { let description = [{ Indicates that the vector operations nested under the isolated from above op `target` should be lowered to finer-grained vector primitives. @@ -28,14 +27,10 @@ def LowerVectorsOp : Op:$contraction_lowering, DefaultValuedAttr:$transpose_avx2_lowering, DefaultValuedAttr:$unroll_vector_transfers ); - let results = (outs); + let results = (outs PDL_Operation:$results); let builders = [ OpBuilder<(ins "Type":$resultType, "Value":$target, @@ -71,14 +66,6 @@ def LowerVectorsOp : Op &effects) { - onlyReadsHandle(getTarget(), effects); - modifiesPayload(effects); -} - DiagnosedSilenceableFailure transform::MapNestedForallToThreads::applyToOne( Operation *target, ApplyToEachResultList &results, TransformState &state) { LaunchOp gpuLaunch = dyn_cast(target); @@ -892,6 +886,7 @@ DiagnosedSilenceableFailure transform::MapNestedForallToThreads::applyToOne( mapNestedForallToThreadsImpl(rewriter, transformOp, gpuLaunch, blockDims, getWarpDims(), getSyncAfterDistribute()); + results.push_back(gpuLaunch.getOperation()); return diag; } diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index 332a9bfa680d1..407b8d213de1c 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -1786,7 +1786,7 @@ LogicalResult transform::PadOp::verify() { } //===---------------------------------------------------------------------===// -// PadOp +// HoistPadOp //===---------------------------------------------------------------------===// DiagnosedSilenceableFailure @@ -2977,6 +2977,21 @@ void transform::TileToScfForOp::getEffects( // VectorizeOp //===----------------------------------------------------------------------===// +void transform::VectorizeOp::build(OpBuilder &builder, OperationState &result, + Value target, bool vectorizePadding, + bool vectorizeExtract) { + result.addOperands(target); + if (vectorizePadding) { + result.addAttribute(VectorizeOp::getVectorizePaddingAttrName(result.name), + builder.getUnitAttr()); + } + if (vectorizeExtract) { + result.addAttribute(VectorizeOp::getVectorizeNdExtractAttrName(result.name), + builder.getUnitAttr()); + } + result.addTypes(pdl::OperationType::get(builder.getContext())); +} + namespace { /// This is an helper only to call vectorize via a pattern inside of /// VectorizeOp::applyToOne. @@ -3035,15 +3050,10 @@ transform::VectorizeOp::applyToOne(Operation *target, if (failed(applyPatternsAndFoldGreedily(target, std::move(patterns)))) return emitDefaultDefiniteFailure(target); + results.push_back(target); return DiagnosedSilenceableFailure::success(); } -void transform::VectorizeOp::getEffects( - SmallVectorImpl &effects) { - transform::onlyReadsHandle(getTarget(), effects); - transform::modifiesPayload(effects); -} - //===----------------------------------------------------------------------===// // MaskedVectorizeOp //===----------------------------------------------------------------------===// @@ -3123,6 +3133,22 @@ SmallVector MaskedVectorizeOp::getMixedVectorSizes() { return getMixedValues(getStaticVectorSizes(), getVectorSizes(), b); } +//===----------------------------------------------------------------------===// +// HoistRedundantVectorTransfersOp +//===----------------------------------------------------------------------===// + +DiagnosedSilenceableFailure +transform::HoistRedundantVectorTransfersOp::applyToOne( + func::FuncOp target, transform::ApplyToEachResultList &results, + transform::TransformState &state) { + // WARNING: This hoisting does not model parallelism and is generally + // incorrect when used on distributed loops with memref semantics! + // TODO: obsolete and should be retired. + linalg::hoistRedundantVectorTransfers(target); + results.push_back(target); + return DiagnosedSilenceableFailure::success(); +} + //===----------------------------------------------------------------------===// // ConvertConv2DToImg2ColOp. //===----------------------------------------------------------------------===// @@ -3167,7 +3193,9 @@ transform::HoistRedundantTensorSubsetsOp::applyToOne( IRRewriter rewriter(target->getContext()); auto forOp = dyn_cast(target); if (forOp) { - linalg::hoistRedundantSubsetExtractInsert(rewriter, forOp); + scf::ForOp newForOp = + linalg::hoistRedundantSubsetExtractInsert(rewriter, forOp); + results.push_back(newForOp); return DiagnosedSilenceableFailure::success(); } @@ -3176,36 +3204,10 @@ transform::HoistRedundantTensorSubsetsOp::applyToOne( target->walk([&](scf::ForOp forOp) { hoistRedundantSubsetExtractInsert(rewriter, forOp); }); + results.push_back(target); return DiagnosedSilenceableFailure::success(); } -void transform::HoistRedundantTensorSubsetsOp::getEffects( - SmallVectorImpl &effects) { - transform::onlyReadsHandle(getTarget(), effects); - transform::modifiesPayload(effects); -} - -//===----------------------------------------------------------------------===// -// HoistRedundantVectorTransfersOp -//===----------------------------------------------------------------------===// - -DiagnosedSilenceableFailure -transform::HoistRedundantVectorTransfersOp::applyToOne( - func::FuncOp target, transform::ApplyToEachResultList &results, - transform::TransformState &state) { - // WARNING: This hoisting does not model parallelism and is generally - // incorrect when used on distributed loops with memref semantics! - // TODO: obsolete and should be retired. - linalg::hoistRedundantVectorTransfers(target); - return DiagnosedSilenceableFailure::success(); -} - -void transform::HoistRedundantVectorTransfersOp::getEffects( - SmallVectorImpl &effects) { - transform::onlyReadsHandle(getTarget(), effects); - transform::modifiesPayload(effects); -} - //===----------------------------------------------------------------------===// // Transform op registration //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp b/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp index 9b2e1d7d4cfe0..60996b9add614 100644 --- a/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp +++ b/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp @@ -29,90 +29,98 @@ using namespace mlir::transform; void transform::LowerVectorsOp::getEffects( SmallVectorImpl &effects) { - onlyReadsHandle(getTarget(), effects); + consumesHandle(getTarget(), effects); + producesHandle(getResults(), effects); modifiesPayload(effects); } -DiagnosedSilenceableFailure transform::LowerVectorsOp::applyToOne( - ::mlir::Operation *target, - ::mlir::transform::ApplyToEachResultList &results, - ::mlir::transform::TransformState &state) { - - // This check can't be part of the verifier because payload IR is - // independent from transform IR and may not even exist. - if (!target->hasTrait()) { - return mlir::emitDefiniteFailure(target, - "applies only to isolated-from-above " - "targets because it needs to apply " - "patterns greedily"); +DiagnosedSilenceableFailure transform::LowerVectorsOp::apply( + mlir::transform::TransformResults &transformResults, + mlir::transform::TransformState &state) { + + SmallVector results; + ArrayRef payloadOps = state.getPayloadOps(getTarget()); + for (Operation *target : payloadOps) { + // This check can't be part of the verifier because payload IR is + // independent from transform IR and may not even exist. + if (!target->hasTrait()) { + return mlir::emitDefiniteFailure(target, + "applies only to isolated-from-above " + "targets because it needs to apply " + "patterns greedily"); + } + + MLIRContext *ctx = getContext(); + RewritePatternSet patterns(ctx); + vector::VectorTransposeLowering vectorTransposeLowering = + getTransposeLowering(); + vector::VectorMultiReductionLowering vectorMultiReductionLowering = + getMultireductionLowering(); + vector::VectorContractLowering vectorContractLowering = + getContractionLowering(); + vector::VectorTransferSplit vectorTransferSplit = getSplitTransfers(); + + vector::VectorTransformsOptions vectorTransformOptions; + vectorTransformOptions.setVectorTransformsOptions(vectorContractLowering) + .setVectorMultiReductionLowering(vectorMultiReductionLowering) + .setVectorTransposeLowering(vectorTransposeLowering) + .setVectorTransferSplit(vectorTransferSplit); + + VectorTransferToSCFOptions vectorTransferToSCFOptions = + VectorTransferToSCFOptions().enableFullUnroll( + getUnrollVectorTransfers()); + + int maxTransferRank = 1; + + auto avx2LoweringOptions = + x86vector::avx2::LoweringOptions().setTransposeOptions( + x86vector::avx2::TransposeLoweringOptions() + .lower4x8xf32(getTransposeAvx2Lowering()) + .lower8x8xf32(getTransposeAvx2Lowering())); + + vector::populateVectorToVectorCanonicalizationPatterns(patterns); + + // In the future we may want to more finely select particular stages. + // Stage 1: contraction lowerings. + patterns.add(vectorTransformOptions, + ctx); + vector::populateVectorTransferPermutationMapLoweringPatterns(patterns); + + // Stage 2: multi-reduction lowerings. + vector::populateVectorMultiReductionLoweringPatterns( + patterns, vectorTransformOptions.vectorMultiReductionLowering); + + // Stage 3: Rewrite vector.transfer into full and partial parts. + patterns.add( + ctx, vectorTransformOptions); + + // Stage 4: Lower vector transfers. + vector::populateVectorTransferLoweringPatterns(patterns, maxTransferRank); + + // Stage 5: Vector to scf patterns. + populateVectorToSCFConversionPatterns( + patterns, vectorTransferToSCFOptions.setTargetRank(maxTransferRank)); + + // Stage 6: Lower vector.shape_cast. + vector::populateVectorShapeCastLoweringPatterns(patterns); + + // Stage 7: Lower vector.transpose. + vector::populateVectorTransposeLoweringPatterns(patterns, + vectorTransformOptions); + if (getTransposeAvx2Lowering()) + x86vector::avx2::populateSpecializedTransposeLoweringPatterns( + patterns, avx2LoweringOptions, /*benefit=*/10); + + // Apply everything. + if (failed(applyPatternsAndFoldGreedily(target, std::move(patterns)))) + return DiagnosedSilenceableFailure::definiteFailure(); + + results.push_back(target); } - MLIRContext *ctx = getContext(); - RewritePatternSet patterns(ctx); - vector::VectorTransposeLowering vectorTransposeLowering = - getTransposeLowering(); - vector::VectorMultiReductionLowering vectorMultiReductionLowering = - getMultireductionLowering(); - vector::VectorContractLowering vectorContractLowering = - getContractionLowering(); - vector::VectorTransferSplit vectorTransferSplit = getSplitTransfers(); - - vector::VectorTransformsOptions vectorTransformOptions; - vectorTransformOptions.setVectorTransformsOptions(vectorContractLowering) - .setVectorMultiReductionLowering(vectorMultiReductionLowering) - .setVectorTransposeLowering(vectorTransposeLowering) - .setVectorTransferSplit(vectorTransferSplit); - - VectorTransferToSCFOptions vectorTransferToSCFOptions = - VectorTransferToSCFOptions().enableFullUnroll(getUnrollVectorTransfers()); - - int maxTransferRank = 1; - - auto avx2LoweringOptions = - x86vector::avx2::LoweringOptions().setTransposeOptions( - x86vector::avx2::TransposeLoweringOptions() - .lower4x8xf32(getTransposeAvx2Lowering()) - .lower8x8xf32(getTransposeAvx2Lowering())); - - vector::populateVectorToVectorCanonicalizationPatterns(patterns); - - // In the future we may want to more finely select particular stages. - // Stage 1: contraction lowerings. - patterns.add(vectorTransformOptions, - ctx); - vector::populateVectorTransferPermutationMapLoweringPatterns(patterns); - - // Stage 2: multi-reduction lowerings. - vector::populateVectorMultiReductionLoweringPatterns( - patterns, vectorTransformOptions.vectorMultiReductionLowering); - - // Stage 3: Rewrite vector.transfer into full and partial parts. - patterns.add( - ctx, vectorTransformOptions); - - // Stage 4: Lower vector transfers. - vector::populateVectorTransferLoweringPatterns(patterns, maxTransferRank); - - // Stage 5: Vector to scf patterns. - populateVectorToSCFConversionPatterns( - patterns, vectorTransferToSCFOptions.setTargetRank(maxTransferRank)); - - // Stage 6: Lower vector.shape_cast. - vector::populateVectorShapeCastLoweringPatterns(patterns); - - // Stage 7: Lower vector.transpose. - vector::populateVectorTransposeLoweringPatterns(patterns, - vectorTransformOptions); - if (getTransposeAvx2Lowering()) - x86vector::avx2::populateSpecializedTransposeLoweringPatterns( - patterns, avx2LoweringOptions, /*benefit=*/10); - - // Apply everything. - if (failed(applyPatternsAndFoldGreedily(target, std::move(patterns)))) - return DiagnosedSilenceableFailure::definiteFailure(); - + transformResults.set(getResults().cast(), results); return DiagnosedSilenceableFailure::success(); } diff --git a/mlir/python/mlir/dialects/_structured_transform_ops_ext.py b/mlir/python/mlir/dialects/_structured_transform_ops_ext.py index f314496c693f1..e2c262ca50201 100644 --- a/mlir/python/mlir/dialects/_structured_transform_ops_ext.py +++ b/mlir/python/mlir/dialects/_structured_transform_ops_ext.py @@ -325,9 +325,11 @@ def __init__(self, vectorize_padding: Union[bool, BoolAttr] = False, loc=None, ip=None): + pdl_operation_type = pdl.OperationType.get() if isinstance(vectorize_padding, bool): vectorize_padding = UnitAttr.get() super().__init__( + pdl_operation_type, _get_op_result_or_value(target), vectorize_padding=vectorize_padding, loc=loc, diff --git a/mlir/test/Dialect/GPU/transform-gpu-failing.mlir b/mlir/test/Dialect/GPU/transform-gpu-failing.mlir index c9ded7d7ef193..459b800f76d35 100644 --- a/mlir/test/Dialect/GPU/transform-gpu-failing.mlir +++ b/mlir/test/Dialect/GPU/transform-gpu-failing.mlir @@ -8,8 +8,7 @@ transform.sequence failures(propagate) { ^bb0(%arg0: !pdl.operation): %funcop = transform.structured.match ops{["tensor.empty"]} in %arg0 : (!pdl.operation) -> !pdl.operation // expected-error @below {{Given target is not a gpu.launch}} - transform.gpu.map_nested_forall_to_threads %funcop block_dims = [1, 1, 1] - : (!pdl.operation) -> () + %1 = transform.gpu.map_nested_forall_to_threads %funcop block_dims = [1, 1, 1] } // ----- @@ -51,7 +50,6 @@ transform.sequence failures(propagate) { // expected-error @below {{Trying to launch a GPU kernel with grid_dims = (1, 1, 1) block_dims = (1200, 9, 1). It is larger than the limits.}} // expected-note @below {{"block_dims" is too large}} transform.gpu.map_nested_forall_to_threads %funcop block_dims = [1200, 9, 1] - : (!pdl.operation) -> () } // ----- @@ -93,7 +91,6 @@ transform.sequence failures(propagate) { %funcop = transform.structured.match ops{["gpu.launch"]} in %arg0 : (!pdl.operation) -> !pdl.operation // expected-error @below {{Trying to map to fewer GPU threads than loop iterations but overprovisioning is not yet supported. Try additional tiling of the before mapping or map to more threads.}} transform.gpu.map_nested_forall_to_threads %funcop block_dims = [128, 4, 1] - : (!pdl.operation) -> () } // ----- @@ -120,7 +117,6 @@ transform.sequence failures(propagate) { %funcop = transform.structured.match ops{["gpu.launch"]} in %arg0 : (!pdl.operation) -> !pdl.operation // expected-error @below {{unsupported dynamic sizes}} transform.gpu.map_nested_forall_to_threads %funcop block_dims = [128, 4, 1] - : (!pdl.operation) -> () } // ----- @@ -143,7 +139,6 @@ transform.sequence failures(propagate) { %funcop = transform.structured.match ops{["gpu.launch"]} in %arg0 : (!pdl.operation) -> !pdl.operation // expected-error @below {{only bufferized scf.forall can be mapped}} transform.gpu.map_nested_forall_to_threads %funcop block_dims = [128, 4, 1] - : (!pdl.operation) -> () } // ----- @@ -277,7 +272,6 @@ transform.sequence failures(propagate) { %funcop = transform.structured.match ops{["gpu.launch"]} in %arg0 : (!pdl.operation) -> !pdl.operation // expected-error @below {{duplicated attribute, cannot map different loops to the same processor}} transform.gpu.map_nested_forall_to_threads %funcop block_dims = [32, 32, 1] - : (!pdl.operation) -> () } // ----- diff --git a/mlir/test/Dialect/GPU/transform-gpu.mlir b/mlir/test/Dialect/GPU/transform-gpu.mlir index e485d4107a645..d9872a9666f62 100644 --- a/mlir/test/Dialect/GPU/transform-gpu.mlir +++ b/mlir/test/Dialect/GPU/transform-gpu.mlir @@ -88,7 +88,6 @@ transform.sequence failures(propagate) { ^bb1(%arg0: !pdl.operation): %funcop = transform.structured.match ops{["gpu.launch"]} in %arg0 : (!pdl.operation) -> !pdl.operation transform.gpu.map_nested_forall_to_threads %funcop block_dims = [12, 9, 1] - : (!pdl.operation) -> () } // ----- @@ -129,7 +128,6 @@ transform.sequence failures(propagate) { %funcop = transform.structured.match ops{["func.func"]} in %arg0 : (!pdl.operation) -> !pdl.operation %gpuLaunch = transform.gpu.map_forall_to_blocks %funcop { generate_gpu_launch } transform.gpu.map_nested_forall_to_threads %gpuLaunch block_dims = [32, 4, 1] - : (!pdl.operation) -> () } // ----- @@ -163,7 +161,6 @@ transform.sequence failures(propagate) { ^bb1(%arg0: !pdl.operation): %funcop = transform.structured.match ops{["gpu.launch"]} in %arg0 : (!pdl.operation) -> !pdl.operation transform.gpu.map_nested_forall_to_threads %funcop block_dims = [12, 9, 1] sync_after_distribute = false - : (!pdl.operation) -> () } // ----- @@ -196,7 +193,6 @@ transform.sequence failures(propagate) { ^bb1(%arg0: !pdl.operation): %funcop = transform.structured.match ops{["gpu.launch"]} in %arg0 : (!pdl.operation) -> !pdl.operation transform.gpu.map_nested_forall_to_threads %funcop block_dims = [32, 1, 1] - : (!pdl.operation) -> () } // ----- @@ -233,7 +229,6 @@ transform.sequence failures(propagate) { ^bb1(%arg0: !pdl.operation): %funcop = transform.structured.match ops{["gpu.launch"]} in %arg0 : (!pdl.operation) -> !pdl.operation transform.gpu.map_nested_forall_to_threads %funcop block_dims = [12, 9, 1] sync_after_distribute = false - : (!pdl.operation) -> () } // ----- @@ -311,5 +306,4 @@ transform.sequence failures(propagate) { %funcop = transform.structured.match ops{["gpu.launch"]} in %arg0 : (!pdl.operation) -> !pdl.operation transform.gpu.map_nested_forall_to_threads %funcop block_dims = [12, 11, 1] warp_dims = [3, 2, 1] - : (!pdl.operation) -> () } diff --git a/mlir/test/Dialect/LLVM/transform-e2e.mlir b/mlir/test/Dialect/LLVM/transform-e2e.mlir index 7d7a2c85a379d..d091e9d18d1b5 100644 --- a/mlir/test/Dialect/LLVM/transform-e2e.mlir +++ b/mlir/test/Dialect/LLVM/transform-e2e.mlir @@ -17,10 +17,9 @@ transform.sequence failures(propagate) { %0 = transform.structured.match ops{["linalg.matmul"]} in %module_op : (!pdl.operation) -> !pdl.operation %1, %loops:3 = transform.structured.tile %0 [2, 2, 2] : (!pdl.operation) -> (!pdl.operation, !pdl.operation, !pdl.operation, !pdl.operation) %2 = get_closest_isolated_parent %1 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %2 : (!pdl.operation) -> () + transform.structured.vectorize %2 transform.bufferization.one_shot_bufferize layout{IdentityLayoutMap} %module_op {bufferize_function_boundaries = true} %func = transform.structured.match ops{["func.func"]} in %module_op : (!pdl.operation) -> !pdl.operation - transform.vector.lower_vectors %func multireduction_lowering = "innerreduction" - : (!pdl.operation) -> () + transform.vector.lower_vectors %func multireduction_lowering = "innerreduction" } diff --git a/mlir/test/Dialect/Linalg/hoisting.mlir b/mlir/test/Dialect/Linalg/hoisting.mlir index 96d809a1fd694..aeecb8cf95f89 100644 --- a/mlir/test/Dialect/Linalg/hoisting.mlir +++ b/mlir/test/Dialect/Linalg/hoisting.mlir @@ -79,10 +79,7 @@ transform.sequence failures(propagate) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!pdl.operation) -> !pdl.operation transform.structured.hoist_redundant_vector_transfers %0 - : (!pdl.operation) -> () - // Test we can call the op twice without consuming the handle. - transform.structured.hoist_redundant_vector_transfers %0 - : (!pdl.operation) -> () + : (!pdl.operation) -> !pdl.operation } // ----- @@ -171,7 +168,7 @@ transform.sequence failures(propagate) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!pdl.operation) -> !pdl.operation transform.structured.hoist_redundant_vector_transfers %0 - : (!pdl.operation) -> () + : (!pdl.operation) -> !pdl.operation } // ----- @@ -216,7 +213,7 @@ transform.sequence failures(propagate) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!pdl.operation) -> !pdl.operation transform.structured.hoist_redundant_vector_transfers %0 - : (!pdl.operation) -> () + : (!pdl.operation) -> !pdl.operation } // ----- @@ -305,10 +302,7 @@ transform.sequence failures(propagate) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!pdl.operation) -> !pdl.operation transform.structured.hoist_redundant_tensor_subsets %0 - : (!pdl.operation) -> () - // Test we can call the op twice without consuming the handle. - transform.structured.hoist_redundant_tensor_subsets %0 - : (!pdl.operation) -> () + : (!pdl.operation) -> !pdl.operation } // ----- @@ -403,7 +397,7 @@ transform.sequence failures(propagate) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!pdl.operation) -> !pdl.operation transform.structured.hoist_redundant_tensor_subsets %0 - : (!pdl.operation) -> () + : (!pdl.operation) -> !pdl.operation } // ----- @@ -520,7 +514,7 @@ transform.sequence failures(propagate) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!pdl.operation) -> !pdl.operation transform.structured.hoist_redundant_tensor_subsets %0 - : (!pdl.operation) -> () + : (!pdl.operation) -> !pdl.operation } // ----- @@ -567,7 +561,7 @@ transform.sequence failures(propagate) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!pdl.operation) -> !pdl.operation transform.structured.hoist_redundant_tensor_subsets %0 - : (!pdl.operation) -> () + : (!pdl.operation) -> !pdl.operation } // ----- @@ -680,5 +674,5 @@ transform.sequence failures(propagate) { %0 = transform.structured.match ops{["func.func"]} in %arg1 : (!pdl.operation) -> !pdl.operation transform.structured.hoist_redundant_tensor_subsets %0 - : (!pdl.operation) -> () + : (!pdl.operation) -> !pdl.operation } diff --git a/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir b/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir index b31df69456f24..155b0785d2ec7 100644 --- a/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir @@ -20,7 +20,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 } // ----- @@ -66,7 +66,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 } // ----- @@ -114,9 +114,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 {vectorize_padding} : (!pdl.operation) -> () - // Apply transform twice to ensure %1 is not consumed. - transform.structured.vectorize %1 {vectorize_padding} : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 {vectorize_padding} } // ----- @@ -133,5 +131,5 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!pdl.operation) -> !pdl.operation // expected-error @below {{op requires isolated-from-above targets}} - transform.structured.vectorize %0 : (!pdl.operation) -> () + %2 = transform.structured.vectorize %0 } diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir index 3b1b51e347d1d..26e27c108ce81 100644 --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -14,8 +14,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.dot"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns } - : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns } } // ----- @@ -34,8 +33,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.matvec"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns } - : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns } } // ----- @@ -53,8 +51,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns } - : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns } } // ----- @@ -73,8 +70,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.batch_matmul"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns } - : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns } } // ----- @@ -114,8 +110,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } - : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } } // ----- @@ -155,8 +150,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } - : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } } // ----- @@ -183,8 +177,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } - : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } } // ----- @@ -224,8 +217,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } - : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } } // ----- @@ -245,8 +237,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns } - : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns } } // ----- @@ -270,7 +261,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 } // ----- @@ -294,7 +285,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 } // ----- @@ -339,8 +330,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { vectorize_nd_extract } - : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { vectorize_nd_extract } } // ----- @@ -357,7 +347,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.fill"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 } // ----- @@ -375,7 +365,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.fill"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 } // ----- @@ -392,7 +382,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["memref.copy"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 } // ----- @@ -412,7 +402,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["memref.copy"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 } // ----- @@ -428,7 +418,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["memref.copy"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 } // ----- @@ -456,7 +446,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 } // ----- @@ -485,7 +475,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 } // ----- @@ -570,8 +560,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { disable_transfer_permutation_map_lowering_patterns } - : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { disable_transfer_permutation_map_lowering_patterns } } // ----- @@ -662,8 +651,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { disable_transfer_permutation_map_lowering_patterns } - : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { disable_transfer_permutation_map_lowering_patterns } } // ----- @@ -707,8 +695,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { disable_transfer_permutation_map_lowering_patterns } - : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { disable_transfer_permutation_map_lowering_patterns } } // ----- @@ -751,8 +738,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { disable_transfer_permutation_map_lowering_patterns } - : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { disable_transfer_permutation_map_lowering_patterns } } // ----- @@ -784,8 +770,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.matmul"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } - : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } } // ----- @@ -814,7 +799,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { vectorize_padding } : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { vectorize_padding } } // ----- @@ -843,7 +828,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { vectorize_padding } : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { vectorize_padding } } @@ -880,7 +865,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { vectorize_padding } : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { vectorize_padding } } // ----- @@ -900,7 +885,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { vectorize_padding } : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { vectorize_padding } } // ----- @@ -930,7 +915,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { vectorize_padding } : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { vectorize_padding } } // ----- @@ -963,7 +948,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %4 { vectorize_padding } : (!pdl.operation) -> () + %5 = transform.structured.vectorize %4 { vectorize_padding } } @@ -1000,7 +985,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %4 { vectorize_padding } : (!pdl.operation) -> () + %5 = transform.structured.vectorize %4 { vectorize_padding } } @@ -1034,7 +1019,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %4 { vectorize_padding } : (!pdl.operation) -> () + %5 = transform.structured.vectorize %4 { vectorize_padding } } @@ -1062,7 +1047,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %4 : (!pdl.operation) -> () + %5 = transform.structured.vectorize %4 } // ----- @@ -1099,7 +1084,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["tensor.pad"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %4 { vectorize_padding } : (!pdl.operation) -> () + %5 = transform.structured.vectorize %4 { vectorize_padding } } // ----- @@ -1134,7 +1119,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %4 : (!pdl.operation) -> () + %5 = transform.structured.vectorize %4 } // ----- @@ -1179,8 +1164,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %4 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } - : (!pdl.operation) -> () + %5 = transform.structured.vectorize %4 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } } // ----- @@ -1210,7 +1194,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %4 { vectorize_padding } : (!pdl.operation) -> () + %5 = transform.structured.vectorize %4 { vectorize_padding } } // ----- @@ -1241,7 +1225,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %4 : (!pdl.operation) -> () + %5 = transform.structured.vectorize %4 } // ----- @@ -1271,7 +1255,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %4 : (!pdl.operation) -> () + %5 = transform.structured.vectorize %4 } // ----- @@ -1301,7 +1285,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %4 : (!pdl.operation) -> () + %5 = transform.structured.vectorize %4 } // ----- @@ -1331,7 +1315,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %4 : (!pdl.operation) -> () + %5 = transform.structured.vectorize %4 } // ----- @@ -1361,7 +1345,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %4 : (!pdl.operation) -> () + %5 = transform.structured.vectorize %4 } // ----- @@ -1395,7 +1379,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %3 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %4 : (!pdl.operation) -> () + %5 = transform.structured.vectorize %4 } // ----- @@ -1433,11 +1417,11 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.fill"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 %3 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %4 = get_closest_isolated_parent %3 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %4 : (!pdl.operation) -> () + %5 = transform.structured.vectorize %4 } // ----- @@ -1480,7 +1464,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 } @@ -1511,7 +1495,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 } // ----- @@ -1550,8 +1534,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } - : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { disable_multi_reduction_to_contract_patterns, disable_transfer_permutation_map_lowering_patterns } } // ----- @@ -1587,7 +1570,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 } // ----- @@ -1623,7 +1606,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { vectorize_nd_extract } : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { vectorize_nd_extract } } // ----- @@ -1662,7 +1645,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { vectorize_nd_extract } : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { vectorize_nd_extract } } // ----- @@ -1712,7 +1695,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { vectorize_nd_extract } : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { vectorize_nd_extract } } // ----- @@ -1760,7 +1743,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { vectorize_nd_extract } : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { vectorize_nd_extract } } // ----- @@ -1804,7 +1787,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { vectorize_nd_extract } : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { vectorize_nd_extract } } // ----- @@ -1846,7 +1829,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { vectorize_nd_extract } : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { vectorize_nd_extract } } // ----- @@ -1890,7 +1873,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { vectorize_nd_extract } : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { vectorize_nd_extract } } // ----- @@ -1930,7 +1913,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { vectorize_nd_extract } : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { vectorize_nd_extract } } // ----- @@ -1970,7 +1953,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { vectorize_nd_extract } : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { vectorize_nd_extract } } // ----- @@ -2009,7 +1992,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 { vectorize_nd_extract } : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 { vectorize_nd_extract } } @@ -2034,7 +2017,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.map"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 } // ----- @@ -2053,7 +2036,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.transpose"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 } // ----- @@ -2076,7 +2059,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.reduce"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 } // ----- @@ -2327,7 +2310,7 @@ func.func @not_vectorizable(%arg0: tensor<1x?xf32>, %arg1: index, %arg2: index, transform.sequence failures(propagate) { ^bb0(%arg0: !pdl.operation): %0 = transform.structured.match ops{["func.func"]} in %arg0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %0 : (!pdl.operation) -> () + %1 = transform.structured.vectorize %0 } // ----- @@ -2362,7 +2345,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 } // CHECK-LABEL: @wrong_reduction_detection @@ -2391,7 +2374,7 @@ transform.sequence failures(propagate) { ^bb1(%arg1: !pdl.operation): %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 : (!pdl.operation) -> () + %2 = transform.structured.vectorize %1 } // ----- diff --git a/mlir/test/Dialect/Transform/selective-targeting.mlir b/mlir/test/Dialect/Transform/selective-targeting.mlir index c0a6d6b7a8dc0..231ff3099d175 100644 --- a/mlir/test/Dialect/Transform/selective-targeting.mlir +++ b/mlir/test/Dialect/Transform/selective-targeting.mlir @@ -80,7 +80,7 @@ transform.with_pdl_patterns { transform.structured.tile %0 [4, 4, 4] : (!pdl.operation) -> (!pdl.operation, !pdl.operation, !pdl.operation, !pdl.operation) %1 = pdl_match @pdl_target_attrC in %arg1 : (!pdl.operation) -> !pdl.operation %2 = transform.get_closest_isolated_parent %1 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %2 : (!pdl.operation) -> () + transform.structured.vectorize %2 } } @@ -125,7 +125,7 @@ transform.with_pdl_patterns { ^bb1(%arg1: !pdl.operation): %0 = pdl_match @pdl_target in %arg1 : (!pdl.operation) -> !pdl.operation %1 = get_closest_isolated_parent %0 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %1 : (!pdl.operation) -> () + transform.structured.vectorize %1 } } @@ -150,5 +150,5 @@ func.func @vectorize_all( transform.sequence failures(propagate) { ^bb0(%arg0: !pdl.operation): - transform.structured.vectorize %arg0 : (!pdl.operation) -> () + transform.structured.vectorize %arg0 } diff --git a/mlir/test/Dialect/Vector/transform-vector.mlir b/mlir/test/Dialect/Vector/transform-vector.mlir index ce920e18885d4..cf3738f2e9b5e 100644 --- a/mlir/test/Dialect/Vector/transform-vector.mlir +++ b/mlir/test/Dialect/Vector/transform-vector.mlir @@ -18,10 +18,9 @@ transform.sequence failures(propagate) { %0 = transform.structured.match ops{["linalg.matmul"]} in %module_op : (!pdl.operation) -> !pdl.operation %1, %loops:3 = transform.structured.tile %0 [8, 4, 2] : (!pdl.operation) -> (!pdl.operation, !pdl.operation, !pdl.operation, !pdl.operation) %2 = get_closest_isolated_parent %1 : (!pdl.operation) -> !pdl.operation - transform.structured.vectorize %2 : (!pdl.operation) -> () + transform.structured.vectorize %2 transform.bufferization.one_shot_bufferize %module_op %func = transform.structured.match ops{["func.func"]} in %module_op : (!pdl.operation) -> !pdl.operation transform.vector.lower_vectors %func multireduction_lowering = "innerreduction" - : (!pdl.operation) -> () } diff --git a/mlir/test/python/dialects/transform_structured_ext.py b/mlir/test/python/dialects/transform_structured_ext.py index d88fe2cc0505a..9684bfb47f1b0 100644 --- a/mlir/test/python/dialects/transform_structured_ext.py +++ b/mlir/test/python/dialects/transform_structured_ext.py @@ -206,5 +206,5 @@ def testVectorize(): transform.YieldOp() # CHECK-LABEL: TEST: testVectorize # CHECK: transform.sequence - # CHECK: transform.structured.vectorize + # CHECK: = transform.structured.vectorize # CHECK: {vectorize_padding} From cc69d58d7923f57668c35c71516c1edbc160a32c Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Mon, 20 Mar 2023 15:10:12 +0100 Subject: [PATCH 024/691] [mlir][Bazel] Adjust BUILD file for 930744fcdad7b326dd0337622b6e8dc99efcfa60 --- .../bazel/llvm-project-overlay/mlir/BUILD.bazel | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index c9a68e1b27a65..3bca5fcefb519 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -3576,6 +3576,21 @@ cc_library( ], ) +cc_library( + name = "Observers", + srcs = glob([ + "lib/Debug/Observers/*.cpp", + "lib/Debug/Observers/*.h", + ]), + hdrs = glob(["include/mlir/Debug/Observers/*.h"]), + includes = ["include"], + deps = [ + ":Debug", + ":IR", + "//llvm:Support", + ], +) + cc_library( name = "BreakpointManagers", srcs = glob([ @@ -6935,6 +6950,7 @@ cc_library( ":BytecodeWriter", ":Debug", ":IR", + ":Observers", ":Parser", ":Pass", ":Support", From ae30ae23aa4026bbc05c1de89a8d69b3cc2d9158 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 20 Mar 2023 09:29:43 -0500 Subject: [PATCH 025/691] [libc][NFC] Add some missing comments to the RPC implementation Summary: These comments were accidentally dropped from the committed version. Add them back in. --- libc/src/__support/OSUtil/gpu/io.cpp | 5 ++-- libc/src/__support/OSUtil/gpu/quick_exit.cpp | 2 +- libc/src/__support/RPC/rpc.h | 29 +++++++++++++------- libc/utils/gpu/loader/amdgpu/Loader.cpp | 2 +- 4 files changed, 24 insertions(+), 14 deletions(-) diff --git a/libc/src/__support/OSUtil/gpu/io.cpp b/libc/src/__support/OSUtil/gpu/io.cpp index 75ac83ac6909c..dcc3bf76ec068 100644 --- a/libc/src/__support/OSUtil/gpu/io.cpp +++ b/libc/src/__support/OSUtil/gpu/io.cpp @@ -16,14 +16,15 @@ namespace __llvm_libc { void write_to_stderr(const char *msg) { uint64_t length = internal::string_length(msg) + 1; uint64_t buffer_len = sizeof(rpc::Buffer) - sizeof(uint64_t); - for (uint64_t i = 0; i < length; i += buffer_len) + for (uint64_t i = 0; i < length; i += buffer_len) { rpc::client.run( [&](rpc::Buffer *buffer) { buffer->data[0] = rpc::Opcode::PRINT_TO_STDERR; inline_memcpy(reinterpret_cast(&buffer->data[1]), &msg[i], (length > buffer_len ? buffer_len : length)); }, - [](rpc::Buffer *) {}); + [](rpc::Buffer *) { /* void */ }); + } } } // namespace __llvm_libc diff --git a/libc/src/__support/OSUtil/gpu/quick_exit.cpp b/libc/src/__support/OSUtil/gpu/quick_exit.cpp index 9be709552dc5c..56f0427c8d81b 100644 --- a/libc/src/__support/OSUtil/gpu/quick_exit.cpp +++ b/libc/src/__support/OSUtil/gpu/quick_exit.cpp @@ -24,7 +24,7 @@ void quick_exit(int status) { buffer->data[0] = rpc::Opcode::EXIT; buffer->data[1] = status; }, - [](rpc::Buffer *) {}); + [](rpc::Buffer *) { /* void */ }); #if defined(LIBC_TARGET_ARCH_IS_NVPTX) asm("exit" ::: "memory"); diff --git a/libc/src/__support/RPC/rpc.h b/libc/src/__support/RPC/rpc.h index c3df09e3f5db2..d536de49bf5ff 100644 --- a/libc/src/__support/RPC/rpc.h +++ b/libc/src/__support/RPC/rpc.h @@ -5,6 +5,15 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +// +// This file implements a remote procedure call mechanism to communicate between +// heterogeneous devices that can share an address space atomically. We provide +// a client and a server to facilitate the remote call. The client makes request +// to the server using a shared communication channel. We use separate atomic +// signals to indicate which side, the client or the server is in ownership of +// the buffer. +// +//===----------------------------------------------------------------------===// #ifndef LLVM_LIBC_SRC_SUPPORT_RPC_RPC_H #define LLVM_LIBC_SRC_SUPPORT_RPC_RPC_H @@ -55,7 +64,7 @@ struct Client : public Process { /// The RPC server used to respond to the client. struct Server : public Process { - template bool run(W work, C clean); + template bool handle(W work, C clean); }; /// Run the RPC client protocol to communicate with the server. We perform the @@ -68,27 +77,27 @@ template void Client::run(F fill, U use) { bool in = inbox->load(cpp::MemoryOrder::RELAXED); bool out = outbox->load(cpp::MemoryOrder::RELAXED); atomic_thread_fence(cpp::MemoryOrder::ACQUIRE); - // Write to buffer then to the outbox. + // Apply the \p fill to the buffer and signal the server. if (!in & !out) { fill(buffer); atomic_thread_fence(cpp::MemoryOrder::RELEASE); outbox->store(1, cpp::MemoryOrder::RELEASE); out = 1; } - // Wait for the result from the server. + // Wait for the server to work on the buffer and respond. if (!in & out) { while (!in) in = inbox->load(cpp::MemoryOrder::RELAXED); atomic_thread_fence(cpp::MemoryOrder::ACQUIRE); } - // Read from the buffer and then write to outbox. + // Apply \p use to the buffer and signal the server. if (in & out) { use(buffer); atomic_thread_fence(cpp::MemoryOrder::RELEASE); outbox->store(0, cpp::MemoryOrder::RELEASE); out = 0; } - // Wait for server to complete the communication. + // Wait for the server to signal the end of the protocol. if (in & !out) { while (in) in = inbox->load(cpp::MemoryOrder::RELAXED); @@ -103,27 +112,27 @@ template void Client::run(F fill, U use) { /// - Apply \p work to the shared buffer and write 1 to the outbox. /// - Wait until the inbox is 0. /// - Apply \p clean to the shared buffer and write 0 to the outbox. -template bool Server::run(W work, C clean) { +template bool Server::handle(W work, C clean) { bool in = inbox->load(cpp::MemoryOrder::RELAXED); bool out = outbox->load(cpp::MemoryOrder::RELAXED); atomic_thread_fence(cpp::MemoryOrder::ACQUIRE); - // No work to do, exit. + // There is no work to do, exit early. if (!in & !out) return false; - // Do work then write to the outbox. + // Apply \p work to the buffer and signal the client. if (in & !out) { work(buffer); atomic_thread_fence(cpp::MemoryOrder::RELEASE); outbox->store(1, cpp::MemoryOrder::RELEASE); out = 1; } - // Wait for the client to read the result. + // Wait for the client to use the buffer and respond. if (in & out) { while (in) in = inbox->load(cpp::MemoryOrder::RELAXED); atomic_thread_fence(cpp::MemoryOrder::ACQUIRE); } - // Clean up the buffer and signal the client. + // Clean up the buffer and signal the end of the protocol. if (!in & out) { clean(buffer); atomic_thread_fence(cpp::MemoryOrder::RELEASE); diff --git a/libc/utils/gpu/loader/amdgpu/Loader.cpp b/libc/utils/gpu/loader/amdgpu/Loader.cpp index 3136dc2509790..0d631e98aae43 100644 --- a/libc/utils/gpu/loader/amdgpu/Loader.cpp +++ b/libc/utils/gpu/loader/amdgpu/Loader.cpp @@ -43,7 +43,7 @@ static __llvm_libc::rpc::Server server; /// Queries the RPC client at least once and performs server-side work if there /// are any active requests. void handle_server() { - while (server.run( + while (server.handle( [&](__llvm_libc::rpc::Buffer *buffer) { switch (static_cast<__llvm_libc::rpc::Opcode>(buffer->data[0])) { case __llvm_libc::rpc::Opcode::PRINT_TO_STDERR: { From 21cd04c46fe0a2bee224899f56518a09bce5306e Mon Sep 17 00:00:00 2001 From: Pavel Kosov Date: Mon, 20 Mar 2023 17:24:12 +0300 Subject: [PATCH 026/691] [clang][ASTImport] Add support for import of empty records Patch represents the clang part of changes in D143347 Reviewed By: balazske Differential Revision: https://reviews.llvm.org/D145057 --- clang/include/clang/AST/ASTImporter.h | 1 + clang/include/clang/AST/DeclCXX.h | 4 ++++ clang/lib/AST/ASTImporter.cpp | 30 +++++++++++++++++-------- clang/unittests/AST/ASTImporterTest.cpp | 23 +++++++++++++++++++ 4 files changed, 49 insertions(+), 9 deletions(-) diff --git a/clang/include/clang/AST/ASTImporter.h b/clang/include/clang/AST/ASTImporter.h index f851decd0965c..4ffd913846575 100644 --- a/clang/include/clang/AST/ASTImporter.h +++ b/clang/include/clang/AST/ASTImporter.h @@ -258,6 +258,7 @@ class TypeSourceInfo; FoundDeclsTy findDeclsInToCtx(DeclContext *DC, DeclarationName Name); void AddToLookupTable(Decl *ToD); + llvm::Error ImportAttrs(Decl *ToD, Decl *FromD); protected: /// Can be overwritten by subclasses to implement their own import logic. diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h index ff8f8a1bb12d6..dd35ef4adfd70 100644 --- a/clang/include/clang/AST/DeclCXX.h +++ b/clang/include/clang/AST/DeclCXX.h @@ -1165,6 +1165,10 @@ class CXXRecordDecl : public RecordDecl { /// /// \note This does NOT include a check for union-ness. bool isEmpty() const { return data().Empty; } + /// Marks this record as empty. This is used by DWARFASTParserClang + /// when parsing records with empty fields having [[no_unique_address]] + /// attribute + void markEmpty() { data().Empty = true; } void setInitMethod(bool Val) { data().HasInitMethod = Val; } bool hasInitMethod() const { return data().HasInitMethod; } diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index bd055082778df..d0da2dae3aa23 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -3895,6 +3895,12 @@ ExpectedDecl ASTNodeImporter::VisitFieldDecl(FieldDecl *D) { D->getInClassInitStyle())) return ToField; + // We need [[no_unqiue_address]] attributes to be added to FieldDecl, before + // we add fields in CXXRecordDecl::addedMember, otherwise record will be + // marked as having non-zero size. + Err = Importer.ImportAttrs(ToField, D); + if (Err) + return std::move(Err); ToField->setAccess(D->getAccess()); ToField->setLexicalDeclContext(LexicalDC); if (ToInitializer) @@ -8981,6 +8987,19 @@ TranslationUnitDecl *ASTImporter::GetFromTU(Decl *ToD) { return FromDPos->second->getTranslationUnitDecl(); } +Error ASTImporter::ImportAttrs(Decl *ToD, Decl *FromD) { + if (!FromD->hasAttrs() || ToD->hasAttrs()) + return Error::success(); + for (const Attr *FromAttr : FromD->getAttrs()) { + auto ToAttrOrErr = Import(FromAttr); + if (ToAttrOrErr) + ToD->addAttr(*ToAttrOrErr); + else + return ToAttrOrErr.takeError(); + } + return Error::success(); +} + Expected ASTImporter::Import(Decl *FromD) { if (!FromD) return nullptr; @@ -9115,15 +9134,8 @@ Expected ASTImporter::Import(Decl *FromD) { // Make sure that ImportImpl registered the imported decl. assert(ImportedDecls.count(FromD) != 0 && "Missing call to MapImported?"); - - if (FromD->hasAttrs()) - for (const Attr *FromAttr : FromD->getAttrs()) { - auto ToAttrOrErr = Import(FromAttr); - if (ToAttrOrErr) - ToD->addAttr(*ToAttrOrErr); - else - return ToAttrOrErr.takeError(); - } + if (auto Error = ImportAttrs(ToD, FromD)) + return std::move(Error); // Notify subclasses. Imported(FromD, ToD); diff --git a/clang/unittests/AST/ASTImporterTest.cpp b/clang/unittests/AST/ASTImporterTest.cpp index 6300551ca4469..7dd4c81074c76 100644 --- a/clang/unittests/AST/ASTImporterTest.cpp +++ b/clang/unittests/AST/ASTImporterTest.cpp @@ -8478,6 +8478,29 @@ TEST_P(ASTImporterOptionSpecificTestBase, VaListCpp) { ToVaList->getUnderlyingType(), ToBuiltinVaList->getUnderlyingType())); } +TEST_P(ASTImporterOptionSpecificTestBase, + ImportDefinitionOfEmptyClassWithNoUniqueAddressField) { + Decl *FromTU = getTuDecl( + R"( + struct B {}; + struct A { B b; }; + )", + Lang_CXX20); + + CXXRecordDecl *FromD = FirstDeclMatcher().match( + FromTU, cxxRecordDecl(hasName("A"))); + + for (auto *FD : FromD->fields()) + FD->addAttr(clang::NoUniqueAddressAttr::Create(FromD->getASTContext(), + clang::SourceRange())); + FromD->markEmpty(); + + CXXRecordDecl *ToD = cast(Import(FromD, Lang_CXX20)); + EXPECT_EQ(true, ToD->isEmpty()); + for (auto *FD : ToD->fields()) + EXPECT_EQ(true, FD->hasAttr()); +} + INSTANTIATE_TEST_SUITE_P(ParameterizedTests, ASTImporterLookupTableTest, DefaultTestValuesForRunOptions); From 47f528217ed82121882bcf2722c743360237c409 Mon Sep 17 00:00:00 2001 From: Andrew Litteken Date: Mon, 12 Dec 2022 23:51:11 -0600 Subject: [PATCH 027/691] [IRSim] Ensure that assignment accurately reduces potential mapping between different candidates Previous: When we do not make decisions about commutative operands, we can end up in a situation where two values have two potential canonical numbers between two regions. This ensures that an ordering is decided after the initial structure between two regions is determined. Current: Previously the outliner only checked that assignment to a value matched what was already known, this patch makes sure that it matches what has already been found, and creates a mapping between the two values where it is a one-to-one mapping. Reviewer: paquette Differential Revision: https://reviews.llvm.org/D139336 --- .../llvm/Analysis/IRSimilarityIdentifier.h | 18 ++++ llvm/lib/Analysis/IRSimilarityIdentifier.cpp | 43 ++++++--- .../outlining-larger-size-commutative.ll | 89 +++++++++++++++++++ 3 files changed, 140 insertions(+), 10 deletions(-) create mode 100644 llvm/test/Transforms/IROutliner/outlining-larger-size-commutative.ll diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h index bbc2385fb4bbc..9f9e7c59b42ba 100644 --- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -768,6 +768,24 @@ class IRSimilarityCandidate { static bool compareCommutativeOperandMapping(OperandMapping A, OperandMapping B); + /// Compare the GVN of the assignment value in corresponding instructions in + /// IRSimilarityCandidates \p A and \p B and check that there exists a mapping + /// between the values and replaces the mapping with a one-to-one value if + /// needed. + /// + /// \param InstValA - The assignment GVN from the first IRSimilarityCandidate. + /// \param InstValB - The assignment GVN from the second + /// IRSimilarityCandidate. + /// \param [in,out] ValueNumberMappingA - A mapping of value numbers from + /// candidate \p A to candidate \B. + /// \param [in,out] ValueNumberMappingB - A mapping of value numbers from + /// candidate \p B to candidate \A. + /// \returns true if the IRSimilarityCandidates assignments are compatible. + static bool compareAssignmentMapping( + const unsigned InstValA, const unsigned &InstValB, + DenseMap> &ValueNumberMappingA, + DenseMap> &ValueNumberMappingB); + /// Compare the relative locations in \p A and \p B and check that the /// distances match if both locations are contained in the region, and that /// the branches both point outside the region if they do not. diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp index 930985a955456..c8007be4142cf 100644 --- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -718,6 +718,34 @@ bool IRSimilarityCandidate::compareCommutativeOperandMapping( return true; } +bool IRSimilarityCandidate::compareAssignmentMapping( + const unsigned InstValA, const unsigned &InstValB, + DenseMap> &ValueNumberMappingA, + DenseMap> &ValueNumberMappingB) { + DenseMap>::iterator ValueMappingIt; + bool WasInserted; + std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert( + std::make_pair(InstValA, DenseSet({InstValB}))); + if (!WasInserted && !ValueMappingIt->second.contains(InstValB)) + return false; + else if (ValueMappingIt->second.size() != 1) { + for (unsigned OtherVal : ValueMappingIt->second) { + if (OtherVal == InstValB) + continue; + if (ValueNumberMappingA.find(OtherVal) == ValueNumberMappingA.end()) + continue; + if (!ValueNumberMappingA[OtherVal].contains(InstValA)) + continue; + ValueNumberMappingA[OtherVal].erase(InstValA); + } + ValueNumberMappingA.erase(ValueMappingIt); + std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert( + std::make_pair(InstValA, DenseSet({InstValB}))); + } + + return true; +} + bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A, RelativeLocMapping B) { // Get the basic blocks the label refers to. @@ -775,8 +803,6 @@ bool IRSimilarityCandidate::compareStructure( // in one candidate to values in the other candidate. If we create a set with // one element, and that same element maps to the original element in the // candidate we have a good mapping. - DenseMap>::iterator ValueMappingIt; - // Iterate over the instructions contained in each candidate unsigned SectionLength = A.getStartIdx() + A.getLength(); @@ -799,16 +825,13 @@ bool IRSimilarityCandidate::compareStructure( unsigned InstValA = A.ValueToNumber.find(IA)->second; unsigned InstValB = B.ValueToNumber.find(IB)->second; - bool WasInserted; // Ensure that the mappings for the instructions exists. - std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert( - std::make_pair(InstValA, DenseSet({InstValB}))); - if (!WasInserted && !ValueMappingIt->second.contains(InstValB)) + if (!compareAssignmentMapping(InstValA, InstValB, ValueNumberMappingA, + ValueNumberMappingB)) return false; - - std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingB.insert( - std::make_pair(InstValB, DenseSet({InstValA}))); - if (!WasInserted && !ValueMappingIt->second.contains(InstValA)) + + if (!compareAssignmentMapping(InstValB, InstValA, ValueNumberMappingB, + ValueNumberMappingA)) return false; // We have different paths for commutative instructions and non-commutative diff --git a/llvm/test/Transforms/IROutliner/outlining-larger-size-commutative.ll b/llvm/test/Transforms/IROutliner/outlining-larger-size-commutative.ll new file mode 100644 index 0000000000000..00098e6ba407d --- /dev/null +++ b/llvm/test/Transforms/IROutliner/outlining-larger-size-commutative.ll @@ -0,0 +1,89 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -p iroutliner,verify -ir-outlining-no-cost < %s | FileCheck %s + +; This test checks that commutative instructions where the operands are +; swapped are outlined as the same function. + +; It also checks that non-commutative instructions outlined as different +; functions when the operands are swapped; + +; These are identical functions, except that in the flipped functions, +; the operands in the adds are commuted. However, since add instructions +; are commutative, we should still outline from all four as the same +; instruction. + +define void @function1(i32 %a, i32 %b) { +; CHECK-LABEL: @function1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BLOCK_1:%.*]] +; CHECK: block_0: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP4:%.*]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP0]], [[TMP0]] +; CHECK-NEXT: br i1 [[TMP3]], label [[BLOCK_1]], label [[BLOCK_2:%.*]] +; CHECK: block_1: +; CHECK-NEXT: [[TMP4]] = phi i32 [ [[TMP1]], [[BLOCK_0:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[B]]) +; CHECK-NEXT: br label [[BLOCK_0]] +; CHECK: block_2: +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP2]], [[TMP2]] +; CHECK-NEXT: ret void +; +entry: + br label %block_1 + +block_0: + %0 = add i32 %a, %b + %1 = add i32 %4, 1 + %2 = add i32 %0, %0 + %3 = icmp sgt i32 %0, %0 + br i1 %3, label %block_1, label %block_2 + +block_1: + %4 = phi i32 [ %1, %block_0 ], [ 0, %entry ] + %5 = add i32 %b, %b + br label %block_0 + +block_2: + %6 = add i32 %2, %2 + ret void +} + +define void @function2(i32 %a, i32 %b) { +; CHECK-LABEL: @function2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BLOCK_1:%.*]] +; CHECK: block_0: +; CHECK-NEXT: [[TMP0:%.*]] = sub i32 [[A:%.*]], [[B:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 1, [[TMP4:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP0]], [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP0]], [[TMP0]] +; CHECK-NEXT: br i1 [[TMP3]], label [[BLOCK_1]], label [[BLOCK_2:%.*]] +; CHECK: block_1: +; CHECK-NEXT: [[TMP4]] = phi i32 [ [[TMP1]], [[BLOCK_0:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[B]]) +; CHECK-NEXT: br label [[BLOCK_0]] +; CHECK: block_2: +; CHECK-NEXT: [[TMP5:%.*]] = sub i32 [[TMP2]], [[TMP2]] +; CHECK-NEXT: ret void +; +entry: + br label %block_1 + +block_0: + %0 = sub i32 %a, %b + %1 = add i32 1, %4 + %2 = add i32 %0, %0 + %3 = icmp sgt i32 %0, %0 + br i1 %3, label %block_1, label %block_2 + +block_1: + %4 = phi i32 [ %1, %block_0 ], [ 0, %entry ] + %5 = add i32 %b, %b + br label %block_0 + +block_2: + %6 = sub i32 %2, %2 + ret void +} From 5a222c0b82f8ade6904aa25b33fe93c2d09516f8 Mon Sep 17 00:00:00 2001 From: Paul Scoropan <1paulscoropan@gmail.com> Date: Wed, 15 Mar 2023 14:43:06 +0000 Subject: [PATCH 028/691] [Flang] [PowerPC] Implement remaining PPC math operation intrinsics that do not require semantic error checks This review implements the following PowerPC math operations that we care about: - fnabs - fre - fres - frsqrte - frsqrtes None of these intrinsics require additional error checks in semantics. The interfaces handle checking types and kinds Reviewed By: kkwli0 Differential Revision: https://reviews.llvm.org/D146139 --- flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 6 +++ flang/module/__fortran_ppc_intrinsics.f90 | 37 ++++++++++++++ flang/test/Lower/ppc-intrinsics.f90 | 48 +++++++++++++++++++ 3 files changed, 91 insertions(+) diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index b933603484581..8ff86893c3fc9 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -1419,12 +1419,18 @@ static constexpr MathOperation ppcMathOperations[] = { genMathOp}, {"__ppc_fmsub", "llvm.ppc.fmsubs", genF32F32F32F32FuncType, genLibCall}, {"__ppc_fmsub", "llvm.ppc.fmsub", genF64F64F64F64FuncType, genLibCall}, + {"__ppc_fnabs", "llvm.ppc.fnabss", genF32F32FuncType, genLibCall}, + {"__ppc_fnabs", "llvm.ppc.fnabs", genF64F64FuncType, genLibCall}, {"__ppc_fnmadd", "llvm.ppc.fnmadds", genF32F32F32F32FuncType, genLibCall}, {"__ppc_fnmadd", "llvm.ppc.fnmadd", genF64F64F64F64FuncType, genLibCall}, {"__ppc_fnmsub", "llvm.ppc.fnmsub.f32", genF32F32F32F32FuncType, genLibCall}, {"__ppc_fnmsub", "llvm.ppc.fnmsub.f64", genF64F64F64F64FuncType, genLibCall}, + {"__ppc_fre", "llvm.ppc.fre", genF64F64FuncType, genLibCall}, + {"__ppc_fres", "llvm.ppc.fres", genF32F32FuncType, genLibCall}, + {"__ppc_frsqrte", "llvm.ppc.frsqrte", genF64F64FuncType, genLibCall}, + {"__ppc_frsqrtes", "llvm.ppc.frsqrtes", genF32F32FuncType, genLibCall}, }; // This helper class computes a "distance" between two function types. diff --git a/flang/module/__fortran_ppc_intrinsics.f90 b/flang/module/__fortran_ppc_intrinsics.f90 index fff18aa36d2fe..1447df1e82290 100644 --- a/flang/module/__fortran_ppc_intrinsics.f90 +++ b/flang/module/__fortran_ppc_intrinsics.f90 @@ -54,6 +54,9 @@ end function func_r8r8r8r8 ! fctid, fctidz, fctiw, fctiwz, fctudz, fctuwz abstract interface + elemental real(4) function func_r4r4x(x) + real(4), intent(in) :: x + end function func_r4r4x elemental real(8) function func_r8r8x(x) real(8), intent(in) :: x end function func_r8r8x @@ -120,4 +123,38 @@ end function func_r8r8i end interface fcfud public :: fcfud +! fnabs + procedure(func_r4r4x) :: __ppc_fnabs_r4 + procedure(func_r8r8x) :: __ppc_fnabs_r8 + interface fnabs + procedure :: __ppc_fnabs_r4 + procedure :: __ppc_fnabs_r8 + end interface fnabs + public :: fnabs + +! fre, fres + procedure(func_r8r8x) :: __ppc_fre + interface fre + procedure :: __ppc_fre + end interface fre + public :: fre + + procedure(func_r4r4x) :: __ppc_fres + interface fres + procedure :: __ppc_fres + end interface fres + public :: fres + +! frsqrte, frsqrtes + procedure(func_r8r8x) :: __ppc_frsqrte + interface frsqrte + procedure :: __ppc_frsqrte + end interface frsqrte + public :: frsqrte + + procedure(func_r4r4x) :: __ppc_frsqrtes + interface frsqrtes + procedure :: __ppc_frsqrtes + end interface frsqrtes + public :: frsqrtes end module __Fortran_PPC_intrinsics diff --git a/flang/test/Lower/ppc-intrinsics.f90 b/flang/test/Lower/ppc-intrinsics.f90 index 8410fbdd86aa1..c0eef7eeb36c7 100644 --- a/flang/test/Lower/ppc-intrinsics.f90 +++ b/flang/test/Lower/ppc-intrinsics.f90 @@ -137,3 +137,51 @@ subroutine fcfud_test(i) ! CHECK-FIR: fir.call @fir.__ppc_fcfud.f64.f64 ! CHECK-LLVMIR: call contract double @llvm.ppc.fcfud(double %{{[0-9]}}) end + +! CHECK-LABEL: fnabs_testr(x) +subroutine fnabs_testr(x) + real :: x, y + y = fnabs(x) +! CHECK-FIR: fir.call @fir.__ppc_fnabs.f32.f32 +! CHECK-LLVMIR: call contract float @llvm.ppc.fnabss(float %{{[0-9]}}) +end + +! CHECK-LABEL: fnabs_testd(x) +subroutine fnabs_testd(x) + real(8) :: x, y + y = fnabs(x) +! CHECK-FIR: fir.call @fir.__ppc_fnabs.f64.f64 +! CHECK-LLVMIR: call contract double @llvm.ppc.fnabs(double %{{[0-9]}}) +end + +!CHECK-LABEL: fre_test(x) +subroutine fre_test(x) + real(8) :: x, y + y = fre(x) +! CHECK-FIR: fir.call @fir.__ppc_fre.f64.f64 +! CHECK-LLVMIR: call contract double @llvm.ppc.fre(double %{{[0-9]}}) +end + +!CHECK-LABEL: fres_test(x) +subroutine fres_test(x) + real :: x, y + y = fres(x) +! CHECK-FIR: fir.call @fir.__ppc_fres.f32.f32 +! CHECK-LLVMIR: call contract float @llvm.ppc.fres(float %{{[0-9]}}) +end + +!CHECK-LABEL: frsqrte_test(x) +subroutine frsqrte_test(x) + real(8) :: x, y + y = frsqrte(x) +! CHECK-FIR: fir.call @fir.__ppc_frsqrte.f64.f64 +! CHECK-LLVMIR: call contract double @llvm.ppc.frsqrte(double %{{[0-9]}}) +end + +!CHECK-LABEL: frsqrtes_test(x) +subroutine frsqrtes_test(x) + real :: x, y + y = frsqrtes(x) +! CHECK-FIR: fir.call @fir.__ppc_frsqrtes.f32.f32 +! CHECK-LLVMIR: call contract float @llvm.ppc.frsqrtes(float %{{[0-9]}}) +end From 8bb5ca58327ec5d0788b1546844b06b1118c5cb5 Mon Sep 17 00:00:00 2001 From: Maya Amrami Date: Sun, 19 Mar 2023 16:28:23 +0200 Subject: [PATCH 029/691] [mlir] Support bufferization of arith.constant to memref.global with memory space Reviewed By: springerm Differential Revision: https://reviews.llvm.org/D146381 --- .../Dialect/Bufferization/Transforms/BufferUtils.h | 3 ++- .../Arith/Transforms/BufferizableOpInterfaceImpl.cpp | 12 +++++++----- .../Dialect/Bufferization/Transforms/BufferUtils.cpp | 8 ++++++-- .../one-shot-bufferize-memory-space-invalid.mlir | 4 ++-- 4 files changed, 17 insertions(+), 10 deletions(-) diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/BufferUtils.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/BufferUtils.h index 6c521acd0e146..85e9c47ad5302 100644 --- a/mlir/include/mlir/Dialect/Bufferization/Transforms/BufferUtils.h +++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/BufferUtils.h @@ -125,7 +125,8 @@ class BufferPlacementTransformationBase { // Globals are created lazily at the top of the enclosing ModuleOp with pretty // names. Duplicates are avoided. FailureOr getGlobalFor(arith::ConstantOp constantOp, - uint64_t alignment); + uint64_t alignment, + Attribute memorySpace = {}); } // namespace bufferization } // namespace mlir diff --git a/mlir/lib/Dialect/Arith/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Arith/Transforms/BufferizableOpInterfaceImpl.cpp index 8408aad6e3fc1..9602d530cf826 100644 --- a/mlir/lib/Dialect/Arith/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Arith/Transforms/BufferizableOpInterfaceImpl.cpp @@ -11,6 +11,7 @@ #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/IR/Attributes.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/Operation.h" @@ -26,10 +27,11 @@ struct ConstantOpInterface const BufferizationOptions &options) const { auto constantOp = cast(op); - // TODO: Implement memory space for this op. E.g., by adding a memory_space - // attribute to ConstantOp. - if (options.defaultMemorySpace != Attribute()) - return op->emitError("memory space not implemented yet"); + Attribute memorySpace; + if (options.defaultMemorySpace.has_value()) + memorySpace = *options.defaultMemorySpace; + else + return constantOp->emitError("could not infer memory space"); // Only ranked tensors are supported. if (!constantOp.getType().isa()) @@ -43,7 +45,7 @@ struct ConstantOpInterface // Create global memory segment and replace tensor with memref pointing to // that memory segment. FailureOr globalOp = - getGlobalFor(constantOp, options.bufferAlignment); + getGlobalFor(constantOp, options.bufferAlignment, memorySpace); if (failed(globalOp)) return failure(); memref::GlobalOp globalMemref = *globalOp; diff --git a/mlir/lib/Dialect/Bufferization/Transforms/BufferUtils.cpp b/mlir/lib/Dialect/Bufferization/Transforms/BufferUtils.cpp index 38d69194be1e8..b9776e2fb2095 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/BufferUtils.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/BufferUtils.cpp @@ -147,7 +147,8 @@ bool BufferPlacementTransformationBase::isLoop(Operation *op) { //===----------------------------------------------------------------------===// FailureOr -bufferization::getGlobalFor(arith::ConstantOp constantOp, uint64_t alignment) { +bufferization::getGlobalFor(arith::ConstantOp constantOp, uint64_t alignment, + Attribute memorySpace) { auto type = constantOp.getType().cast(); auto moduleOp = constantOp->getParentOfType(); if (!moduleOp) @@ -184,10 +185,13 @@ bufferization::getGlobalFor(arith::ConstantOp constantOp, uint64_t alignment) { : IntegerAttr(); BufferizeTypeConverter typeConverter; + auto memrefType = typeConverter.convertType(type).cast(); + if (memorySpace) + memrefType = MemRefType::Builder(memrefType).setMemorySpace(memorySpace); auto global = globalBuilder.create( constantOp.getLoc(), (Twine("__constant_") + os.str()).str(), /*sym_visibility=*/globalBuilder.getStringAttr("private"), - /*type=*/typeConverter.convertType(type).cast(), + /*type=*/memrefType, /*initial_value=*/constantOp.getValue().cast(), /*constant=*/true, /*alignment=*/memrefAlignment); diff --git a/mlir/test/Dialect/Arith/one-shot-bufferize-memory-space-invalid.mlir b/mlir/test/Dialect/Arith/one-shot-bufferize-memory-space-invalid.mlir index 315da00a00d78..deda8bb74b323 100644 --- a/mlir/test/Dialect/Arith/one-shot-bufferize-memory-space-invalid.mlir +++ b/mlir/test/Dialect/Arith/one-shot-bufferize-memory-space-invalid.mlir @@ -13,8 +13,8 @@ func.func @inconsistent_memory_space_arith_select(%c: i1) -> tensor<10xf32> { // ----- -func.func @constant_memory_space(%idx: index, %v: i32) -> tensor<3xi32> { - // expected-error @+2 {{memory space not implemented yet}} +func.func @unknown_memory_space(%idx: index, %v: i32) -> tensor<3xi32> { + // expected-error @+2 {{could not infer memory space}} // expected-error @+1 {{failed to bufferize op}} %cst = arith.constant dense<[5, 1000, 20]> : tensor<3xi32> %0 = tensor.insert %v into %cst[%idx] : tensor<3xi32> From dc521b9a1033239be5663d7dcdf1b17aa07c9cf3 Mon Sep 17 00:00:00 2001 From: Felipe de Azevedo Piovezan Date: Mon, 20 Mar 2023 08:51:04 -0400 Subject: [PATCH 030/691] [x86][MemFold] Fix anon namespace in header D142084 moved an enumeration inside a header from the llvm namespace into an anon namespace. Some of the bots started failing as a result. Differential Revision: https://reviews.llvm.org/D146419 --- llvm/include/llvm/Support/X86FoldTablesUtils.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/Support/X86FoldTablesUtils.h b/llvm/include/llvm/Support/X86FoldTablesUtils.h index 4829700183139..c3850588c8522 100644 --- a/llvm/include/llvm/Support/X86FoldTablesUtils.h +++ b/llvm/include/llvm/Support/X86FoldTablesUtils.h @@ -9,7 +9,7 @@ #ifndef LLVM_SUPPORT_X86FOLDTABLESUTILS_H #define LLVM_SUPPORT_X86FOLDTABLESUTILS_H -namespace { +namespace llvm { enum { // Select which memory operand is being unfolded. // (stored in bits 0 - 2) @@ -55,5 +55,5 @@ enum { // Unused bits 14-15 }; -} -#endif // LLVM_SUPPORT_X86FOLDTABLESUTILS_H \ No newline at end of file +} // namespace llvm +#endif // LLVM_SUPPORT_X86FOLDTABLESUTILS_H From fa326c21919faea33b0470b7083dccbd3c1fc3f2 Mon Sep 17 00:00:00 2001 From: Pavel Kosov Date: Mon, 20 Mar 2023 18:20:41 +0300 Subject: [PATCH 031/691] Revert "[clang][ASTImport] Add support for import of empty records" This reverts commit 21cd04c46fe0a2bee224899f56518a09bce5306e. --- clang/include/clang/AST/ASTImporter.h | 1 - clang/include/clang/AST/DeclCXX.h | 4 ---- clang/lib/AST/ASTImporter.cpp | 30 ++++++++----------------- clang/unittests/AST/ASTImporterTest.cpp | 23 ------------------- 4 files changed, 9 insertions(+), 49 deletions(-) diff --git a/clang/include/clang/AST/ASTImporter.h b/clang/include/clang/AST/ASTImporter.h index 4ffd913846575..f851decd0965c 100644 --- a/clang/include/clang/AST/ASTImporter.h +++ b/clang/include/clang/AST/ASTImporter.h @@ -258,7 +258,6 @@ class TypeSourceInfo; FoundDeclsTy findDeclsInToCtx(DeclContext *DC, DeclarationName Name); void AddToLookupTable(Decl *ToD); - llvm::Error ImportAttrs(Decl *ToD, Decl *FromD); protected: /// Can be overwritten by subclasses to implement their own import logic. diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h index dd35ef4adfd70..ff8f8a1bb12d6 100644 --- a/clang/include/clang/AST/DeclCXX.h +++ b/clang/include/clang/AST/DeclCXX.h @@ -1165,10 +1165,6 @@ class CXXRecordDecl : public RecordDecl { /// /// \note This does NOT include a check for union-ness. bool isEmpty() const { return data().Empty; } - /// Marks this record as empty. This is used by DWARFASTParserClang - /// when parsing records with empty fields having [[no_unique_address]] - /// attribute - void markEmpty() { data().Empty = true; } void setInitMethod(bool Val) { data().HasInitMethod = Val; } bool hasInitMethod() const { return data().HasInitMethod; } diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index d0da2dae3aa23..bd055082778df 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -3895,12 +3895,6 @@ ExpectedDecl ASTNodeImporter::VisitFieldDecl(FieldDecl *D) { D->getInClassInitStyle())) return ToField; - // We need [[no_unqiue_address]] attributes to be added to FieldDecl, before - // we add fields in CXXRecordDecl::addedMember, otherwise record will be - // marked as having non-zero size. - Err = Importer.ImportAttrs(ToField, D); - if (Err) - return std::move(Err); ToField->setAccess(D->getAccess()); ToField->setLexicalDeclContext(LexicalDC); if (ToInitializer) @@ -8987,19 +8981,6 @@ TranslationUnitDecl *ASTImporter::GetFromTU(Decl *ToD) { return FromDPos->second->getTranslationUnitDecl(); } -Error ASTImporter::ImportAttrs(Decl *ToD, Decl *FromD) { - if (!FromD->hasAttrs() || ToD->hasAttrs()) - return Error::success(); - for (const Attr *FromAttr : FromD->getAttrs()) { - auto ToAttrOrErr = Import(FromAttr); - if (ToAttrOrErr) - ToD->addAttr(*ToAttrOrErr); - else - return ToAttrOrErr.takeError(); - } - return Error::success(); -} - Expected ASTImporter::Import(Decl *FromD) { if (!FromD) return nullptr; @@ -9134,8 +9115,15 @@ Expected ASTImporter::Import(Decl *FromD) { // Make sure that ImportImpl registered the imported decl. assert(ImportedDecls.count(FromD) != 0 && "Missing call to MapImported?"); - if (auto Error = ImportAttrs(ToD, FromD)) - return std::move(Error); + + if (FromD->hasAttrs()) + for (const Attr *FromAttr : FromD->getAttrs()) { + auto ToAttrOrErr = Import(FromAttr); + if (ToAttrOrErr) + ToD->addAttr(*ToAttrOrErr); + else + return ToAttrOrErr.takeError(); + } // Notify subclasses. Imported(FromD, ToD); diff --git a/clang/unittests/AST/ASTImporterTest.cpp b/clang/unittests/AST/ASTImporterTest.cpp index 7dd4c81074c76..6300551ca4469 100644 --- a/clang/unittests/AST/ASTImporterTest.cpp +++ b/clang/unittests/AST/ASTImporterTest.cpp @@ -8478,29 +8478,6 @@ TEST_P(ASTImporterOptionSpecificTestBase, VaListCpp) { ToVaList->getUnderlyingType(), ToBuiltinVaList->getUnderlyingType())); } -TEST_P(ASTImporterOptionSpecificTestBase, - ImportDefinitionOfEmptyClassWithNoUniqueAddressField) { - Decl *FromTU = getTuDecl( - R"( - struct B {}; - struct A { B b; }; - )", - Lang_CXX20); - - CXXRecordDecl *FromD = FirstDeclMatcher().match( - FromTU, cxxRecordDecl(hasName("A"))); - - for (auto *FD : FromD->fields()) - FD->addAttr(clang::NoUniqueAddressAttr::Create(FromD->getASTContext(), - clang::SourceRange())); - FromD->markEmpty(); - - CXXRecordDecl *ToD = cast(Import(FromD, Lang_CXX20)); - EXPECT_EQ(true, ToD->isEmpty()); - for (auto *FD : ToD->fields()) - EXPECT_EQ(true, FD->hasAttr()); -} - INSTANTIATE_TEST_SUITE_P(ParameterizedTests, ASTImporterLookupTableTest, DefaultTestValuesForRunOptions); From b33437c969f3bc45576fda4bf43872aea8f7e8c6 Mon Sep 17 00:00:00 2001 From: Kirill Stoimenov Date: Mon, 20 Mar 2023 15:41:05 +0000 Subject: [PATCH 032/691] Reland "[LSAN][HWASAN] Turn on leak sanitizer in HWASAN for Linux" This reverts commit 35c05f04e547100d03b6359d1f66950ff83350e0. --- compiler-rt/lib/hwasan/hwasan.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/compiler-rt/lib/hwasan/hwasan.cpp b/compiler-rt/lib/hwasan/hwasan.cpp index 662cfb4e9f807..0a5999cf7f2a5 100644 --- a/compiler-rt/lib/hwasan/hwasan.cpp +++ b/compiler-rt/lib/hwasan/hwasan.cpp @@ -86,8 +86,9 @@ static void InitializeFlags() { cf.clear_shadow_mmap_threshold = 4096 * (SANITIZER_ANDROID ? 2 : 8); // Sigtrap is used in error reporting. cf.handle_sigtrap = kHandleSignalExclusive; - // FIXME: enable once all false positives have been fixed. - cf.detect_leaks = false; + // For now only tested on Linux. Other plantforms can be turned on as they + // become ready. + cf.detect_leaks = cf.detect_leaks && SANITIZER_LINUX && !SANITIZER_ANDROID; #if SANITIZER_ANDROID // Let platform handle other signals. It is better at reporting them then we From afce10c5b60fada1db369d3770f4389da7ef30ef Mon Sep 17 00:00:00 2001 From: NagaChaitanya Vellanki Date: Mon, 20 Mar 2023 15:42:04 +0000 Subject: [PATCH 033/691] [clang][ExtractAPI] Add semicolons for enum, typedef, struct declaration fragments Fixes https://github.com/llvm/llvm-project/issues/61480 Reviewed By: dang Differential Revision: https://reviews.llvm.org/D146354 --- clang/lib/ExtractAPI/DeclarationFragments.cpp | 7 +++--- .../ExtractAPI/anonymous_record_no_typedef.c | 8 +++++++ clang/test/ExtractAPI/enum.c | 16 +++++++++++++ clang/test/ExtractAPI/struct.c | 4 ++++ clang/test/ExtractAPI/typedef.c | 4 ++++ .../ExtractAPI/typedef_anonymous_record.c | 24 +++++++++++++++++++ clang/test/ExtractAPI/typedef_chain.c | 12 ++++++++++ clang/test/ExtractAPI/underscored.c | 12 ++++++++++ 8 files changed, 84 insertions(+), 3 deletions(-) diff --git a/clang/lib/ExtractAPI/DeclarationFragments.cpp b/clang/lib/ExtractAPI/DeclarationFragments.cpp index 12c91c582aa98..b8de1270b5f02 100644 --- a/clang/lib/ExtractAPI/DeclarationFragments.cpp +++ b/clang/lib/ExtractAPI/DeclarationFragments.cpp @@ -470,7 +470,7 @@ DeclarationFragmentsBuilder::getFragmentsForEnum(const EnumDecl *EnumDecl) { getFragmentsForType(IntegerType, EnumDecl->getASTContext(), After)) .append(std::move(After)); - return Fragments; + return Fragments.append(";", DeclarationFragments::FragmentKind::Text); } DeclarationFragments @@ -493,7 +493,8 @@ DeclarationFragmentsBuilder::getFragmentsForStruct(const RecordDecl *Record) { if (!Record->getName().empty()) Fragments.appendSpace().append( Record->getName(), DeclarationFragments::FragmentKind::Identifier); - return Fragments; + + return Fragments.append(";", DeclarationFragments::FragmentKind::Text); } DeclarationFragments @@ -743,7 +744,7 @@ DeclarationFragments DeclarationFragmentsBuilder::getFragmentsForTypedef( .appendSpace() .append(Decl->getName(), DeclarationFragments::FragmentKind::Identifier); - return Fragments; + return Fragments.append(";", DeclarationFragments::FragmentKind::Text); } template diff --git a/clang/test/ExtractAPI/anonymous_record_no_typedef.c b/clang/test/ExtractAPI/anonymous_record_no_typedef.c index abb96db058dbf..880a42c30ceb8 100644 --- a/clang/test/ExtractAPI/anonymous_record_no_typedef.c +++ b/clang/test/ExtractAPI/anonymous_record_no_typedef.c @@ -94,6 +94,10 @@ struct Vehicle { "kind": "typeIdentifier", "preciseIdentifier": "c:i", "spelling": "unsigned int" + }, + { + "kind": "text", + "spelling": ";" } ], "docComment": { @@ -241,6 +245,10 @@ struct Vehicle { { "kind": "identifier", "spelling": "Vehicle" + }, + { + "kind": "text", + "spelling": ";" } ], "docComment": { diff --git a/clang/test/ExtractAPI/enum.c b/clang/test/ExtractAPI/enum.c index 7b345464cb982..a6c749028bd17 100644 --- a/clang/test/ExtractAPI/enum.c +++ b/clang/test/ExtractAPI/enum.c @@ -153,6 +153,10 @@ enum { "kind": "typeIdentifier", "preciseIdentifier": "c:i", "spelling": "unsigned int" + }, + { + "kind": "text", + "spelling": ";" } ], "docComment": { @@ -461,6 +465,10 @@ enum { "kind": "typeIdentifier", "preciseIdentifier": "c:c", "spelling": "unsigned char" + }, + { + "kind": "text", + "spelling": ";" } ], "identifier": { @@ -684,6 +692,10 @@ enum { "kind": "typeIdentifier", "preciseIdentifier": "c:i", "spelling": "unsigned int" + }, + { + "kind": "text", + "spelling": ";" } ], "identifier": { @@ -772,6 +784,10 @@ enum { "kind": "typeIdentifier", "preciseIdentifier": "c:i", "spelling": "unsigned int" + }, + { + "kind": "text", + "spelling": ";" } ], "identifier": { diff --git a/clang/test/ExtractAPI/struct.c b/clang/test/ExtractAPI/struct.c index 7e93f0d7e7bfa..cd6d25d835821 100644 --- a/clang/test/ExtractAPI/struct.c +++ b/clang/test/ExtractAPI/struct.c @@ -89,6 +89,10 @@ struct Color { { "kind": "identifier", "spelling": "Color" + }, + { + "kind": "text", + "spelling": ";" } ], "docComment": { diff --git a/clang/test/ExtractAPI/typedef.c b/clang/test/ExtractAPI/typedef.c index fb9b8ef32cdc1..89df9db8b362d 100644 --- a/clang/test/ExtractAPI/typedef.c +++ b/clang/test/ExtractAPI/typedef.c @@ -66,6 +66,10 @@ typedef int MyInt; { "kind": "identifier", "spelling": "MyInt" + }, + { + "kind": "text", + "spelling": ";" } ], "identifier": { diff --git a/clang/test/ExtractAPI/typedef_anonymous_record.c b/clang/test/ExtractAPI/typedef_anonymous_record.c index 1bd93b92ede81..501873ed16de2 100644 --- a/clang/test/ExtractAPI/typedef_anonymous_record.c +++ b/clang/test/ExtractAPI/typedef_anonymous_record.c @@ -75,6 +75,10 @@ typedef MyEnumEnum MyEnumEnumEnum; { "kind": "identifier", "spelling": "MyEnum" + }, + { + "kind": "text", + "spelling": ";" } ], "identifier": { @@ -170,6 +174,10 @@ typedef MyEnumEnum MyEnumEnumEnum; { "kind": "identifier", "spelling": "MyStruct" + }, + { + "kind": "text", + "spelling": ";" } ], "identifier": { @@ -223,6 +231,10 @@ typedef MyEnumEnum MyEnumEnumEnum; { "kind": "identifier", "spelling": "MyStructStruct" + }, + { + "kind": "text", + "spelling": ";" } ], "identifier": { @@ -283,6 +295,10 @@ typedef MyEnumEnum MyEnumEnumEnum; { "kind": "identifier", "spelling": "MyStructStructStruct" + }, + { + "kind": "text", + "spelling": ";" } ], "identifier": { @@ -343,6 +359,10 @@ typedef MyEnumEnum MyEnumEnumEnum; { "kind": "identifier", "spelling": "MyEnumEnum" + }, + { + "kind": "text", + "spelling": ";" } ], "identifier": { @@ -403,6 +423,10 @@ typedef MyEnumEnum MyEnumEnumEnum; { "kind": "identifier", "spelling": "MyEnumEnumEnum" + }, + { + "kind": "text", + "spelling": ";" } ], "identifier": { diff --git a/clang/test/ExtractAPI/typedef_chain.c b/clang/test/ExtractAPI/typedef_chain.c index 3ba47651fda1e..ff838978d492a 100644 --- a/clang/test/ExtractAPI/typedef_chain.c +++ b/clang/test/ExtractAPI/typedef_chain.c @@ -68,6 +68,10 @@ typedef MyIntInt MyIntIntInt; { "kind": "identifier", "spelling": "MyInt" + }, + { + "kind": "text", + "spelling": ";" } ], "identifier": { @@ -128,6 +132,10 @@ typedef MyIntInt MyIntIntInt; { "kind": "identifier", "spelling": "MyIntInt" + }, + { + "kind": "text", + "spelling": ";" } ], "identifier": { @@ -188,6 +196,10 @@ typedef MyIntInt MyIntIntInt; { "kind": "identifier", "spelling": "MyIntIntInt" + }, + { + "kind": "text", + "spelling": ";" } ], "identifier": { diff --git a/clang/test/ExtractAPI/underscored.c b/clang/test/ExtractAPI/underscored.c index 6eeaf1ce412c3..e619849c8ceca 100644 --- a/clang/test/ExtractAPI/underscored.c +++ b/clang/test/ExtractAPI/underscored.c @@ -135,6 +135,10 @@ typedef _HiddenTypedef ExposedTypedefToHidden; { "kind": "identifier", "spelling": "ExposedRecord" + }, + { + "kind": "text", + "spelling": ";" } ], "identifier": { @@ -296,6 +300,10 @@ typedef _HiddenTypedef ExposedTypedefToHidden; { "kind": "identifier", "spelling": "ExposedTypedef" + }, + { + "kind": "text", + "spelling": ";" } ], "identifier": { @@ -356,6 +364,10 @@ typedef _HiddenTypedef ExposedTypedefToHidden; { "kind": "identifier", "spelling": "ExposedTypedefToHidden" + }, + { + "kind": "text", + "spelling": ";" } ], "identifier": { From 54539fa8b3a3b8875b4e3d8b0737c66052a0edcd Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 20 Mar 2023 08:44:15 -0700 Subject: [PATCH 034/691] [LSR/LFTR] Move two utilities to common code for reuse [nfc] We're working on a transform in LSR which is essentiall an inverse of LFTR (in certain sub-cases). Move utilties so that they can be reused. --- llvm/include/llvm/Analysis/ValueTracking.h | 11 ++++ .../include/llvm/Transforms/Utils/LoopUtils.h | 5 ++ llvm/lib/Analysis/ValueTracking.cpp | 44 +++++++++++++ llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 62 +------------------ llvm/lib/Transforms/Utils/LoopUtils.cpp | 13 ++++ 5 files changed, 75 insertions(+), 60 deletions(-) diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index 51ee3ad27fbf6..23be9d9fc98ad 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -797,6 +797,17 @@ bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC = nullptr, const DominatorTree *DT = nullptr, unsigned Depth = 0); +/// Return true if undefined behavior would provable be executed on the path to +/// OnPathTo if Root produced a posion result. Note that this doesn't say +/// anything about whether OnPathTo is actually executed or whether Root is +/// actually poison. This can be used to assess whether a new use of Root can +/// be added at a location which is control equivalent with OnPathTo (such as +/// immediately before it) without introducing UB which didn't previously +/// exist. Note that a false result conveys no information. +bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, + Instruction *OnPathTo, + DominatorTree *DT); + /// Specific patterns of select instructions we can match. enum SelectPatternFlavor { SPF_UNKNOWN = 0, diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index d63bee6fa3211..2c841353154ab 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -175,6 +175,11 @@ bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *, SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool, bool AllowSpeculation); +/// Return true if the induction variable \p IV in a Loop whose latch is +/// \p LatchBlock would become dead if the exit test \p Cond were removed. +/// Conservatively returns false if analysis is insufficient. +bool isAlmostDeadIV(PHINode *IV, BasicBlock *LatchBlock, Value *Cond); + /// This function deletes dead loops. The caller of this function needs to /// guarantee that the loop is infact dead. /// The function requires a bunch or prerequisites to be present: diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 905b4ac6174a7..628a12491458b 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -6091,6 +6091,50 @@ bool llvm::isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC, return ::isGuaranteedNotToBeUndefOrPoison(V, AC, CtxI, DT, Depth, true); } +/// Return true if undefined behavior would provable be executed on the path to +/// OnPathTo if Root produced a posion result. Note that this doesn't say +/// anything about whether OnPathTo is actually executed or whether Root is +/// actually poison. This can be used to assess whether a new use of Root can +/// be added at a location which is control equivalent with OnPathTo (such as +/// immediately before it) without introducing UB which didn't previously +/// exist. Note that a false result conveys no information. +bool llvm::mustExecuteUBIfPoisonOnPathTo(Instruction *Root, + Instruction *OnPathTo, + DominatorTree *DT) { + // Basic approach is to assume Root is poison, propagate poison forward + // through all users we can easily track, and then check whether any of those + // users are provable UB and must execute before out exiting block might + // exit. + + // The set of all recursive users we've visited (which are assumed to all be + // poison because of said visit) + SmallSet KnownPoison; + SmallVector Worklist; + Worklist.push_back(Root); + while (!Worklist.empty()) { + const Instruction *I = Worklist.pop_back_val(); + + // If we know this must trigger UB on a path leading our target. + if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) + return true; + + // If we can't analyze propagation through this instruction, just skip it + // and transitive users. Safe as false is a conservative result. + if (I != Root && !any_of(I->operands(), [&KnownPoison](const Use &U) { + return KnownPoison.contains(U) && propagatesPoison(U); + })) + continue; + + if (KnownPoison.insert(I).second) + for (const User *User : I->users()) + Worklist.push_back(cast(User)); + } + + // Might be non-UB, or might have a path we couldn't prove must execute on + // way to exiting bb. + return false; +} + OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add, const DataLayout &DL, AssumptionCache *AC, diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 1f79db79b0347..0f784a8715edb 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -750,50 +750,6 @@ static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) { return Phi != getLoopPhiForCounter(IncV, L); } -/// Return true if undefined behavior would provable be executed on the path to -/// OnPathTo if Root produced a posion result. Note that this doesn't say -/// anything about whether OnPathTo is actually executed or whether Root is -/// actually poison. This can be used to assess whether a new use of Root can -/// be added at a location which is control equivalent with OnPathTo (such as -/// immediately before it) without introducing UB which didn't previously -/// exist. Note that a false result conveys no information. -static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root, - Instruction *OnPathTo, - DominatorTree *DT) { - // Basic approach is to assume Root is poison, propagate poison forward - // through all users we can easily track, and then check whether any of those - // users are provable UB and must execute before out exiting block might - // exit. - - // The set of all recursive users we've visited (which are assumed to all be - // poison because of said visit) - SmallSet KnownPoison; - SmallVector Worklist; - Worklist.push_back(Root); - while (!Worklist.empty()) { - const Instruction *I = Worklist.pop_back_val(); - - // If we know this must trigger UB on a path leading our target. - if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo)) - return true; - - // If we can't analyze propagation through this instruction, just skip it - // and transitive users. Safe as false is a conservative result. - if (I != Root && !any_of(I->operands(), [&KnownPoison](const Use &U) { - return KnownPoison.contains(U) && propagatesPoison(U); - })) - continue; - - if (KnownPoison.insert(I).second) - for (const User *User : I->users()) - Worklist.push_back(cast(User)); - } - - // Might be non-UB, or might have a path we couldn't prove must execute on - // way to exiting bb. - return false; -} - /// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils /// down to checking that all operands are constant and listing instructions /// that may hide undef. @@ -836,20 +792,6 @@ static bool hasConcreteDef(Value *V) { return hasConcreteDefImpl(V, Visited, 0); } -/// Return true if this IV has any uses other than the (soon to be rewritten) -/// loop exit test. -static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { - int LatchIdx = Phi->getBasicBlockIndex(LatchBlock); - Value *IncV = Phi->getIncomingValue(LatchIdx); - - for (User *U : Phi->users()) - if (U != Cond && U != IncV) return false; - - for (User *U : IncV->users()) - if (U != Cond && U != Phi) return false; - return true; -} - /// Return true if the given phi is a "counter" in L. A counter is an /// add recurance (of integer or pointer type) with an arbitrary start, and a /// step of 1. Note that L must have exactly one latch. @@ -940,9 +882,9 @@ static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, const SCEV *Init = AR->getStart(); - if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) { + if (BestPhi && !isAlmostDeadIV(BestPhi, LatchBlock, Cond)) { // Don't force a live loop counter if another IV can be used. - if (AlmostDeadIV(Phi, LatchBlock, Cond)) + if (isAlmostDeadIV(Phi, LatchBlock, Cond)) continue; // Prefer to count-from-zero. This is a more "canonical" counter form. It diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index 7df8651ede154..1c58370a77d55 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -466,6 +466,19 @@ llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) { return Worklist; } +bool llvm::isAlmostDeadIV(PHINode *PN, BasicBlock *LatchBlock, Value *Cond) { + int LatchIdx = PN->getBasicBlockIndex(LatchBlock); + Value *IncV = PN->getIncomingValue(LatchIdx); + + for (User *U : PN->users()) + if (U != Cond && U != IncV) return false; + + for (User *U : IncV->users()) + if (U != Cond && U != PN) return false; + return true; +} + + void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI, MemorySSA *MSSA) { assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!"); From fc7b860e5e58791f06678dddcd5117ce028bef64 Mon Sep 17 00:00:00 2001 From: Eric Schweitz Date: Mon, 20 Mar 2023 08:10:26 -0700 Subject: [PATCH 035/691] [mlir][nfc] Fix syntax to conform with the C++20 restrictions on error-prone redundancy See: http://eel.is/c++draft/diff.cpp17.class#2 Reviewed By: springerm Differential Revision: https://reviews.llvm.org/D146308 --- .../mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h index 0f630628016ae..b6644963a751d 100644 --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h @@ -60,16 +60,16 @@ struct AliasingOpResult { template class AliasList { public: /// Create an empty list of aliases. - AliasList() = default; + AliasList() = default; /// Create a list of aliases. - AliasList(std::initializer_list elems) { + AliasList(std::initializer_list elems) { for (T alias : elems) addAlias(alias); } /// Create a list of aliases. - AliasList(SmallVector &&aliases) : aliases(std::move(aliases)) {} + AliasList(SmallVector &&aliases) : aliases(std::move(aliases)) {} ArrayRef getAliases() const { return aliases; } From c41c336ee065a53899b8fd4fee3b4ab73370aa7e Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 20 Mar 2023 09:24:11 -0700 Subject: [PATCH 036/691] [Internalize] Remove interaction with CallGraph Internalize was trying to update CallGraph if the analysis was available, but the new PM doesn't really use it so there's little reason to update it. --- llvm/include/llvm/Transforms/IPO/Internalize.h | 10 +++------- llvm/lib/Transforms/IPO/Internalize.cpp | 9 ++------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Internalize.h b/llvm/include/llvm/Transforms/IPO/Internalize.h index adcf5a932be01..5beb925bb089d 100644 --- a/llvm/include/llvm/Transforms/IPO/Internalize.h +++ b/llvm/include/llvm/Transforms/IPO/Internalize.h @@ -66,10 +66,7 @@ class InternalizePass : public PassInfoMixin { /// Run the internalizer on \p TheModule, returns true if any changes was /// made. - /// - /// If the CallGraph \p CG is supplied, it will be updated when - /// internalizing a function (by removing any edge from the "external node") - bool internalizeModule(Module &TheModule, CallGraph *CG = nullptr); + bool internalizeModule(Module &TheModule); PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; @@ -77,10 +74,9 @@ class InternalizePass : public PassInfoMixin { /// Helper function to internalize functions and variables in a Module. inline bool internalizeModule(Module &TheModule, - std::function MustPreserveGV, - CallGraph *CG = nullptr) { + std::function MustPreserveGV) { return InternalizePass(std::move(MustPreserveGV)) - .internalizeModule(TheModule, CG); + .internalizeModule(TheModule); } } // end namespace llvm diff --git a/llvm/lib/Transforms/IPO/Internalize.cpp b/llvm/lib/Transforms/IPO/Internalize.cpp index 5982b8eafee81..21d649296cb0e 100644 --- a/llvm/lib/Transforms/IPO/Internalize.cpp +++ b/llvm/lib/Transforms/IPO/Internalize.cpp @@ -183,9 +183,8 @@ void InternalizePass::checkComdat( Info.External = true; } -bool InternalizePass::internalizeModule(Module &M, CallGraph *CG) { +bool InternalizePass::internalizeModule(Module &M) { bool Changed = false; - CallGraphNode *ExternalNode = CG ? CG->getExternalCallingNode() : nullptr; SmallVector Used; collectUsedGlobalVariables(M, Used, false); @@ -242,10 +241,6 @@ bool InternalizePass::internalizeModule(Module &M, CallGraph *CG) { continue; Changed = true; - if (ExternalNode) - // Remove a callgraph edge from the external node to this function. - ExternalNode->removeOneAbstractEdgeTo((*CG)[&I]); - ++NumFunctions; LLVM_DEBUG(dbgs() << "Internalizing func " << I.getName() << "\n"); } @@ -277,7 +272,7 @@ bool InternalizePass::internalizeModule(Module &M, CallGraph *CG) { InternalizePass::InternalizePass() : MustPreserveGV(PreserveAPIList()) {} PreservedAnalyses InternalizePass::run(Module &M, ModuleAnalysisManager &AM) { - if (!internalizeModule(M, AM.getCachedResult(M))) + if (!internalizeModule(M)) return PreservedAnalyses::all(); PreservedAnalyses PA; From 313365add2fb9fbe4d31cbbcd8d12f427b52569c Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 20 Mar 2023 09:25:47 -0700 Subject: [PATCH 037/691] [Internalize] Don't claim to preserve CallGraph Follow up to c41c336ee065a --- llvm/lib/Transforms/IPO/Internalize.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/IPO/Internalize.cpp b/llvm/lib/Transforms/IPO/Internalize.cpp index 21d649296cb0e..38375d74337c0 100644 --- a/llvm/lib/Transforms/IPO/Internalize.cpp +++ b/llvm/lib/Transforms/IPO/Internalize.cpp @@ -275,7 +275,5 @@ PreservedAnalyses InternalizePass::run(Module &M, ModuleAnalysisManager &AM) { if (!internalizeModule(M)) return PreservedAnalyses::all(); - PreservedAnalyses PA; - PA.preserve(); - return PA; + return PreservedAnalyses::none(); } From cd22e7c3ad9885d3a9cf990a78a8a4127420da06 Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 20 Mar 2023 16:29:28 +0000 Subject: [PATCH 038/691] [AArch64] Regenerate neon-vcmla.ll tests and add tests for combining fadd with vcmla. NFC See D146407. --- llvm/test/CodeGen/AArch64/neon-vcmla.ll | 293 +++++++++++++++++------- 1 file changed, 210 insertions(+), 83 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/neon-vcmla.ll b/llvm/test/CodeGen/AArch64/neon-vcmla.ll index 700e17e4b647d..76c4743de0c0c 100644 --- a/llvm/test/CodeGen/AArch64/neon-vcmla.ll +++ b/llvm/test/CodeGen/AArch64/neon-vcmla.ll @@ -1,19 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc %s -mtriple=aarch64 -mattr=+v8.3a,+fullfp16 -o - | FileCheck %s define <4 x half> @test_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) { +; CHECK-LABEL: test_16x4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmla v0.4h, v1.4h, v2.4h, #0 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_16x4 -; CHECK: fcmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #0 -; %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) ret <4 x half> %res } define <4 x half> @test_16x4_lane_1(<4 x half> %a, <4 x half> %b, <4 x half> %c) { +; CHECK-LABEL: test_16x4_lane_1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: fcmla v0.4h, v1.4h, v2.h[1], #0 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_16x4_lane_1 -; CHECK: fcmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[1], #0 -; %c.cast = bitcast <4 x half> %c to <2 x i32> %c.dup = shufflevector <2 x i32> %c.cast , <2 x i32> undef, <2 x i32> %c.res = bitcast <2 x i32> %c.dup to <4 x half> @@ -22,19 +26,22 @@ entry: } define <4 x half> @test_rot90_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) { +; CHECK-LABEL: test_rot90_16x4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmla v0.4h, v1.4h, v2.4h, #90 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_rot90_16x4 -; CHECK: fcmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #90 -; %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) ret <4 x half> %res } define <4 x half> @test_rot90_16x4_lane_0(<4 x half> %a, <4 x half> %b, <4 x half> %c) { +; CHECK-LABEL: test_rot90_16x4_lane_0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: fcmla v0.4h, v1.4h, v2.h[0], #90 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_rot90_16x4_lane_0 -; CHECK: fcmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[0], #90 -; %c.cast = bitcast <4 x half> %c to <2 x i32> %c.dup = shufflevector <2 x i32> %c.cast , <2 x i32> undef, <2 x i32> %c.res = bitcast <2 x i32> %c.dup to <4 x half> @@ -43,18 +50,21 @@ entry: } define <4 x half> @test_rot180_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) { +; CHECK-LABEL: test_rot180_16x4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmla v0.4h, v1.4h, v2.4h, #180 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_rot180_16x4 -; CHECK: fcmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #180 -; %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) ret <4 x half> %res } define <4 x half> @test_rot180_16x4_lane_0(<4 x half> %a, <4 x half> %b, <8 x half> %c) { +; CHECK-LABEL: test_rot180_16x4_lane_0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmla v0.4h, v1.4h, v2.h[0], #180 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_rot180_16x4_lane_0 -; CHECK: fcmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.h[0], #180 %c.cast = bitcast <8 x half> %c to <4 x i32> %c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <2 x i32> @@ -64,64 +74,71 @@ entry: } define <4 x half> @test_rot270_16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) { +; CHECK-LABEL: test_rot270_16x4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmla v0.4h, v1.4h, v2.4h, #270 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_rot270_16x4 -; CHECK: fcmla v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, v{{[0-9]+}}.4h, #270 -; %res = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) ret <4 x half> %res } define <2 x float> @test_32x2(<2 x float> %a, <2 x float> %b, <2 x float> %c) { +; CHECK-LABEL: test_32x2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmla v0.2s, v1.2s, v2.2s, #0 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_32x2 -; CHECK: fcmla v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #0 -; %res = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot0.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) ret <2 x float> %res } define <2 x float> @test_rot90_32x2(<2 x float> %a, <2 x float> %b, <2 x float> %c) { +; CHECK-LABEL: test_rot90_32x2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmla v0.2s, v1.2s, v2.2s, #90 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_rot90_32x2 -; CHECK: fcmla v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #90 -; %res = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot90.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) ret <2 x float> %res } define <2 x float> @test_rot180_32x2(<2 x float> %a, <2 x float> %b, <2 x float> %c) { +; CHECK-LABEL: test_rot180_32x2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmla v0.2s, v1.2s, v2.2s, #180 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_rot180_32x2 -; CHECK: fcmla v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #180 -; %res = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot180.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) ret <2 x float> %res } define <2 x float> @test_rot270_32x2(<2 x float> %a, <2 x float> %b, <2 x float> %c) { +; CHECK-LABEL: test_rot270_32x2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmla v0.2s, v1.2s, v2.2s, #270 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_rot270_32x2 -; CHECK: fcmla v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, v{{[0-9]+}}.2s, #270 -; %res = tail call <2 x float> @llvm.aarch64.neon.vcmla.rot270.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) ret <2 x float> %res } define <8 x half> @test_16x8(<8 x half> %a, <8 x half> %b, <8 x half> %c) { +; CHECK-LABEL: test_16x8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmla v0.8h, v1.8h, v2.8h, #0 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_16x8 -; CHECK: fcmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, #0 -; %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot0.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) ret <8 x half> %res } define <8 x half> @test_16x8_lane_0(<8 x half> %a, <8 x half> %b, <8 x half> %c) { +; CHECK-LABEL: test_16x8_lane_0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmla v0.8h, v1.8h, v2.h[0], #0 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_16x8_lane_0 -; CHECK: fcmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[0], #0 -; %c.cast = bitcast <8 x half> %c to <4 x i32> %c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <4 x i32> %c.res = bitcast <4 x i32> %c.dup to <8 x half> @@ -130,19 +147,21 @@ entry: } define <8 x half> @test_rot90_16x8(<8 x half> %a, <8 x half> %b, <8 x half> %c) { +; CHECK-LABEL: test_rot90_16x8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmla v0.8h, v1.8h, v2.8h, #90 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_rot90_16x8 -; CHECK: fcmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, #90 -; %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot90.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) ret <8 x half> %res } define <8 x half> @test_rot90_16x8_lane_1(<8 x half> %a, <8 x half> %b, <8 x half> %c) { +; CHECK-LABEL: test_rot90_16x8_lane_1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmla v0.8h, v1.8h, v2.h[1], #90 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_rot90_16x8_lane_1 -; CHECK: fcmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[1], #90 -; %c.cast = bitcast <8 x half> %c to <4 x i32> %c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <4 x i32> %c.res = bitcast <4 x i32> %c.dup to <8 x half> @@ -151,19 +170,21 @@ entry: } define <8 x half> @test_rot180_16x8(<8 x half> %a, <8 x half> %b, <8 x half> %c) { +; CHECK-LABEL: test_rot180_16x8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmla v0.8h, v1.8h, v2.8h, #180 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_rot180_16x8 -; CHECK: fcmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, #180 -; %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot180.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) ret <8 x half> %res } define <8 x half> @test_rot180_16x8_lane_1(<8 x half> %a, <8 x half> %b, <8 x half> %c) { +; CHECK-LABEL: test_rot180_16x8_lane_1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmla v0.8h, v1.8h, v2.h[1], #180 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_rot180_16x8_lane_1 -; CHECK: fcmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[1], #180 -; %c.cast = bitcast <8 x half> %c to <4 x i32> %c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <4 x i32> %c.res = bitcast <4 x i32> %c.dup to <8 x half> @@ -172,19 +193,21 @@ entry: } define <8 x half> @test_rot270_16x8(<8 x half> %a, <8 x half> %b, <8 x half> %c) { +; CHECK-LABEL: test_rot270_16x8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmla v0.8h, v1.8h, v2.8h, #270 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_rot270_16x8 -; CHECK: fcmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, #270 -; %res = tail call <8 x half> @llvm.aarch64.neon.vcmla.rot270.v8f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) ret <8 x half> %res } define <8 x half> @test_rot270_16x8_lane_0(<8 x half> %a, <8 x half> %b, <8 x half> %c) { +; CHECK-LABEL: test_rot270_16x8_lane_0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmla v0.8h, v1.8h, v2.h[0], #270 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_rot270_16x8_lane_0 -; CHECK: fcmla v{{[0-9]+}}.8h, v{{[0-9]+}}.8h, v{{[0-9]+}}.h[0], #270 -; %c.cast = bitcast <8 x half> %c to <4 x i32> %c.dup = shufflevector <4 x i32> %c.cast , <4 x i32> undef, <4 x i32> %c.res = bitcast <4 x i32> %c.dup to <8 x half> @@ -193,19 +216,21 @@ entry: } define <4 x float> @test_32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; CHECK-LABEL: test_32x4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmla v0.4s, v1.4s, v2.4s, #0 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_32x4 -; CHECK: fcmla v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, #0 -; %res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) ret <4 x float> %res } define <4 x float> @test_32x4_lane_0(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; CHECK-LABEL: test_32x4_lane_0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmla v0.4s, v1.4s, v2.s[0], #0 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_32x4_lane_0 -; CHECK: fcmla v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.s[0], #0 -; %c.cast = bitcast <4 x float> %c to <2 x i64> %c.dup = shufflevector <2 x i64> %c.cast , <2 x i64> undef, <2 x i32> %c.res = bitcast <2 x i64> %c.dup to <4 x float> @@ -214,68 +239,170 @@ entry: } define <4 x float> @test_rot90_32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; CHECK-LABEL: test_rot90_32x4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmla v0.4s, v1.4s, v2.4s, #90 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_rot90_32x4 -; CHECK: fcmla v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, #90 -; %res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) ret <4 x float> %res } define <4 x float> @test_rot180_32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; CHECK-LABEL: test_rot180_32x4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmla v0.4s, v1.4s, v2.4s, #180 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_rot180_32x4 -; CHECK: fcmla v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, #180 -; %res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot180.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) ret <4 x float> %res } define <4 x float> @test_rot270_32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; CHECK-LABEL: test_rot270_32x4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmla v0.4s, v1.4s, v2.4s, #270 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_rot270_32x4 -; CHECK: fcmla v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, v{{[0-9]+}}.4s, #270 -; %res = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot270.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) ret <4 x float> %res } define <2 x double> @test_64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c) { +; CHECK-LABEL: test_64x2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmla v0.2d, v1.2d, v2.2d, #0 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_64x2 -; CHECK: fcmla v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, #0 -; %res = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot0.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) ret <2 x double> %res } define <2 x double> @test_rot90_64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c) { +; CHECK-LABEL: test_rot90_64x2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmla v0.2d, v1.2d, v2.2d, #90 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_rot90_64x2 -; CHECK: fcmla v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, #90 -; %res = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot90.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) ret <2 x double> %res } define <2 x double> @test_rot180_64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c) { +; CHECK-LABEL: test_rot180_64x2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmla v0.2d, v1.2d, v2.2d, #180 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_rot180_64x2 -; CHECK: fcmla v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, #180 -; %res = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot180.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) ret <2 x double> %res } define <2 x double> @test_rot270_64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c) { +; CHECK-LABEL: test_rot270_64x2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmla v0.2d, v1.2d, v2.2d, #270 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: test_rot270_64x2 -; CHECK: fcmla v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, v{{[0-9]+}}.2d, #270 -; %res = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) ret <2 x double> %res } +define <4 x float> @reassoc_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; CHECK-LABEL: reassoc_f32x4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v3.2d, #0000000000000000 +; CHECK-NEXT: fcmla v3.4s, v1.4s, v2.4s, #0 +; CHECK-NEXT: fadd v0.4s, v3.4s, v0.4s +; CHECK-NEXT: ret +entry: + %d = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> zeroinitializer, <4 x float> %b, <4 x float> %c) + %res = fadd fast <4 x float> %d, %a + ret <4 x float> %res +} + +define <4 x float> @reassoc_c_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; CHECK-LABEL: reassoc_c_f32x4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v3.2d, #0000000000000000 +; CHECK-NEXT: fcmla v3.4s, v1.4s, v2.4s, #90 +; CHECK-NEXT: fadd v0.4s, v0.4s, v3.4s +; CHECK-NEXT: ret +entry: + %d = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot90.v4f32(<4 x float> zeroinitializer, <4 x float> %b, <4 x float> %c) + %res = fadd fast <4 x float> %a, %d + ret <4 x float> %res +} + +define <4 x half> @reassoc_f16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) { +; CHECK-LABEL: reassoc_f16x4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi d3, #0000000000000000 +; CHECK-NEXT: fcmla v3.4h, v1.4h, v2.4h, #180 +; CHECK-NEXT: fadd v0.4h, v3.4h, v0.4h +; CHECK-NEXT: ret +entry: + %d = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half> zeroinitializer, <4 x half> %b, <4 x half> %c) + %res = fadd fast <4 x half> %d, %a + ret <4 x half> %res +} + +define <4 x half> @reassoc_c_f16x4(<4 x half> %a, <4 x half> %b, <4 x half> %c) { +; CHECK-LABEL: reassoc_c_f16x4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi d3, #0000000000000000 +; CHECK-NEXT: fcmla v3.4h, v1.4h, v2.4h, #270 +; CHECK-NEXT: fadd v0.4h, v0.4h, v3.4h +; CHECK-NEXT: ret +entry: + %d = tail call <4 x half> @llvm.aarch64.neon.vcmla.rot270.v4f16(<4 x half> zeroinitializer, <4 x half> %b, <4 x half> %c) + %res = fadd fast <4 x half> %a, %d + ret <4 x half> %res +} + +define <2 x double> @reassoc_f64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %g) { +; CHECK-LABEL: reassoc_f64x2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v4.2d, #0000000000000000 +; CHECK-NEXT: fcmla v0.2d, v1.2d, v2.2d, #270 +; CHECK-NEXT: fcmla v4.2d, v2.2d, v3.2d, #270 +; CHECK-NEXT: fadd v0.2d, v4.2d, v0.2d +; CHECK-NEXT: ret +entry: + %d = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) + %e = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> zeroinitializer, <2 x double> %c, <2 x double> %g) + %res = fadd fast <2 x double> %e, %d + ret <2 x double> %res +} + +define <2 x double> @reassoc_c_f64x2(<2 x double> %a, <2 x double> %b, <2 x double> %c, <2 x double> %g) { +; CHECK-LABEL: reassoc_c_f64x2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov v4.16b, v0.16b +; CHECK-NEXT: fcmla v0.2d, v2.2d, v3.2d, #270 +; CHECK-NEXT: fcmla v4.2d, v1.2d, v2.2d, #270 +; CHECK-NEXT: fadd v0.2d, v0.2d, v4.2d +; CHECK-NEXT: ret +entry: + %d = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) + %e = tail call <2 x double> @llvm.aarch64.neon.vcmla.rot270.v2f64(<2 x double> %a, <2 x double> %c, <2 x double> %g) + %res = fadd fast <2 x double> %e, %d + ret <2 x double> %res +} + +define <4 x float> @reassoc_nonfast_f32x4(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; CHECK-LABEL: reassoc_nonfast_f32x4: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v3.2d, #0000000000000000 +; CHECK-NEXT: fcmla v3.4s, v1.4s, v2.4s, #0 +; CHECK-NEXT: fadd v0.4s, v3.4s, v0.4s +; CHECK-NEXT: ret +entry: + %d = tail call <4 x float> @llvm.aarch64.neon.vcmla.rot0.v4f32(<4 x float> zeroinitializer, <4 x float> %b, <4 x float> %c) + %res = fadd <4 x float> %d, %a + ret <4 x float> %res +} + declare <4 x half> @llvm.aarch64.neon.vcmla.rot0.v4f16(<4 x half>, <4 x half>, <4 x half>) declare <4 x half> @llvm.aarch64.neon.vcmla.rot90.v4f16(<4 x half>, <4 x half>, <4 x half>) declare <4 x half> @llvm.aarch64.neon.vcmla.rot180.v4f16(<4 x half>, <4 x half>, <4 x half>) From 67089a39a23b6ff4d1e2c16502cdf627cb56e6fc Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 20 Mar 2023 09:34:57 -0700 Subject: [PATCH 039/691] [LSR] Regen tests to adjust for naming in SCEVExpander [nfc] --- .../lsr-term-fold-negative-testcase.ll | 20 ++++---- .../LoopStrengthReduce/lsr-term-fold.ll | 48 +++++++++---------- 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll index cb6c63ad39712..8682351a4e30c 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll @@ -144,7 +144,7 @@ define void @NonSCEVableIV(float %init, float* %A, i32 %N) { ; CHECK-NEXT: [[TMP0:%.*]] = load float, ptr @fp_inc, align 4 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP:%.*]], [[FOR_BODY]] ], [ [[A]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[FOR_BODY]] ], [ [[A]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 1, [[ENTRY]] ] ; CHECK-NEXT: [[X_05:%.*]] = phi float [ [[INIT]], [[ENTRY]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: store float [[X_05]], ptr [[LSR_IV1]], align 4 @@ -152,7 +152,7 @@ define void @NonSCEVableIV(float %init, float* %A, i32 %N) { ; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[LSR_IV]] to i32 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] ; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], 1 -; CHECK-NEXT: [[UGLYGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 +; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -217,7 +217,7 @@ define void @TermCondMoreThanOneUse(ptr %a) { ; CHECK-SAME: (ptr [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A]], i64 84 -; CHECK-NEXT: [[UGLYGEP6:%.*]] = getelementptr i8, ptr [[A]], i64 1600 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 1600 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[LSR_IV2:%.*]] = phi i64 [ [[LSR_IV_NEXT3:%.*]], [[FOR_BODY]] ], [ -378, [[ENTRY:%.*]] ] @@ -227,7 +227,7 @@ define void @TermCondMoreThanOneUse(ptr %a) { ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[LSR_IV2]], 0 ; CHECK-NEXT: [[DUMMY:%.*]] = select i1 [[EXITCOND_NOT]], i8 0, i8 1 ; CHECK-NEXT: [[LSR_IV_NEXT3]] = add nsw i64 [[LSR_IV2]], 1 -; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP6]] +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]] ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -280,16 +280,16 @@ define void @ebur128_calc_gating_block(ptr %st, ptr %optional_output) { ; CHECK-NEXT: br i1 [[CMP525_NOT]], label [[FOR_INC11]], label [[FOR_BODY7_LR_PH:%.*]] ; CHECK: for.body7.lr.ph: ; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[AUDIO_DATA]], align 8 -; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[LSR_IV1]] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[LSR_IV1]] ; CHECK-NEXT: br label [[FOR_BODY7:%.*]] ; CHECK: for.body7: -; CHECK-NEXT: [[LSR_IV3:%.*]] = phi ptr [ [[UGLYGEP4:%.*]], [[FOR_BODY7]] ], [ [[UGLYGEP]], [[FOR_BODY7_LR_PH]] ] +; CHECK-NEXT: [[LSR_IV3:%.*]] = phi ptr [ [[SCEVGEP4:%.*]], [[FOR_BODY7]] ], [ [[SCEVGEP]], [[FOR_BODY7_LR_PH]] ] ; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY7]] ], [ [[UMAX]], [[FOR_BODY7_LR_PH]] ] ; CHECK-NEXT: [[CHANNEL_SUM_127:%.*]] = phi double [ [[CHANNEL_SUM_030]], [[FOR_BODY7_LR_PH]] ], [ [[ADD10:%.*]], [[FOR_BODY7]] ] ; CHECK-NEXT: [[TMP4:%.*]] = load double, ptr [[LSR_IV3]], align 8 ; CHECK-NEXT: [[ADD10]] = fadd double [[CHANNEL_SUM_127]], [[TMP4]] ; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1 -; CHECK-NEXT: [[UGLYGEP4]] = getelementptr i8, ptr [[LSR_IV3]], i64 [[TMP2]] +; CHECK-NEXT: [[SCEVGEP4]] = getelementptr i8, ptr [[LSR_IV3]], i64 [[TMP2]] ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_INC11_LOOPEXIT:%.*]], label [[FOR_BODY7]] ; CHECK: for.inc11.loopexit: @@ -369,14 +369,14 @@ define i64 @alac_seek(ptr %0) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DIV:%.*]] = udiv i64 1, 0 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[DIV]], 1 -; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 12 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 12 ; CHECK-NEXT: br label [[FOR_BODY_I:%.*]] ; CHECK: for.body.i: -; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY_I]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP2:%.*]], [[FOR_BODY_I]] ], [ [[SCEVGEP]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY_I]] ], [ [[TMP1]], [[ENTRY]] ] ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[LSR_IV1]], align 4 ; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1 -; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 +; CHECK-NEXT: [[SCEVGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 ; CHECK-NEXT: [[EXITCOND_NOT_I:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 ; CHECK-NEXT: br i1 [[EXITCOND_NOT_I]], label [[ALAC_PAKT_BLOCK_OFFSET_EXIT:%.*]], label [[FOR_BODY_I]] ; CHECK: alac_pakt_block_offset.exit: diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll index 762b8b15a671e..7da1a73a21d1e 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll @@ -7,13 +7,13 @@ define void @const_tripcount(ptr %a) { ; CHECK-LABEL: @const_tripcount( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 84 -; CHECK-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, ptr [[A]], i64 1600 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 1600 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] ; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 ; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 -; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP4]] +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]] ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -42,13 +42,13 @@ define void @runtime_tripcount(ptr %a, i32 %N) { ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N:%.*]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 84 -; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] ; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 ; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 -; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP1]] +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]] ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -79,14 +79,14 @@ define void @ptr_of_ptr_addrec(ptr %ptrptr, i32 %length) { ; CHECK-NEXT: [[START_PTRPTR:%.*]] = getelementptr ptr, ptr [[PTRPTR:%.*]] ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LENGTH:%.*]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 -; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[START_PTRPTR]], i64 [[TMP1]] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[START_PTRPTR]], i64 [[TMP1]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IT_04:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[START_PTRPTR]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[IT_04]], align 8 ; CHECK-NEXT: tail call void @foo(ptr [[TMP2]]) ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds ptr, ptr [[IT_04]], i64 1 -; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[UGLYGEP]] +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[SCEVGEP]] ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -119,7 +119,7 @@ define void @iv_start_non_preheader(ptr %mark, i32 signext %length) { ; CHECK: for.body.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LENGTH]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 -; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[MARK:%.*]], i64 [[TMP1]] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[MARK:%.*]], i64 [[TMP1]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -130,7 +130,7 @@ define void @iv_start_non_preheader(ptr %mark, i32 signext %length) { ; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DST_04]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = call ptr @foo(ptr [[TMP2]]) ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds ptr, ptr [[DST_04]], i64 1 -; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[UGLYGEP]] +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[SCEVGEP]] ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]] ; entry: @@ -162,13 +162,13 @@ for.body: ; preds = %entry, %for.body define void @inbounds_poison_use(ptr %a) { ; CHECK-LABEL: @inbounds_poison_use( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 16 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 16 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[A]], [[ENTRY:%.*]] ] ; CHECK-NEXT: store i8 1, ptr [[LSR_IV1]], align 4 ; CHECK-NEXT: [[UGLYGEP2]] = getelementptr inbounds i8, ptr [[LSR_IV1]], i64 4 -; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP]] +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]] ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -198,13 +198,13 @@ define void @iv_size(ptr %a, i128 %N) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = trunc i128 [[N:%.*]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 2 -; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP1]] +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP1]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[A]], [[ENTRY:%.*]] ] ; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 ; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 -; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP]] +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]] ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -231,13 +231,13 @@ define void @IcmpSle(ptr %a) { ; CHECK-LABEL: @IcmpSle( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 84 -; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 1600 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 1600 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] ; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 ; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 4 -; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP1]] +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]] ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -264,13 +264,13 @@ define void @IcmpSlt(ptr %a) { ; CHECK-LABEL: @IcmpSlt( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 84 -; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 1604 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 1604 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] ; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 ; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 4 -; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP1]] +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]] ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -297,13 +297,13 @@ define void @IcmpSgt(ptr %a) { ; CHECK-LABEL: @IcmpSgt( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 84 -; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 88 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 88 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] ; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 ; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 4 -; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP1]] +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]] ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -330,13 +330,13 @@ define void @IcmpSgt2(ptr %a) { ; CHECK-LABEL: @IcmpSgt2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 84 -; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 1600 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 1600 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] ; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 ; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 4 -; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP1]] +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]] ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -363,7 +363,7 @@ define void @SeveralLoopLatch(ptr %a) { ; CHECK-LABEL: @SeveralLoopLatch( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 84 -; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 1600 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 1600 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[ANOTHER_BRANCH:%.*]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] @@ -371,7 +371,7 @@ define void @SeveralLoopLatch(ptr %a) { ; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 4 ; CHECK-NEXT: br label [[ANOTHER_BRANCH]] ; CHECK: another.branch: -; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP1]] +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]] ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -401,7 +401,7 @@ define void @SeveralLoopLatch2(ptr %a) { ; CHECK-LABEL: @SeveralLoopLatch2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 84 -; CHECK-NEXT: [[UGLYGEP1:%.*]] = getelementptr i8, ptr [[A]], i64 1600 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 1600 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[ANOTHER_BRANCH:%.*]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] @@ -409,7 +409,7 @@ define void @SeveralLoopLatch2(ptr %a) { ; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 4 ; CHECK-NEXT: br label [[ANOTHER_BRANCH]] ; CHECK: another.branch: -; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[UGLYGEP1]] +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]] ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void From edc03550063ce1c39bb47bf94937cf036359b487 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 20 Mar 2023 11:49:10 -0500 Subject: [PATCH 040/691] [Libomptarget] Add missing explicit moves on llvm::Error Summary: Some older compilers, which we still support, have problems handling the copy elision that allows us to directly move an `Error` to an `Expected`. This patch adds explicit moves to remove the error. --- openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp | 2 +- .../common/PluginInterface/PluginInterface.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp index e03825651286d..5a31c5362265c 100644 --- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp @@ -1863,7 +1863,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { hsa_status_t Status = hsa_amd_memory_lock(HstPtr, Size, nullptr, 0, &PinnedPtr); if (auto Err = Plugin::check(Status, "Error in hsa_amd_memory_lock: %s\n")) - return Err; + return std::move(Err); return PinnedPtr; } diff --git a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp index 65983577f08fd..0b90d250b762b 100644 --- a/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/PluginInterface/PluginInterface.cpp @@ -703,7 +703,7 @@ Expected PinnedAllocationMapTy::lockHostBuffer(void *HstPtr, if (Entry) { // An already registered intersecting buffer was found. Register a new use. if (auto Err = registerEntryUse(*Entry, HstPtr, Size)) - return Err; + return std::move(Err); // Return the device accessible pointer with the correct offset. return advanceVoidPtr(Entry->DevAccessiblePtr, @@ -718,7 +718,7 @@ Expected PinnedAllocationMapTy::lockHostBuffer(void *HstPtr, // Now insert the new entry into the map. if (auto Err = insertEntry(HstPtr, *DevAccessiblePtrOrErr, Size)) - return Err; + return std::move(Err); // Return the device accessible pointer. return *DevAccessiblePtrOrErr; @@ -885,7 +885,7 @@ Expected GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr, // Register allocated buffer as pinned memory if the type is host memory. if (Kind == TARGET_ALLOC_HOST) if (auto Err = PinnedAllocs.registerHostBuffer(Alloc, Alloc, Size)) - return Err; + return std::move(Err); return Alloc; } From ad9f751a6e9085d45d7c33877f4a5185463c617f Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 20 Mar 2023 12:00:01 -0500 Subject: [PATCH 041/691] [Libomptarget] Add missing explicit moves on llvm::Error Summary: Some older compilers, which we still support, have problems handling the copy elision that allows us to directly move an `Error` to an `Expected`. This patch adds explicit moves to remove the error. Same as last patch but I forgot this one. --- openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp index 5a31c5362265c..f9b0371f903a8 100644 --- a/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp @@ -1886,7 +1886,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { /* Number of accessible agents (out) */ nullptr, /* Accessible agents */ nullptr); if (auto Err = Plugin::check(Status, "Error in hsa_amd_pointer_info: %s")) - return Err; + return std::move(Err); // The buffer may be locked or allocated through HSA allocators. Assume that // the buffer is host pinned if the runtime reports a HSA type. From 34f1da3468ab70b182072ddbe9e2784b3bde9923 Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Mon, 20 Mar 2023 17:05:48 +0000 Subject: [PATCH 042/691] [RISCV] Make Zfh PseudoQuietFCMP definitions predicated on HasStdExtZfh These are currently in a `Predicates = [HasStdExtZfhOrZfhmin]` block, but Zfhmin has no fcmp instructions so the definition makes no sense for Zfhmin. Differential Revision: https://reviews.llvm.org/D146435 --- llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td index 43e8fa1dfe0e3..3ff445e1397d8 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -206,15 +206,16 @@ def : InstAlias<"fgt.h $rd, $rs, $rt", (FLT_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>; def : InstAlias<"fge.h $rd, $rs, $rt", (FLE_H GPR:$rd, FPR16:$rt, FPR16:$rs), 0>; -} // Predicates = [HasStdExtZfh] -let Predicates = [HasStdExtZfhOrZfhmin] in { -def PseudoFLH : PseudoFloatLoad<"flh", FPR16>; -def PseudoFSH : PseudoStore<"fsh", FPR16>; let usesCustomInserter = 1 in { def PseudoQuietFLE_H : PseudoQuietFCMP; def PseudoQuietFLT_H : PseudoQuietFCMP; } +} // Predicates = [HasStdExtZfh] + +let Predicates = [HasStdExtZfhOrZfhmin] in { +def PseudoFLH : PseudoFloatLoad<"flh", FPR16>; +def PseudoFSH : PseudoStore<"fsh", FPR16>; } // Predicates = [HasStdExtZfhOrZfhmin] let Predicates = [HasStdExtZhinx] in { From dfb40d3fd7a261d7e3f697242df4680f1e6780ff Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Sun, 19 Mar 2023 08:45:28 -0700 Subject: [PATCH 043/691] [SimpleLoopUnswitch] Skip non-trivial unswitching of cold loop nests This fixes a compile time issue due to guarding loop unswitching based on whether the enclosing function is cold. That approach is very inefficient in the case of large cold functions that contain numerous loops, since the loop pass calls isFunctionColdInCallGraph once per loop, and that function walks all BBs in the function (twice for Sample PGO) looking for any non-cold blocks. Originally, this code only checked if the current Loop's header was cold (D129599). However, that apparently caused a slowdown on a SPEC benchmark, and the example given was that of a cold inner loop nested in a non-cold outer loop (see comments in D129599). The fix was to check if the whole function is cold, done in D133275. This is overkill, and we can simply check if the header of any loop in the current loop's loop nest is non-cold (looking at both outer and inner loops). This patch drops the compile time for a large module by 40% with this approach. I also updated PGO-nontrivial-unswitch2.ll since it only had one cold loop in a non-cold function, so that it instead had IR based off the example given in the comments relating to the SPEC degradation in D129599. I confirmed that the new version of the test fails with the original check done in D129599 of only the current loop's header coldness. Similarly updated test PGO-nontrivial-unswitch.ll to contain a cold loop in a cold loop nest, and created PGO-nontrivial-unswitch3.ll to contain a non-cold loop in a non-cold loop nest. Differential Revision: https://reviews.llvm.org/D146383 --- .../Transforms/Scalar/SimpleLoopUnswitch.cpp | 31 ++- .../PGO-nontrivial-unswitch.ll | 161 ++++++++---- .../PGO-nontrivial-unswitch2.ll | 236 ++++++++++++------ .../PGO-nontrivial-unswitch3.ll | 184 ++++++++++++++ 4 files changed, 485 insertions(+), 127 deletions(-) create mode 100644 llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch3.ll diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 204aaf743e149..819653cc9c632 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -3493,10 +3493,33 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC, if (L.getHeader()->getParent()->hasOptSize()) return false; - // Skip cold loops, as unswitching them brings little benefit - // but increases the code size - if (PSI && PSI->hasProfileSummary() && BFI && - PSI->isFunctionColdInCallGraph(L.getHeader()->getParent(), *BFI)) { + // Returns true if Loop L's loop nest is cold, i.e. if the headers of L, + // of the loops L is nested in, and of the loops nested in L are all cold. + auto IsLoopNestCold = [&](const Loop *L) { + // Check L and all of its parent loops. + auto *Parent = L; + while (Parent) { + if (!PSI->isColdBlock(Parent->getHeader(), BFI)) + return false; + Parent = Parent->getParentLoop(); + } + // Next check all loops nested within L. + SmallVector Worklist; + Worklist.insert(Worklist.end(), L->getSubLoops().begin(), + L->getSubLoops().end()); + while (!Worklist.empty()) { + auto *CurLoop = Worklist.pop_back_val(); + if (!PSI->isColdBlock(CurLoop->getHeader(), BFI)) + return false; + Worklist.insert(Worklist.end(), CurLoop->getSubLoops().begin(), + CurLoop->getSubLoops().end()); + } + return true; + }; + + // Skip cold loops in cold loop nests, as unswitching them brings little + // benefit but increases the code size + if (PSI && PSI->hasProfileSummary() && BFI && IsLoopNestCold(&L)) { LLVM_DEBUG(dbgs() << " Skip cold loop: " << L << "\n"); return false; } diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll index daff716edea8d..f1ffcc788a019 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch.ll @@ -1,65 +1,124 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt < %s -passes='require,function(loop-mssa(simple-loop-unswitch))' -S | FileCheck %s -; This test checks for a crash. -; RUN: opt < %s -passes=simple-loop-unswitch -aa-pipeline= -disable-output -declare i32 @a() -declare i32 @b() -; Check loops in cold functions will not be applied non-trivial loop unswitch -define void @f1(i32 %i, i1 %cond, i1 %hot_cond, i1 %cold_cond, ptr %ptr) !prof !0 { -; CHECK-LABEL: @f1( +;; Check that non-trivial loop unswitching is not applied to a cold loop in a +;; cold loop nest. + +;; IR was generated from the following loop nest, profiled when called +;; with M=0 and N=0. +;; void hotFunction(bool cond, int M, int N, int * A, int *B, int *C) { +;; for (unsigned j = 0; j < M; j++) +;; for (unsigned i=0; i < N; i++) { +;; A[i] = B[i] + C[i]; +;; if (cond) do_something(); +;; } +;; } + +define void @_Z11hotFunctionbiiPiS_S_(i1 %cond, i32 %M, i32 %N, ptr %A, ptr %B, ptr %C) !prof !36 { +; CHECK-LABEL: define void @_Z11hotFunctionbiiPiS_S_ +; CHECK-SAME: (i1 [[COND:%.*]], i32 [[M:%.*]], i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) !prof [[PROF16:![0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[COLD_LOOP_BEGIN:%.*]] -; CHECK: cold_loop_begin: -; CHECK-NEXT: br i1 [[COND:%.*]], label [[COLD_LOOP_A:%.*]], label [[COLD_LOOP_B:%.*]] -; CHECK: cold_loop_a: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @a() -; CHECK-NEXT: br label [[COLD_LOOP_LATCH:%.*]] -; CHECK: cold_loop_b: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @b() -; CHECK-NEXT: br label [[COLD_LOOP_LATCH]] -; CHECK: cold_loop_latch: -; CHECK-NEXT: [[V2:%.*]] = load i1, ptr [[PTR:%.*]], align 1 -; CHECK-NEXT: br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT:%.*]] -; CHECK: cold_loop_exit: +; CHECK-NEXT: [[CMP19_NOT:%.*]] = icmp eq i32 [[M]], 0 +; CHECK-NEXT: br i1 [[CMP19_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]], !prof [[PROF17:![0-9]+]] +; CHECK: for.cond1.preheader.lr.ph: +; CHECK-NEXT: [[CMP217_NOT:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] +; CHECK: for.cond1.preheader: +; CHECK-NEXT: [[J_020:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_LR_PH]] ], [ [[INC10:%.*]], [[FOR_COND_CLEANUP3:%.*]] ] +; CHECK-NEXT: br i1 [[CMP217_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4_PREHEADER:%.*]] +; CHECK: for.body4.preheader: +; CHECK-NEXT: br label [[FOR_BODY4:%.*]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void +; CHECK: for.cond.cleanup3.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP3]] +; CHECK: for.cond.cleanup3: +; CHECK-NEXT: [[INC10]] = add nuw i32 [[J_020]], 1 +; CHECK-NEXT: [[EXITCOND22_NOT:%.*]] = icmp eq i32 [[INC10]], [[M]] +; CHECK-NEXT: br i1 [[EXITCOND22_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_COND1_PREHEADER]], !prof [[PROF17]] +; CHECK: for.body4: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY4_PREHEADER]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX8]], align 4 +; CHECK-NEXT: br i1 [[COND]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; CHECK: if.then: +; CHECK-NEXT: tail call void @_Z12do_somethingv() +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP3_LOOPEXIT:%.*]], label [[FOR_BODY4]] ; entry: - br label %cold_loop_begin + %cmp19.not = icmp eq i32 %M, 0 + br i1 %cmp19.not, label %for.cond.cleanup, label %for.cond1.preheader.lr.ph, !prof !37 -cold_loop_begin: - br i1 %cond, label %cold_loop_a, label %cold_loop_b +for.cond1.preheader.lr.ph: + %cmp217.not = icmp eq i32 %N, 0 + br label %for.cond1.preheader -cold_loop_a: - call i32 @a() - br label %cold_loop_latch +for.cond1.preheader: + %j.020 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %inc10, %for.cond.cleanup3 ] + br i1 %cmp217.not, label %for.cond.cleanup3, label %for.body4 -cold_loop_b: - call i32 @b() - br label %cold_loop_latch +for.cond.cleanup: + ret void -cold_loop_latch: - %v2 = load i1, ptr %ptr - br i1 %v2, label %cold_loop_begin, label %cold_loop_exit +for.cond.cleanup3: + %inc10 = add nuw i32 %j.020, 1 + %exitcond22.not = icmp eq i32 %inc10, %M + br i1 %exitcond22.not, label %for.cond.cleanup, label %for.cond1.preheader, !prof !37 -cold_loop_exit: - ret void +for.body4: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.cond1.preheader ] + %arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx6 = getelementptr inbounds i32, ptr %C, i64 %indvars.iv + %1 = load i32, ptr %arrayidx6, align 4 + %add = add nsw i32 %1, %0 + %arrayidx8 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv + store i32 %add, ptr %arrayidx8, align 4 + br i1 %cond, label %if.then, label %for.inc + +if.then: + tail call void @_Z12do_somethingv() + br label %for.inc + +for.inc: + %wide.trip.count = zext i32 %N to i64 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond.cleanup3, label %for.body4 } -!llvm.module.flags = !{!1} -!0 = !{!"function_entry_count", i64 0} -!1 = !{i32 1, !"ProfileSummary", !2} -!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} -!3 = !{!"ProfileFormat", !"InstrProf"} -!4 = !{!"TotalCount", i64 10000} -!5 = !{!"MaxCount", i64 10} -!6 = !{!"MaxInternalCount", i64 1} -!7 = !{!"MaxFunctionCount", i64 1000} -!8 = !{!"NumCounts", i64 3} -!9 = !{!"NumFunctions", i64 3} -!10 = !{!"DetailedSummary", !11} -!11 = !{!12, !13, !14} -!12 = !{i32 10000, i64 100, i32 1} -!13 = !{i32 999000, i64 100, i32 1} -!14 = !{i32 999999, i64 1, i32 2} +declare void @_Z12do_somethingv() + +!llvm.module.flags = !{!6} + +!6 = !{i32 1, !"ProfileSummary", !7} +!7 = !{!8, !9, !10, !11, !12, !13, !14, !15, !16, !17} +!8 = !{!"ProfileFormat", !"InstrProf"} +!9 = !{!"TotalCount", i64 1002} +!10 = !{!"MaxCount", i64 1000} +!11 = !{!"MaxInternalCount", i64 1000} +!12 = !{!"MaxFunctionCount", i64 1} +!13 = !{!"NumCounts", i64 6} +!14 = !{!"NumFunctions", i64 3} +!15 = !{!"IsPartialProfile", i64 0} +!16 = !{!"PartialProfileRatio", double 0.000000e+00} +!17 = !{!"DetailedSummary", !18} +!18 = !{!19, !31, !34} +!19 = !{i32 10000, i64 1000, i32 1} +!31 = !{i32 999000, i64 1000, i32 1} +!34 = !{i32 999999, i64 1, i32 3} +!36 = !{!"function_entry_count", i64 1} +!37 = !{!"branch_weights", i32 1, i32 0} diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch2.ll b/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch2.ll index 2f7acac885f45..ad674ed11d3d8 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch2.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch2.ll @@ -1,90 +1,182 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt < %s -passes='require,function(loop-mssa(simple-loop-unswitch))' -S | FileCheck %s -declare i32 @a() -declare i32 @b() +;; Check that non-trivial loop unswitching is applied to a cold loop in a +;; non-cold loop nest. -; Check loops will be applied non-trivial loop unswitch in a non-cold function, -; even loop headers are cold +;; IR was generated from the following loop nest, profiled when called +;; with M=1000 and N=0. +;; void hotFunction(bool cond, int M, int N, int * A, int *B, int *C) { +;; for (unsigned j = 0; j < M; j++) +;; for (unsigned i=0; i < N; i++) { +;; A[i] = B[i] + C[i]; +;; if (cond) do_something(); +;; } +;; } -define void @f1(i32 %i, i1 %cond, i1 %hot_cond, i1 %cold_cond, ptr %ptr) !prof !14 { -; CHECK-LABEL: @f1( +define void @_Z11hotFunctionbiiPiS_S_(i1 %cond, i32 %M, i32 %N, ptr %A, ptr %B, ptr %C) !prof !36 { +; CHECK-LABEL: define void @_Z11hotFunctionbiiPiS_S_ +; CHECK-SAME: (i1 [[COND:%.*]], i32 [[M:%.*]], i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) !prof [[PROF33:![0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[ENTRY_COLD_LOOP:%.*]] -; CHECK: entry_cold_loop: -; CHECK-NEXT: br i1 [[COLD_COND:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER:%.*]], label [[COLD_LOOP_EXIT:%.*]], !prof [[PROF15:![0-9]+]] -; CHECK: cold_loop_begin.preheader: -; CHECK-NEXT: br i1 [[COND:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER_SPLIT_US:%.*]], label [[COLD_LOOP_BEGIN_PREHEADER_SPLIT:%.*]] -; CHECK: cold_loop_begin.preheader.split.us: -; CHECK-NEXT: br label [[COLD_LOOP_BEGIN_US:%.*]] -; CHECK: cold_loop_begin.us: -; CHECK-NEXT: br label [[COLD_LOOP_A_US:%.*]] -; CHECK: cold_loop_a.us: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @a() -; CHECK-NEXT: br label [[COLD_LOOP_LATCH_US:%.*]] -; CHECK: cold_loop_latch.us: -; CHECK-NEXT: [[V2_US:%.*]] = load i1, ptr [[PTR:%.*]], align 1 -; CHECK-NEXT: br i1 [[V2_US]], label [[COLD_LOOP_BEGIN_US]], label [[COLD_LOOP_EXIT_LOOPEXIT_SPLIT_US:%.*]] -; CHECK: cold_loop_exit.loopexit.split.us: -; CHECK-NEXT: br label [[COLD_LOOP_EXIT_LOOPEXIT:%.*]] -; CHECK: cold_loop_begin.preheader.split: -; CHECK-NEXT: br label [[COLD_LOOP_BEGIN:%.*]] -; CHECK: cold_loop_begin: -; CHECK-NEXT: br label [[COLD_LOOP_B:%.*]] -; CHECK: cold_loop_b: -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @b() -; CHECK-NEXT: br label [[COLD_LOOP_LATCH:%.*]] -; CHECK: cold_loop_latch: -; CHECK-NEXT: [[V2:%.*]] = load i1, ptr [[PTR]], align 1 -; CHECK-NEXT: br i1 [[V2]], label [[COLD_LOOP_BEGIN]], label [[COLD_LOOP_EXIT_LOOPEXIT_SPLIT:%.*]] -; CHECK: cold_loop_exit.loopexit.split: -; CHECK-NEXT: br label [[COLD_LOOP_EXIT_LOOPEXIT]] -; CHECK: cold_loop_exit.loopexit: -; CHECK-NEXT: br label [[COLD_LOOP_EXIT]] -; CHECK: cold_loop_exit: +; CHECK-NEXT: [[CMP19_NOT:%.*]] = icmp eq i32 [[M]], 0 +; CHECK-NEXT: br i1 [[CMP19_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]], !prof [[PROF34:![0-9]+]] +; CHECK: for.cond1.preheader.lr.ph: +; CHECK-NEXT: [[CMP217_NOT:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP217_NOT]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT_US:%.*]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT:%.*]], !prof [[PROF35:![0-9]+]] +; CHECK: for.cond1.preheader.lr.ph.split.us: +; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] +; CHECK: for.cond1.preheader.us: +; CHECK-NEXT: [[J_020_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_LR_PH_SPLIT_US]] ], [ [[INC10_US:%.*]], [[FOR_COND_CLEANUP3_US:%.*]] ] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP3_US]] +; CHECK: for.cond.cleanup3.us: +; CHECK-NEXT: [[INC10_US]] = add nuw i32 [[J_020_US]], 1 +; CHECK-NEXT: [[EXITCOND22_NOT_US:%.*]] = icmp eq i32 [[INC10_US]], [[M]] +; CHECK-NEXT: br i1 [[EXITCOND22_NOT_US]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT_US:%.*]], label [[FOR_COND1_PREHEADER_US]], !prof [[PROF34]] +; CHECK: for.cond.cleanup.loopexit.split.us: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] +; CHECK: for.cond1.preheader.lr.ph.split: +; CHECK-NEXT: br i1 [[COND]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT_SPLIT_US:%.*]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT_SPLIT:%.*]] +; CHECK: for.cond1.preheader.lr.ph.split.split.us: +; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US1:%.*]] +; CHECK: for.cond1.preheader.us1: +; CHECK-NEXT: [[J_020_US2:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_LR_PH_SPLIT_SPLIT_US]] ], [ [[INC10_US4:%.*]], [[FOR_COND_CLEANUP3_US3:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY4_PREHEADER_US:%.*]] +; CHECK: for.cond.cleanup3.us3: +; CHECK-NEXT: [[INC10_US4]] = add nuw i32 [[J_020_US2]], 1 +; CHECK-NEXT: [[EXITCOND22_NOT_US5:%.*]] = icmp eq i32 [[INC10_US4]], [[M]] +; CHECK-NEXT: br i1 [[EXITCOND22_NOT_US5]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT_SPLIT_US:%.*]], label [[FOR_COND1_PREHEADER_US1]], !prof [[PROF34]] +; CHECK: for.body4.preheader.us: +; CHECK-NEXT: br label [[FOR_BODY4_PREHEADER_SPLIT_US_US:%.*]] +; CHECK: for.cond.cleanup3.loopexit.us: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP3_US3]] +; CHECK: for.body4.preheader.split.us.us: +; CHECK-NEXT: br label [[FOR_BODY4_US_US:%.*]] +; CHECK: for.body4.us.us: +; CHECK-NEXT: [[INDVARS_IV_US_US:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_US_US:%.*]], [[FOR_INC_US_US:%.*]] ], [ 0, [[FOR_BODY4_PREHEADER_SPLIT_US_US]] ] +; CHECK-NEXT: [[ARRAYIDX_US_US:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV_US_US]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX_US_US]], align 4 +; CHECK-NEXT: [[ARRAYIDX6_US_US:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV_US_US]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX6_US_US]], align 4 +; CHECK-NEXT: [[ADD_US_US:%.*]] = add nsw i32 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX8_US_US:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_US_US]] +; CHECK-NEXT: store i32 [[ADD_US_US]], ptr [[ARRAYIDX8_US_US]], align 4 +; CHECK-NEXT: br label [[IF_THEN_US_US:%.*]] +; CHECK: if.then.us.us: +; CHECK-NEXT: tail call void @_Z12do_somethingv() +; CHECK-NEXT: br label [[FOR_INC_US_US]] +; CHECK: for.inc.us.us: +; CHECK-NEXT: [[WIDE_TRIP_COUNT_US_US:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[INDVARS_IV_NEXT_US_US]] = add nuw nsw i64 [[INDVARS_IV_US_US]], 1 +; CHECK-NEXT: [[EXITCOND_NOT_US_US:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_US_US]], [[WIDE_TRIP_COUNT_US_US]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT_US_US]], label [[FOR_COND_CLEANUP3_LOOPEXIT_SPLIT_US_US:%.*]], label [[FOR_BODY4_US_US]], !prof [[PROF35]] +; CHECK: for.cond.cleanup3.loopexit.split.us.us: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP3_LOOPEXIT_US:%.*]] +; CHECK: for.cond.cleanup.loopexit.split.split.us: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT:%.*]] +; CHECK: for.cond1.preheader.lr.ph.split.split: +; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] +; CHECK: for.cond1.preheader: +; CHECK-NEXT: [[J_020:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_LR_PH_SPLIT_SPLIT]] ], [ [[INC10:%.*]], [[FOR_COND_CLEANUP3:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY4_PREHEADER:%.*]] +; CHECK: for.body4.preheader: +; CHECK-NEXT: br label [[FOR_BODY4_PREHEADER_SPLIT:%.*]] +; CHECK: for.body4.preheader.split: +; CHECK-NEXT: br label [[FOR_BODY4:%.*]] +; CHECK: for.cond.cleanup.loopexit.split.split: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT]] +; CHECK: for.cond.cleanup.loopexit.split: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void +; CHECK: for.cond.cleanup3.loopexit.split: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP3_LOOPEXIT:%.*]] +; CHECK: for.cond.cleanup3.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP3]] +; CHECK: for.cond.cleanup3: +; CHECK-NEXT: [[INC10]] = add nuw i32 [[J_020]], 1 +; CHECK-NEXT: [[EXITCOND22_NOT:%.*]] = icmp eq i32 [[INC10]], [[M]] +; CHECK-NEXT: br i1 [[EXITCOND22_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT_SPLIT:%.*]], label [[FOR_COND1_PREHEADER]], !prof [[PROF34]] +; CHECK: for.body4: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY4_PREHEADER_SPLIT]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX8]], align 4 +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP3_LOOPEXIT_SPLIT:%.*]], label [[FOR_BODY4]], !prof [[PROF35]] ; entry: - br label %entry_cold_loop + %cmp19.not = icmp eq i32 %M, 0 + br i1 %cmp19.not, label %for.cond.cleanup, label %for.cond1.preheader.lr.ph, !prof !37 -entry_cold_loop: - br i1 %cold_cond, label %cold_loop_begin, label %cold_loop_exit, !prof !15 +for.cond1.preheader.lr.ph: + %cmp217.not = icmp eq i32 %N, 0 + br label %for.cond1.preheader -cold_loop_begin: - br i1 %cond, label %cold_loop_a, label %cold_loop_b +for.cond1.preheader: + %j.020 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %inc10, %for.cond.cleanup3 ] + br i1 %cmp217.not, label %for.cond.cleanup3, label %for.body4, !prof !38 -cold_loop_a: - %0 = call i32 @a() - br label %cold_loop_latch +for.cond.cleanup: + ret void -cold_loop_b: - %1 = call i32 @b() - br label %cold_loop_latch +for.cond.cleanup3: + %inc10 = add nuw i32 %j.020, 1 + %exitcond22.not = icmp eq i32 %inc10, %M + br i1 %exitcond22.not, label %for.cond.cleanup, label %for.cond1.preheader, !prof !37 -cold_loop_latch: - %v2 = load i1, ptr %ptr - br i1 %v2, label %cold_loop_begin, label %cold_loop_exit +for.body4: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.cond1.preheader ] + %arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx6 = getelementptr inbounds i32, ptr %C, i64 %indvars.iv + %1 = load i32, ptr %arrayidx6, align 4 + %add = add nsw i32 %1, %0 + %arrayidx8 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv + store i32 %add, ptr %arrayidx8, align 4 + br i1 %cond, label %if.then, label %for.inc -cold_loop_exit: - ret void +if.then: + tail call void @_Z12do_somethingv() + br label %for.inc + +for.inc: + %wide.trip.count = zext i32 %N to i64 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond.cleanup3, label %for.body4, !prof !38 } -!llvm.module.flags = !{!0} +declare void @_Z12do_somethingv() + +!llvm.module.flags = !{!6} -!0 = !{i32 1, !"ProfileSummary", !1} -!1 = !{!2, !3, !4, !5, !6, !7, !8, !9} -!2 = !{!"ProfileFormat", !"InstrProf"} -!3 = !{!"TotalCount", i64 10000} -!4 = !{!"MaxCount", i64 10} -!5 = !{!"MaxInternalCount", i64 1} -!6 = !{!"MaxFunctionCount", i64 1000} -!7 = !{!"NumCounts", i64 3} -!8 = !{!"NumFunctions", i64 3} -!9 = !{!"DetailedSummary", !10} -!10 = !{!11, !12, !13} -!11 = !{i32 10000, i64 100, i32 1} -!12 = !{i32 999000, i64 100, i32 1} -!13 = !{i32 999999, i64 1, i32 2} -!14 = !{!"function_entry_count", i64 400} -!15 = !{!"branch_weights", i32 0, i32 100} +!6 = !{i32 1, !"ProfileSummary", !7} +!7 = !{!8, !9, !10, !11, !12, !13, !14, !15, !16, !17} +!8 = !{!"ProfileFormat", !"InstrProf"} +!9 = !{!"TotalCount", i64 1002} +!10 = !{!"MaxCount", i64 1000} +!11 = !{!"MaxInternalCount", i64 1000} +!12 = !{!"MaxFunctionCount", i64 1} +!13 = !{!"NumCounts", i64 6} +!14 = !{!"NumFunctions", i64 3} +!15 = !{!"IsPartialProfile", i64 0} +!16 = !{!"PartialProfileRatio", double 0.000000e+00} +!17 = !{!"DetailedSummary", !18} +!18 = !{!19, !31, !34} +!19 = !{i32 10000, i64 1000, i32 1} +!31 = !{i32 999000, i64 1000, i32 1} +!34 = !{i32 999999, i64 1, i32 3} +!36 = !{!"function_entry_count", i64 1} +!37 = !{!"branch_weights", i32 1, i32 1000} +!38 = !{!"branch_weights", i32 1000, i32 0} diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch3.ll b/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch3.ll new file mode 100644 index 0000000000000..59b8404b3e9ef --- /dev/null +++ b/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch3.ll @@ -0,0 +1,184 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 + +; RUN: opt < %s -passes='require,function(loop-mssa(simple-loop-unswitch))' -S | FileCheck %s + +;; Check that non-trivial loop unswitching is applied to a non-cold loop in a +;; non-cold loop nest. + +;; IR was generated from the following loop nest, profiled when called +;; with M=1000 and N=10. +;; void hotFunction(bool cond, int M, int N, int * A, int *B, int *C) { +;; for (unsigned j = 0; j < M; j++) +;; for (unsigned i=0; i < N; i++) { +;; A[i] = B[i] + C[i]; +;; if (cond) do_something(); +;; } +;; } + +define void @_Z11hotFunctionbiiPiS_S_(i1 %cond, i32 %M, i32 %N, ptr %A, ptr %B, ptr %C) !prof !36 { +; CHECK-LABEL: define void @_Z11hotFunctionbiiPiS_S_ +; CHECK-SAME: (i1 [[COND:%.*]], i32 [[M:%.*]], i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) !prof [[PROF18:![0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP19_NOT:%.*]] = icmp eq i32 [[M]], 0 +; CHECK-NEXT: br i1 [[CMP19_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]], !prof [[PROF19:![0-9]+]] +; CHECK: for.cond1.preheader.lr.ph: +; CHECK-NEXT: [[CMP217_NOT:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP217_NOT]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT_US:%.*]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT:%.*]], !prof [[PROF20:![0-9]+]] +; CHECK: for.cond1.preheader.lr.ph.split.us: +; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] +; CHECK: for.cond1.preheader.us: +; CHECK-NEXT: [[J_020_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_LR_PH_SPLIT_US]] ], [ [[INC10_US:%.*]], [[FOR_COND_CLEANUP3_US:%.*]] ] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP3_US]] +; CHECK: for.cond.cleanup3.us: +; CHECK-NEXT: [[INC10_US]] = add nuw i32 [[J_020_US]], 1 +; CHECK-NEXT: [[EXITCOND22_NOT_US:%.*]] = icmp eq i32 [[INC10_US]], [[M]] +; CHECK-NEXT: br i1 [[EXITCOND22_NOT_US]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT_US:%.*]], label [[FOR_COND1_PREHEADER_US]], !prof [[PROF19]] +; CHECK: for.cond.cleanup.loopexit.split.us: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] +; CHECK: for.cond1.preheader.lr.ph.split: +; CHECK-NEXT: br i1 [[COND]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT_SPLIT_US:%.*]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT_SPLIT:%.*]] +; CHECK: for.cond1.preheader.lr.ph.split.split.us: +; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US1:%.*]] +; CHECK: for.cond1.preheader.us1: +; CHECK-NEXT: [[J_020_US2:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_LR_PH_SPLIT_SPLIT_US]] ], [ [[INC10_US4:%.*]], [[FOR_COND_CLEANUP3_US3:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY4_PREHEADER_US:%.*]] +; CHECK: for.cond.cleanup3.us3: +; CHECK-NEXT: [[INC10_US4]] = add nuw i32 [[J_020_US2]], 1 +; CHECK-NEXT: [[EXITCOND22_NOT_US5:%.*]] = icmp eq i32 [[INC10_US4]], [[M]] +; CHECK-NEXT: br i1 [[EXITCOND22_NOT_US5]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT_SPLIT_US:%.*]], label [[FOR_COND1_PREHEADER_US1]], !prof [[PROF19]] +; CHECK: for.body4.preheader.us: +; CHECK-NEXT: br label [[FOR_BODY4_PREHEADER_SPLIT_US_US:%.*]] +; CHECK: for.cond.cleanup3.loopexit.us: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP3_US3]] +; CHECK: for.body4.preheader.split.us.us: +; CHECK-NEXT: br label [[FOR_BODY4_US_US:%.*]] +; CHECK: for.body4.us.us: +; CHECK-NEXT: [[INDVARS_IV_US_US:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_US_US:%.*]], [[FOR_INC_US_US:%.*]] ], [ 0, [[FOR_BODY4_PREHEADER_SPLIT_US_US]] ] +; CHECK-NEXT: [[ARRAYIDX_US_US:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV_US_US]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX_US_US]], align 4 +; CHECK-NEXT: [[ARRAYIDX6_US_US:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV_US_US]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX6_US_US]], align 4 +; CHECK-NEXT: [[ADD_US_US:%.*]] = add nsw i32 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX8_US_US:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_US_US]] +; CHECK-NEXT: store i32 [[ADD_US_US]], ptr [[ARRAYIDX8_US_US]], align 4 +; CHECK-NEXT: br label [[IF_THEN_US_US:%.*]] +; CHECK: if.then.us.us: +; CHECK-NEXT: tail call void @_Z12do_somethingv() +; CHECK-NEXT: br label [[FOR_INC_US_US]] +; CHECK: for.inc.us.us: +; CHECK-NEXT: [[WIDE_TRIP_COUNT_US_US:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[INDVARS_IV_NEXT_US_US]] = add nuw nsw i64 [[INDVARS_IV_US_US]], 1 +; CHECK-NEXT: [[EXITCOND_NOT_US_US:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_US_US]], [[WIDE_TRIP_COUNT_US_US]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT_US_US]], label [[FOR_COND_CLEANUP3_LOOPEXIT_SPLIT_US_US:%.*]], label [[FOR_BODY4_US_US]], !prof [[PROF20]] +; CHECK: for.cond.cleanup3.loopexit.split.us.us: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP3_LOOPEXIT_US:%.*]] +; CHECK: for.cond.cleanup.loopexit.split.split.us: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT:%.*]] +; CHECK: for.cond1.preheader.lr.ph.split.split: +; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] +; CHECK: for.cond1.preheader: +; CHECK-NEXT: [[J_020:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_LR_PH_SPLIT_SPLIT]] ], [ [[INC10:%.*]], [[FOR_COND_CLEANUP3:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY4_PREHEADER:%.*]] +; CHECK: for.body4.preheader: +; CHECK-NEXT: br label [[FOR_BODY4_PREHEADER_SPLIT:%.*]] +; CHECK: for.body4.preheader.split: +; CHECK-NEXT: br label [[FOR_BODY4:%.*]] +; CHECK: for.cond.cleanup.loopexit.split.split: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT]] +; CHECK: for.cond.cleanup.loopexit.split: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT]] +; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; CHECK: for.cond.cleanup3.loopexit.split: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP3_LOOPEXIT:%.*]] +; CHECK: for.cond.cleanup3.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP3]] +; CHECK: for.cond.cleanup3: +; CHECK-NEXT: [[INC10]] = add nuw i32 [[J_020]], 1 +; CHECK-NEXT: [[EXITCOND22_NOT:%.*]] = icmp eq i32 [[INC10]], [[M]] +; CHECK-NEXT: br i1 [[EXITCOND22_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT_SPLIT:%.*]], label [[FOR_COND1_PREHEADER]], !prof [[PROF19]] +; CHECK: for.body4: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY4_PREHEADER_SPLIT]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX8]], align 4 +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP3_LOOPEXIT_SPLIT:%.*]], label [[FOR_BODY4]], !prof [[PROF20]] +; +entry: + %cmp19.not = icmp eq i32 %M, 0 + br i1 %cmp19.not, label %for.cond.cleanup, label %for.cond1.preheader.lr.ph, !prof !37 + +for.cond1.preheader.lr.ph: + %cmp217.not = icmp eq i32 %N, 0 + br label %for.cond1.preheader + +for.cond1.preheader: + %j.020 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %inc10, %for.cond.cleanup3 ] + br i1 %cmp217.not, label %for.cond.cleanup3, label %for.body4, !prof !38 + +for.cond.cleanup: + ret void + +for.cond.cleanup3: + %inc10 = add nuw i32 %j.020, 1 + %exitcond22.not = icmp eq i32 %inc10, %M + br i1 %exitcond22.not, label %for.cond.cleanup, label %for.cond1.preheader, !prof !37 + +for.body4: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.cond1.preheader ] + %arrayidx = getelementptr inbounds i32, ptr %B, i64 %indvars.iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx6 = getelementptr inbounds i32, ptr %C, i64 %indvars.iv + %1 = load i32, ptr %arrayidx6, align 4 + %add = add nsw i32 %1, %0 + %arrayidx8 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv + store i32 %add, ptr %arrayidx8, align 4 + br i1 %cond, label %if.then, label %for.inc + +if.then: + tail call void @_Z12do_somethingv() + br label %for.inc + +for.inc: + %wide.trip.count = zext i32 %N to i64 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond.cleanup3, label %for.body4, !prof !38 +} + +declare void @_Z12do_somethingv() + +!llvm.module.flags = !{!6} + +!6 = !{i32 1, !"ProfileSummary", !7} +!7 = !{!8, !9, !10, !11, !12, !13, !14, !15, !16, !17} +!8 = !{!"ProfileFormat", !"InstrProf"} +!9 = !{!"TotalCount", i64 1002} +!10 = !{!"MaxCount", i64 1000} +!11 = !{!"MaxInternalCount", i64 1000} +!12 = !{!"MaxFunctionCount", i64 1} +!13 = !{!"NumCounts", i64 6} +!14 = !{!"NumFunctions", i64 3} +!15 = !{!"IsPartialProfile", i64 0} +!16 = !{!"PartialProfileRatio", double 0.000000e+00} +!17 = !{!"DetailedSummary", !18} +!18 = !{!19, !29, !30, !32, !34} +!19 = !{i32 10000, i64 10000, i32 3} +!29 = !{i32 950000, i64 10000, i32 3} +!30 = !{i32 990000, i64 1000, i32 4} +!32 = !{i32 999900, i64 1000, i32 4} +!34 = !{i32 999999, i64 1, i32 6} +!36 = !{!"function_entry_count", i64 1} +!37 = !{!"branch_weights", i32 1, i32 1000} +!38 = !{!"branch_weights", i32 1000, i32 10000} From 4e298c32d558cbe7059bb90e4306c22c6702016e Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Fri, 17 Mar 2023 17:57:15 -0700 Subject: [PATCH 044/691] [libc] Make string tests compatible with the Fuchsia build Some test code was doing loose conversions caught by compiler warnings in the Fuchsia build. This included duplicated code in a few tests that was reconsolidated with the existing header file copy of the same functions. The MemoryMatcher abstraction presumes gtest-style matcher support, which is not available in Fuchsia's zxtest library. It's avoided in favor of simpler memory-comparing assertions. Reviewed By: abrachet Differential Revision: https://reviews.llvm.org/D146343 --- libc/test/UnitTest/MemoryMatcher.h | 32 +++++++++++++++++-- libc/test/src/string/bcopy_test.cpp | 20 +++--------- libc/test/src/string/memmove_test.cpp | 20 +++--------- .../string/memory_utils/memory_check_utils.h | 2 +- libc/test/src/string/strsignal_test.cpp | 5 +-- 5 files changed, 41 insertions(+), 38 deletions(-) diff --git a/libc/test/UnitTest/MemoryMatcher.h b/libc/test/UnitTest/MemoryMatcher.h index e08b452cf9a84..48d630a39e887 100644 --- a/libc/test/UnitTest/MemoryMatcher.h +++ b/libc/test/UnitTest/MemoryMatcher.h @@ -19,6 +19,32 @@ namespace testing { using MemoryView = __llvm_libc::cpp::span; +} // namespace testing +} // namespace memory +} // namespace __llvm_libc + +#ifdef LIBC_COPT_TEST_USE_FUCHSIA + +#define EXPECT_MEM_EQ(expected, actual) \ + do { \ + __llvm_libc::memory::testing::MemoryView e = (expected); \ + __llvm_libc::memory::testing::MemoryView a = (actual); \ + ASSERT_EQ(e.size(), a.size()); \ + EXPECT_BYTES_EQ(e.data(), a.data(), e.size()); \ + } while (0) + +#define ASSERT_MEM_EQ(expected, actual) \ + do { \ + __llvm_libc::memory::testing::MemoryView e = (expected); \ + __llvm_libc::memory::testing::MemoryView a = (actual); \ + ASSERT_EQ(e.size(), a.size()); \ + ASSERT_BYTES_EQ(e.data(), a.data(), e.size()); \ + } while (0) + +#else + +namespace __llvm_libc::memory::testing { + class MemoryMatcher : public __llvm_libc::testing::Matcher { MemoryView expected; MemoryView actual; @@ -33,13 +59,13 @@ class MemoryMatcher : public __llvm_libc::testing::Matcher { void explainError(testutils::StreamWrapper &stream) override; }; -} // namespace testing -} // namespace memory -} // namespace __llvm_libc +} // namespace __llvm_libc::memory::testing #define EXPECT_MEM_EQ(expected, actual) \ EXPECT_THAT(actual, __llvm_libc::memory::testing::MemoryMatcher(expected)) #define ASSERT_MEM_EQ(expected, actual) \ ASSERT_THAT(actual, __llvm_libc::memory::testing::MemoryMatcher(expected)) +#endif + #endif // LLVM_LIBC_UTILS_UNITTEST_MEMORY_MATCHER_H diff --git a/libc/test/src/string/bcopy_test.cpp b/libc/test/src/string/bcopy_test.cpp index c1c0dae4fcd6e..affd23b1bd8b1 100644 --- a/libc/test/src/string/bcopy_test.cpp +++ b/libc/test/src/string/bcopy_test.cpp @@ -6,8 +6,10 @@ // //===----------------------------------------------------------------------===// -#include "src/__support/CPP/span.h" #include "src/string/bcopy.h" + +#include "memory_utils/memory_check_utils.h" +#include "src/__support/CPP/span.h" #include "test/UnitTest/MemoryMatcher.h" #include "test/UnitTest/Test.h" @@ -70,24 +72,10 @@ TEST(LlvmLibcBcopyTest, DstFollowSrc) { static constexpr int kMaxSize = 512; -char GetRandomChar() { - static constexpr const uint64_t A = 1103515245; - static constexpr const uint64_t C = 12345; - static constexpr const uint64_t M = 1ULL << 31; - static uint64_t Seed = 123456789; - Seed = (A * Seed + C) % M; - return Seed; -} - -void Randomize(span Buffer) { - for (auto ¤t : Buffer) - current = GetRandomChar(); -} - TEST(LlvmLibcBcopyTest, SizeSweep) { using LargeBuffer = array; LargeBuffer GroundTruth; - Randomize(GroundTruth); + __llvm_libc::Randomize(GroundTruth); for (int Size = 0; Size < kMaxSize; ++Size) { for (int Offset = -Size; Offset < Size; ++Offset) { LargeBuffer Buffer = GroundTruth; diff --git a/libc/test/src/string/memmove_test.cpp b/libc/test/src/string/memmove_test.cpp index ab5d8ad210727..dad834c091fcc 100644 --- a/libc/test/src/string/memmove_test.cpp +++ b/libc/test/src/string/memmove_test.cpp @@ -6,8 +6,10 @@ // //===----------------------------------------------------------------------===// -#include "src/__support/CPP/span.h" #include "src/string/memmove.h" + +#include "memory_utils/memory_check_utils.h" +#include "src/__support/CPP/span.h" #include "test/UnitTest/MemoryMatcher.h" #include "test/UnitTest/Test.h" @@ -76,24 +78,10 @@ TEST(LlvmLibcMemmoveTest, DstFollowSrc) { static constexpr int kMaxSize = 512; -char GetRandomChar() { - static constexpr const uint64_t A = 1103515245; - static constexpr const uint64_t C = 12345; - static constexpr const uint64_t M = 1ULL << 31; - static uint64_t Seed = 123456789; - Seed = (A * Seed + C) % M; - return Seed; -} - -void Randomize(span Buffer) { - for (auto ¤t : Buffer) - current = GetRandomChar(); -} - TEST(LlvmLibcMemmoveTest, SizeSweep) { using LargeBuffer = array; LargeBuffer GroundTruth; - Randomize(GroundTruth); + __llvm_libc::Randomize(GroundTruth); for (int Size = 0; Size < kMaxSize; ++Size) { for (int Offset = -Size; Offset < Size; ++Offset) { LargeBuffer Buffer = GroundTruth; diff --git a/libc/test/src/string/memory_utils/memory_check_utils.h b/libc/test/src/string/memory_utils/memory_check_utils.h index 325cba30c61bb..930161a95d929 100644 --- a/libc/test/src/string/memory_utils/memory_check_utils.h +++ b/libc/test/src/string/memory_utils/memory_check_utils.h @@ -65,7 +65,7 @@ static inline char GetRandomChar() { static constexpr const uint64_t m = 1ULL << 31; static uint64_t seed = 123456789; seed = (a * seed + c) % m; - return seed; + return static_cast(seed); } // Randomize the content of the buffer. diff --git a/libc/test/src/string/strsignal_test.cpp b/libc/test/src/string/strsignal_test.cpp index f939b2376603f..aa55230d5286b 100644 --- a/libc/test/src/string/strsignal_test.cpp +++ b/libc/test/src/string/strsignal_test.cpp @@ -66,10 +66,11 @@ TEST(LlvmLibcStrSignalTest, KnownSignals) { }; for (size_t i = 0; i < (sizeof(message_array) / sizeof(char *)); ++i) { - EXPECT_STREQ(__llvm_libc::strsignal(i), message_array[i]); + ASSERT_EQ(static_cast(static_cast(i)), i); + EXPECT_STREQ(__llvm_libc::strsignal(static_cast(i)), message_array[i]); } - for (size_t i = 0; i < SIGRTMAX - SIGRTMIN; ++i) { + for (int i = 0; i < SIGRTMAX - SIGRTMIN; ++i) { EXPECT_STREQ(__llvm_libc::strsignal(i + SIGRTMIN), rt_message_array[i]); } } From e4f62da8129d9632d77fd4db5cf8caede0a1e5a6 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Mon, 20 Mar 2023 10:30:42 -0700 Subject: [PATCH 045/691] [lldb] Sidestep -Wformat warning by using LLDB_LOG instead of LLDB_LOGF Fixes warning: format specifies type 'unsigned long' but the argument has type 'DataType' (aka 'unsigned long long') [-Wformat] --- lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp index c52caab3e735e..aa5d7f61dbfbb 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugAranges.cpp @@ -76,8 +76,8 @@ void DWARFDebugAranges::Dump(Log *log) const { for (size_t i = 0; i < num_entries; ++i) { const RangeToDIE::Entry *entry = m_aranges.GetEntryAtIndex(i); if (entry) - LLDB_LOGF(log, "0x%8.8lx: [0x%" PRIx64 " - 0x%" PRIx64 ")", entry->data, - entry->GetRangeBase(), entry->GetRangeEnd()); + LLDB_LOG(log, "{0:x8}: [{1:x16} - {2:x16})", entry->data, + entry->GetRangeBase(), entry->GetRangeEnd()); } } From cd11f55a0c6d8524f9ac494d26514d4d72d83435 Mon Sep 17 00:00:00 2001 From: Dmitri Gribenko Date: Mon, 20 Mar 2023 18:33:40 +0100 Subject: [PATCH 046/691] [clang][dataflow] Fix indentation in a test --- .../Analysis/FlowSensitive/DataflowEnvironmentTest.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp b/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp index fa7322bc586be..770c7a50195e9 100644 --- a/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp @@ -134,9 +134,9 @@ TEST_F(EnvironmentTest, IncludeFieldsFromDefaultInitializers) { std::string Code = R"cc( struct S { - S() {} - int X = 3; - int Y = X; + S() {} + int X = 3; + int Y = X; }; S foo(); )cc"; From 72073fc95cd4793a853925ddc8cc3fb2118808a5 Mon Sep 17 00:00:00 2001 From: John Brawn Date: Tue, 21 Feb 2023 11:09:26 +0000 Subject: [PATCH 047/691] [Serialization] Place command line defines in the correct file Fix several problems related to serialization causing command line defines to be reported as being built-in defines: * When serializing the and files don't convert them into absolute paths. * When deserializing SM_SLOC_BUFFER_ENTRY we need to call setHasLineDirectives in the same way as we do for SM_SLOC_FILE_ENTRY. * When created suggested predefines based on the current command line options we need to add line markers in the same way that InitializePreprocessor does. * Adjust a place in clangd where it was implicitly relying on command line defines being treated as builtin. Differential Revision: https://reviews.llvm.org/D144651 --- .../clangd/index/SymbolCollector.cpp | 4 +++- clang/docs/ReleaseNotes.rst | 4 ++-- clang/lib/Serialization/ASTReader.cpp | 17 +++++++++++++++-- clang/lib/Serialization/ASTWriter.cpp | 5 +++++ clang/test/PCH/macro-cmdline.c | 13 +++++++++++++ clang/test/PCH/ms-pch-macro.c | 2 +- 6 files changed, 39 insertions(+), 6 deletions(-) create mode 100644 clang/test/PCH/macro-cmdline.c diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp index 3179810b1b185..519aceec15a18 100644 --- a/clang-tools-extra/clangd/index/SymbolCollector.cpp +++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp @@ -687,8 +687,10 @@ bool SymbolCollector::handleMacroOccurrence(const IdentifierInfo *Name, const auto &SM = PP->getSourceManager(); auto DefLoc = MI->getDefinitionLoc(); - // Also avoid storing predefined macros like __DBL_MIN__. + // Also avoid storing macros that aren't defined in any file, i.e. predefined + // macros like __DBL_MIN__ and those defined on the command line. if (SM.isWrittenInBuiltinFile(DefLoc) || + SM.isWrittenInCommandLineFile(DefLoc) || Name->getName() == "__GCC_HAVE_DWARF2_CFI_ASM") return true; diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 53001f651ea4b..78c57500568f9 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -172,8 +172,8 @@ Improvements to Clang's diagnostics - Diagnostic notes and fix-its are now generated for ``ifunc``/``alias`` attributes which point to functions whose names are mangled. - Diagnostics relating to macros on the command line of a preprocessed assembly - file are now reported as coming from the file ```` instead of - ````. + file or precompiled header are now reported as coming from the file + ```` instead of ````. - Clang constexpr evaluator now provides a more concise diagnostic when calling function pointer that is known to be null. - Clang now avoids duplicate warnings on unreachable ``[[fallthrough]];`` statements diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 0273fa1b839a5..6654df40010cb 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -654,6 +654,10 @@ static bool checkPreprocessorOptions( SmallVector ExistingMacroNames; collectMacroDefinitions(ExistingPPOpts, ExistingMacros, &ExistingMacroNames); + // Use a line marker to enter the file, as the defines and + // undefines here will have come from the command line. + SuggestedPredefines += "# 1 \"\" 1\n"; + for (unsigned I = 0, N = ExistingMacroNames.size(); I != N; ++I) { // Dig out the macro definition in the existing preprocessor options. StringRef MacroName = ExistingMacroNames[I]; @@ -713,6 +717,10 @@ static bool checkPreprocessorOptions( } return true; } + + // Leave the file and return to . + SuggestedPredefines += "# 1 \"\" 2\n"; + if (Validation == OptionValidateStrictMatches) { // If strict matches are requested, don't tolerate any extra defines in // the AST file that are missing on the command line. @@ -1579,8 +1587,13 @@ bool ASTReader::ReadSLocEntry(int ID) { auto Buffer = ReadBuffer(SLocEntryCursor, Name); if (!Buffer) return true; - SourceMgr.createFileID(std::move(Buffer), FileCharacter, ID, - BaseOffset + Offset, IncludeLoc); + FileID FID = SourceMgr.createFileID(std::move(Buffer), FileCharacter, ID, + BaseOffset + Offset, IncludeLoc); + if (Record[3]) { + auto &FileInfo = + const_cast(SourceMgr.getSLocEntry(FID).getFile()); + FileInfo.setHasLineDirectives(); + } break; } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index f5691e99241c1..e8f390bc5b1dd 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -4444,6 +4444,11 @@ void ASTWriter::AddString(StringRef Str, RecordDataImpl &Record) { bool ASTWriter::PreparePathForOutput(SmallVectorImpl &Path) { assert(Context && "should have context when outputting path"); + // Leave special file names as they are. + StringRef PathStr(Path.data(), Path.size()); + if (PathStr == "" || PathStr == "") + return false; + bool Changed = cleanPathForOutput(Context->getSourceManager().getFileManager(), Path); diff --git a/clang/test/PCH/macro-cmdline.c b/clang/test/PCH/macro-cmdline.c new file mode 100644 index 0000000000000..c4647f5d4c92e --- /dev/null +++ b/clang/test/PCH/macro-cmdline.c @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 %s -emit-pch -o %t1.pch -DMACRO1=1 +// RUN: %clang_cc1 -fsyntax-only %s -include-pch %t1.pch -DMACRO2=1 2>&1 | FileCheck %s + +#ifndef HEADER +#define HEADER +#else +#define MACRO1 2 +// CHECK: macro-cmdline.c{{.*}}'MACRO1' macro redefined +// CHECK: {{.*}}previous definition is here +#define MACRO2 2 +// CHECK: macro-cmdline.c{{.*}}'MACRO2' macro redefined +// CHECK: {{.*}}previous definition is here +#endif diff --git a/clang/test/PCH/ms-pch-macro.c b/clang/test/PCH/ms-pch-macro.c index 3a8052eb322e9..a512e66e24866 100644 --- a/clang/test/PCH/ms-pch-macro.c +++ b/clang/test/PCH/ms-pch-macro.c @@ -36,4 +36,4 @@ BAR bar = 17; // CHECK-FOO: definition of macro 'FOO' differs between the precompiled header ('1') and the command line ('blah') // CHECK-NOFOO: macro 'FOO' was defined in the precompiled header but undef'd on the command line -// expected-warning@1 {{definition of macro 'BAR' does not match definition in precompiled header}} +// expected-warning@2 {{definition of macro 'BAR' does not match definition in precompiled header}} From b9521484ec72142bce8124a171206fc577abfd3f Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 20 Mar 2023 09:54:56 -0700 Subject: [PATCH 048/691] [LSR] Rewrite IV match for term-fold using existing utilities Main benefit here is making the logic easier to follow, slightly more efficient, and more in line with LFTR. This is not NFC. There are three semantic changes here. First, we drop handling for constants on the LHS of the comparison. These are non-canonical, and we're very late in the optimization pipeline here, so there's no point in supporting this. I removed a test which covered this case. Second, we don't need the almost dead IV to be an addrec. We just need SCEV to be able to compute a trip count for it. Third, we require a simple IV for the almost dead IV. In theory, this removes cases we could have previously handled, but given a) zero testing and b) multiple known correctness issues, I'm adopting an attidute of narrowing this down to something which works correctly, and *then* expanding. --- .../Transforms/Scalar/LoopStrengthReduce.cpp | 66 ++++++------------- .../lsr-term-fold-negative-testcase.ll | 7 +- .../LoopStrengthReduce/lsr-term-fold.ll | 33 ---------- 3 files changed, 23 insertions(+), 83 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 5ff7c1027108b..5d8e822eaddff 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -6719,49 +6719,23 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, return std::nullopt; } - // For `IsToFold`, a primary IV can be replaced by other affine AddRec when it - // is only used by the terminating condition. To check for this, we may need - // to traverse through a chain of use-def until we can examine the final - // usage. - // *----------------------* - // *---->| LoopHeader: | - // | | PrimaryIV = phi ... | - // | *----------------------* - // | | - // | | - // | chain of - // | single use - // used by | - // phi | - // | Value - // | / \ - // | chain of chain of - // | single use single use - // | / \ - // | / \ - // *- Value Value --> used by terminating condition - auto IsToFold = [&](PHINode &PN) -> bool { - Value *V = &PN; - - while (V->getNumUses() == 1) - V = *V->user_begin(); - - if (V->getNumUses() != 2) - return false; + BinaryOperator *LHS = dyn_cast(TermCond->getOperand(0)); + Value *RHS = TermCond->getOperand(1); + if (!LHS || !L->isLoopInvariant(RHS)) + // We could pattern match the inverse form of the icmp, but that is + // non-canonical, and this pass is running *very* late in the pipeline. + return std::nullopt; - Value *VToPN = nullptr; - Value *VToTermCond = nullptr; - for (User *U : V->users()) { - while (U->getNumUses() == 1) { - if (isa(U)) - VToPN = U; - if (U == TermCond) - VToTermCond = U; - U = *U->user_begin(); - } - } - return VToPN && VToTermCond; - }; + // Find the IV used by the current exit condition. + PHINode *ToFold; + Value *ToFoldStart, *ToFoldStep; + if (!matchSimpleRecurrence(LHS, ToFold, ToFoldStart, ToFoldStep)) + return std::nullopt; + + // If that IV isn't dead after we rewrite the exit condition in terms of + // another IV, there's no point in doing the transform. + if (!isAlmostDeadIV(ToFold, LoopLatch, TermCond)) + return std::nullopt; // If this is an IV which we could replace the terminating condition, return // the final value of the alternative IV on the last iteration. @@ -6789,11 +6763,13 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, return TermValueS; }; - PHINode *ToFold = nullptr; PHINode *ToHelpFold = nullptr; const SCEV *TermValueS = nullptr; for (PHINode &PN : L->getHeader()->phis()) { + if (ToFold == &PN) + continue; + if (!SE.isSCEVable(PN.getType())) { LLVM_DEBUG(dbgs() << "IV of phi '" << PN << "' is not SCEV-able, not qualified for the " @@ -6809,9 +6785,7 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, continue; } - if (IsToFold(PN)) - ToFold = &PN; - else if (auto P = getAlternateIVEnd(PN)) { + if (auto P = getAlternateIVEnd(PN)) { ToHelpFold = &PN; TermValueS = P; } diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll index 8682351a4e30c..1b9b58f79b480 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll @@ -106,15 +106,14 @@ define void @NonAddRecIV(ptr %a) { ; CHECK-SAME: (ptr [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A]], i32 84 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 148 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ 1, [[ENTRY]] ] ; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 -; CHECK-NEXT: [[LSR_IV_NEXT]] = mul nsw i32 [[LSR_IV]], 2 ; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[LSR_IV_NEXT]], 65536 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]] +; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll index 7da1a73a21d1e..a72e859791574 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll @@ -297,39 +297,6 @@ define void @IcmpSgt(ptr %a) { ; CHECK-LABEL: @IcmpSgt( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 84 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 88 -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] -; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 -; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 4 -; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]] -; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] -; CHECK: for.end: -; CHECK-NEXT: ret void -; -entry: - %uglygep = getelementptr i8, ptr %a, i32 84 - br label %for.body - -for.body: ; preds = %for.body, %entry - %lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %uglygep, %entry ] - %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ 379, %entry ] - store i32 1, ptr %lsr.iv1, align 4 - %lsr.iv.next = add nsw i32 %lsr.iv, -1 - %uglygep2 = getelementptr i8, ptr %lsr.iv1, i32 4 - %exitcond.not = icmp sgt i32 0, %lsr.iv.next - br i1 %exitcond.not, label %for.body, label %for.end - -for.end: ; preds = %for.body - ret void -} - -; Invert predicate and branches -define void @IcmpSgt2(ptr %a) { -; CHECK-LABEL: @IcmpSgt2( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 84 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 1600 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: From c47da7f109468efbe77c27d436fd69fa2b3ad991 Mon Sep 17 00:00:00 2001 From: Alex Langford Date: Fri, 17 Mar 2023 15:14:21 -0700 Subject: [PATCH 049/691] [lldb] Introduce CMake variable LLDB_ENFORCE_STRICT_TEST_REQUIREMENTS The goal of this patch is to add the ability for the CMake configure to fail when some optional test dependencies are not met. LLDB tries to be flexible when test dependencies are not present but there are cases where it would be useful to know that these dependencies are missing before we run the test suite. The intent here is to apply this setting on CI machines and make sure that they have useful optional dependencies installed. We recently hit a case where some CI machines were timing out while running the test suite because a few tests were hanging. With this option, we'll be able to know if the machine does not have psutil installed so we can install it and avoid the timeout scenario altogether. rdar://103194447 Differential Revision: https://reviews.llvm.org/D146335 --- lldb/cmake/modules/AddLLDB.cmake | 19 +++++++++++++++++++ lldb/cmake/modules/LLDBConfig.cmake | 2 ++ lldb/test/CMakeLists.txt | 25 +++++++++++++++++++++++++ 3 files changed, 46 insertions(+) diff --git a/lldb/cmake/modules/AddLLDB.cmake b/lldb/cmake/modules/AddLLDB.cmake index 251c2957e2d19..374946fe49083 100644 --- a/lldb/cmake/modules/AddLLDB.cmake +++ b/lldb/cmake/modules/AddLLDB.cmake @@ -349,6 +349,25 @@ function(lldb_find_system_debugserver path) endif() endfunction() +function(lldb_find_python_module module) + set(MODULE_FOUND PY_${module}_FOUND) + if (DEFINED ${MODULE_FOUND}) + return() + endif() + + execute_process(COMMAND "${Python3_EXECUTABLE}" "-c" "import ${module}" + RESULT_VARIABLE status + ERROR_QUIET) + + if (status) + set(${MODULE_FOUND} OFF CACHE BOOL "Failed to find python module '${module}'") + message(STATUS "Could NOT find Python module '${module}'") + else() + set(${MODULE_FOUND} ON CACHE BOOL "Found python module '${module}'") + message(STATUS "Found Python module '${module}'") + endif() +endfunction() + # Removes all module flags from the current CMAKE_CXX_FLAGS. Used for # the Objective-C++ code in lldb which we don't want to build with modules. # Reasons for this are that modules with Objective-C++ would require that diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake index ec06ba285f270..1393342dd5cb6 100644 --- a/lldb/cmake/modules/LLDBConfig.cmake +++ b/lldb/cmake/modules/LLDBConfig.cmake @@ -71,6 +71,8 @@ option(LLDB_NO_INSTALL_DEFAULT_RPATH "Disable default RPATH settings in binaries option(LLDB_USE_SYSTEM_DEBUGSERVER "Use the system's debugserver for testing (Darwin only)." OFF) option(LLDB_SKIP_STRIP "Whether to skip stripping of binaries when installing lldb." OFF) option(LLDB_SKIP_DSYM "Whether to skip generating a dSYM when installing lldb." OFF) +option(LLDB_ENFORCE_STRICT_TEST_REQUIREMENTS + "Fail to configure if certain requirements are not met for testing." OFF) set(LLDB_GLOBAL_INIT_DIRECTORY "" CACHE STRING "Path to the global lldbinit directory. Relative paths are resolved relative to the diff --git a/lldb/test/CMakeLists.txt b/lldb/test/CMakeLists.txt index 0b0a9e9065257..ca15b96586cad 100644 --- a/lldb/test/CMakeLists.txt +++ b/lldb/test/CMakeLists.txt @@ -1,5 +1,30 @@ # Test runner infrastructure for LLDB. This configures the LLDB test trees # for use by Lit, and delegates to LLVM's lit test handlers. +# Lit requires a Python3 interpreter, let's be careful and fail early if it's +# not present. +if (NOT DEFINED Python3_EXECUTABLE) + message(FATAL_ERROR + "LLDB test suite requires a Python3 interpreter but none " + "was found. Please install Python3 or disable tests with " + "`LLDB_INCLUDE_TESTS=OFF`.") +endif() + +if(LLDB_ENFORCE_STRICT_TEST_REQUIREMENTS) + message(STATUS "Enforcing strict test requirements for LLDB") + set(useful_python_modules + psutil # Lit uses psutil to do per-test timeouts. + ) + foreach(module ${useful_python_modules}) + lldb_find_python_module(${module}) + if (NOT PY_${module}_FOUND) + message(FATAL_ERROR + "Python module '${module}' not found. Please install it via pip or via " + "your operating system's package manager. Alternatively, disable " + "strict testing requirements with " + "`LLDB_ENFORCE_STRICT_TEST_REQUIREMENTS=OFF`") + endif() + endforeach() +endif() if(LLDB_BUILT_STANDALONE) # In order to run check-lldb-* we need the correct map_config directives in From 60b117aa81493e4f91df11ec2322957ec7800f55 Mon Sep 17 00:00:00 2001 From: Maryam Moghadas Date: Wed, 22 Feb 2023 19:33:04 -0600 Subject: [PATCH 050/691] [PowerPC] Adding test coverage for vector compatibility warning This is to test D143210 patch to have the same vector compatibility logic for error and warning diagnostics. Reviewed By: lei Differential Revision: https://reviews.llvm.org/D144611 --- clang/test/Parser/lax-conv.cpp | 45 ++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/clang/test/Parser/lax-conv.cpp b/clang/test/Parser/lax-conv.cpp index b3b6112649288..f784e3fa74e7e 100644 --- a/clang/test/Parser/lax-conv.cpp +++ b/clang/test/Parser/lax-conv.cpp @@ -2,6 +2,20 @@ // RUN: %clang_cc1 -triple=powerpc64le-unknown-linux-gnu -target-feature +altivec -target-feature +vsx -target-cpu pwr8 -fsyntax-only -verify=expected,novsx %s // RUN: %clang_cc1 -triple=powerpc64-ibm-aix -target-feature +altivec -target-feature +vsx -target-cpu pwr8 -fsyntax-only -verify=expected,aix %s +vector bool short vbs; +vector signed short vss; +vector unsigned short vus; +vector bool int vbi; +vector signed int vsi; +vector unsigned int vui; +vector bool long long vbl; +vector signed long long vsl; +vector unsigned long long vul; +vector bool char vbc; +vector signed char vsc; +vector unsigned char vuc; +vector pixel vp; + void dummy(vector unsigned int a); template VEC __attribute__((noinline)) test(vector unsigned char a, vector unsigned char b) { return (VEC)(a * b); @@ -65,3 +79,34 @@ void test7a(vector unsigned char ArgExplicitConvAddSame1Full, return dummy((vector unsigned int)(ArgExplicitConvAddSame1Full + ArgExplicitConvAddSame2Full)); } +void test_bool_compat(void) { + vbs = vss; // expected-warning {{Implicit conversion between vector types (''__vector short' (vector of 8 'short' values)' and ''__vector __bool unsigned short' (vector of 8 'unsigned short' values)') is deprecated. In the future, the behavior implied by '-fno-lax-vector-conversions' will be the default.}} + vbs = vus; // expected-warning {{Implicit conversion between vector types (''__vector unsigned short' (vector of 8 'unsigned short' values)' and ''__vector __bool unsigned short' (vector of 8 'unsigned short' values)') is deprecated. In the future, the behavior implied by '-fno-lax-vector-conversions' will be the default.}} + + vbi = vsi; // expected-warning {{Implicit conversion between vector types (''__vector int' (vector of 4 'int' values)' and ''__vector __bool unsigned int' (vector of 4 'unsigned int' values)') is deprecated. In the future, the behavior implied by '-fno-lax-vector-conversions' will be the default.}} + vbi = vui; // expected-warning {{Implicit conversion between vector types (''__vector unsigned int' (vector of 4 'unsigned int' values)' and ''__vector __bool unsigned int' (vector of 4 'unsigned int' values)') is deprecated. In the future, the behavior implied by '-fno-lax-vector-conversions' will be the default.}} + + vbl = vsl; // expected-warning {{Implicit conversion between vector types (''__vector long long' (vector of 2 'long long' values)' and ''__vector __bool unsigned long long' (vector of 2 'unsigned long long' values)') is deprecated. In the future, the behavior implied by '-fno-lax-vector-conversions' will be the default.}} + vbl = vul; // expected-warning {{Implicit conversion between vector types (''__vector unsigned long long' (vector of 2 'unsigned long long' values)' and ''__vector __bool unsigned long long' (vector of 2 'unsigned long long' values)') is deprecated. In the future, the behavior implied by '-fno-lax-vector-conversions' will be the default.}} + + vbc = vsc; // expected-warning {{Implicit conversion between vector types (''__vector signed char' (vector of 16 'signed char' values)' and ''__vector __bool unsigned char' (vector of 16 'unsigned char' values)') is deprecated. In the future, the behavior implied by '-fno-lax-vector-conversions' will be the default.}} + vbc = vuc; // expected-warning {{Implicit conversion between vector types (''__vector unsigned char' (vector of 16 'unsigned char' values)' and ''__vector __bool unsigned char' (vector of 16 'unsigned char' values)') is deprecated. In the future, the behavior implied by '-fno-lax-vector-conversions' will be the default.}} +} + +void test_pixel_compat(void) { + vp = vbs; // expected-warning {{Implicit conversion between vector types (''__vector __bool unsigned short' (vector of 8 'unsigned short' values)' and ''__vector __pixel ' (vector of 8 'unsigned short' values)') is deprecated. In the future, the behavior implied by '-fno-lax-vector-conversions' will be the default.}} + vp = vss; // expected-warning {{Implicit conversion between vector types (''__vector short' (vector of 8 'short' values)' and ''__vector __pixel ' (vector of 8 'unsigned short' values)') is deprecated. In the future, the behavior implied by '-fno-lax-vector-conversions' will be the default.}} + vp = vus; // expected-warning {{Implicit conversion between vector types (''__vector unsigned short' (vector of 8 'unsigned short' values)' and ''__vector __pixel ' (vector of 8 'unsigned short' values)') is deprecated. In the future, the behavior implied by '-fno-lax-vector-conversions' will be the default.}} + + vp = vbi; // expected-warning {{Implicit conversion between vector types (''__vector __bool unsigned int' (vector of 4 'unsigned int' values)' and ''__vector __pixel ' (vector of 8 'unsigned short' values)') is deprecated. In the future, the behavior implied by '-fno-lax-vector-conversions' will be the default.}} + vp = vsi; // expected-warning {{Implicit conversion between vector types (''__vector int' (vector of 4 'int' values)' and ''__vector __pixel ' (vector of 8 'unsigned short' values)') is deprecated. In the future, the behavior implied by '-fno-lax-vector-conversions' will be the default.}} + vp = vui; // expected-warning {{Implicit conversion between vector types (''__vector unsigned int' (vector of 4 'unsigned int' values)' and ''__vector __pixel ' (vector of 8 'unsigned short' values)') is deprecated. In the future, the behavior implied by '-fno-lax-vector-conversions' will be the default.}} + + vp = vbl; // expected-warning {{Implicit conversion between vector types (''__vector __bool unsigned long long' (vector of 2 'unsigned long long' values)' and ''__vector __pixel ' (vector of 8 'unsigned short' values)') is deprecated. In the future, the behavior implied by '-fno-lax-vector-conversions' will be the default.}} + vp = vsl; // expected-warning {{Implicit conversion between vector types (''__vector long long' (vector of 2 'long long' values)' and ''__vector __pixel ' (vector of 8 'unsigned short' values)') is deprecated. In the future, the behavior implied by '-fno-lax-vector-conversions' will be the default.}} + vp = vul; // expected-warning {{Implicit conversion between vector types (''__vector unsigned long long' (vector of 2 'unsigned long long' values)' and ''__vector __pixel ' (vector of 8 'unsigned short' values)') is deprecated. In the future, the behavior implied by '-fno-lax-vector-conversions' will be the default.}} + + vp = vbc; // expected-warning {{Implicit conversion between vector types (''__vector __bool unsigned char' (vector of 16 'unsigned char' values)' and ''__vector __pixel ' (vector of 8 'unsigned short' values)') is deprecated. In the future, the behavior implied by '-fno-lax-vector-conversions' will be the default.}} + vp = vsc; // expected-warning {{Implicit conversion between vector types (''__vector signed char' (vector of 16 'signed char' values)' and ''__vector __pixel ' (vector of 8 'unsigned short' values)') is deprecated. In the future, the behavior implied by '-fno-lax-vector-conversions' will be the default.}} + vp = vuc; // expected-warning {{Implicit conversion between vector types (''__vector unsigned char' (vector of 16 'unsigned char' values)' and ''__vector __pixel ' (vector of 8 'unsigned short' values)') is deprecated. In the future, the behavior implied by '-fno-lax-vector-conversions' will be the default.}} +} From 6ac632ad83fb0f1f91cfca2d05e074712aebce79 Mon Sep 17 00:00:00 2001 From: Amy Wang Date: Mon, 20 Mar 2023 13:19:13 -0400 Subject: [PATCH 051/691] [MLIR][Linalg] Generate unique LibraryCallName for LinalgOps. When lowering LinalgToStandard for named UnaryFn/BinaryFn ops, ensure the fun name appears in the generated library name. Further, for linalg.copy to/from different address spaces, ensure the to/from address spaces are appended onto the library name for uniqueness. This fixes the lowering error with the linalg.copy testcase shown in this patch. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D145467 --- mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 16 ++++- mlir/test/Dialect/Linalg/library-calls.mlir | 79 ++++++++++++++++++++- 2 files changed, 93 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index f766ab6a27fe3..ee4d064d55d36 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -1802,6 +1802,12 @@ static LogicalResult appendMangledType(llvm::raw_string_ostream &ss, Type t) { ss << size << "x"; if (failed(appendMangledType(ss, memref.getElementType()))) return failure(); + if (auto as = memref.getMemorySpace()) { + if (auto attr = as.dyn_cast()) + ss << "as" << attr.getInt(); + else + return failure(); + } return success(); } if (auto vec = t.dyn_cast()) { @@ -1821,10 +1827,18 @@ static LogicalResult appendMangledType(llvm::raw_string_ostream &ss, Type t) { std::string mlir::linalg::generateLibraryCallName(Operation *op) { assert(isa(op)); std::string name(op->getName().getStringRef().str()); + std::string fun = ""; + for (NamedAttribute kv : op->getAttrs()) { + if (UnaryFnAttr ufa = kv.getValue().dyn_cast()) { + fun = stringifyEnum(ufa.getValue()).str() + "_"; + } else if (BinaryFnAttr bfa = kv.getValue().dyn_cast()) { + fun = stringifyEnum(bfa.getValue()).str() + "_"; + } + } name.reserve(128); std::replace(name.begin(), name.end(), '.', '_'); llvm::raw_string_ostream ss(name); - ss << "_"; + ss << "_" << fun; for (Type t : op->getOperandTypes()) { if (failed(appendMangledType(ss, t))) return std::string(); diff --git a/mlir/test/Dialect/Linalg/library-calls.mlir b/mlir/test/Dialect/Linalg/library-calls.mlir index cbf1c5ed8b878..1fa675d8b4b68 100644 --- a/mlir/test/Dialect/Linalg/library-calls.mlir +++ b/mlir/test/Dialect/Linalg/library-calls.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -convert-linalg-to-std | FileCheck %s +// RUN: mlir-opt %s -convert-linalg-to-std -split-input-file | FileCheck %s func.func private @printMemrefF32(memref<*xf32>) @@ -22,3 +22,80 @@ func.func @matmul(%A: memref, %B: memref) -> (memref) return %C : memref } +// ----- + +#accesses = [ + affine_map<(d0) -> (d0)>, + affine_map<(d0) -> (d0)>, + affine_map<(d0) -> (d0)> + ] +#trait = { + doc = "...", + indexing_maps = #accesses, + library_call = "test", + iterator_types = ["parallel"] +} + +// CHECK: func.func private @linalg_copy_view32xf16as1_view32xf16as6(memref<32xf16, strided<[?], offset: ?>, 1>, memref<32xf16, strided<[?], offset: ?>, 6>) attributes {llvm.emit_c_interface} +// CHECK: func.func private @linalg_copy_view32xf16as6_view32xf16as1(memref<32xf16, strided<[?], offset: ?>, 6>, memref<32xf16, strided<[?], offset: ?>, 1>) attributes {llvm.emit_c_interface} + +module { + func.func @helper(%arg7: memref<32xf16, 1>, %arg8: memref<32xf16, 1>, %arg9: memref<32xf16, 1>) { + %localA = memref.alloca() : memref<32xf16, 6> + %localB = memref.alloca() : memref<32xf16, 6> + %localOut = memref.alloca() : memref<32xf16, 6> + linalg.copy ins(%arg8 : memref<32xf16, 1>) outs(%localA : memref<32xf16, 6>) + linalg.copy ins(%arg9 : memref<32xf16, 1>) outs(%localB : memref<32xf16, 6>) + + linalg.generic #trait + ins(%localA, %localB : memref<32xf16, 6>, memref<32xf16, 6>) + outs(%localOut : memref<32xf16, 6>) { + ^bb0(%0: f16, %1: f16, %2: f16) : + %e = arith.addf %1, %0: f16 + linalg.yield %e : f16 + } + + linalg.copy ins(%localOut : memref<32xf16, 6>) outs(%arg7 : memref<32xf16, 1>) + return + } +} + + +// ----- + +// CHECK: func.func private @linalg_elemwise_unary_negf_view16x8xf32_view16x8xf32(memref<16x8xf32, strided<[?, ?], offset: ?>>, memref<16x8xf32, strided<[?, ?], offset: ?>>) attributes {llvm.emit_c_interface} +// CHECK: func.func private @linalg_elemwise_unary_negf_view16xf32_view16xf32(memref<16xf32, strided<[?], offset: ?>>, memref<16xf32, strided<[?], offset: ?>>) attributes {llvm.emit_c_interface} + +func.func @test_neg(%A : memref<16x8xf32>, %B: memref<16x8xf32>, %C: memref<16xf32>, %D: memref<16xf32>) { + linalg.elemwise_unary {fun = #linalg.unary_fn} + ins(%A: memref<16x8xf32>) outs(%B: memref<16x8xf32>) + linalg.elemwise_unary {fun = #linalg.unary_fn} + ins(%C: memref<16xf32>) outs(%D: memref<16xf32>) + return +} + +// ----- + +// CHECK: func.func private @linalg_elemwise_unary_exp_view16x8xf32_view16x8xf32(memref<16x8xf32, strided<[?, ?], offset: ?>>, memref<16x8xf32, strided<[?, ?], offset: ?>>) attributes {llvm.emit_c_interface} +// CHECK: func.func private @linalg_elemwise_unary_exp_view16xf32_view16xf32(memref<16xf32, strided<[?], offset: ?>>, memref<16xf32, strided<[?], offset: ?>>) attributes {llvm.emit_c_interface} + +func.func @test_exp(%A : memref<16x8xf32>, %B: memref<16x8xf32>, %C: memref<16xf32>, %D: memref<16xf32>) { + linalg.elemwise_unary {fun = #linalg.unary_fn} + ins(%A: memref<16x8xf32>) outs(%B: memref<16x8xf32>) + linalg.elemwise_unary {fun = #linalg.unary_fn} + ins(%C: memref<16xf32>) outs(%D: memref<16xf32>) + return +} + +// ----- + +// CHECK: func.func private @linalg_elemwise_binary_add_view16x8xf32_view16x8xf32_view16x8xf32(memref<16x8xf32, strided<[?, ?], offset: ?>>, memref<16x8xf32, strided<[?, ?], offset: ?>>, memref<16x8xf32, strided<[?, ?], offset: ?>>) attributes {llvm.emit_c_interface} +// CHECK: func.func private @linalg_elemwise_binary_add_view16xf32_view16xf32_view16xf32(memref<16xf32, strided<[?], offset: ?>>, memref<16xf32, strided<[?], offset: ?>>, memref<16xf32, strided<[?], offset: ?>>) attributes {llvm.emit_c_interface} + +func.func @test_add(%A : memref<16x8xf32>, %B: memref<16x8xf32>, %C: memref<16x8xf32>, %D: memref<16xf32>, %E: memref<16xf32>, %F: memref<16xf32>) { + linalg.elemwise_binary {fun = #linalg.binary_fn} + ins(%A, %B: memref<16x8xf32>, memref<16x8xf32>) outs(%C: memref<16x8xf32>) + linalg.elemwise_binary {fun = #linalg.binary_fn} + ins(%D, %E: memref<16xf32>, memref<16xf32>) outs(%F: memref<16xf32>) + return +} From 5259da75b9352592cd12fc2c6b6b875567989867 Mon Sep 17 00:00:00 2001 From: Michael Francis Date: Mon, 13 Mar 2023 06:53:54 +0000 Subject: [PATCH 052/691] [AIX][Clang] Respect -r when invoking the linker On AIX, libraries are still being linked when `-r` is passed to the driver. This patch corrects this error. Differential Revision: https://reviews.llvm.org/D145899 --- clang/lib/Driver/ToolChains/AIX.cpp | 76 +++++++++++++++-------------- clang/test/Driver/aix-ld.c | 19 ++++++++ 2 files changed, 58 insertions(+), 37 deletions(-) diff --git a/clang/lib/Driver/ToolChains/AIX.cpp b/clang/lib/Driver/ToolChains/AIX.cpp index f41fb768fdc06..d4d13ce36e76f 100644 --- a/clang/lib/Driver/ToolChains/AIX.cpp +++ b/clang/lib/Driver/ToolChains/AIX.cpp @@ -175,7 +175,7 @@ void aix::Linker::ConstructJob(Compilation &C, const JobAction &JA, }; if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles, - options::OPT_shared)) { + options::OPT_shared, options::OPT_r)) { CmdArgs.push_back( Args.MakeArgString(ToolChain.GetFilePath(getCrt0Basename()))); @@ -235,47 +235,49 @@ void aix::Linker::ConstructJob(Compilation &C, const JobAction &JA, // Add directory to library search path. Args.AddAllArgs(CmdArgs, options::OPT_L); ToolChain.AddFilePathLibArgs(Args, CmdArgs); - ToolChain.addProfileRTLibs(Args, CmdArgs); - - if (getToolChain().ShouldLinkCXXStdlib(Args)) - getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs); - - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { - AddRunTimeLibs(ToolChain, D, CmdArgs, Args); - - // Add OpenMP runtime if -fopenmp is specified. - if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, - options::OPT_fno_openmp, false)) { - switch (ToolChain.getDriver().getOpenMPRuntime(Args)) { - case Driver::OMPRT_OMP: - CmdArgs.push_back("-lomp"); - break; - case Driver::OMPRT_IOMP5: - CmdArgs.push_back("-liomp5"); - break; - case Driver::OMPRT_GOMP: - CmdArgs.push_back("-lgomp"); - break; - case Driver::OMPRT_Unknown: - // Already diagnosed. - break; + if (!Args.hasArg(options::OPT_r)) { + ToolChain.addProfileRTLibs(Args, CmdArgs); + + if (getToolChain().ShouldLinkCXXStdlib(Args)) + getToolChain().AddCXXStdlibLibArgs(Args, CmdArgs); + + if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { + AddRunTimeLibs(ToolChain, D, CmdArgs, Args); + + // Add OpenMP runtime if -fopenmp is specified. + if (Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, + options::OPT_fno_openmp, false)) { + switch (ToolChain.getDriver().getOpenMPRuntime(Args)) { + case Driver::OMPRT_OMP: + CmdArgs.push_back("-lomp"); + break; + case Driver::OMPRT_IOMP5: + CmdArgs.push_back("-liomp5"); + break; + case Driver::OMPRT_GOMP: + CmdArgs.push_back("-lgomp"); + break; + case Driver::OMPRT_Unknown: + // Already diagnosed. + break; + } } - } - // Support POSIX threads if "-pthreads" or "-pthread" is present. - if (Args.hasArg(options::OPT_pthreads, options::OPT_pthread)) - CmdArgs.push_back("-lpthreads"); + // Support POSIX threads if "-pthreads" or "-pthread" is present. + if (Args.hasArg(options::OPT_pthreads, options::OPT_pthread)) + CmdArgs.push_back("-lpthreads"); - if (D.CCCIsCXX()) - CmdArgs.push_back("-lm"); + if (D.CCCIsCXX()) + CmdArgs.push_back("-lm"); - CmdArgs.push_back("-lc"); + CmdArgs.push_back("-lc"); - if (Args.hasArgNoClaim(options::OPT_p, options::OPT_pg)) { - CmdArgs.push_back(Args.MakeArgString((llvm::Twine("-L") + D.SysRoot) + - "/lib/profiled")); - CmdArgs.push_back(Args.MakeArgString((llvm::Twine("-L") + D.SysRoot) + - "/usr/lib/profiled")); + if (Args.hasArgNoClaim(options::OPT_p, options::OPT_pg)) { + CmdArgs.push_back(Args.MakeArgString((llvm::Twine("-L") + D.SysRoot) + + "/lib/profiled")); + CmdArgs.push_back(Args.MakeArgString((llvm::Twine("-L") + D.SysRoot) + + "/usr/lib/profiled")); + } } } diff --git a/clang/test/Driver/aix-ld.c b/clang/test/Driver/aix-ld.c index 38ac440aabdc6..eb2910db239ff 100644 --- a/clang/test/Driver/aix-ld.c +++ b/clang/test/Driver/aix-ld.c @@ -1077,3 +1077,22 @@ // RUN: -fopenmp=libfoo \ // RUN: | FileCheck --check-prefixes=CHECK-FOPENMP-FOO %s // CHECK-FOPENMP-FOO: error: unsupported argument 'libfoo' to option '-fopenmp=' + +// Check powerpc-ibm-aix7.1.0.0. -r does not link object files or libraries +// RUN: %clang %s 2>&1 -### \ +// RUN: --target=powerpc-ibm-aix7.1.0.0 \ +// RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: --unwindlib=libunwind \ +// RUN: -L/foo/bar \ +// RUN: -r \ +// RUN: | FileCheck --check-prefixes=CHECK-RELOCATABLE %s + +// CHECK-RELOCATABLE: "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" +// CHECK-RELOCATABLE: "-isysroot" "[[SYSROOT:[^"]+]]" +// CHECK-RELOCATABLE: "{{.*}}ld{{(.exe)?}}" +// CHECK-RELOCATABLE: "-r" +// CHECK-RELOCATABLE: "-L/foo/bar" +// CHECK-RELOCATABLE-NOT: "[[SYSROOT]]/usr/lib{{/|\\\\}}crt0.o" +// CHECK-RELOCATABLE-NOT: "[[SYSROOT]]/usr/lib{{/|\\\\}}crti.o" +// CHECK-RELOCATABLE-NOT: "-l{{.*}}" +// CHECK-RELOCATABLE-NOT: "-L{{.*}}" From fb3f6a95393f33bc8d8550a5ac62c18e488a9b6f Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Mon, 20 Mar 2023 18:57:40 +0100 Subject: [PATCH 053/691] Revert "[clangd] Fix AddUsing in the face of typo-correction" This reverts commit 6f23fee4ef98a695062aa128a177478ba7d742d4. Breaks windows buildbots --- .../clangd/refactor/tweaks/AddUsing.cpp | 87 ++++++++----------- .../clangd/unittests/tweaks/AddUsingTests.cpp | 40 ++------- 2 files changed, 42 insertions(+), 85 deletions(-) diff --git a/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp b/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp index 1e51d8fb9a518..103e13f44d060 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp @@ -8,25 +8,10 @@ #include "AST.h" #include "Config.h" -#include "SourceCode.h" #include "refactor/Tweak.h" #include "support/Logger.h" #include "clang/AST/Decl.h" -#include "clang/AST/Expr.h" -#include "clang/AST/NestedNameSpecifier.h" #include "clang/AST/RecursiveASTVisitor.h" -#include "clang/AST/Type.h" -#include "clang/AST/TypeLoc.h" -#include "clang/Basic/LLVM.h" -#include "clang/Basic/SourceLocation.h" -#include "clang/Tooling/Core/Replacement.h" -#include "clang/Tooling/Syntax/Tokens.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include namespace clang { namespace clangd { @@ -60,12 +45,8 @@ class AddUsing : public Tweak { // All of the following are set by prepare(). // The qualifier to remove. NestedNameSpecifierLoc QualifierToRemove; - // Qualified name to use when spelling the using declaration. This might be - // different than SpelledQualifier in presence of error correction. - std::string QualifierToSpell; - // The name and qualifier as spelled in the code. - llvm::StringRef SpelledQualifier; - llvm::StringRef SpelledName; + // The name following QualifierToRemove. + llvm::StringRef Name; // If valid, the insertion point for "using" statement must come after this. // This is relevant when the type is defined in the main file, to make sure // the type/function is already defined at the point where "using" is added. @@ -75,7 +56,7 @@ REGISTER_TWEAK(AddUsing) std::string AddUsing::title() const { return std::string(llvm::formatv( - "Add using-declaration for {0} and remove qualifier", SpelledName)); + "Add using-declaration for {0} and remove qualifier", Name)); } // Locates all "using" statements relevant to SelectionDeclContext. @@ -288,23 +269,36 @@ bool AddUsing::prepare(const Selection &Inputs) { if (Node == nullptr) return false; - SourceRange SpelledNameRange; if (auto *D = Node->ASTNode.get()) { if (auto *II = D->getDecl()->getIdentifier()) { QualifierToRemove = D->getQualifierLoc(); - SpelledNameRange = D->getSourceRange(); + Name = II->getName(); MustInsertAfterLoc = D->getDecl()->getBeginLoc(); } } else if (auto *T = Node->ASTNode.get()) { if (auto E = T->getAs()) { QualifierToRemove = E.getQualifierLoc(); + if (!QualifierToRemove) + return false; - SpelledNameRange = E.getSourceRange(); + auto NameRange = E.getSourceRange(); if (auto T = E.getNamedTypeLoc().getAs()) { // Remove the template arguments from the name. - SpelledNameRange.setEnd(T.getLAngleLoc().getLocWithOffset(-1)); + NameRange.setEnd(T.getLAngleLoc().getLocWithOffset(-1)); } + auto SpelledTokens = TB.spelledForExpanded(TB.expandedTokens(NameRange)); + if (!SpelledTokens) + return false; + auto SpelledRange = syntax::Token::range(SM, SpelledTokens->front(), + SpelledTokens->back()); + Name = SpelledRange.text(SM); + + std::string QualifierToRemoveStr = getNNSLAsString( + QualifierToRemove, Inputs.AST->getASTContext().getPrintingPolicy()); + if (!Name.consume_front(QualifierToRemoveStr)) + return false; // What's spelled doesn't match the qualifier. + if (const auto *ET = E.getTypePtr()) { if (const auto *TDT = dyn_cast(ET->getNamedType().getTypePtr())) { @@ -315,14 +309,19 @@ bool AddUsing::prepare(const Selection &Inputs) { } } } - if (!QualifierToRemove || - // FIXME: This only supports removing qualifiers that are made up of just - // namespace names. If qualifier contains a type, we could take the - // longest namespace prefix and remove that. + + // FIXME: This only supports removing qualifiers that are made up of just + // namespace names. If qualifier contains a type, we could take the longest + // namespace prefix and remove that. + if (!QualifierToRemove.hasQualifier() || !QualifierToRemove.getNestedNameSpecifier()->getAsNamespace() || - // Respect user config. - isNamespaceForbidden(Inputs, *QualifierToRemove.getNestedNameSpecifier())) + Name.empty()) { + return false; + } + + if (isNamespaceForbidden(Inputs, *QualifierToRemove.getNestedNameSpecifier())) return false; + // Macros are difficult. We only want to offer code action when what's spelled // under the cursor is a namespace qualifier. If it's a macro that expands to // a qualifier, user would not know what code action will actually change. @@ -334,35 +333,23 @@ bool AddUsing::prepare(const Selection &Inputs) { return false; } - auto SpelledTokens = - TB.spelledForExpanded(TB.expandedTokens(SpelledNameRange)); - if (!SpelledTokens) - return false; - auto SpelledRange = - syntax::Token::range(SM, SpelledTokens->front(), SpelledTokens->back()); - // We only drop qualifiers that're namespaces, so this is safe. - std::tie(SpelledQualifier, SpelledName) = - splitQualifiedName(SpelledRange.text(SM)); - QualifierToSpell = getNNSLAsString( - QualifierToRemove, Inputs.AST->getASTContext().getPrintingPolicy()); - if (!llvm::StringRef(QualifierToSpell).endswith(SpelledQualifier) || - SpelledName.empty()) - return false; // What's spelled doesn't match the qualifier. return true; } Expected AddUsing::apply(const Selection &Inputs) { auto &SM = Inputs.AST->getSourceManager(); + std::string QualifierToRemoveStr = getNNSLAsString( + QualifierToRemove, Inputs.AST->getASTContext().getPrintingPolicy()); tooling::Replacements R; if (auto Err = R.add(tooling::Replacement( SM, SM.getSpellingLoc(QualifierToRemove.getBeginLoc()), - SpelledQualifier.size(), ""))) { + QualifierToRemoveStr.length(), ""))) { return std::move(Err); } - auto InsertionPoint = findInsertionPoint(Inputs, QualifierToRemove, - SpelledName, MustInsertAfterLoc); + auto InsertionPoint = + findInsertionPoint(Inputs, QualifierToRemove, Name, MustInsertAfterLoc); if (!InsertionPoint) { return InsertionPoint.takeError(); } @@ -375,7 +362,7 @@ Expected AddUsing::apply(const Selection &Inputs) { if (InsertionPoint->AlwaysFullyQualify && !isFullyQualified(QualifierToRemove.getNestedNameSpecifier())) UsingTextStream << "::"; - UsingTextStream << QualifierToSpell << SpelledName << ";" + UsingTextStream << QualifierToRemoveStr << Name << ";" << InsertionPoint->Suffix; assert(SM.getFileID(InsertionPoint->Loc) == SM.getMainFileID()); diff --git a/clang-tools-extra/clangd/unittests/tweaks/AddUsingTests.cpp b/clang-tools-extra/clangd/unittests/tweaks/AddUsingTests.cpp index 86077c17f7555..adfd018f56d27 100644 --- a/clang-tools-extra/clangd/unittests/tweaks/AddUsingTests.cpp +++ b/clang-tools-extra/clangd/unittests/tweaks/AddUsingTests.cpp @@ -8,11 +8,8 @@ #include "Config.h" #include "TweakTesting.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringRef.h" #include "gmock/gmock.h" #include "gtest/gtest.h" -#include namespace clang { namespace clangd { @@ -33,7 +30,7 @@ namespace one { void oo() {} template class tt {}; namespace two { -enum ee { ee_enum_value }; +enum ee {}; void ff() {} class cc { public: @@ -67,6 +64,9 @@ class cc { EXPECT_UNAVAILABLE(Header + "void fun() { ::ban::fo^o(); }"); EXPECT_AVAILABLE(Header + "void fun() { banana::fo^o(); }"); + // Do not offer code action on typo-corrections. + EXPECT_UNAVAILABLE(Header + "/*error-ok*/c^c C;"); + // NestedNameSpecifier, but no namespace. EXPECT_UNAVAILABLE(Header + "class Foo {}; class F^oo foo;"); @@ -466,37 +466,7 @@ one::v^ec foo; using one::vec; vec foo; -)cpp"}, - // Typo correction. - {R"cpp( -// error-ok -#include "test.hpp" -c^c C; -)cpp", - R"cpp( -// error-ok -#include "test.hpp" -using one::two::cc; - -cc C; -)cpp"}, - {R"cpp( -// error-ok -#include "test.hpp" -void foo() { - switch(one::two::ee{}) { case two::ee_^one:break; } -} -)cpp", - R"cpp( -// error-ok -#include "test.hpp" -using one::two::ee_one; - -void foo() { - switch(one::two::ee{}) { case ee_one:break; } -} -)cpp"}, - }; +)cpp"}}; llvm::StringMap EditedFiles; for (const auto &Case : Cases) { ExtraFiles["test.hpp"] = R"cpp( From 427136dc355a2c70e7aae96460944816b4326111 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 20 Mar 2023 10:55:44 -0700 Subject: [PATCH 054/691] [SLP][NFC]Add a test with missed buildvector node, matching the vectorized node. --- ...ed-buildvector-matching-vectorized-node.ll | 68 +++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/reused-buildvector-matching-vectorized-node.ll diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reused-buildvector-matching-vectorized-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/reused-buildvector-matching-vectorized-node.ll new file mode 100644 index 0000000000000..d250fcfe5bf80 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/reused-buildvector-matching-vectorized-node.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define void @blam(ptr %arg, double %load2, i1 %fcmp3) { +; CHECK-LABEL: define void @blam +; CHECK-SAME: (ptr [[ARG:%.*]], double [[LOAD2:%.*]], i1 [[FCMP3:%.*]]) { +; CHECK-NEXT: bb: +; CHECK-NEXT: [[GETELEMENTPTR13:%.*]] = getelementptr double, ptr [[ARG]], i64 3 +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[ARG]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i1> poison, i1 [[FCMP3]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i1> [[TMP1]], <2 x i1> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x double> zeroinitializer, <2 x double> [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, ptr poison, align 16 +; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <2 x double> [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP5]], <2 x double> zeroinitializer, <2 x double> [[TMP0]] +; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt <2 x double> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP7]], <2 x double> , <2 x double> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP8]], <2 x double> poison, <2 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = fcmp olt <2 x double> [[TMP9]], [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> , <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> , <2 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = select <2 x i1> [[TMP10]], <2 x double> [[TMP11]], <2 x double> [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = fcmp olt <2 x double> [[TMP13]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> +; CHECK-NEXT: [[TMP16:%.*]] = fcmp ogt <2 x double> [[TMP15]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = select <2 x i1> [[TMP16]], <2 x double> zeroinitializer, <2 x double> [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = fcmp olt <2 x double> [[TMP18]], zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP19]], <2 x double> , <2 x double> zeroinitializer +; CHECK-NEXT: store <2 x double> [[TMP20]], ptr [[GETELEMENTPTR13]], align 8 +; CHECK-NEXT: ret void +; +bb: + %getelementptr = getelementptr double, ptr %arg, i64 1 + %load = load double, ptr %getelementptr, align 8 + %fcmp = fcmp olt double %load, 0.000000e+00 + %select3 = select i1 %fcmp, double 0.000000e+00, double %load + %select4 = select i1 %fcmp3, double 0.000000e+00, double %load + %load7 = load double, ptr %arg, align 8 + %select10 = select i1 %fcmp3, double 0.000000e+00, double %load7 + %fcmp11 = fcmp olt double %load2, 0.000000e+00 + %select128 = select i1 %fcmp11, double 0.000000e+00, double %load7 + %getelementptr13 = getelementptr double, ptr %arg, i64 3 + %getelementptr21 = getelementptr double, ptr %arg, i64 4 + %fcmp23 = fcmp olt double %select10, 0.000000e+00 + %select24 = select i1 %fcmp23, double 0.000000e+00, double 1.000000e+00 + %fcmp29 = fcmp olt double %select4, 0.000000e+00 + %select30 = select i1 %fcmp29, double 1.000000e+00, double 0.000000e+00 + %fcmp33 = fcmp olt double %select24, %select3 + %select34 = select i1 %fcmp33, double 0.000000e+00, double %load2 + %fcmp39 = fcmp olt double %select30, %select128 + %select40 = select i1 %fcmp39, double %load2, double 0.000000e+00 + %fcmp62 = fcmp olt double %select34, 0.000000e+00 + %select639 = select i1 %fcmp62, double 0.000000e+00, double 1.000000e+00 + %fcmp76 = fcmp olt double %select40, 0.000000e+00 + %select77 = select i1 %fcmp76, double 0.000000e+00, double 1.000000e+00 + %fcmp90 = fcmp ogt double %select639, 0.000000e+00 + %select91 = select i1 %fcmp90, double 0.000000e+00, double %load2 + %fcmp92 = fcmp ogt double %select77, 0.000000e+00 + %select93 = select i1 %fcmp92, double 0.000000e+00, double %load2 + %fcmp108 = fcmp olt double %select93, 0.000000e+00 + %select109 = select i1 %fcmp108, double 1.000000e+00, double 0.000000e+00 + %fcmp110 = fcmp olt double %select91, 0.000000e+00 + %select111 = select i1 %fcmp110, double 1.000000e+00, double 0.000000e+00 + store double %select111, ptr %getelementptr21, align 8 + store double %select109, ptr %getelementptr13, align 8 + ret void +} From 0c0468e6df2bcabd207858891c2387357857b0bc Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 20 Mar 2023 17:59:36 +0000 Subject: [PATCH 055/691] [InstCombine] Add test for issue fixed by 50fe87a5c8597e. Extra test that was fixed by 50fe87a5c8597e to make sure it doesn't regress again. --- llvm/test/Transforms/InstCombine/sincospi.ll | 43 ++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/sincospi.ll b/llvm/test/Transforms/InstCombine/sincospi.ll index 5386669bba511..ef65e0b68c769 100644 --- a/llvm/test/Transforms/InstCombine/sincospi.ll +++ b/llvm/test/Transforms/InstCombine/sincospi.ll @@ -209,3 +209,46 @@ define double @test_fptr(ptr %fptr, double %p1) { %res = fadd double %sin, %cos ret double %res } + +define i1 @test_cospif_used_in_branch_cond() { +; CHECK-FLOAT-IN-VEC-LABEL: @test_cospif_used_in_branch_cond( +; CHECK-FLOAT-IN-VEC-NEXT: entry: +; CHECK-FLOAT-IN-VEC-NEXT: [[RES:%.*]] = call float @__cospif(float noundef 0.000000e+00) +; CHECK-FLOAT-IN-VEC-NEXT: [[CMP:%.*]] = fcmp uno float [[RES]], 0.000000e+00 +; CHECK-FLOAT-IN-VEC-NEXT: br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-FLOAT-IN-VEC: then: +; CHECK-FLOAT-IN-VEC-NEXT: ret i1 false +; CHECK-FLOAT-IN-VEC: else: +; CHECK-FLOAT-IN-VEC-NEXT: ret i1 true +; +; CHECK-LABEL: @test_cospif_used_in_branch_cond( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[RES:%.*]] = call float @__cospif(float noundef 0.000000e+00) +; CHECK-NEXT: [[CMP:%.*]] = fcmp uno float [[RES]], 0.000000e+00 +; CHECK-NEXT: br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK: then: +; CHECK-NEXT: ret i1 false +; CHECK: else: +; CHECK-NEXT: ret i1 true +; +; CHECK-NO-SINCOS-LABEL: @test_cospif_used_in_branch_cond( +; CHECK-NO-SINCOS-NEXT: entry: +; CHECK-NO-SINCOS-NEXT: [[RES:%.*]] = call float @__cospif(float noundef 0.000000e+00) +; CHECK-NO-SINCOS-NEXT: [[CMP:%.*]] = fcmp uno float [[RES]], 0.000000e+00 +; CHECK-NO-SINCOS-NEXT: br i1 [[CMP]], label [[THEN:%.*]], label [[ELSE:%.*]] +; CHECK-NO-SINCOS: then: +; CHECK-NO-SINCOS-NEXT: ret i1 false +; CHECK-NO-SINCOS: else: +; CHECK-NO-SINCOS-NEXT: ret i1 true +; +entry: + %res = call float @__cospif(float noundef 0.000000e+00) #3 + %cmp = fcmp uno float %res, 0.000000e+00 + br i1 %cmp, label %then, label %else + +then: + ret i1 false + +else: + ret i1 true +} From d1e00b6f136ec71a4c95a7eb4fd81ec0ab547962 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Fri, 17 Mar 2023 13:19:12 -0700 Subject: [PATCH 056/691] [clang][deps] Only cache files with specific extension In the scanner's VFS, we cache all files by default and only avoid caching stat failures for certain files. This tanks the performance of scanning with pre-populated module cache. When there is a stale PCM file, it gets cached by the scanner at the start and the rebuilt version never makes it through the VFS again. The TU invocation that rebuilds the PCM only sees the copy in its InMemoryModuleCache, which is invisible to other invocations. This means the PCM gets rebuilt for every TU given to the scanner. This patch fixes the situation by flipping the default, only caching files that are known to be important, and letting everything else fall through to the underlying VFS. rdar://106376153 Reviewed By: Bigcheese Differential Revision: https://reviews.llvm.org/D146328 --- .../DependencyScanningFilesystem.h | 41 +++++- .../DependencyScanningFilesystem.cpp | 76 +++++------ .../Tooling/DependencyScannerTest.cpp | 127 ++++++++++++++++++ 3 files changed, 192 insertions(+), 52 deletions(-) diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h index 4b4e3c7eb2ecd..357a5b9423005 100644 --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h @@ -269,6 +269,32 @@ class EntryRef { } }; +enum class ScanFile { Yes, No }; +enum class CacheStatFailure { Yes, No }; + +struct PathPolicy { + /// Implies caching of all open and stat results. + unsigned Enable : 1; + /// Controls whether a file will be scanned for dependency directives. + unsigned ScanFile : 1; + /// Explicitly disables stat failure caching when false. + unsigned CacheStatFailure : 1; + + static PathPolicy fallThrough() { return {false, false, false}; } + + static PathPolicy cache(enum ScanFile SF, + enum CacheStatFailure CSF = CacheStatFailure::Yes) { + return {true, SF == ScanFile::Yes, CSF == CacheStatFailure::Yes}; + } + +private: + PathPolicy(bool E, bool SF, bool CSF) + : Enable(E), ScanFile(SF), CacheStatFailure(CSF) {} +}; + +/// Determine caching and scanning behavior based on file extension. +PathPolicy getPolicy(StringRef Filename); + /// A virtual file system optimized for the dependency discovery. /// /// It is primarily designed to work with source files whose contents was @@ -293,24 +319,25 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem { /// /// Attempts to use the local and shared caches first, then falls back to /// using the underlying filesystem. - llvm::ErrorOr - getOrCreateFileSystemEntry(StringRef Filename, - bool DisableDirectivesScanning = false); + llvm::ErrorOr getOrCreateFileSystemEntry(StringRef Filename) { + return getOrCreateFileSystemEntry(Filename, getPolicy(Filename)); + } private: - /// Check whether the file should be scanned for preprocessor directives. - bool shouldScanForDirectives(StringRef Filename); + /// Same as the public version, but with explicit PathPolicy parameter. + llvm::ErrorOr getOrCreateFileSystemEntry(StringRef Filename, + PathPolicy Policy); /// For a filename that's not yet associated with any entry in the caches, /// uses the underlying filesystem to either look up the entry based in the /// shared cache indexed by unique ID, or creates new entry from scratch. llvm::ErrorOr - computeAndStoreResult(StringRef Filename); + computeAndStoreResult(StringRef Filename, PathPolicy Policy); /// Scan for preprocessor directives for the given entry if necessary and /// returns a wrapper object with reference semantics. EntryRef scanForDirectivesIfNecessary(const CachedFileSystemEntry &Entry, - StringRef Filename, bool Disable); + StringRef Filename, PathPolicy Policy); /// Represents a filesystem entry that has been stat-ed (and potentially read) /// and that's about to be inserted into the cache as `CachedFileSystemEntry`. diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp index 0ddb5c24c5e6c..eb15fc532995c 100644 --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp @@ -42,9 +42,8 @@ DependencyScanningWorkerFilesystem::readFile(StringRef Filename) { } EntryRef DependencyScanningWorkerFilesystem::scanForDirectivesIfNecessary( - const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) { - if (Entry.isError() || Entry.isDirectory() || Disable || - !shouldScanForDirectives(Filename)) + const CachedFileSystemEntry &Entry, StringRef Filename, PathPolicy Policy) { + if (Entry.isError() || Entry.isDirectory() || !Policy.ScanFile) return EntryRef(Filename, Entry); CachedFileContents *Contents = Entry.getCachedContents(); @@ -159,39 +158,22 @@ DependencyScanningFilesystemSharedCache::CacheShard:: return *EntriesByFilename.insert({Filename, &Entry}).first->getValue(); } -/// Whitelist file extensions that should be minimized, treating no extension as -/// a source file that should be minimized. -/// -/// This is kinda hacky, it would be better if we knew what kind of file Clang -/// was expecting instead. -static bool shouldScanForDirectivesBasedOnExtension(StringRef Filename) { +PathPolicy clang::tooling::dependencies::getPolicy(StringRef Filename) { StringRef Ext = llvm::sys::path::extension(Filename); if (Ext.empty()) - return true; // C++ standard library - return llvm::StringSwitch(Ext) - .CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true) - .CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true) - .CasesLower(".m", ".mm", true) - .CasesLower(".i", ".ii", ".mi", ".mmi", true) - .CasesLower(".def", ".inc", true) - .Default(false); -} - -static bool shouldCacheStatFailures(StringRef Filename) { - StringRef Ext = llvm::sys::path::extension(Filename); - if (Ext.empty()) - return false; // This may be the module cache directory. - // Only cache stat failures on files that are not expected to change during - // the build. - StringRef FName = llvm::sys::path::filename(Filename); - if (FName == "module.modulemap" || FName == "module.map") - return true; - return shouldScanForDirectivesBasedOnExtension(Filename); -} - -bool DependencyScanningWorkerFilesystem::shouldScanForDirectives( - StringRef Filename) { - return shouldScanForDirectivesBasedOnExtension(Filename); + return PathPolicy::cache(ScanFile::Yes, CacheStatFailure::No); + // clang-format off + return llvm::StringSwitch(Ext) + .CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", PathPolicy::cache(ScanFile::Yes)) + .CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", PathPolicy::cache(ScanFile::Yes)) + .CasesLower(".m", ".mm", PathPolicy::cache(ScanFile::Yes)) + .CasesLower(".i", ".ii", ".mi", ".mmi", PathPolicy::cache(ScanFile::Yes)) + .CasesLower(".def", ".inc", PathPolicy::cache(ScanFile::Yes)) + .CasesLower(".modulemap", ".map", PathPolicy::cache(ScanFile::No)) + .CasesLower(".framework", ".apinotes", PathPolicy::cache(ScanFile::No)) + .CasesLower(".yaml", ".json", ".hmap", PathPolicy::cache(ScanFile::No)) + .Default(PathPolicy::fallThrough()); + // clang-format on } const CachedFileSystemEntry & @@ -215,10 +197,11 @@ DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough( } llvm::ErrorOr -DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) { +DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename, + PathPolicy Policy) { llvm::ErrorOr Stat = getUnderlyingFS().status(Filename); if (!Stat) { - if (!shouldCacheStatFailures(Filename)) + if (!Policy.CacheStatFailure) return Stat.getError(); const auto &Entry = getOrEmplaceSharedEntryForFilename(Filename, Stat.getError()); @@ -244,16 +227,13 @@ DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) { llvm::ErrorOr DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry( - StringRef Filename, bool DisableDirectivesScanning) { + StringRef Filename, PathPolicy Policy) { if (const auto *Entry = findEntryByFilenameWithWriteThrough(Filename)) - return scanForDirectivesIfNecessary(*Entry, Filename, - DisableDirectivesScanning) - .unwrapError(); - auto MaybeEntry = computeAndStoreResult(Filename); + return scanForDirectivesIfNecessary(*Entry, Filename, Policy).unwrapError(); + auto MaybeEntry = computeAndStoreResult(Filename, Policy); if (!MaybeEntry) return MaybeEntry.getError(); - return scanForDirectivesIfNecessary(*MaybeEntry, Filename, - DisableDirectivesScanning) + return scanForDirectivesIfNecessary(*MaybeEntry, Filename, Policy) .unwrapError(); } @@ -261,8 +241,11 @@ llvm::ErrorOr DependencyScanningWorkerFilesystem::status(const Twine &Path) { SmallString<256> OwnedFilename; StringRef Filename = Path.toStringRef(OwnedFilename); + PathPolicy Policy = getPolicy(Filename); + if (!Policy.Enable) + return getUnderlyingFS().status(Path); - llvm::ErrorOr Result = getOrCreateFileSystemEntry(Filename); + llvm::ErrorOr Result = getOrCreateFileSystemEntry(Filename, Policy); if (!Result) return Result.getError(); return Result->getStatus(); @@ -318,8 +301,11 @@ llvm::ErrorOr> DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) { SmallString<256> OwnedFilename; StringRef Filename = Path.toStringRef(OwnedFilename); + PathPolicy Policy = getPolicy(Filename); + if (!Policy.Enable) + return getUnderlyingFS().openFileForRead(Path); - llvm::ErrorOr Result = getOrCreateFileSystemEntry(Filename); + llvm::ErrorOr Result = getOrCreateFileSystemEntry(Filename, Policy); if (!Result) return Result.getError(); return DepScanFile::create(Result.get()); diff --git a/clang/unittests/Tooling/DependencyScannerTest.cpp b/clang/unittests/Tooling/DependencyScannerTest.cpp index abcc2c787b0d0..a7bd1ddbbdb5c 100644 --- a/clang/unittests/Tooling/DependencyScannerTest.cpp +++ b/clang/unittests/Tooling/DependencyScannerTest.cpp @@ -239,3 +239,130 @@ TEST(DependencyScanner, ScanDepsWithFS) { EXPECT_EQ(convert_to_slash(DepFile), "test.cpp.o: /root/test.cpp /root/header.h\n"); } + +// Note: We want to test caching in DependencyScanningWorkerFilesystem. To do +// that, we need to be able to mutate the underlying file system. However, +// InMemoryFileSystem does not allow changing the contents of a file after it's +// been created. +// To simulate the behavior, we create two separate in-memory file systems, each +// containing different version of the same file. We pass those to two scanning +// file systems that share the same cache. + +TEST(DependencyScanningFileSystemTest, CacheFileContentsEnabled) { + DependencyScanningFilesystemSharedCache SharedCache; + + StringRef Path = "/root/source.c"; + auto Contents1 = llvm::MemoryBuffer::getMemBuffer("contents1"); + auto Contents2 = llvm::MemoryBuffer::getMemBuffer("contents2"); + + { + auto InMemoryFS = + llvm::makeIntrusiveRefCnt(); + ASSERT_TRUE(InMemoryFS->addFile(Path, 0, std::move(Contents1))); + DependencyScanningWorkerFilesystem ScanningFS(SharedCache, InMemoryFS); + auto File = ScanningFS.openFileForRead(Path); + ASSERT_TRUE(File); + auto Buffer = (*File)->getBuffer("Buffer for /root/source.c."); + ASSERT_TRUE(Buffer); + auto Contents = (*Buffer)->getBuffer(); + EXPECT_EQ(Contents, "contents1"); + } + + { + auto InMemoryFS = + llvm::makeIntrusiveRefCnt(); + ASSERT_TRUE(InMemoryFS->addFile(Path, 0, std::move(Contents2))); + DependencyScanningWorkerFilesystem ScanningFS(SharedCache, InMemoryFS); + auto File = ScanningFS.openFileForRead(Path); + ASSERT_TRUE(File); + auto Buffer = (*File)->getBuffer("Buffer for /root/source.c."); + ASSERT_TRUE(Buffer); + auto Contents = (*Buffer)->getBuffer(); + EXPECT_EQ(Contents, "contents1"); + } +} + +TEST(DependencyScanningFileSystemTest, CacheFileContentsDisabled) { + DependencyScanningFilesystemSharedCache SharedCache; + + StringRef Path = "/root/module.pcm"; + auto Contents1 = llvm::MemoryBuffer::getMemBuffer("contents1"); + auto Contents2 = llvm::MemoryBuffer::getMemBuffer("contents2"); + + { + auto InMemoryFS = + llvm::makeIntrusiveRefCnt(); + ASSERT_TRUE(InMemoryFS->addFile(Path, 0, std::move(Contents1))); + DependencyScanningWorkerFilesystem ScanningFS(SharedCache, InMemoryFS); + auto File = ScanningFS.openFileForRead(Path); + ASSERT_TRUE(File); + auto Buffer = (*File)->getBuffer("Buffer for /root/module.pcm."); + ASSERT_TRUE(Buffer); + auto Contents = (*Buffer)->getBuffer(); + EXPECT_EQ(Contents, "contents1"); + } + + { + auto InMemoryFS = + llvm::makeIntrusiveRefCnt(); + ASSERT_TRUE(InMemoryFS->addFile(Path, 0, std::move(Contents2))); + DependencyScanningWorkerFilesystem ScanningFS(SharedCache, InMemoryFS); + auto File = ScanningFS.openFileForRead(Path); + ASSERT_TRUE(File); + auto Buffer = (*File)->getBuffer("Buffer for /root/module.pcm."); + ASSERT_TRUE(Buffer); + auto Contents = (*Buffer)->getBuffer(); + EXPECT_EQ(Contents, "contents2"); + } +} + +TEST(DependencyScanningFileSystemTest, CacheStatFailureEnabled) { + DependencyScanningFilesystemSharedCache SharedCache; + auto InMemoryFS = llvm::makeIntrusiveRefCnt(); + DependencyScanningWorkerFilesystem ScanningFS(SharedCache, InMemoryFS); + + StringRef Path = "/root/source.c"; + + auto Stat1 = ScanningFS.status(Path); + EXPECT_FALSE(Stat1); + + auto Contents = llvm::MemoryBuffer::getMemBuffer("contents"); + InMemoryFS->addFile(Path, 0, std::move(Contents)); + + auto Stat2 = ScanningFS.status(Path); + EXPECT_FALSE(Stat2); +} + +TEST(DependencyScanningFileSystemTest, CacheStatFailureDisabledFile) { + DependencyScanningFilesystemSharedCache SharedCache; + auto InMemoryFS = llvm::makeIntrusiveRefCnt(); + DependencyScanningWorkerFilesystem ScanningFS(SharedCache, InMemoryFS); + + StringRef Path = "/root/vector"; + + auto Stat1 = ScanningFS.status(Path); + EXPECT_FALSE(Stat1); + + auto Contents = llvm::MemoryBuffer::getMemBuffer("contents"); + InMemoryFS->addFile(Path, 0, std::move(Contents)); + + auto Stat2 = ScanningFS.status(Path); + EXPECT_TRUE(Stat2); +} + +TEST(DependencyScanningFileSystemTest, CacheStatFailureDisabledDirectory) { + DependencyScanningFilesystemSharedCache SharedCache; + auto InMemoryFS = llvm::makeIntrusiveRefCnt(); + DependencyScanningWorkerFilesystem ScanningFS(SharedCache, InMemoryFS); + + StringRef Path = "/root/dir"; + + auto Stat1 = ScanningFS.status(Path); + EXPECT_FALSE(Stat1); + + auto Contents = llvm::MemoryBuffer::getMemBuffer("contents"); + InMemoryFS->addFile("/root/dir/file", 0, std::move(Contents)); + + auto Stat2 = ScanningFS.status(Path); + EXPECT_TRUE(Stat2); +} From 6bd4d717d577b88e83a92ea865bb96dd5df45936 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Fri, 17 Mar 2023 14:06:53 -0500 Subject: [PATCH 057/691] [libc] Add environment variables to GPU libc test for AMDGPU This patch performs the same operation to copy over the `argv` array to the `envp` array. This allows the GPU tests to use environment variables. Reviewed By: sivachandra Differential Revision: https://reviews.llvm.org/D146322 --- libc/startup/gpu/amdgpu/start.cpp | 7 +++-- .../integration/startup/gpu/CMakeLists.txt | 3 ++ .../integration/startup/gpu/args_test.cpp | 13 ++++++++- libc/utils/gpu/loader/Loader.h | 2 +- libc/utils/gpu/loader/Main.cpp | 4 +-- libc/utils/gpu/loader/amdgpu/Loader.cpp | 28 ++++++++++++++++++- 6 files changed, 49 insertions(+), 8 deletions(-) diff --git a/libc/startup/gpu/amdgpu/start.cpp b/libc/startup/gpu/amdgpu/start.cpp index cc30982e148ff..9915dff94f6fa 100644 --- a/libc/startup/gpu/amdgpu/start.cpp +++ b/libc/startup/gpu/amdgpu/start.cpp @@ -8,11 +8,12 @@ #include "src/__support/RPC/rpc_client.h" -extern "C" int main(int argc, char **argv); +extern "C" int main(int argc, char **argv, char **envp); extern "C" [[gnu::visibility("protected"), clang::amdgpu_kernel]] void -_start(int argc, char **argv, int *ret, void *in, void *out, void *buffer) { +_start(int argc, char **argv, char **envp, int *ret, void *in, void *out, + void *buffer) { __llvm_libc::rpc::client.reset(in, out, buffer); - __atomic_fetch_or(ret, main(argc, argv), __ATOMIC_RELAXED); + __atomic_fetch_or(ret, main(argc, argv, envp), __ATOMIC_RELAXED); } diff --git a/libc/test/integration/startup/gpu/CMakeLists.txt b/libc/test/integration/startup/gpu/CMakeLists.txt index 5451a27c28874..9bd7f675eeaad 100644 --- a/libc/test/integration/startup/gpu/CMakeLists.txt +++ b/libc/test/integration/startup/gpu/CMakeLists.txt @@ -8,4 +8,7 @@ add_integration_test( args_test.cpp ARGS 1 2 3 + ENV + FRANCE=Paris + GERMANY=Berlin ) diff --git a/libc/test/integration/startup/gpu/args_test.cpp b/libc/test/integration/startup/gpu/args_test.cpp index f3a5410691c22..1cc5a0e769279 100644 --- a/libc/test/integration/startup/gpu/args_test.cpp +++ b/libc/test/integration/startup/gpu/args_test.cpp @@ -17,11 +17,22 @@ static bool my_streq(const char *lhs, const char *rhs) { return *l == '\0' && *r == '\0'; } -TEST_MAIN(int argc, char **argv) { +TEST_MAIN(int argc, char **argv, char **envp) { ASSERT_TRUE(argc == 4); ASSERT_TRUE(my_streq(argv[1], "1")); ASSERT_TRUE(my_streq(argv[2], "2")); ASSERT_TRUE(my_streq(argv[3], "3")); + bool found_france = false; + bool found_germany = false; + for (; *envp != nullptr; ++envp) { + if (my_streq(*envp, "FRANCE=Paris")) + found_france = true; + if (my_streq(*envp, "GERMANY=Berlin")) + found_germany = true; + } + + ASSERT_TRUE(found_france && found_germany); + return 0; } diff --git a/libc/utils/gpu/loader/Loader.h b/libc/utils/gpu/loader/Loader.h index a24b8b1e982ea..aecd6db25b54a 100644 --- a/libc/utils/gpu/loader/Loader.h +++ b/libc/utils/gpu/loader/Loader.h @@ -11,4 +11,4 @@ /// Generic interface to load the \p image and launch execution of the _start /// kernel on the target device. Copies \p argc and \p argv to the device. /// Returns the final value of the `main` function on the device. -int load(int argc, char **argv, void *image, size_t size); +int load(int argc, char **argv, char **evnp, void *image, size_t size); diff --git a/libc/utils/gpu/loader/Main.cpp b/libc/utils/gpu/loader/Main.cpp index 435bda6cc7e49..00354720dda93 100644 --- a/libc/utils/gpu/loader/Main.cpp +++ b/libc/utils/gpu/loader/Main.cpp @@ -16,7 +16,7 @@ #include #include -int main(int argc, char **argv) { +int main(int argc, char **argv, char **envp) { if (argc < 2) { printf("USAGE: ./loader , ...\n"); return EXIT_SUCCESS; @@ -39,7 +39,7 @@ int main(int argc, char **argv) { fclose(file); // Drop the loader from the program arguments. - int ret = load(argc - 1, &argv[1], image, size); + int ret = load(argc - 1, &argv[1], envp, image, size); free(image); return ret; diff --git a/libc/utils/gpu/loader/amdgpu/Loader.cpp b/libc/utils/gpu/loader/amdgpu/Loader.cpp index 0d631e98aae43..fcb5119a0f87e 100644 --- a/libc/utils/gpu/loader/amdgpu/Loader.cpp +++ b/libc/utils/gpu/loader/amdgpu/Loader.cpp @@ -32,6 +32,7 @@ constexpr const char *KERNEL_START = "_start.kd"; struct kernel_args_t { int argc; void *argv; + void *envp; void *ret; void *inbox; void *outbox; @@ -164,7 +165,7 @@ hsa_status_t get_agent_memory_pool(hsa_agent_t agent, return iterate_agent_memory_pools(agent, cb); } -int load(int argc, char **argv, void *image, size_t size) { +int load(int argc, char **argv, char **envp, void *image, size_t size) { // Initialize the HSA runtime used to communicate with the device. if (hsa_status_t err = hsa_init()) handle_error(err); @@ -299,6 +300,30 @@ int load(int argc, char **argv, void *image, size_t size) { static_cast(dev_argv)[i] = dev_str; } + // Allocate fine-grained memory on the host to hold the pointer array for the + // copied environment array and allow the GPU agent to access it. + int envc = 0; + for (char **env = envp; *env != 0; ++env) + ++envc; + void *dev_envp; + if (hsa_status_t err = + hsa_amd_memory_pool_allocate(finegrained_pool, envc * sizeof(char *), + /*flags=*/0, &dev_envp)) + handle_error(err); + hsa_amd_agents_allow_access(1, &dev_agent, nullptr, dev_envp); + for (int i = 0; i < envc; ++i) { + size_t size = strlen(envp[i]) + 1; + void *dev_str; + if (hsa_status_t err = hsa_amd_memory_pool_allocate(finegrained_pool, size, + /*flags=*/0, &dev_str)) + handle_error(err); + hsa_amd_agents_allow_access(1, &dev_agent, nullptr, dev_str); + // Load the host memory buffer with the pointer values of the newly + // allocated strings. + std::memcpy(dev_str, envp[i], size); + static_cast(dev_envp)[i] = dev_str; + } + // Allocate space for the return pointer and initialize it to zero. void *dev_ret; if (hsa_status_t err = @@ -333,6 +358,7 @@ int load(int argc, char **argv, void *image, size_t size) { kernel_args_t *kernel_args = reinterpret_cast(args); kernel_args->argc = argc; kernel_args->argv = dev_argv; + kernel_args->envp = dev_envp; kernel_args->ret = dev_ret; kernel_args->inbox = server_outbox; kernel_args->outbox = server_inbox; From 41e9c4b88c28b0a6f3820b45000cedeced89206c Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Sat, 18 Mar 2023 23:17:28 -0700 Subject: [PATCH 058/691] [NFC][Outliner] Delete default ctors for Candidate & OutlinedFunction. I think it's good practice to avoid having default ctors unless they're really valid/useful. For OutlinedFunction the default ctor was used to represent a bail-out value for getOutliningCandidateInfo(), so I changed the API to return an optional instead which seems a tad cleaner. Differential Revision: https://reviews.llvm.org/D146375 --- llvm/include/llvm/CodeGen/MachineOutliner.h | 4 ++-- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 5 +++-- llvm/lib/CodeGen/MachineOutliner.cpp | 10 +++++----- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 15 ++++++++------- llvm/lib/Target/AArch64/AArch64InstrInfo.h | 3 ++- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 9 +++++---- llvm/lib/Target/ARM/ARMBaseInstrInfo.h | 2 +- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 5 +++-- llvm/lib/Target/RISCV/RISCVInstrInfo.h | 2 +- llvm/lib/Target/X86/X86InstrInfo.cpp | 8 +++++--- llvm/lib/Target/X86/X86InstrInfo.h | 2 +- 11 files changed, 36 insertions(+), 29 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineOutliner.h b/llvm/include/llvm/CodeGen/MachineOutliner.h index f968089e0de0d..d0ff02fea4ff9 100644 --- a/llvm/include/llvm/CodeGen/MachineOutliner.h +++ b/llvm/include/llvm/CodeGen/MachineOutliner.h @@ -199,7 +199,7 @@ struct Candidate { unsigned FunctionIdx, unsigned Flags) : StartIdx(StartIdx), Len(Len), FirstInst(FirstInst), LastInst(LastInst), MBB(MBB), FunctionIdx(FunctionIdx), Flags(Flags) {} - Candidate() = default; + Candidate() = delete; /// Used to ensure that \p Candidates are outlined in an order that /// preserves the start and end indices of other \p Candidates. @@ -268,7 +268,7 @@ struct OutlinedFunction { C.Benefit = B; } - OutlinedFunction() = default; + OutlinedFunction() = delete; }; } // namespace outliner } // namespace llvm diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 92c283a29c36b..b4a7bbb4c99ce 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1966,8 +1966,9 @@ class TargetInstrInfo : public MCInstrInfo { } /// Returns a \p outliner::OutlinedFunction struct containing target-specific - /// information for a set of outlining candidates. - virtual outliner::OutlinedFunction getOutliningCandidateInfo( + /// information for a set of outlining candidates. Returns None if the + /// candidates are not suitable for outlining. + virtual std::optional getOutliningCandidateInfo( std::vector &RepeatedSequenceLocs) const { llvm_unreachable( "Target didn't implement TargetInstrInfo::getOutliningCandidateInfo!"); diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp index 856b9bfb40bc0..8d72208e8c581 100644 --- a/llvm/lib/CodeGen/MachineOutliner.cpp +++ b/llvm/lib/CodeGen/MachineOutliner.cpp @@ -655,21 +655,21 @@ void MachineOutliner::findCandidates( const TargetInstrInfo *TII = CandidatesForRepeatedSeq[0].getMF()->getSubtarget().getInstrInfo(); - OutlinedFunction OF = + std::optional OF = TII->getOutliningCandidateInfo(CandidatesForRepeatedSeq); // If we deleted too many candidates, then there's nothing worth outlining. // FIXME: This should take target-specified instruction sizes into account. - if (OF.Candidates.size() < 2) + if (!OF || OF->Candidates.size() < 2) continue; // Is it better to outline this candidate than not? - if (OF.getBenefit() < 1) { - emitNotOutliningCheaperRemark(StringLen, CandidatesForRepeatedSeq, OF); + if (OF->getBenefit() < 1) { + emitNotOutliningCheaperRemark(StringLen, CandidatesForRepeatedSeq, *OF); continue; } - FunctionList.push_back(OF); + FunctionList.push_back(*OF); } } diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 54ad05bc4698b..b1cfd684e18b8 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -7151,7 +7151,8 @@ static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps(); } -outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( +std::optional +AArch64InstrInfo::getOutliningCandidateInfo( std::vector &RepeatedSequenceLocs) const { outliner::Candidate &FirstCand = RepeatedSequenceLocs[0]; unsigned SequenceSize = @@ -7181,7 +7182,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( } return true; }) != RepeatedSequenceLocs.end()) { - return outliner::OutlinedFunction(); + return std::nullopt; } // Since at this point all candidates agree on their return address signing @@ -7259,7 +7260,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( // If the sequence doesn't have enough candidates left, then we're done. if (RepeatedSequenceLocs.size() < 2) - return outliner::OutlinedFunction(); + return std::nullopt; } // Properties about candidate MBBs that hold for all of them. @@ -7304,7 +7305,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( C.getMF()->getFrameInstructions(); if (CFICount > 0 && CFICount != CFIInstructions.size()) - return outliner::OutlinedFunction(); + return std::nullopt; } // Returns true if an instructions is safe to fix up, false otherwise. @@ -7506,7 +7507,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( // If we dropped all of the candidates, bail out here. if (RepeatedSequenceLocs.size() < 2) { RepeatedSequenceLocs.clear(); - return outliner::OutlinedFunction(); + return std::nullopt; } } @@ -7533,7 +7534,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( // We can't fix up the stack. Bail out. if (!AllStackInstrsSafe) { RepeatedSequenceLocs.clear(); - return outliner::OutlinedFunction(); + return std::nullopt; } // Save + restore LR. @@ -7544,7 +7545,7 @@ outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo( // If we have CFI instructions, we can only outline if the outlined section // can be a tail call if (FrameID != MachineOutlinerTailCall && CFICount > 0) - return outliner::OutlinedFunction(); + return std::nullopt; return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 96ec0552ac795..59fd05a0a3051 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -17,6 +17,7 @@ #include "AArch64RegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/Support/TypeSize.h" +#include #define GET_INSTRINFO_HEADER #include "AArch64GenInstrInfo.inc" @@ -289,7 +290,7 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override; - outliner::OutlinedFunction getOutliningCandidateInfo( + std::optional getOutliningCandidateInfo( std::vector &RepeatedSequenceLocs) const override; outliner::InstrType getOutliningTypeImpl(MachineBasicBlock::iterator &MIT, unsigned Flags) const override; diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 77557c3f9b5fc..4a158b31aae23 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -5869,7 +5869,8 @@ static bool isLRAvailable(const TargetRegisterInfo &TRI, return !Live; } -outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( +std::optional +ARMBaseInstrInfo::getOutliningCandidateInfo( std::vector &RepeatedSequenceLocs) const { outliner::Candidate &FirstCand = RepeatedSequenceLocs[0]; unsigned SequenceSize = @@ -5915,7 +5916,7 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( // If the sequence doesn't have enough candidates left, then we're done. if (RepeatedSequenceLocs.size() < 2) - return outliner::OutlinedFunction(); + return std::nullopt; } // We expect the majority of the outlining candidates to be in consensus with @@ -5941,7 +5942,7 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoBTI); if (RepeatedSequenceLocs.size() < 2) - return outliner::OutlinedFunction(); + return std::nullopt; // Likewise, partition the candidates according to PAC-RET enablement. auto NoPAC = @@ -5958,7 +5959,7 @@ outliner::OutlinedFunction ARMBaseInstrInfo::getOutliningCandidateInfo( RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoPAC); if (RepeatedSequenceLocs.size() < 2) - return outliner::OutlinedFunction(); + return std::nullopt; // At this point, we have only "safe" candidates to outline. Figure out // frame + call instruction information. diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 70c3be2b54f16..5efcc1a0d9fc0 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -348,7 +348,7 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo { /// ARM supports the MachineOutliner. bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override; - outliner::OutlinedFunction getOutliningCandidateInfo( + std::optional getOutliningCandidateInfo( std::vector &RepeatedSequenceLocs) const override; void mergeOutliningCandidateAttributes( Function &F, std::vector &Candidates) const override; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index d525365762293..d1a8a412e26c3 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1925,7 +1925,8 @@ bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault( return MF.getFunction().hasMinSize(); } -outliner::OutlinedFunction RISCVInstrInfo::getOutliningCandidateInfo( +std::optional +RISCVInstrInfo::getOutliningCandidateInfo( std::vector &RepeatedSequenceLocs) const { // First we need to filter out candidates where the X5 register (IE t0) can't @@ -1939,7 +1940,7 @@ outliner::OutlinedFunction RISCVInstrInfo::getOutliningCandidateInfo( // If the sequence doesn't have enough candidates left, then we're done. if (RepeatedSequenceLocs.size() < 2) - return outliner::OutlinedFunction(); + return std::nullopt; unsigned SequenceSize = 0; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index cc84e9c07d0d6..e3b394ed1fe14 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -159,7 +159,7 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override; // Calculate target-specific information for a set of outlining candidates. - outliner::OutlinedFunction getOutliningCandidateInfo( + std::optional getOutliningCandidateInfo( std::vector &RepeatedSequenceLocs) const override; // Return if/how a given MachineInstr should be outlined. diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 557e18cf596e0..dbf5e8d535031 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -43,6 +43,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" +#include using namespace llvm; @@ -9598,7 +9599,8 @@ enum MachineOutlinerClass { MachineOutlinerTailCall }; -outliner::OutlinedFunction X86InstrInfo::getOutliningCandidateInfo( +std::optional +X86InstrInfo::getOutliningCandidateInfo( std::vector &RepeatedSequenceLocs) const { unsigned SequenceSize = std::accumulate(RepeatedSequenceLocs[0].front(), @@ -9631,7 +9633,7 @@ outliner::OutlinedFunction X86InstrInfo::getOutliningCandidateInfo( C.getMF()->getFrameInstructions(); if (CFICount > 0 && CFICount != CFIInstructions.size()) - return outliner::OutlinedFunction(); + return std::nullopt; } // FIXME: Use real size in bytes for call and ret instructions. @@ -9646,7 +9648,7 @@ outliner::OutlinedFunction X86InstrInfo::getOutliningCandidateInfo( } if (CFICount > 0) - return outliner::OutlinedFunction(); + return std::nullopt; for (outliner::Candidate &C : RepeatedSequenceLocs) C.setCallInfo(MachineOutlinerDefault, 1); diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index ff588b1d78f38..f8016b3a98ef4 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -551,7 +551,7 @@ class X86InstrInfo final : public X86GenInstrInfo { ArrayRef> getSerializableDirectMachineOperandTargetFlags() const override; - outliner::OutlinedFunction getOutliningCandidateInfo( + std::optional getOutliningCandidateInfo( std::vector &RepeatedSequenceLocs) const override; bool isFunctionSafeToOutlineFrom(MachineFunction &MF, From 272ebd6957ef7bd39a6c6d2aaf7249d86e09791b Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 20 Mar 2023 11:15:54 -0700 Subject: [PATCH 059/691] [LSR] Inline getAlternateIVEnd and simplify [nfc] Also, add a comment to highlight that the "good" result on this test is accidental, and not based on a principled decision. I matched the original behavior to make this nfc, but selecting the last legal IV is not well motivated here. --- .../Transforms/Scalar/LoopStrengthReduce.cpp | 51 ++++++++----------- .../lsr-term-fold-negative-testcase.ll | 3 ++ 2 files changed, 25 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 5d8e822eaddff..e76ba2da22127 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -6737,35 +6737,12 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, if (!isAlmostDeadIV(ToFold, LoopLatch, TermCond)) return std::nullopt; - // If this is an IV which we could replace the terminating condition, return - // the final value of the alternative IV on the last iteration. - auto getAlternateIVEnd = [&](PHINode &PN) -> const SCEV * { - // FIXME: This does not properly account for overflow. - const SCEVAddRecExpr *AddRec = cast(SE.getSCEV(&PN)); - const SCEV *BECount = SE.getBackedgeTakenCount(L); - const SCEV *TermValueS = SE.getAddExpr( - AddRec->getOperand(0), - SE.getTruncateOrZeroExtend( - SE.getMulExpr( - AddRec->getOperand(1), - SE.getTruncateOrZeroExtend( - SE.getAddExpr(BECount, SE.getOne(BECount->getType())), - AddRec->getOperand(1)->getType())), - AddRec->getOperand(0)->getType())); - const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); - SCEVExpander Expander(SE, DL, "lsr_fold_term_cond"); - if (!Expander.isSafeToExpand(TermValueS)) { - LLVM_DEBUG( - dbgs() << "Is not safe to expand terminating value for phi node" << PN - << "\n"); - return nullptr; - } - return TermValueS; - }; + const SCEV *BECount = SE.getBackedgeTakenCount(L); + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + SCEVExpander Expander(SE, DL, "lsr_fold_term_cond"); PHINode *ToHelpFold = nullptr; const SCEV *TermValueS = nullptr; - for (PHINode &PN : L->getHeader()->phis()) { if (ToFold == &PN) continue; @@ -6785,10 +6762,26 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, continue; } - if (auto P = getAlternateIVEnd(PN)) { - ToHelpFold = &PN; - TermValueS = P; + // FIXME: This does not properly account for overflow. + const SCEV *TermValueSLocal = SE.getAddExpr( + AddRec->getOperand(0), + SE.getTruncateOrZeroExtend( + SE.getMulExpr( + AddRec->getOperand(1), + SE.getTruncateOrZeroExtend( + SE.getAddExpr(BECount, SE.getOne(BECount->getType())), + AddRec->getOperand(1)->getType())), + AddRec->getOperand(0)->getType())); + if (!Expander.isSafeToExpand(TermValueSLocal)) { + LLVM_DEBUG( + dbgs() << "Is not safe to expand terminating value for phi node" << PN + << "\n"); + continue; } + // We pick the last legal alternate IV. We could expore choosing an optimal + // alternate IV if we had a decent heuristic to do so. + ToHelpFold = &PN; + TermValueS = TermValueSLocal; } LLVM_DEBUG(if (ToFold && !ToHelpFold) dbgs() diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll index 1b9b58f79b480..e7a8acb82c20f 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll @@ -211,6 +211,9 @@ for.end: ; preds = %for.body ret void } +; After LSR, there are three IVs in this loop. As a result, we have two +; alternate IVs to chose from. At the moment, we chose the last, but this +; is somewhat arbitrary. define void @TermCondMoreThanOneUse(ptr %a) { ; CHECK-LABEL: define void @TermCondMoreThanOneUse ; CHECK-SAME: (ptr [[A:%.*]]) { From 92416b63a57b74689abc175bcafd97b674ff9728 Mon Sep 17 00:00:00 2001 From: Jakub Kuderski Date: Mon, 20 Mar 2023 14:27:50 -0400 Subject: [PATCH 060/691] [ADT] Work around `enumerate` compilation error with modules enabled This manifests on Apple clang 14 with `-DLLVM_ENABLE_MODULES=1` and `-DLLVM_ENABLE_ASSERTIONS=1` and seems like a host compiler bug. Sample compilation failure: https://green.lab.llvm.org/green/view/LLDB/job/lldb-cmake/52513/consoleFull#-458239162a1ca8a51-895e-46c6-af87-ce24fa4cd561. Reviewed By: aprantl Differential Revision: https://reviews.llvm.org/D146340 --- llvm/include/llvm/ADT/STLExtras.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index bf33d79801065..8d739106bccbb 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -2385,10 +2385,16 @@ struct index_stream { /// template auto enumerate(FirstRange &&First, RestRanges &&...Rest) { - assert((sizeof...(Rest) == 0 || - all_equal({std::distance(adl_begin(First), adl_end(First)), - std::distance(adl_begin(Rest), adl_end(Rest))...})) && - "Ranges have different length"); + if constexpr (sizeof...(Rest) != 0) { +#ifndef NDEBUG + // Note: Create an array instead of an initializer list to work around an + // Apple clang 14 compiler bug. + size_t sizes[] = { + static_cast(std::distance(adl_begin(First), adl_end(First))), + static_cast(std::distance(adl_begin(Rest), adl_end(Rest)))...}; + assert(all_equal(sizes) && "Ranges have different length"); +#endif + } using enumerator = detail::zippy; return enumerator(detail::index_stream{}, std::forward(First), From 2d373e4dc7e97b0b2d7022d8d04e64834204187c Mon Sep 17 00:00:00 2001 From: Akash Banerjee Date: Fri, 17 Mar 2023 12:40:29 +0000 Subject: [PATCH 061/691] [MLIR][OpenMP] Added OMPIRBuilder support for Target Data directives This patch adds OpenMP IRBuilder support for the Target Data directives to allow lowering to LLVM IR. The mlir::Translation is responsible for generating supporting code for processing the map_operands through the processMapOperand function, and also generate code for the r> The OMPIRBuilder is responsible for generating the begin and end mapper function calls. Limitations: - use_device_ptr and use_device_addr clauses are NOT supported for Target Data operation. - nowait clauses are NOT supported for Target Enter and Exit Data operations. - Only LLVMPointerType is supported for map_operands. Differential Revision: https://reviews.llvm.org/D142914 --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 5 - .../llvm/Frontend/OpenMP/OMPConstants.h | 6 + .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 26 ++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 89 ++++++- .../Frontend/OpenMPIRBuilderTest.cpp | 239 ++++++++++++++++++ .../mlir/Target/LLVMIR/Dialect/OpenMPCommon.h | 40 +++ mlir/lib/Target/LLVMIR/CMakeLists.txt | 1 + .../OpenACC/OpenACCToLLVMIRTranslation.cpp | 52 +--- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 194 ++++++++++++++ .../Target/LLVMIR/Dialect/OpenMPCommon.cpp | 41 +++ mlir/test/Target/LLVMIR/omptarget-llvm.mlir | 176 +++++++++++++ 11 files changed, 814 insertions(+), 55 deletions(-) create mode 100644 mlir/include/mlir/Target/LLVMIR/Dialect/OpenMPCommon.h create mode 100644 mlir/lib/Target/LLVMIR/Dialect/OpenMPCommon.cpp create mode 100644 mlir/test/Target/LLVMIR/omptarget-llvm.mlir diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 36bc7f10762d7..5f21cfca66bb8 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -498,11 +498,6 @@ enum OpenMPOffloadingRequiresDirFlags : int64_t { LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS) }; -enum OpenMPOffloadingReservedDeviceIDs { - /// Device ID if the device was not defined, runtime should get it - /// from environment variables in the spec. - OMP_DEVICEID_UNDEF = -1, -}; } // anonymous namespace /// Describes ident structure that describes a source location. diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h index afdbc4d9788d6..948cc128c65ca 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -241,6 +241,12 @@ enum class OpenMPOffloadMappingFlags : uint64_t { LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF) }; +enum OpenMPOffloadingReservedDeviceIDs { + /// Device ID if the device was not defined, runtime should get it + /// from environment variables in the spec. + OMP_DEVICEID_UNDEF = -1 +}; + enum class AddressSpace : unsigned { Generic = 0, Global = 1, diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index b3c4f21274cb2..a13f8528fa6dd 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -1097,6 +1097,9 @@ class OpenMPIRBuilder { /// variables. StringMap InternalVars; + /// Computes the size of type in bytes. + Value *getSizeInBytes(Value *BasePtr); + /// Create the global variable holding the offload mappings information. GlobalVariable *createOffloadMaptypes(SmallVectorImpl &Mappings, std::string VarName); @@ -1552,6 +1555,29 @@ class OpenMPIRBuilder { StringRef EntryFnIDName, int32_t NumTeams, int32_t NumThreads); + /// Generator for '#omp target data' + /// + /// \param Loc The location where the target data construct was encountered. + /// \param CodeGenIP The insertion point at which the target directive code + /// should be placed. + /// \param MapTypeFlags BitVector storing the mapType flags for the + /// mapOperands. + /// \param MapNames Names for the mapOperands. + /// \param MapperAllocas Pointers to the AllocInsts for the map clause. + /// \param IsBegin If true then emits begin mapper call otherwise emits + /// end mapper call. + /// \param DeviceID Stores the DeviceID from the device clause. + /// \param IfCond Value which corresponds to the if clause condition. + /// \param ProcessMapOpCB Callback that generates code for the map clause. + /// \param BodyGenCB Callback that will generate the region code. + OpenMPIRBuilder::InsertPointTy createTargetData( + const LocationDescription &Loc, OpenMPIRBuilder::InsertPointTy CodeGenIP, + SmallVectorImpl &MapTypeFlags, + SmallVectorImpl &MapNames, + struct MapperAllocas &MapperAllocas, bool IsBegin, int64_t DeviceID, + Value *IfCond, BodyGenCallbackTy ProcessMapOpCB, + BodyGenCallbackTy BodyGenCB = {}); + /// Declarations for LLVM-IR types (simple, array, function and structure) are /// generated below. Their names are defined and used in OpenMPKinds.def. Here /// we provide the declarations, the initializeTypes function will provide the diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index fd4d2b5d51c86..81e2904bbd019 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -4041,6 +4041,77 @@ Constant *OpenMPIRBuilder::registerTargetRegionFunction( return OutlinedFnID; } +OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetData( + const LocationDescription &Loc, OpenMPIRBuilder::InsertPointTy CodeGenIP, + SmallVectorImpl &MapTypeFlags, + SmallVectorImpl &MapNames, struct MapperAllocas &MapperAllocas, + bool IsBegin, int64_t DeviceID, Value *IfCond, + BodyGenCallbackTy ProcessMapOpCB, BodyGenCallbackTy BodyGenCB) { + if (!updateToLocation(Loc)) + return InsertPointTy(); + + Builder.restoreIP(CodeGenIP); + + // LLVM utilities like blocks with terminators. + // The UI acts as a resume point for code insertion after the BodyGen + auto *UI = Builder.CreateUnreachable(); + if (IfCond) { + auto *ThenTI = + SplitBlockAndInsertIfThen(IfCond, UI, /* Unreachable */ false); + ThenTI->getParent()->setName("omp_if.then"); + Builder.SetInsertPoint(ThenTI); + } else { + Builder.SetInsertPoint(UI); + } + + ProcessMapOpCB(Builder.saveIP(), Builder.saveIP()); + + uint32_t SrcLocStrSize; + Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize); + Value *srcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize); + + GlobalVariable *MapTypesGV = + createOffloadMaptypes(MapTypeFlags, ".offload_maptypes"); + Value *MapTypesArg = Builder.CreateConstInBoundsGEP2_32( + ArrayType::get(Builder.getInt64Ty(), MapTypeFlags.size()), MapTypesGV, + /*Idx0=*/0, /*Idx1=*/0); + + GlobalVariable *MapNamesGV = + createOffloadMapnames(MapNames, ".offload_mapnames"); + Value *MapNamesArg = Builder.CreateConstInBoundsGEP2_32( + ArrayType::get(Builder.getInt8PtrTy(), MapNames.size()), MapNamesGV, + /*Idx0=*/0, /*Idx1=*/0); + + Function *beginMapperFunc = + getOrCreateRuntimeFunctionPtr(omp::OMPRTL___tgt_target_data_begin_mapper); + Function *endMapperFunc = + getOrCreateRuntimeFunctionPtr(omp::OMPRTL___tgt_target_data_end_mapper); + + if (BodyGenCB) { + // Create call to start the data region. + emitMapperCall(Builder.saveIP(), beginMapperFunc, srcLocInfo, MapTypesArg, + MapNamesArg, MapperAllocas, DeviceID, MapTypeFlags.size()); + + BodyGenCB(Builder.saveIP(), Builder.saveIP()); + + Builder.SetInsertPoint(UI->getParent()); + // Create call to end the data region. + emitMapperCall(Builder.saveIP(), endMapperFunc, srcLocInfo, MapTypesArg, + MapNamesArg, MapperAllocas, DeviceID, MapTypeFlags.size()); + } else { + emitMapperCall(Builder.saveIP(), IsBegin ? beginMapperFunc : endMapperFunc, + srcLocInfo, MapTypesArg, MapNamesArg, MapperAllocas, + DeviceID, MapTypeFlags.size()); + } + + // Update the insertion point and remove the terminator we introduced. + Builder.SetInsertPoint(UI->getParent()); + if (IfCond) + UI->getParent()->setName("omp_if.end"); + UI->eraseFromParent(); + return Builder.saveIP(); +} + std::string OpenMPIRBuilder::getNameWithSeparators(ArrayRef Parts, StringRef FirstSeparator, StringRef Separator) { @@ -4088,6 +4159,15 @@ Value *OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName) { return getOrCreateInternalVariable(KmpCriticalNameTy, Name); } +Value *OpenMPIRBuilder::getSizeInBytes(Value *BasePtr) { + LLVMContext &Ctx = Builder.getContext(); + Value *Null = Constant::getNullValue(BasePtr->getType()->getPointerTo()); + Value *SizeGep = + Builder.CreateGEP(BasePtr->getType(), Null, Builder.getInt32(1)); + Value *SizePtrToInt = Builder.CreatePtrToInt(SizeGep, Type::getInt64Ty(Ctx)); + return SizePtrToInt; +} + GlobalVariable * OpenMPIRBuilder::createOffloadMaptypes(SmallVectorImpl &Mappings, std::string VarName) { @@ -4111,9 +4191,12 @@ void OpenMPIRBuilder::createMapperAllocas(const LocationDescription &Loc, auto *ArrI8PtrTy = ArrayType::get(Int8Ptr, NumOperands); auto *ArrI64Ty = ArrayType::get(Int64, NumOperands); Builder.restoreIP(AllocaIP); - AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI8PtrTy); - AllocaInst *Args = Builder.CreateAlloca(ArrI8PtrTy); - AllocaInst *ArgSizes = Builder.CreateAlloca(ArrI64Ty); + AllocaInst *ArgsBase = Builder.CreateAlloca( + ArrI8PtrTy, /* ArraySize = */ nullptr, ".offload_baseptrs"); + AllocaInst *Args = Builder.CreateAlloca(ArrI8PtrTy, /* ArraySize = */ nullptr, + ".offload_ptrs"); + AllocaInst *ArgSizes = Builder.CreateAlloca( + ArrI64Ty, /* ArraySize = */ nullptr, ".offload_sizes"); Builder.restoreIP(Loc.IP); MapperAllocas.ArgsBase = ArgsBase; MapperAllocas.Args = Args; diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index d1fa73503a440..05a1d7a58b84d 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -4912,6 +4912,245 @@ TEST_F(OpenMPIRBuilderTest, EmitMapperCall) { EXPECT_TRUE(MapperCall->getOperand(8)->getType()->isPointerTy()); } +TEST_F(OpenMPIRBuilderTest, TargetEnterData) { + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + + unsigned NumDataOperands = 1; + int64_t DeviceID = 2; + struct OpenMPIRBuilder::MapperAllocas MapperAllocas; + SmallVector MapTypeFlagsTo = {1}; + SmallVector MapNames; + auto *I8PtrTy = Builder.getInt8PtrTy(); + auto *ArrI8PtrTy = ArrayType::get(I8PtrTy, NumDataOperands); + auto *I64Ty = Builder.getInt64Ty(); + auto *ArrI64Ty = ArrayType::get(I64Ty, NumDataOperands); + + AllocaInst *Val1 = + Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1)); + ASSERT_NE(Val1, nullptr); + + IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), + F->getEntryBlock().getFirstInsertionPt()); + OMPBuilder.createMapperAllocas(Builder.saveIP(), AllocaIP, NumDataOperands, + MapperAllocas); + + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + auto ProcessMapOpCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + Value *DataValue = Val1; + Value *DataPtrBase; + Value *DataPtr; + DataPtrBase = DataValue; + DataPtr = DataValue; + Builder.restoreIP(CodeGenIP); + + Value *Null = Constant::getNullValue(DataValue->getType()->getPointerTo()); + Value *SizeGep = + Builder.CreateGEP(DataValue->getType(), Null, Builder.getInt32(1)); + Value *SizePtrToInt = Builder.CreatePtrToInt(SizeGep, I64Ty); + + Value *PtrBaseGEP = + Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.ArgsBase, + {Builder.getInt32(0), Builder.getInt32(0)}); + Value *PtrBaseCast = Builder.CreateBitCast( + PtrBaseGEP, DataPtrBase->getType()->getPointerTo()); + Builder.CreateStore(DataPtrBase, PtrBaseCast); + Value *PtrGEP = + Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.Args, + {Builder.getInt32(0), Builder.getInt32(0)}); + Value *PtrCast = + Builder.CreateBitCast(PtrGEP, DataPtr->getType()->getPointerTo()); + Builder.CreateStore(DataPtr, PtrCast); + Value *SizeGEP = + Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes, + {Builder.getInt32(0), Builder.getInt32(0)}); + Builder.CreateStore(SizePtrToInt, SizeGEP); + }; + + Builder.restoreIP(OMPBuilder.createTargetData( + Loc, Builder.saveIP(), MapTypeFlagsTo, MapNames, MapperAllocas, + /* IsBegin= */ true, DeviceID, /* IfCond= */ nullptr, ProcessMapOpCB)); + + CallInst *TargetDataCall = dyn_cast(&BB->back()); + EXPECT_NE(TargetDataCall, nullptr); + EXPECT_EQ(TargetDataCall->arg_size(), 9U); + EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(), + "__tgt_target_data_begin_mapper"); + EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64)); + EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32)); + EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy()); + + Builder.CreateRetVoid(); + EXPECT_FALSE(verifyModule(*M, &errs())); +} + +TEST_F(OpenMPIRBuilderTest, TargetExitData) { + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + + unsigned NumDataOperands = 1; + int64_t DeviceID = 2; + struct OpenMPIRBuilder::MapperAllocas MapperAllocas; + SmallVector MapTypeFlagsFrom = {2}; + SmallVector MapNames; + auto *I8PtrTy = Builder.getInt8PtrTy(); + auto *ArrI8PtrTy = ArrayType::get(I8PtrTy, NumDataOperands); + auto *I64Ty = Builder.getInt64Ty(); + auto *ArrI64Ty = ArrayType::get(I64Ty, NumDataOperands); + + AllocaInst *Val1 = + Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1)); + ASSERT_NE(Val1, nullptr); + + IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), + F->getEntryBlock().getFirstInsertionPt()); + OMPBuilder.createMapperAllocas(Builder.saveIP(), AllocaIP, NumDataOperands, + MapperAllocas); + + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + auto ProcessMapOpCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + Value *DataValue = Val1; + Value *DataPtrBase; + Value *DataPtr; + DataPtrBase = DataValue; + DataPtr = DataValue; + Builder.restoreIP(CodeGenIP); + + Value *Null = Constant::getNullValue(DataValue->getType()->getPointerTo()); + Value *SizeGep = + Builder.CreateGEP(DataValue->getType(), Null, Builder.getInt32(1)); + Value *SizePtrToInt = Builder.CreatePtrToInt(SizeGep, I64Ty); + + Value *PtrBaseGEP = + Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.ArgsBase, + {Builder.getInt32(0), Builder.getInt32(0)}); + Value *PtrBaseCast = Builder.CreateBitCast( + PtrBaseGEP, DataPtrBase->getType()->getPointerTo()); + Builder.CreateStore(DataPtrBase, PtrBaseCast); + Value *PtrGEP = + Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.Args, + {Builder.getInt32(0), Builder.getInt32(0)}); + Value *PtrCast = + Builder.CreateBitCast(PtrGEP, DataPtr->getType()->getPointerTo()); + Builder.CreateStore(DataPtr, PtrCast); + Value *SizeGEP = + Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes, + {Builder.getInt32(0), Builder.getInt32(0)}); + Builder.CreateStore(SizePtrToInt, SizeGEP); + }; + + Builder.restoreIP(OMPBuilder.createTargetData( + Loc, Builder.saveIP(), MapTypeFlagsFrom, MapNames, MapperAllocas, + /* IsBegin= */ false, DeviceID, /* IfCond= */ nullptr, ProcessMapOpCB)); + + CallInst *TargetDataCall = dyn_cast(&BB->back()); + EXPECT_NE(TargetDataCall, nullptr); + EXPECT_EQ(TargetDataCall->arg_size(), 9U); + EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(), + "__tgt_target_data_end_mapper"); + EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64)); + EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32)); + EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy()); + + Builder.CreateRetVoid(); + EXPECT_FALSE(verifyModule(*M, &errs())); +} + +TEST_F(OpenMPIRBuilderTest, TargetDataRegion) { + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.initialize(); + F->setName("func"); + IRBuilder<> Builder(BB); + + OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL}); + + unsigned NumDataOperands = 1; + int64_t DeviceID = 2; + struct OpenMPIRBuilder::MapperAllocas MapperAllocas; + SmallVector MapTypeFlagsToFrom = {3}; + SmallVector MapNames; + auto *I8PtrTy = Builder.getInt8PtrTy(); + auto *ArrI8PtrTy = ArrayType::get(I8PtrTy, NumDataOperands); + auto *I64Ty = Builder.getInt64Ty(); + auto *ArrI64Ty = ArrayType::get(I64Ty, NumDataOperands); + + AllocaInst *Val1 = + Builder.CreateAlloca(Builder.getInt32Ty(), Builder.getInt64(1)); + ASSERT_NE(Val1, nullptr); + + IRBuilder<>::InsertPoint AllocaIP(&F->getEntryBlock(), + F->getEntryBlock().getFirstInsertionPt()); + OMPBuilder.createMapperAllocas(Builder.saveIP(), AllocaIP, NumDataOperands, + MapperAllocas); + + using InsertPointTy = OpenMPIRBuilder::InsertPointTy; + auto ProcessMapOpCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) { + Value *DataValue = Val1; + Value *DataPtrBase; + Value *DataPtr; + DataPtrBase = DataValue; + DataPtr = DataValue; + Builder.restoreIP(CodeGenIP); + + Value *Null = Constant::getNullValue(DataValue->getType()->getPointerTo()); + Value *SizeGep = + Builder.CreateGEP(DataValue->getType(), Null, Builder.getInt32(1)); + Value *SizePtrToInt = Builder.CreatePtrToInt(SizeGep, I64Ty); + + Value *PtrBaseGEP = + Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.ArgsBase, + {Builder.getInt32(0), Builder.getInt32(0)}); + Value *PtrBaseCast = Builder.CreateBitCast( + PtrBaseGEP, DataPtrBase->getType()->getPointerTo()); + Builder.CreateStore(DataPtrBase, PtrBaseCast); + Value *PtrGEP = + Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.Args, + {Builder.getInt32(0), Builder.getInt32(0)}); + Value *PtrCast = + Builder.CreateBitCast(PtrGEP, DataPtr->getType()->getPointerTo()); + Builder.CreateStore(DataPtr, PtrCast); + Value *SizeGEP = + Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes, + {Builder.getInt32(0), Builder.getInt32(0)}); + Builder.CreateStore(SizePtrToInt, SizeGEP); + }; + + auto BodyCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { + Builder.restoreIP(codeGenIP); + auto *SI = Builder.CreateStore(Builder.getInt32(99), Val1); + auto *newBB = SplitBlock(Builder.GetInsertBlock(), SI); + Builder.SetInsertPoint(newBB); + auto *UI = &Builder.GetInsertBlock()->back(); + SplitBlock(Builder.GetInsertBlock(), UI); + }; + + Builder.restoreIP(OMPBuilder.createTargetData( + Loc, Builder.saveIP(), MapTypeFlagsToFrom, MapNames, MapperAllocas, + /* IsBegin= */ false, DeviceID, /* IfCond= */ nullptr, ProcessMapOpCB, + BodyCB)); + + CallInst *TargetDataCall = + dyn_cast(&Builder.GetInsertBlock()->back()); + EXPECT_NE(TargetDataCall, nullptr); + EXPECT_EQ(TargetDataCall->arg_size(), 9U); + EXPECT_EQ(TargetDataCall->getCalledFunction()->getName(), + "__tgt_target_data_end_mapper"); + EXPECT_TRUE(TargetDataCall->getOperand(1)->getType()->isIntegerTy(64)); + EXPECT_TRUE(TargetDataCall->getOperand(2)->getType()->isIntegerTy(32)); + EXPECT_TRUE(TargetDataCall->getOperand(8)->getType()->isPointerTy()); + + Builder.CreateRetVoid(); + EXPECT_FALSE(verifyModule(*M, &errs())); +} + TEST_F(OpenMPIRBuilderTest, CreateTask) { using InsertPointTy = OpenMPIRBuilder::InsertPointTy; OpenMPIRBuilder OMPBuilder(*M); diff --git a/mlir/include/mlir/Target/LLVMIR/Dialect/OpenMPCommon.h b/mlir/include/mlir/Target/LLVMIR/Dialect/OpenMPCommon.h new file mode 100644 index 0000000000000..6b23b2f7b5448 --- /dev/null +++ b/mlir/include/mlir/Target/LLVMIR/Dialect/OpenMPCommon.h @@ -0,0 +1,40 @@ +//===- OpenMPCommon.h - Utils for translating MLIR dialect to LLVM IR------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines general utilities for MLIR Dialect translations to LLVM IR. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TARGET_LLVMIR_DIALECT_OPENMPCOMMON_H +#define MLIR_TARGET_LLVMIR_DIALECT_OPENMPCOMMON_H + +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/Location.h" +#include "mlir/Support/LLVM.h" + +#include "llvm/Frontend/OpenMP/OMPIRBuilder.h" +#include "llvm/IR/IRBuilder.h" + +namespace mlir { +namespace LLVM { + +/// Create a constant string location from the MLIR Location information. +llvm::Constant *createSourceLocStrFromLocation(Location loc, + llvm::OpenMPIRBuilder &builder, + StringRef name, + uint32_t &strLen); + +/// Create a constant string representing the mapping information extracted from +/// the MLIR location information. +llvm::Constant *createMappingInformation(Location loc, + llvm::OpenMPIRBuilder &builder); + +} // namespace LLVM +} // namespace mlir + +#endif // MLIR_TARGET_LLVMIR_DIALECT_OPENMPCOMMON_H diff --git a/mlir/lib/Target/LLVMIR/CMakeLists.txt b/mlir/lib/Target/LLVMIR/CMakeLists.txt index c8e7797c76129..7411573791b76 100644 --- a/mlir/lib/Target/LLVMIR/CMakeLists.txt +++ b/mlir/lib/Target/LLVMIR/CMakeLists.txt @@ -19,6 +19,7 @@ add_mlir_translation_library(MLIRTargetLLVMIRExport LoopAnnotationTranslation.cpp ModuleTranslation.cpp TypeToLLVM.cpp + Dialect/OpenMPCommon.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Target/LLVMIR diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenACC/OpenACCToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenACC/OpenACCToLLVMIRTranslation.cpp index 0a66ce956c950..4390839e14af0 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenACC/OpenACCToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenACC/OpenACCToLLVMIRTranslation.cpp @@ -18,6 +18,7 @@ #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Operation.h" #include "mlir/Support/LLVM.h" +#include "mlir/Target/LLVMIR/Dialect/OpenMPCommon.h" #include "mlir/Target/LLVMIR/ModuleTranslation.h" #include "llvm/ADT/TypeSwitch.h" @@ -46,23 +47,6 @@ static constexpr uint64_t kHoldFlag = 0x2000; /// Default value for the device id static constexpr int64_t kDefaultDevice = -1; -/// Create a constant string location from the MLIR Location information. -static llvm::Constant *createSourceLocStrFromLocation(Location loc, - OpenACCIRBuilder &builder, - StringRef name, - uint32_t &strLen) { - if (auto fileLoc = loc.dyn_cast()) { - StringRef fileName = fileLoc.getFilename(); - unsigned lineNo = fileLoc.getLine(); - unsigned colNo = fileLoc.getColumn(); - return builder.getOrCreateSrcLocStr(name, fileName, lineNo, colNo, strLen); - } - std::string locStr; - llvm::raw_string_ostream locOS(locStr); - locOS << loc; - return builder.getOrCreateSrcLocStr(locOS.str(), strLen); -} - /// Create the location struct from the operation location information. static llvm::Value *createSourceLocationInfo(OpenACCIRBuilder &builder, Operation *op) { @@ -70,24 +54,11 @@ static llvm::Value *createSourceLocationInfo(OpenACCIRBuilder &builder, auto funcOp = op->getParentOfType(); StringRef funcName = funcOp ? funcOp.getName() : "unknown"; uint32_t strLen; - llvm::Constant *locStr = - createSourceLocStrFromLocation(loc, builder, funcName, strLen); + llvm::Constant *locStr = mlir::LLVM::createSourceLocStrFromLocation( + loc, builder, funcName, strLen); return builder.getOrCreateIdent(locStr, strLen); } -/// Create a constant string representing the mapping information extracted from -/// the MLIR location information. -static llvm::Constant *createMappingInformation(Location loc, - OpenACCIRBuilder &builder) { - uint32_t strLen; - if (auto nameLoc = loc.dyn_cast()) { - StringRef name = nameLoc.getName(); - return createSourceLocStrFromLocation(nameLoc.getChildLoc(), builder, name, - strLen); - } - return createSourceLocStrFromLocation(loc, builder, "unknown", strLen); -} - /// Return the runtime function used to lower the given operation. static llvm::Function *getAssociatedFunction(OpenACCIRBuilder &builder, Operation *op) { @@ -107,19 +78,6 @@ static llvm::Function *getAssociatedFunction(OpenACCIRBuilder &builder, llvm_unreachable("Unknown OpenACC operation"); } -/// Computes the size of type in bytes. -static llvm::Value *getSizeInBytes(llvm::IRBuilderBase &builder, - llvm::Value *basePtr) { - llvm::LLVMContext &ctx = builder.getContext(); - llvm::Value *null = - llvm::Constant::getNullValue(basePtr->getType()->getPointerTo()); - llvm::Value *sizeGep = - builder.CreateGEP(basePtr->getType(), null, builder.getInt32(1)); - llvm::Value *sizePtrToInt = - builder.CreatePtrToInt(sizeGep, llvm::Type::getInt64Ty(ctx)); - return sizePtrToInt; -} - /// Extract pointer, size and mapping information from operands /// to populate the future functions arguments. static LogicalResult @@ -153,7 +111,7 @@ processOperands(llvm::IRBuilderBase &builder, } else if (data.getType().isa()) { dataPtrBase = dataValue; dataPtr = dataValue; - dataSize = getSizeInBytes(builder, dataValue); + dataSize = accBuilder->getSizeInBytes(dataValue); } else { return op->emitOpError() << "Data operand must be legalized before translation." @@ -185,7 +143,7 @@ processOperands(llvm::IRBuilderBase &builder, flags.push_back(operandFlag); llvm::Constant *mapName = - createMappingInformation(data.getLoc(), *accBuilder); + mlir::LLVM::createMappingInformation(data.getLoc(), *accBuilder); names.push_back(mapName); ++index; } diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 2cfdaa3f8730a..5469f4b0e1f79 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -15,10 +15,12 @@ #include "mlir/IR/IRMapping.h" #include "mlir/IR/Operation.h" #include "mlir/Support/LLVM.h" +#include "mlir/Target/LLVMIR/Dialect/OpenMPCommon.h" #include "mlir/Target/LLVMIR/ModuleTranslation.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/TypeSwitch.h" +#include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/IRBuilder.h" @@ -1351,6 +1353,195 @@ convertOmpThreadprivate(Operation &opInst, llvm::IRBuilderBase &builder, return success(); } +/// Process MapOperands for Target Data directives. +static LogicalResult processMapOperand( + llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation, + const SmallVector &mapOperands, const ArrayAttr &mapTypes, + SmallVector &mapTypeFlags, + SmallVectorImpl &mapNames, + struct llvm::OpenMPIRBuilder::MapperAllocas &mapperAllocas) { + auto numMapOperands = mapOperands.size(); + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + llvm::PointerType *i8PtrTy = builder.getInt8PtrTy(); + llvm::ArrayType *arrI8PtrTy = llvm::ArrayType::get(i8PtrTy, numMapOperands); + llvm::IntegerType *i64Ty = builder.getInt64Ty(); + llvm::ArrayType *arrI64Ty = llvm::ArrayType::get(i64Ty, numMapOperands); + + unsigned index = 0; + for (const auto &mapOp : mapOperands) { + const auto &mapTypeOp = mapTypes[index]; + + llvm::Value *mapOpValue = moduleTranslation.lookupValue(mapOp); + llvm::Value *mapOpPtrBase; + llvm::Value *mapOpPtr; + llvm::Value *mapOpSize; + + if (mapOp.getType().isa()) { + mapOpPtrBase = mapOpValue; + mapOpPtr = mapOpValue; + mapOpSize = ompBuilder->getSizeInBytes(mapOpValue); + } else { + return failure(); + } + + // Store base pointer extracted from operand into the i-th position of + // argBase. + llvm::Value *ptrBaseGEP = builder.CreateInBoundsGEP( + arrI8PtrTy, mapperAllocas.ArgsBase, + {builder.getInt32(0), builder.getInt32(index)}); + llvm::Value *ptrBaseCast = builder.CreateBitCast( + ptrBaseGEP, mapOpPtrBase->getType()->getPointerTo()); + builder.CreateStore(mapOpPtrBase, ptrBaseCast); + + // Store pointer extracted from operand into the i-th position of args. + llvm::Value *ptrGEP = builder.CreateInBoundsGEP( + arrI8PtrTy, mapperAllocas.Args, + {builder.getInt32(0), builder.getInt32(index)}); + llvm::Value *ptrCast = + builder.CreateBitCast(ptrGEP, mapOpPtr->getType()->getPointerTo()); + builder.CreateStore(mapOpPtr, ptrCast); + + // Store size extracted from operand into the i-th position of argSizes. + llvm::Value *sizeGEP = builder.CreateInBoundsGEP( + arrI64Ty, mapperAllocas.ArgSizes, + {builder.getInt32(0), builder.getInt32(index)}); + builder.CreateStore(mapOpSize, sizeGEP); + + mapTypeFlags.push_back(mapTypeOp.dyn_cast().getInt()); + llvm::Constant *mapName = + mlir::LLVM::createMappingInformation(mapOp.getLoc(), *ompBuilder); + mapNames.push_back(mapName); + ++index; + } + + return success(); +} + +static LogicalResult +convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + unsigned numMapOperands; + bool mapperFunc; + llvm::Value *ifCond = nullptr; + int64_t deviceID = llvm::omp::OMP_DEVICEID_UNDEF; + SmallVector mapOperands; + ArrayAttr mapTypes; + + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + + LogicalResult result = + llvm::TypeSwitch(op) + .Case([&](omp::DataOp dataOp) { + if (dataOp.getUseDeviceAddr().size() || + dataOp.getUseDevicePtr().size()) + return failure(); + + if (auto ifExprVar = dataOp.getIfExpr()) + ifCond = moduleTranslation.lookupValue(ifExprVar); + + if (auto devId = dataOp.getDevice()) + if (auto constOp = mlir::dyn_cast( + devId.getDefiningOp())) + if (auto intAttr = + constOp.getValue().dyn_cast()) + deviceID = intAttr.getInt(); + + numMapOperands = dataOp.getMapOperands().size(); + mapOperands = dataOp.getMapOperands(); + mapTypes = dataOp.getMapTypes(); + return success(); + }) + .Case([&](omp::EnterDataOp enterDataOp) { + if (enterDataOp.getNowait()) + return failure(); + + if (auto ifExprVar = enterDataOp.getIfExpr()) + ifCond = moduleTranslation.lookupValue(ifExprVar); + + if (auto devId = enterDataOp.getDevice()) + if (auto constOp = mlir::dyn_cast( + devId.getDefiningOp())) + if (auto intAttr = + constOp.getValue().dyn_cast()) + deviceID = intAttr.getInt(); + + numMapOperands = enterDataOp.getMapOperands().size(); + mapOperands = enterDataOp.getMapOperands(); + mapTypes = enterDataOp.getMapTypes(); + mapperFunc = true; + return success(); + }) + .Case([&](omp::ExitDataOp exitDataOp) { + if (exitDataOp.getNowait()) + return failure(); + + if (auto ifExprVar = exitDataOp.getIfExpr()) + ifCond = moduleTranslation.lookupValue(ifExprVar); + + if (auto devId = exitDataOp.getDevice()) + if (auto constOp = mlir::dyn_cast( + devId.getDefiningOp())) + if (auto intAttr = + constOp.getValue().dyn_cast()) + deviceID = intAttr.getInt(); + + numMapOperands = exitDataOp.getMapOperands().size(); + mapOperands = exitDataOp.getMapOperands(); + mapTypes = exitDataOp.getMapTypes(); + mapperFunc = false; + return success(); + }) + .Default([&](Operation *op) { + return op->emitError("unsupported OpenMP operation: ") + << op->getName(); + }); + + if (failed(result)) + return failure(); + + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); + llvm::OpenMPIRBuilder::InsertPointTy allocaIP = + findAllocaInsertPoint(builder, moduleTranslation); + + struct llvm::OpenMPIRBuilder::MapperAllocas mapperAllocas; + SmallVector mapTypeFlags; + SmallVector mapNames; + ompBuilder->createMapperAllocas(builder.saveIP(), allocaIP, numMapOperands, + mapperAllocas); + + using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; + LogicalResult processMapOpStatus = success(); + auto processMapOpCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { + builder.restoreIP(codeGenIP); + processMapOpStatus = + processMapOperand(builder, moduleTranslation, mapOperands, mapTypes, + mapTypeFlags, mapNames, mapperAllocas); + }; + + LogicalResult bodyGenStatus = success(); + auto bodyCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { + // DataOp has only one region associated with it. + auto ®ion = cast(op).getRegion(); + builder.restoreIP(codeGenIP); + convertOmpOpRegions(region, "omp.data.region", builder, moduleTranslation, + bodyGenStatus); + }; + + if (isa(op)) { + builder.restoreIP(ompBuilder->createTargetData( + ompLoc, builder.saveIP(), mapTypeFlags, mapNames, mapperAllocas, + mapperFunc, deviceID, ifCond, processMapOpCB, bodyCB)); + } else { + builder.restoreIP(ompBuilder->createTargetData( + ompLoc, builder.saveIP(), mapTypeFlags, mapNames, mapperAllocas, + mapperFunc, deviceID, ifCond, processMapOpCB)); + } + + if (failed(processMapOpStatus)) + return processMapOpStatus; + return bodyGenStatus; +} + namespace { /// Implementation of the dialect interface that converts operations belonging @@ -1465,6 +1656,9 @@ LogicalResult OpenMPDialectLLVMIRTranslationInterface::convertOperation( .Case([&](omp::ThreadprivateOp) { return convertOmpThreadprivate(*op, builder, moduleTranslation); }) + .Case([&](auto op) { + return convertOmpTargetData(op, builder, moduleTranslation); + }) .Default([&](Operation *inst) { return inst->emitError("unsupported OpenMP operation: ") << inst->getName(); diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMPCommon.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMPCommon.cpp new file mode 100644 index 0000000000000..fd739cf4d7e96 --- /dev/null +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMPCommon.cpp @@ -0,0 +1,41 @@ +//===- OpenMPCommon.cpp - Utils for translating MLIR dialect to LLVM IR----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines general utilities for MLIR Dialect translations to LLVM IR. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Target/LLVMIR/Dialect/OpenMPCommon.h" + +llvm::Constant * +mlir::LLVM::createSourceLocStrFromLocation(Location loc, + llvm::OpenMPIRBuilder &builder, + StringRef name, uint32_t &strLen) { + if (auto fileLoc = loc.dyn_cast()) { + StringRef fileName = fileLoc.getFilename(); + unsigned lineNo = fileLoc.getLine(); + unsigned colNo = fileLoc.getColumn(); + return builder.getOrCreateSrcLocStr(name, fileName, lineNo, colNo, strLen); + } + std::string locStr; + llvm::raw_string_ostream locOS(locStr); + locOS << loc; + return builder.getOrCreateSrcLocStr(locOS.str(), strLen); +} + +llvm::Constant * +mlir::LLVM::createMappingInformation(Location loc, + llvm::OpenMPIRBuilder &builder) { + uint32_t strLen; + if (auto nameLoc = loc.dyn_cast()) { + StringRef name = nameLoc.getName(); + return createSourceLocStrFromLocation(nameLoc.getChildLoc(), builder, name, + strLen); + } + return createSourceLocStrFromLocation(loc, builder, "unknown", strLen); +} diff --git a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir new file mode 100644 index 0000000000000..7d29ccb1e0da7 --- /dev/null +++ b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir @@ -0,0 +1,176 @@ +// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s + +llvm.func @_QPopenmp_target_data() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFopenmp_target_dataEi"} : (i64) -> !llvm.ptr + omp.target_data map((tofrom -> %1 : !llvm.ptr)) { + %2 = llvm.mlir.constant(99 : i32) : i32 + llvm.store %2, %1 : !llvm.ptr + omp.terminator + } + llvm.return +} + +// CHECK: @.offload_maptypes = private unnamed_addr constant [1 x i64] [i64 3] +// CHECK-LABEL: define void @_QPopenmp_target_data() { +// CHECK: %[[VAL_0:.*]] = alloca [1 x ptr], align 8 +// CHECK: %[[VAL_1:.*]] = alloca [1 x ptr], align 8 +// CHECK: %[[VAL_2:.*]] = alloca [1 x i64], align 8 +// CHECK: %[[VAL_3:.*]] = alloca i32, i64 1, align 4 +// CHECK: br label %[[VAL_4:.*]] +// CHECK: entry: ; preds = %[[VAL_5:.*]] +// CHECK: %[[VAL_6:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_0]], i32 0, i32 0 +// CHECK: store ptr %[[VAL_3]], ptr %[[VAL_6]], align 8 +// CHECK: %[[VAL_7:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0 +// CHECK: store ptr %[[VAL_3]], ptr %[[VAL_7]], align 8 +// CHECK: %[[VAL_8:.*]] = getelementptr inbounds [1 x i64], ptr %[[VAL_2]], i32 0, i32 0 +// CHECK: store i64 ptrtoint (ptr getelementptr (ptr, ptr null, i32 1) to i64), ptr %[[VAL_8]], align 4 +// CHECK: %[[VAL_9:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_0]], i32 0, i32 0 +// CHECK: %[[VAL_10:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0 +// CHECK: %[[VAL_11:.*]] = getelementptr inbounds [1 x i64], ptr %[[VAL_2]], i32 0, i32 0 +// CHECK: call void @__tgt_target_data_begin_mapper(ptr @2, i64 -1, i32 1, ptr %[[VAL_9]], ptr %[[VAL_10]], ptr %[[VAL_11]], ptr @.offload_maptypes, ptr @.offload_mapnames, ptr null) +// CHECK: br label %[[VAL_12:.*]] +// CHECK: omp.data.region: ; preds = %[[VAL_4]] +// CHECK: store i32 99, ptr %[[VAL_3]], align 4 +// CHECK: br label %[[VAL_13:.*]] +// CHECK: omp.region.cont: ; preds = %[[VAL_12]] +// CHECK: %[[VAL_14:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_0]], i32 0, i32 0 +// CHECK: %[[VAL_15:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0 +// CHECK: %[[VAL_16:.*]] = getelementptr inbounds [1 x i64], ptr %[[VAL_2]], i32 0, i32 0 +// CHECK: call void @__tgt_target_data_end_mapper(ptr @2, i64 -1, i32 1, ptr %[[VAL_14]], ptr %[[VAL_15]], ptr %[[VAL_16]], ptr @.offload_maptypes, ptr @.offload_mapnames, ptr null) +// CHECK: ret void + +// ----- + +llvm.func @_QPopenmp_target_data_region(%1 : !llvm.ptr>) { + omp.target_data map((from -> %1 : !llvm.ptr>)) { + %2 = llvm.mlir.constant(99 : i32) : i32 + %3 = llvm.mlir.constant(1 : i64) : i64 + %4 = llvm.mlir.constant(1 : i64) : i64 + %5 = llvm.mlir.constant(0 : i64) : i64 + %6 = llvm.getelementptr %1[0, %5] : (!llvm.ptr>, i64) -> !llvm.ptr + llvm.store %2, %6 : !llvm.ptr + omp.terminator + } + llvm.return +} + +// CHECK: @.offload_maptypes = private unnamed_addr constant [1 x i64] [i64 2] +// CHECK-LABEL: define void @_QPopenmp_target_data_region +// CHECK: (ptr %[[ARG_0:.*]]) { +// CHECK: %[[VAL_0:.*]] = alloca [1 x ptr], align 8 +// CHECK: %[[VAL_1:.*]] = alloca [1 x ptr], align 8 +// CHECK: %[[VAL_2:.*]] = alloca [1 x i64], align 8 +// CHECK: br label %[[VAL_3:.*]] +// CHECK: entry: ; preds = %[[VAL_4:.*]] +// CHECK: %[[VAL_5:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_0]], i32 0, i32 0 +// CHECK: store ptr %[[VAL_6:.*]], ptr %[[VAL_5]], align 8 +// CHECK: %[[VAL_7:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0 +// CHECK: store ptr %[[VAL_6]], ptr %[[VAL_7]], align 8 +// CHECK: %[[VAL_8:.*]] = getelementptr inbounds [1 x i64], ptr %[[VAL_2]], i32 0, i32 0 +// CHECK: store i64 ptrtoint (ptr getelementptr (ptr, ptr null, i32 1) to i64), ptr %[[VAL_8]], align 4 +// CHECK: %[[VAL_9:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_0]], i32 0, i32 0 +// CHECK: %[[VAL_10:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0 +// CHECK: %[[VAL_11:.*]] = getelementptr inbounds [1 x i64], ptr %[[VAL_2]], i32 0, i32 0 +// CHECK: call void @__tgt_target_data_begin_mapper(ptr @2, i64 -1, i32 1, ptr %[[VAL_9]], ptr %[[VAL_10]], ptr %[[VAL_11]], ptr @.offload_maptypes, ptr @.offload_mapnames, ptr null) +// CHECK: br label %[[VAL_12:.*]] +// CHECK: omp.data.region: ; preds = %[[VAL_3]] +// CHECK: %[[VAL_13:.*]] = getelementptr [1024 x i32], ptr %[[VAL_6]], i32 0, i64 0 +// CHECK: store i32 99, ptr %[[VAL_13]], align 4 +// CHECK: br label %[[VAL_14:.*]] +// CHECK: omp.region.cont: ; preds = %[[VAL_12]] +// CHECK: %[[VAL_15:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_0]], i32 0, i32 0 +// CHECK: %[[VAL_16:.*]] = getelementptr inbounds [1 x ptr], ptr %[[VAL_1]], i32 0, i32 0 +// CHECK: %[[VAL_17:.*]] = getelementptr inbounds [1 x i64], ptr %[[VAL_2]], i32 0, i32 0 +// CHECK: call void @__tgt_target_data_end_mapper(ptr @2, i64 -1, i32 1, ptr %[[VAL_15]], ptr %[[VAL_16]], ptr %[[VAL_17]], ptr @.offload_maptypes, ptr @.offload_mapnames, ptr null) +// CHECK: ret void + +// ----- + +llvm.func @_QPomp_target_enter_exit(%1 : !llvm.ptr>, %3 : !llvm.ptr>) { + %4 = llvm.mlir.constant(1 : i64) : i64 + %5 = llvm.alloca %4 x i32 {bindc_name = "dvc", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFomp_target_enter_exitEdvc"} : (i64) -> !llvm.ptr + %6 = llvm.mlir.constant(1 : i64) : i64 + %7 = llvm.alloca %6 x i32 {bindc_name = "i", in_type = i32, operand_segment_sizes = array, uniq_name = "_QFomp_target_enter_exitEi"} : (i64) -> !llvm.ptr + %8 = llvm.mlir.constant(5 : i32) : i32 + llvm.store %8, %7 : !llvm.ptr + %9 = llvm.mlir.constant(2 : i32) : i32 + llvm.store %9, %5 : !llvm.ptr + %10 = llvm.load %7 : !llvm.ptr + %11 = llvm.mlir.constant(10 : i32) : i32 + %12 = llvm.icmp "slt" %10, %11 : i32 + %13 = llvm.load %5 : !llvm.ptr + omp.target_enter_data if(%12 : i1) device(%13 : i32) map((to -> %1 : !llvm.ptr>), (alloc -> %3 : !llvm.ptr>)) + %14 = llvm.load %7 : !llvm.ptr + %15 = llvm.mlir.constant(10 : i32) : i32 + %16 = llvm.icmp "sgt" %14, %15 : i32 + %17 = llvm.load %5 : !llvm.ptr + omp.target_exit_data if(%16 : i1) device(%17 : i32) map((from -> %1 : !llvm.ptr>), (release -> %3 : !llvm.ptr>)) + llvm.return +} + +// CHECK: @.offload_maptypes = private unnamed_addr constant [2 x i64] [i64 1, i64 0] +// CHECK: @.offload_maptypes.1 = private unnamed_addr constant [2 x i64] [i64 2, i64 0] +// CHECK-LABEL: define void @_QPomp_target_enter_exit +// CHECK: (ptr %[[ARG_0:.*]], ptr %[[ARG_1:.*]]) { +// CHECK: %[[VAL_0:.*]] = alloca [2 x ptr], align 8 +// CHECK: %[[VAL_1:.*]] = alloca [2 x ptr], align 8 +// CHECK: %[[VAL_2:.*]] = alloca [2 x i64], align 8 +// CHECK: %[[VAL_3:.*]] = alloca [2 x ptr], align 8 +// CHECK: %[[VAL_4:.*]] = alloca [2 x ptr], align 8 +// CHECK: %[[VAL_5:.*]] = alloca [2 x i64], align 8 +// CHECK: %[[VAL_6:.*]] = alloca i32, i64 1, align 4 +// CHECK: %[[VAL_7:.*]] = alloca i32, i64 1, align 4 +// CHECK: store i32 5, ptr %[[VAL_7]], align 4 +// CHECK: store i32 2, ptr %[[VAL_6]], align 4 +// CHECK: %[[VAL_8:.*]] = load i32, ptr %[[VAL_7]], align 4 +// CHECK: %[[VAL_9:.*]] = icmp slt i32 %[[VAL_8]], 10 +// CHECK: %[[VAL_10:.*]] = load i32, ptr %[[VAL_6]], align 4 +// CHECK: br label %[[VAL_11:.*]] +// CHECK: entry: ; preds = %[[VAL_12:.*]] +// CHECK: br i1 %[[VAL_9]], label %[[VAL_13:.*]], label %[[VAL_14:.*]] +// CHECK: omp_if.then: ; preds = %[[VAL_11]] +// CHECK: %[[VAL_15:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_3]], i32 0, i32 0 +// CHECK: store ptr %[[VAL_16:.*]], ptr %[[VAL_15]], align 8 +// CHECK: %[[VAL_17:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_4]], i32 0, i32 0 +// CHECK: store ptr %[[VAL_16]], ptr %[[VAL_17]], align 8 +// CHECK: %[[VAL_18:.*]] = getelementptr inbounds [2 x i64], ptr %[[VAL_5]], i32 0, i32 0 +// CHECK: store i64 ptrtoint (ptr getelementptr (ptr, ptr null, i32 1) to i64), ptr %[[VAL_18]], align 4 +// CHECK: %[[VAL_19:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_3]], i32 0, i32 1 +// CHECK: store ptr %[[VAL_20:.*]], ptr %[[VAL_19]], align 8 +// CHECK: %[[VAL_21:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_4]], i32 0, i32 1 +// CHECK: store ptr %[[VAL_20]], ptr %[[VAL_21]], align 8 +// CHECK: %[[VAL_22:.*]] = getelementptr inbounds [2 x i64], ptr %[[VAL_5]], i32 0, i32 1 +// CHECK: store i64 ptrtoint (ptr getelementptr (ptr, ptr null, i32 1) to i64), ptr %[[VAL_22]], align 4 +// CHECK: %[[VAL_23:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_3]], i32 0, i32 0 +// CHECK: %[[VAL_24:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_4]], i32 0, i32 0 +// CHECK: %[[VAL_25:.*]] = getelementptr inbounds [2 x i64], ptr %[[VAL_5]], i32 0, i32 0 +// CHECK: call void @__tgt_target_data_begin_mapper(ptr @3, i64 -1, i32 2, ptr %[[VAL_23]], ptr %[[VAL_24]], ptr %[[VAL_25]], ptr @.offload_maptypes, ptr @.offload_mapnames, ptr null) +// CHECK: br label %[[VAL_14]] +// CHECK: omp_if.end: ; preds = %[[VAL_11]], %[[VAL_13]] +// CHECK: %[[VAL_26:.*]] = load i32, ptr %[[VAL_7]], align 4 +// CHECK: %[[VAL_27:.*]] = icmp sgt i32 %[[VAL_26]], 10 +// CHECK: %[[VAL_28:.*]] = load i32, ptr %[[VAL_6]], align 4 +// CHECK: br i1 %[[VAL_27]], label %[[VAL_29:.*]], label %[[VAL_30:.*]] +// CHECK: omp_if.then4: ; preds = %[[VAL_14]] +// CHECK: %[[VAL_31:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_0]], i32 0, i32 0 +// CHECK: store ptr %[[VAL_16]], ptr %[[VAL_31]], align 8 +// CHECK: %[[VAL_32:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_1]], i32 0, i32 0 +// CHECK: store ptr %[[VAL_16]], ptr %[[VAL_32]], align 8 +// CHECK: %[[VAL_33:.*]] = getelementptr inbounds [2 x i64], ptr %[[VAL_2]], i32 0, i32 0 +// CHECK: store i64 ptrtoint (ptr getelementptr (ptr, ptr null, i32 1) to i64), ptr %[[VAL_33]], align 4 +// CHECK: %[[VAL_34:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_0]], i32 0, i32 1 +// CHECK: store ptr %[[VAL_20]], ptr %[[VAL_34]], align 8 +// CHECK: %[[VAL_35:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_1]], i32 0, i32 1 +// CHECK: store ptr %[[VAL_20]], ptr %[[VAL_35]], align 8 +// CHECK: %[[VAL_36:.*]] = getelementptr inbounds [2 x i64], ptr %[[VAL_2]], i32 0, i32 1 +// CHECK: store i64 ptrtoint (ptr getelementptr (ptr, ptr null, i32 1) to i64), ptr %[[VAL_36]], align 4 +// CHECK: %[[VAL_37:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_0]], i32 0, i32 0 +// CHECK: %[[VAL_38:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_1]], i32 0, i32 0 +// CHECK: %[[VAL_39:.*]] = getelementptr inbounds [2 x i64], ptr %[[VAL_2]], i32 0, i32 0 +// CHECK: call void @__tgt_target_data_end_mapper(ptr @3, i64 -1, i32 2, ptr %[[VAL_37]], ptr %[[VAL_38]], ptr %[[VAL_39]], ptr @.offload_maptypes.1, ptr @.offload_mapnames.2, ptr null) +// CHECK: br label %[[VAL_30]] +// CHECK: omp_if.end5: ; preds = %[[VAL_14]], %[[VAL_29]] +// CHECK: ret void + +// ----- From 57bfe25574a0bfa9b6701e645d1f1ec966f838d9 Mon Sep 17 00:00:00 2001 From: Nathan Ridge Date: Sun, 19 Mar 2023 04:04:13 -0400 Subject: [PATCH 062/691] [clangd] Remove reundant use of getSpellingLoc() getFileLoc() is guaranteed to return a file loc, and getSpellingLoc() on a file loc is a no-op. Differential Revision: https://reviews.llvm.org/D146377 --- clang-tools-extra/clangd/FindSymbols.cpp | 4 ++-- clang-tools-extra/clangd/XRefs.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clang-tools-extra/clangd/FindSymbols.cpp b/clang-tools-extra/clangd/FindSymbols.cpp index 2eab7ca27033e..790ee9af8f4ac 100644 --- a/clang-tools-extra/clangd/FindSymbols.cpp +++ b/clang-tools-extra/clangd/FindSymbols.cpp @@ -223,8 +223,8 @@ std::string getSymbolDetail(ASTContext &Ctx, const NamedDecl &ND) { std::optional declToSym(ASTContext &Ctx, const NamedDecl &ND) { auto &SM = Ctx.getSourceManager(); - SourceLocation BeginLoc = SM.getSpellingLoc(SM.getFileLoc(ND.getBeginLoc())); - SourceLocation EndLoc = SM.getSpellingLoc(SM.getFileLoc(ND.getEndLoc())); + SourceLocation BeginLoc = SM.getFileLoc(ND.getBeginLoc()); + SourceLocation EndLoc = SM.getFileLoc(ND.getEndLoc()); const auto SymbolRange = toHalfOpenFileRange(SM, Ctx.getLangOpts(), {BeginLoc, EndLoc}); if (!SymbolRange) diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp index 1b1220c8aa7a0..d4442c11a8ed0 100644 --- a/clang-tools-extra/clangd/XRefs.cpp +++ b/clang-tools-extra/clangd/XRefs.cpp @@ -1611,8 +1611,8 @@ declToHierarchyItem(const NamedDecl &ND, llvm::StringRef TUPath) { ASTContext &Ctx = ND.getASTContext(); auto &SM = Ctx.getSourceManager(); SourceLocation NameLoc = nameLocation(ND, Ctx.getSourceManager()); - SourceLocation BeginLoc = SM.getSpellingLoc(SM.getFileLoc(ND.getBeginLoc())); - SourceLocation EndLoc = SM.getSpellingLoc(SM.getFileLoc(ND.getEndLoc())); + SourceLocation BeginLoc = SM.getFileLoc(ND.getBeginLoc()); + SourceLocation EndLoc = SM.getFileLoc(ND.getEndLoc()); const auto DeclRange = toHalfOpenFileRange(SM, Ctx.getLangOpts(), {BeginLoc, EndLoc}); if (!DeclRange) From 5452d8607185a6328dae766330c023cbf1105c95 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Mon, 20 Mar 2023 11:56:59 -0700 Subject: [PATCH 063/691] [llvm-readobj] Pretty-print IMAGE_WEAK_EXTERN_ANTI_DEPENDENCY. --- llvm/tools/llvm-readobj/COFFDumper.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/tools/llvm-readobj/COFFDumper.cpp b/llvm/tools/llvm-readobj/COFFDumper.cpp index 5279e5853cc59..22b4c58acde9d 100644 --- a/llvm/tools/llvm-readobj/COFFDumper.cpp +++ b/llvm/tools/llvm-readobj/COFFDumper.cpp @@ -544,9 +544,10 @@ const EnumEntry ImageDebugType[] = { static const EnumEntry WeakExternalCharacteristics[] = { - { "NoLibrary", COFF::IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY }, - { "Library" , COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY }, - { "Alias" , COFF::IMAGE_WEAK_EXTERN_SEARCH_ALIAS } + { "NoLibrary" , COFF::IMAGE_WEAK_EXTERN_SEARCH_NOLIBRARY }, + { "Library" , COFF::IMAGE_WEAK_EXTERN_SEARCH_LIBRARY }, + { "Alias" , COFF::IMAGE_WEAK_EXTERN_SEARCH_ALIAS }, + { "AntiDependency" , COFF::IMAGE_WEAK_EXTERN_ANTI_DEPENDENCY }, }; const EnumEntry SubSectionTypes[] = { From 59ff9d3777701ebbe6a59ab2edb8792ef3d2873f Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 20 Mar 2023 10:21:12 -0700 Subject: [PATCH 064/691] [SLP]Fix PR61554: use of missing vectorized value in buildvector nodes. If the buildvector node matches the vector node, it reuse the vector value from this vector node, but its VectorizedValue field is not updated. Need to update this field to avoid misses during the analysis of the reused gather/buildvector nodes. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 15 +++++++++++++++ ...reused-buildvector-matching-vectorized-node.ll | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 9da2fd4bbabe5..ba8e04538d383 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -9352,6 +9352,21 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx) { V = FinalShuffle(V, UniformMask); } } + // Need to update the operand gather node, if actually the operand is not a + // vectorized node, but the buildvector/gather node, which matches one of + // the vectorized nodes. + if (find_if(VE->UserTreeIndices, [&](const EdgeInfo &EI) { + return EI.UserTE == E && EI.EdgeIdx == NodeIdx; + }) == VE->UserTreeIndices.end()) { + auto *It = find_if( + VectorizableTree, [&](const std::unique_ptr &TE) { + return TE->State == TreeEntry::NeedToGather && + TE->UserTreeIndices.front().UserTE == E && + TE->UserTreeIndices.front().EdgeIdx == NodeIdx; + }); + assert(It != VectorizableTree.end() && "Expected gather node operand."); + (*It)->VectorizedValue = V; + } return V; } } diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reused-buildvector-matching-vectorized-node.ll b/llvm/test/Transforms/SLPVectorizer/X86/reused-buildvector-matching-vectorized-node.ll index d250fcfe5bf80..2b425ee624700 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reused-buildvector-matching-vectorized-node.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reused-buildvector-matching-vectorized-node.ll @@ -10,7 +10,7 @@ define void @blam(ptr %arg, double %load2, i1 %fcmp3) { ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i1> poison, i1 [[FCMP3]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i1> [[TMP1]], <2 x i1> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[TMP2]], <2 x double> zeroinitializer, <2 x double> [[TMP0]] -; CHECK-NEXT: [[TMP4:%.*]] = load <2 x double>, ptr poison, align 16 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP0]], double [[LOAD2]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = fcmp olt <2 x double> [[TMP4]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = select <2 x i1> [[TMP5]], <2 x double> zeroinitializer, <2 x double> [[TMP0]] ; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt <2 x double> [[TMP3]], zeroinitializer From f67b481098cc30567d0f50a2b21f8f57b92052bd Mon Sep 17 00:00:00 2001 From: "Nadeem, Usman" Date: Mon, 20 Mar 2023 12:05:18 -0700 Subject: [PATCH 065/691] [Flang] Exit gracefully with a useful message when we fail to lookup a target Without this patch we were asserting with a generic message `Failed to create Target`, but we already have a detailed error message stored in the variable `error` after calling `lookupTarget()` but this error was not getting used/printed. With this patch we will emit a message with more details instead of a stack dump with a generic message. Differential Revision: https://reviews.llvm.org/D146333 Change-Id: I7ddee917cf921a2133ca3e6b35791b2142f770a2 --- flang/include/flang/Frontend/FrontendActions.h | 2 +- flang/lib/Frontend/FrontendActions.cpp | 18 +++++++++++++----- flang/test/Driver/target-machine-error.f90 | 6 ++++++ 3 files changed, 20 insertions(+), 6 deletions(-) create mode 100644 flang/test/Driver/target-machine-error.f90 diff --git a/flang/include/flang/Frontend/FrontendActions.h b/flang/include/flang/Frontend/FrontendActions.h index 7a5042b814a14..2b96125e41639 100644 --- a/flang/include/flang/Frontend/FrontendActions.h +++ b/flang/include/flang/Frontend/FrontendActions.h @@ -204,7 +204,7 @@ class CodeGenAction : public FrontendAction { /// Runs prescan, parsing, sema and lowers to MLIR. bool beginSourceFileAction() override; /// Sets up LLVM's TargetMachine. - void setUpTargetMachine(); + bool setUpTargetMachine(); /// Runs the optimization (aka middle-end) pipeline on the LLVM module /// associated with this action. void runOptimizationPipeline(llvm::raw_pwrite_stream &os); diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index 64cabd87e6a41..b723fe89387cd 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -143,7 +143,8 @@ bool CodeGenAction::beginSourceFileAction() { } mlirModule = std::make_unique(module.release()); - setUpTargetMachine(); + if (!setUpTargetMachine()) + return false; const llvm::DataLayout &dl = tm->createDataLayout(); setMLIRDataLayout(*mlirModule, dl); return true; @@ -184,7 +185,8 @@ bool CodeGenAction::beginSourceFileAction() { *mlirModule, ci.getInvocation().getLangOpts().OpenMPIsDevice); } - setUpTargetMachine(); + if (!setUpTargetMachine()) + return false; const llvm::DataLayout &dl = tm->createDataLayout(); setMLIRDataLayout(*mlirModule, dl); @@ -585,7 +587,7 @@ void CodeGenAction::generateLLVMIR() { } -void CodeGenAction::setUpTargetMachine() { +bool CodeGenAction::setUpTargetMachine() { CompilerInstance &ci = this->getInstance(); const TargetOptions &targetOpts = ci.getInvocation().getTargetOpts(); @@ -595,7 +597,11 @@ void CodeGenAction::setUpTargetMachine() { std::string error; const llvm::Target *theTarget = llvm::TargetRegistry::lookupTarget(theTriple, error); - assert(theTarget && "Failed to create Target"); + if (!theTarget) { + ci.getDiagnostics().Report(clang::diag::err_fe_unable_to_create_target) + << error; + return false; + } // Create `TargetMachine` const auto &CGOpts = ci.getInvocation().getCodeGenOpts(); @@ -611,6 +617,7 @@ void CodeGenAction::setUpTargetMachine() { /*Reloc::Model=*/CGOpts.getRelocationModel(), /*CodeModel::Model=*/std::nullopt, OptLevel)); assert(tm && "Failed to create TargetMachine"); + return true; } static std::unique_ptr @@ -799,7 +806,8 @@ void CodeGenAction::executeAction() { // Set the triple based on the targetmachine (this comes compiler invocation // and the command-line target option if specified, or the default if not // given on the command-line). - setUpTargetMachine(); + if (!setUpTargetMachine()) + return; const std::string &theTriple = tm->getTargetTriple().str(); if (llvmModule->getTargetTriple() != theTriple) { diff --git a/flang/test/Driver/target-machine-error.f90 b/flang/test/Driver/target-machine-error.f90 new file mode 100644 index 0000000000000..8f6522de1f617 --- /dev/null +++ b/flang/test/Driver/target-machine-error.f90 @@ -0,0 +1,6 @@ +! RUN: not %flang --target=some-invalid-triple -S %s -o \ +! RUN: /dev/null 2>&1 | FileCheck %s +! RUN: not %flang_fc1 -triple some-invalid-triple -S %s -o \ +! RUN: /dev/null 2>&1 | FileCheck %s + +! CHECK: error: unable to create target: 'No available targets are compatible with triple "some-invalid-triple"' From 82ac02e4a86070cf9924c245ff340aba1f62b45b Mon Sep 17 00:00:00 2001 From: giuseros Date: Mon, 20 Mar 2023 20:02:03 +0000 Subject: [PATCH 066/691] Add scalar support for amdgpu.raw_buffer_{load,store} Introduce the possibility to load/store scalars via amdgpu.raw_buffer_{load,store} Reviewed By: krzysz00 Differential Revision: https://reviews.llvm.org/D146413 --- mlir/include/mlir/Dialect/AMDGPU/AMDGPU.td | 4 +-- .../AMDGPUToROCDL/AMDGPUToROCDL.cpp | 5 ++- .../AMDGPUToROCDL/amdgpu-to-rocdl.mlir | 31 +++++++++++++++++++ mlir/test/Dialect/AMDGPU/ops.mlir | 14 +++++++++ 4 files changed, 49 insertions(+), 5 deletions(-) diff --git a/mlir/include/mlir/Dialect/AMDGPU/AMDGPU.td b/mlir/include/mlir/Dialect/AMDGPU/AMDGPU.td index 92ded941aa7c0..3589fa607b72d 100644 --- a/mlir/include/mlir/Dialect/AMDGPU/AMDGPU.td +++ b/mlir/include/mlir/Dialect/AMDGPU/AMDGPU.td @@ -87,7 +87,7 @@ def AMDGPU_RawBufferLoadOp : let assemblyFormat = [{ attr-dict $memref `[` $indices `]` (`sgprOffset` $sgprOffset^)? `:` - type($memref) `,` type($indices) `->` type($value) + type($memref) (`,` type($indices)^)? `->` type($value) }]; let hasCanonicalizer = 1; let hasVerifier = 1; @@ -130,7 +130,7 @@ def AMDGPU_RawBufferStoreOp : let assemblyFormat = [{ attr-dict $value `->` $memref `[` $indices `]` (`sgprOffset` $sgprOffset^)? `:` - type($value) `->` type($memref) `,` type($indices) + type($value) `->` type($memref) (`,` type($indices)^)? }]; let hasCanonicalizer = 1; let hasVerifier = 1; diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp index 749f569fa7868..d4ee6e7e5d347 100644 --- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp +++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp @@ -199,7 +199,7 @@ struct RawBufferOpLowering : public ConvertOpToLLVMPattern { args.push_back(resource); // Indexing (voffset) - Value voffset; + Value voffset = createI32Constant(rewriter, loc, 0); for (auto pair : llvm::enumerate(adaptor.getIndices())) { size_t i = pair.index(); Value index = pair.value(); @@ -212,8 +212,7 @@ struct RawBufferOpLowering : public ConvertOpToLLVMPattern { createI32Constant(rewriter, loc, strides[i] * elementByteWidth); } index = rewriter.create(loc, index, strideOp); - voffset = - voffset ? rewriter.create(loc, voffset, index) : index; + voffset = rewriter.create(loc, voffset, index); } if (adaptor.getIndexOffset()) { int32_t indexOffset = *gpuOp.getIndexOffset() * elementByteWidth; diff --git a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir index 183fdb61db6cb..a26add21cefb6 100644 --- a/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir +++ b/mlir/test/Conversion/AMDGPUToROCDL/amdgpu-to-rocdl.mlir @@ -1,6 +1,26 @@ // RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx908 | FileCheck %s // RUN: mlir-opt %s -convert-amdgpu-to-rocdl=chipset=gfx1030 | FileCheck %s --check-prefix=RDNA +// CHECK-LABEL: func @gpu_gcn_raw_buffer_load_scalar_i32 +func.func @gpu_gcn_raw_buffer_load_scalar_i32(%buf: memref) -> i32 { + // CHECK: %[[ptr:.*]] = llvm.ptrtoint + // CHECK: %[[lowHalf:.*]] = llvm.trunc %[[ptr]] : i64 to i32 + // CHECK: %[[resource_1:.*]] = llvm.insertelement %[[lowHalf]] + // CHECK: %[[highHalfI64:.*]] = llvm.lshr %[[ptr]] + // CHECK: %[[highHalfI32:.*]] = llvm.trunc %[[highHalfI64]] : i64 to i32 + // CHECK: %[[highHalf:.*]] = llvm.and %[[highHalfI32]], %{{.*}} : i32 + // CHECK: %[[resource_2:.*]] = llvm.insertelement %[[highHalf]], %[[resource_1]] + // CHECK: %[[numRecords:.*]] = llvm.mlir.constant(4 : i32) + // CHECK: %[[resource_3:.*]] = llvm.insertelement %[[numRecords]], %[[resource_2]] + // CHECK: %[[word3:.*]] = llvm.mlir.constant(159744 : i32) + // RDNA: %[[word3:.*]] = llvm.mlir.constant(822243328 : i32) + // CHECK: %[[resource:.*]] = llvm.insertelement %[[word3]], %[[resource_3]] + // CHECK: %[[ret:.*]] = rocdl.raw.buffer.load %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i32 + // CHECK: return %[[ret]] + %0 = amdgpu.raw_buffer_load {boundsCheck = true} %buf[] : memref -> i32 + func.return %0 : i32 +} + // CHECK-LABEL: func @gpu_gcn_raw_buffer_load_i32 func.func @gpu_gcn_raw_buffer_load_i32(%buf: memref<64xi32>, %idx: i32) -> i32 { // CHECK: %[[ptr:.*]] = llvm.ptrtoint @@ -94,6 +114,17 @@ func.func @gpu_gcn_raw_buffer_load_4xf8E4M3FNUZ(%buf: memref<64xf8E4M3FNUZ>, %id } // Since the lowering logic is shared with loads, only bitcasts need to be rechecked +// CHECK-LABEL: func @gpu_gcn_raw_buffer_store_scalar_i32 +func.func @gpu_gcn_raw_buffer_store_scalar_i32(%value: i32, %buf: memref) { + // CHECK: %[[numRecords:.*]] = llvm.mlir.constant(4 : i32) + // CHECK: llvm.insertelement{{.*}}%[[numRecords]] + // CHECK: %[[word3:.*]] = llvm.mlir.constant(159744 : i32) + // CHECK: %[[resource:.*]] = llvm.insertelement{{.*}}%[[word3]] + // CHECK: rocdl.raw.buffer.store %{{.*}} %[[resource]], %{{.*}}, %{{.*}}, %{{.*}} : i32 + amdgpu.raw_buffer_store {boundsCheck = true} %value -> %buf[] : i32 -> memref + func.return +} + // CHECK-LABEL: func @gpu_gcn_raw_buffer_store_i32 func.func @gpu_gcn_raw_buffer_store_i32(%value: i32, %buf: memref<64xi32>, %idx: i32) { // CHECK: %[[numRecords:.*]] = llvm.mlir.constant(256 : i32) diff --git a/mlir/test/Dialect/AMDGPU/ops.mlir b/mlir/test/Dialect/AMDGPU/ops.mlir index 0e13e1ecfd66c..80bd7d4655e8b 100644 --- a/mlir/test/Dialect/AMDGPU/ops.mlir +++ b/mlir/test/Dialect/AMDGPU/ops.mlir @@ -18,6 +18,13 @@ func.func @raw_buffer_load_f32_from_rank_4(%src : memref<128x64x32x16xf32>, %off func.return %0 : f32 } +// CHECK-LABEL: func @raw_buffer_load_scalar +func.func @raw_buffer_load_scalar(%src : memref) -> f32 { + // CHECK: amdgpu.raw_buffer_load {indexOffset = 1 : i32} %{{.*}}[] : memref -> f32 + %0 = amdgpu.raw_buffer_load {indexOffset = 1 : i32} %src[] : memref -> f32 + func.return %0 : f32 +} + // CHECK-LABEL: func @raw_buffer_load_4xf32_from_rank_4 func.func @raw_buffer_load_4xf32_from_rank_4(%src : memref<128x64x32x16xf32>, %offset : i32, %idx0 : i32, %idx1 : i32, %idx2 : i32, %idx3 : i32) -> vector<4xf32> { // CHECK: amdgpu.raw_buffer_load {indexOffset = 1 : i32} %{{.*}}[%{{.*}}, %{{.*}}, %{{.*}}] sgprOffset %{{.*}} : memref<128x64x32x16xf32>, i32, i32, i32, i32 -> vector<4xf32> @@ -46,6 +53,13 @@ func.func @raw_buffer_store_4xf32_to_rank_4(%value : vector<4xf32>, %dst : memre func.return } +// CHECK-LABEL: func @raw_buffer_store_scalar +func.func @raw_buffer_store_scalar(%value : f32, %dst : memref) { + // CHECK: amdgpu.raw_buffer_store {indexOffset = 1 : i32} %{{.*}} -> %{{.*}}[] : f32 -> memref + amdgpu.raw_buffer_store {indexOffset = 1 : i32} %value -> %dst[] : f32 -> memref + func.return +} + // CHECK-LABEL: func @raw_buffer_atomic_fadd_f32_to_rank_1 func.func @raw_buffer_atomic_fadd_f32_to_rank_1(%value : f32, %dst : memref<128xf32>, %offset : i32, %idx0 : i32) { // CHECK: amdgpu.raw_buffer_atomic_fadd {indexOffset = 1 : i32} %{{.*}} -> %{{.*}}[{{.*}}] sgprOffset %{{.*}} : f32 -> memref<128xf32>, i32 From da40f7e8b1b1c386cc4801f03082be582de93a65 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Mon, 20 Mar 2023 15:41:27 -0500 Subject: [PATCH 067/691] [PowerPC][NFC] Pre-commit a test case for upcoming patch --- llvm/test/CodeGen/PowerPC/pr61315.ll | 83 ++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/pr61315.ll diff --git a/llvm/test/CodeGen/PowerPC/pr61315.ll b/llvm/test/CodeGen/PowerPC/pr61315.ll new file mode 100644 index 0000000000000..8203de9cbc142 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/pr61315.ll @@ -0,0 +1,83 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s +define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0) local_unnamed_addr #0 { +; CHECK-LABEL: ConvertExtractedMaskBitsToVect: +; CHECK: # %bb.0: +; CHECK-NEXT: addis r3, r2, .LCPI0_0@toc@ha +; CHECK-NEXT: xxlxor v4, v4, v4 +; CHECK-NEXT: xxlxor v3, v3, v3 +; CHECK-NEXT: addi r3, r3, .LCPI0_0@toc@l +; CHECK-NEXT: lxv vs0, 0(r3) +; CHECK-NEXT: addis r3, r2, .LCPI0_1@toc@ha +; CHECK-NEXT: addi r3, r3, .LCPI0_1@toc@l +; CHECK-NEXT: xxperm v4, v2, vs0 +; CHECK-NEXT: lxv vs0, 0(r3) +; CHECK-NEXT: xxland v2, v4, vs0 +; CHECK-NEXT: vcmpequb v2, v2, v3 +; CHECK-NEXT: xxlnor v2, v2, v2 +; CHECK-NEXT: blr + %a4 = extractelement <16 x i8> %0, i64 7 + %a5 = zext i8 %a4 to i16 + %a6 = insertelement <8 x i16> poison, i16 %a5, i64 0 + %a7 = bitcast <8 x i16> %a6 to <16 x i8> + %a8 = shufflevector <16 x i8> %a7, <16 x i8> undef, <16 x i32> + %a9 = and <16 x i8> %a8, + %a10 = icmp eq <16 x i8> %a9, + %a11 = sext <16 x i1> %a10 to <16 x i8> + ret <16 x i8> %a11 +} + +define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0) local_unnamed_addr #0 { +; CHECK-LABEL: ConvertExtractedMaskBitsToVect2: +; CHECK: # %bb.0: +; CHECK-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; CHECK-NEXT: xxlxor v4, v4, v4 +; CHECK-NEXT: xxlxor v3, v3, v3 +; CHECK-NEXT: addi r3, r3, .LCPI1_0@toc@l +; CHECK-NEXT: lxv vs0, 0(r3) +; CHECK-NEXT: addis r3, r2, .LCPI1_1@toc@ha +; CHECK-NEXT: addi r3, r3, .LCPI1_1@toc@l +; CHECK-NEXT: xxperm v4, v2, vs0 +; CHECK-NEXT: lxv vs0, 0(r3) +; CHECK-NEXT: xxland v2, v4, vs0 +; CHECK-NEXT: vcmpequb v2, v2, v3 +; CHECK-NEXT: xxlnor v2, v2, v2 +; CHECK-NEXT: blr + %a4 = extractelement <16 x i8> %0, i64 7 + %a5 = zext i8 %a4 to i32 + %a6 = insertelement <4 x i32> poison, i32 %a5, i64 0 + %a7 = bitcast <4 x i32> %a6 to <16 x i8> + %a8 = shufflevector <16 x i8> %a7, <16 x i8> undef, <16 x i32> + %a9 = and <16 x i8> %a8, + %a10 = icmp eq <16 x i8> %a9, + %a11 = sext <16 x i1> %a10 to <16 x i8> + ret <16 x i8> %a11 +} + +define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect3(<8 x i16> noundef %0) local_unnamed_addr #0 { +; CHECK-LABEL: ConvertExtractedMaskBitsToVect3: +; CHECK: # %bb.0: +; CHECK-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-NEXT: xxlxor v4, v4, v4 +; CHECK-NEXT: xxlxor v3, v3, v3 +; CHECK-NEXT: addi r3, r3, .LCPI2_0@toc@l +; CHECK-NEXT: lxv vs0, 0(r3) +; CHECK-NEXT: addis r3, r2, .LCPI2_1@toc@ha +; CHECK-NEXT: addi r3, r3, .LCPI2_1@toc@l +; CHECK-NEXT: xxperm v4, v2, vs0 +; CHECK-NEXT: lxv vs0, 0(r3) +; CHECK-NEXT: xxland v2, v4, vs0 +; CHECK-NEXT: vcmpequb v2, v2, v3 +; CHECK-NEXT: xxlnor v2, v2, v2 +; CHECK-NEXT: blr + %a4 = extractelement <8 x i16> %0, i64 3 + %a5 = zext i16 %a4 to i32 + %a6 = insertelement <4 x i32> poison, i32 %a5, i64 0 + %a7 = bitcast <4 x i32> %a6 to <16 x i8> + %a8 = shufflevector <16 x i8> %a7, <16 x i8> undef, <16 x i32> + %a9 = and <16 x i8> %a8, + %a10 = icmp eq <16 x i8> %a9, + %a11 = sext <16 x i1> %a10 to <16 x i8> + ret <16 x i8> %a11 +} From eecb8c5f06149baf970fa0943e9fb9a6afe00207 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 20 Mar 2023 13:42:56 -0700 Subject: [PATCH 068/691] [SampleProfile] Use LazyCallGraph instead of CallGraph The function order in some tests had to be changed because they relied on ordering of functions returned in an SCC which is consistent but unspecified. --- llvm/lib/Transforms/IPO/SampleProfile.cpp | 52 +++++++++---------- ...-pm-thinlto-postlink-samplepgo-defaults.ll | 4 +- ...w-pm-thinlto-prelink-samplepgo-defaults.ll | 4 +- .../SampleProfile/profile-context-order.ll | 38 +++++++------- .../SampleProfile/profile-topdown-order.ll | 18 +++---- 5 files changed, 58 insertions(+), 58 deletions(-) diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 3086cd6d16b63..ccccb37af42f5 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -35,9 +35,9 @@ #include "llvm/ADT/Twine.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" -#include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ReplayInlineAdvisor.h" @@ -479,7 +479,7 @@ class SampleProfileLoader final bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr); bool runOnModule(Module &M, ModuleAnalysisManager *AM, - ProfileSummaryInfo *_PSI, CallGraph *CG); + ProfileSummaryInfo *_PSI, LazyCallGraph &CG); protected: bool runOnFunction(Function &F, ModuleAnalysisManager *AM); @@ -520,8 +520,8 @@ class SampleProfileLoader final void promoteMergeNotInlinedContextSamples( MapVector NonInlinedCallSites, const Function &F); - std::vector buildFunctionOrder(Module &M, CallGraph *CG); - std::unique_ptr buildProfiledCallGraph(CallGraph &CG); + std::vector buildFunctionOrder(Module &M, LazyCallGraph &CG); + std::unique_ptr buildProfiledCallGraph(Module &M); void generateMDProfMetadata(Function &F); /// Map from function name to Function *. Used to find the function from @@ -1821,7 +1821,7 @@ bool SampleProfileLoader::emitAnnotations(Function &F) { } std::unique_ptr -SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) { +SampleProfileLoader::buildProfiledCallGraph(Module &M) { std::unique_ptr ProfiledCG; if (FunctionSamples::ProfileIsCS) ProfiledCG = std::make_unique(*ContextTracker); @@ -1831,18 +1831,17 @@ SampleProfileLoader::buildProfiledCallGraph(CallGraph &CG) { // Add all functions into the profiled call graph even if they are not in // the profile. This makes sure functions missing from the profile still // gets a chance to be processed. - for (auto &Node : CG) { - const auto *F = Node.first; - if (!F || F->isDeclaration() || !F->hasFnAttribute("use-sample-profile")) + for (Function &F : M) { + if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile")) continue; - ProfiledCG->addProfiledFunction(FunctionSamples::getCanonicalFnName(*F)); + ProfiledCG->addProfiledFunction(FunctionSamples::getCanonicalFnName(F)); } return ProfiledCG; } std::vector -SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) { +SampleProfileLoader::buildFunctionOrder(Module &M, LazyCallGraph &CG) { std::vector FunctionOrderList; FunctionOrderList.reserve(M.size()); @@ -1850,7 +1849,7 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) { errs() << "WARNING: -use-profiled-call-graph ignored, should be used " "together with -sample-profile-top-down-load.\n"; - if (!ProfileTopDownLoad || CG == nullptr) { + if (!ProfileTopDownLoad) { if (ProfileMergeInlinee) { // Disable ProfileMergeInlinee if profile is not loaded in top down order, // because the profile for a function may be used for the profile @@ -1866,8 +1865,6 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) { return FunctionOrderList; } - assert(&CG->getModule() == &M); - if (UseProfiledCallGraph || (FunctionSamples::ProfileIsCS && !UseProfiledCallGraph.getNumOccurrences())) { // Use profiled call edges to augment the top-down order. There are cases @@ -1918,7 +1915,7 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) { // static call edges are not so important when they don't correspond to a // context in the profile. - std::unique_ptr ProfiledCG = buildProfiledCallGraph(*CG); + std::unique_ptr ProfiledCG = buildProfiledCallGraph(M); scc_iterator CGI = scc_begin(ProfiledCG.get()); while (!CGI.isAtEnd()) { auto Range = *CGI; @@ -1935,25 +1932,27 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) { ++CGI; } } else { - scc_iterator CGI = scc_begin(CG); - while (!CGI.isAtEnd()) { - for (CallGraphNode *Node : *CGI) { - auto *F = Node->getFunction(); - if (F && !F->isDeclaration() && F->hasFnAttribute("use-sample-profile")) - FunctionOrderList.push_back(F); + CG.buildRefSCCs(); + for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) { + for (LazyCallGraph::SCC &C : RC) { + for (LazyCallGraph::Node &N : C) { + Function &F = N.getFunction(); + if (!F.isDeclaration() && F.hasFnAttribute("use-sample-profile")) + FunctionOrderList.push_back(&F); + } } - ++CGI; } } + std::reverse(FunctionOrderList.begin(), FunctionOrderList.end()); + LLVM_DEBUG({ dbgs() << "Function processing order:\n"; - for (auto F : reverse(FunctionOrderList)) { + for (auto F : FunctionOrderList) { dbgs() << F->getName() << "\n"; } }); - std::reverse(FunctionOrderList.begin(), FunctionOrderList.end()); return FunctionOrderList; } @@ -2205,7 +2204,8 @@ void SampleProfileMatcher::detectProfileMismatch() { } bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM, - ProfileSummaryInfo *_PSI, CallGraph *CG) { + ProfileSummaryInfo *_PSI, + LazyCallGraph &CG) { GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap); PSI = _PSI; @@ -2369,8 +2369,8 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M, return PreservedAnalyses::all(); ProfileSummaryInfo *PSI = &AM.getResult(M); - CallGraph &CG = AM.getResult(M); - if (!SampleLoader.runOnModule(M, &AM, PSI, &CG)) + LazyCallGraph &CG = AM.getResult(M); + if (!SampleLoader.runOnModule(M, &AM, PSI, CG)) return PreservedAnalyses::all(); return PreservedAnalyses::none(); diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll index a275cf18cbf95..a65af442163d5 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -46,7 +46,7 @@ ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Running pass: SampleProfileLoaderPass ; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis -; CHECK-O-NEXT: Running analysis: CallGraphAnalysis +; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion ; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis @@ -76,11 +76,11 @@ ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA ; CHECK-O-NEXT: Running analysis: GlobalsAA +; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}AAManager ; CHECK-O-NEXT: Invalidating analysis: AAManager ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy -; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll index ec4e1236abb6c..de3643bc35843 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -44,7 +44,7 @@ ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass ; CHECK-O-NEXT: Running pass: SampleProfileLoaderPass ; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis -; CHECK-O-NEXT: Running analysis: CallGraphAnalysis +; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running pass: OpenMPOptPass ; CHECK-O-NEXT: Running pass: IPSCCPPass @@ -69,11 +69,11 @@ ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA ; CHECK-O-NEXT: Running analysis: GlobalsAA +; CHECK-O-NEXT: Running analysis: CallGraphAnalysis ; CHECK-O-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}AAManager ; CHECK-O-NEXT: Invalidating analysis: AAManager ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy -; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy<{{.*}}LazyCallGraph::SCC{{.*}}> ; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass diff --git a/llvm/test/Transforms/SampleProfile/profile-context-order.ll b/llvm/test/Transforms/SampleProfile/profile-context-order.ll index 0771345b4c366..db368bcdfb9ef 100644 --- a/llvm/test/Transforms/SampleProfile/profile-context-order.ll +++ b/llvm/test/Transforms/SampleProfile/profile-context-order.ll @@ -28,6 +28,25 @@ @factor = dso_local global i32 3, align 4, !dbg !0 @fp = dso_local global ptr null, align 8 +; INLINE: define dso_local i32 @_Z5funcAi +; INLINE-NOT: call i32 @_Z8funcLeafi +; NOINLINE: define dso_local i32 @_Z5funcAi +; NOINLINE: call i32 @_Z8funcLeafi +; ICALL-INLINE: define dso_local i32 @_Z5funcAi +; ICALL-INLINE: call i32 @_Z3foo +; INLINEB: define dso_local i32 @_Z5funcBi +; INLINEB-NOT: call i32 @_Z8funcLeafi +; NOINLINEB: define dso_local i32 @_Z5funcBi +; NOINLINEB: call i32 @_Z8funcLeafi +define dso_local i32 @_Z5funcAi(i32 %x) local_unnamed_addr #0 !dbg !40 { +entry: + %add = add nsw i32 %x, 100000, !dbg !44 + %0 = load ptr, ptr @fp, align 8 + %call = call i32 %0(i32 8), !dbg !45 + %call1 = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !46 + ret i32 %call, !dbg !46 +} + define dso_local i32 @main() local_unnamed_addr #0 !dbg !18 { entry: store ptr @_Z3fibi, ptr @fp, align 8, !dbg !25 @@ -49,25 +68,6 @@ for.body: ; preds = %for.body, %entry br i1 %cmp, label %for.cond.cleanup, label %for.body, !dbg !25 } -; INLINE: define dso_local i32 @_Z5funcAi -; INLINE-NOT: call i32 @_Z8funcLeafi -; NOINLINE: define dso_local i32 @_Z5funcAi -; NOINLINE: call i32 @_Z8funcLeafi -; ICALL-INLINE: define dso_local i32 @_Z5funcAi -; ICALL-INLINE: call i32 @_Z3foo -; INLINEB: define dso_local i32 @_Z5funcBi -; INLINEB-NOT: call i32 @_Z8funcLeafi -; NOINLINEB: define dso_local i32 @_Z5funcBi -; NOINLINEB: call i32 @_Z8funcLeafi -define dso_local i32 @_Z5funcAi(i32 %x) local_unnamed_addr #0 !dbg !40 { -entry: - %add = add nsw i32 %x, 100000, !dbg !44 - %0 = load ptr, ptr @fp, align 8 - %call = call i32 %0(i32 8), !dbg !45 - %call1 = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !46 - ret i32 %call, !dbg !46 -} - ; INLINE: define dso_local i32 @_Z8funcLeafi ; NOINLINE: define dso_local i32 @_Z8funcLeafi ; ICALL-INLINE: define dso_local i32 @_Z8funcLeafi diff --git a/llvm/test/Transforms/SampleProfile/profile-topdown-order.ll b/llvm/test/Transforms/SampleProfile/profile-topdown-order.ll index fa24937f5e282..f85ab24e18100 100644 --- a/llvm/test/Transforms/SampleProfile/profile-topdown-order.ll +++ b/llvm/test/Transforms/SampleProfile/profile-topdown-order.ll @@ -19,6 +19,15 @@ @factor = dso_local global i32 3, align 4, !dbg !0 @fp = dso_local global ptr null, align 8 +define dso_local i32 @_Z5funcAi(i32 %x) local_unnamed_addr #0 !dbg !40 { +entry: + %add = add nsw i32 %x, 100000, !dbg !44 + %0 = load ptr, ptr @fp, align 8 + %call = call i32 %0(i32 8), !dbg !45 + %call1 = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !46 + ret i32 %call, !dbg !46 +} + define dso_local i32 @main() local_unnamed_addr #0 !dbg !18 { entry: store ptr @_Z3fibi, ptr @fp, align 8, !dbg !25 @@ -40,15 +49,6 @@ for.body: ; preds = %for.body, %entry br i1 %cmp, label %for.cond.cleanup, label %for.body, !dbg !25 } -define dso_local i32 @_Z5funcAi(i32 %x) local_unnamed_addr #0 !dbg !40 { -entry: - %add = add nsw i32 %x, 100000, !dbg !44 - %0 = load ptr, ptr @fp, align 8 - %call = call i32 %0(i32 8), !dbg !45 - %call1 = tail call i32 @_Z8funcLeafi(i32 %add), !dbg !46 - ret i32 %call, !dbg !46 -} - ; INLINE: define dso_local i32 @_Z8funcLeafi ; NOINLINE: define dso_local i32 @_Z8funcLeafi ; ICALL-INLINE: define dso_local i32 @_Z8funcLeafi From 091422adc1d7478b126a967c795414840c5c0c97 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Mon, 20 Mar 2023 13:41:49 -0700 Subject: [PATCH 069/691] [LSR] Fix wrapping bug in lsr-term-fold logic The existing logic was unsound, in two ways. First, due to wrapping on the trip count computation, it could compute a value which convert a loop which exiting on iteration 256, to one which exited at 255. (With i8 trip counts.) Second, it allowed rewriting when the trip count implies wrapping around the alternate IV. As a trivial example, it allowed rewriting an i128 exit test in terms of an i64 IV. This is obviously wrong. Note that the test change is fairly minimal - i.e. only the targeted test - but that's only because I precommitted a change which switched the test from 32 to 64 bit pointers. For 32 bit point architectures with 32 bit primary inductions, this transform is almost always unsound to perform. Differential Revision: https://reviews.llvm.org/D146429 --- .../Transforms/Scalar/LoopStrengthReduce.cpp | 20 ++++++++++++++++++- .../LoopStrengthReduce/lsr-term-fold.ll | 12 +++++------ 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index e76ba2da22127..0a4d815e67206 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -6762,7 +6762,25 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, continue; } - // FIXME: This does not properly account for overflow. + // Check that we can compute the value of AddRec on the exiting iteration + // without soundness problems. There are two cases to be worried about: + // 1) BECount could be 255 with type i8. Simply adding one would be + // incorrect. We may need one extra bit to represent the unsigned + // trip count. + // 2) The multiplication of stride by TC may wrap around. This is subtle + // because computing the result accounting for wrap is insufficient. + // In order to use the result in an exit test, we must also know that + // AddRec doesn't take the same value on any previous iteration. + // The simplest case to consider is a candidate IV which is narrower + // than the trip count (and thus original IV), but this can also + // happen due to non-unit strides on the candidate IVs. + ConstantRange StepCR = SE.getSignedRange(AddRec->getStepRecurrence(SE)); + ConstantRange BECountCR = SE.getUnsignedRange(BECount); + unsigned NoOverflowBitWidth = BECountCR.getActiveBits() + 1 + StepCR.getMinSignedBits(); + unsigned ARBitWidth = SE.getTypeSizeInBits(AddRec->getType()); + if (NoOverflowBitWidth > ARBitWidth) + continue; + const SCEV *TermValueSLocal = SE.getAddExpr( AddRec->getOperand(0), SE.getTruncateOrZeroExtend( diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll index a72e859791574..bb6b74ea8c387 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll @@ -192,20 +192,18 @@ for.end: ; preds = %for.body ; In this case, the integer IV has a larger bitwidth than the pointer IV. ; This means that the smaller IV may wrap around multiple times before ; the original loop exit is taken. -; FIXME: miscompile define void @iv_size(ptr %a, i128 %N) { ; CHECK-LABEL: @iv_size( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = trunc i128 [[N:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 2 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP1]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[A]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i128 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N:%.*]], [[ENTRY]] ] ; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[LSR_IV_NEXT]] = add nsw i128 [[LSR_IV]], -1 ; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 -; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]] -; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i128 [[LSR_IV_NEXT]], 0 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; From 3af1c48c66a3cf906bd35c2a6e9d96b28ad19438 Mon Sep 17 00:00:00 2001 From: Mahesh Ravishankar Date: Mon, 20 Mar 2023 18:58:39 +0000 Subject: [PATCH 070/691] Changes to `SCFFuseProducerOfSliceResult` to also return the operations created during fusion. This is follow up to https://reviews.llvm.org/D145133 that allows propogating information about ops that are fused back to the caller. Reviewed By: hanchung Differential Revision: https://reviews.llvm.org/D146254 --- .../Dialect/SCF/Transforms/TileUsingInterface.h | 1 + .../Dialect/SCF/Transforms/TileUsingInterface.cpp | 13 +++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h b/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h index 5e03eccfc2f3a..e7bcd062d9652 100644 --- a/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h +++ b/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h @@ -96,6 +96,7 @@ struct SCFTileAndFuseOptions { struct SCFFuseProducerOfSliceResult { OpResult origProducer; // Original untiled producer. Value tiledAndFusedProducer; // Tile and fused producer value. + SmallVector tiledOps; }; std::optional tileAndFuseProducerOfSlice(RewriterBase &rewriter, diff --git a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp index 6706f54662839..ec116df700aae 100644 --- a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp @@ -604,7 +604,8 @@ mlir::scf::tileAndFuseProducerOfSlice(RewriterBase &rewriter, } } return scf::SCFFuseProducerOfSliceResult{fusableProducer, - tileAndFuseResult->tiledValues[0]}; + tileAndFuseResult->tiledValues[0], + tileAndFuseResult->tiledOps}; } /// Reconstruct the fused producer from within the tiled-and-fused code. @@ -612,7 +613,8 @@ void mlir::scf::yieldReplacementForFusedProducer( RewriterBase &rewriter, tensor::ExtractSliceOp sliceOp, scf::SCFFuseProducerOfSliceResult fusedProducerInfo, MutableArrayRef loops) { - auto [fusableProducer, fusedProducerValue] = fusedProducerInfo; + auto [fusableProducer, fusedProducerValue, tileAndFusedOps] = + fusedProducerInfo; SmallVector initValues; FailureOr initValue = tensor::getOrCreateDestination( rewriter, fusableProducer.getOwner()->getLoc(), fusableProducer); @@ -623,8 +625,11 @@ void mlir::scf::yieldReplacementForFusedProducer( yieldTiledValues(rewriter, initValue.value(), fusedProducerValue, resultOffsets, resultSizes, loops); } - if (auto dstStyleProducer = - fusedProducerValue.getDefiningOp()) { + for (auto tileAndFusedOp : tileAndFusedOps) { + auto dstStyleProducer = + dyn_cast(tileAndFusedOp); + if (!dstStyleProducer) + continue; Value dstValue = dstStyleProducer.getDpsInitOperand(fusableProducer.getResultNumber()) ->get(); From fb1b9945be7657a3a25b727eaf0eeb3f74525210 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Mon, 20 Mar 2023 20:50:27 +0000 Subject: [PATCH 071/691] [JITLink][ELF][AArch64] Add support for ELF R_AARCH64_ABS32 relocation. This relocation is commonly used in debug sections. Failure to handle it caused the test failure in https://lab.llvm.org/buildbot/#/builders/197/builds/4272, which forced the reversion, in f721fcb6ed0, of 57aeb305460 ("[JITLink][ELF] Don't skip debug info sections by default"). This fix should allow us to re-land 57aeb305460. --- .../ExecutionEngine/JITLink/ELF_aarch64.cpp | 9 ++ .../JITLink/AArch64/ELF_R_AARCH64_ABS32.yaml | 90 +++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 llvm/test/ExecutionEngine/JITLink/AArch64/ELF_R_AARCH64_ABS32.yaml diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp index 3eb7e1bccde7e..ab47f86859dd6 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp @@ -58,6 +58,7 @@ class ELFLinkGraphBuilder_aarch64 : public ELFLinkGraphBuilder { ELFMovwAbsG1, ELFMovwAbsG2, ELFMovwAbsG3, + ELFAbs32, ELFAbs64, ELFPrel32, ELFPrel64, @@ -98,6 +99,8 @@ class ELFLinkGraphBuilder_aarch64 : public ELFLinkGraphBuilder { return ELFMovwAbsG2; case ELF::R_AARCH64_MOVW_UABS_G3: return ELFMovwAbsG3; + case ELF::R_AARCH64_ABS32: + return ELFAbs32; case ELF::R_AARCH64_ABS64: return ELFAbs64; case ELF::R_AARCH64_PREL32: @@ -284,6 +287,10 @@ class ELFLinkGraphBuilder_aarch64 : public ELFLinkGraphBuilder { Kind = aarch64::MoveWide16; break; } + case ELFAbs32: { + Kind = aarch64::Pointer32; + break; + } case ELFAbs64: { Kind = aarch64::Pointer64; break; @@ -357,6 +364,8 @@ class ELFLinkGraphBuilder_aarch64 : public ELFLinkGraphBuilder { return "ELFMovwAbsG2"; case ELFMovwAbsG3: return "ELFMovwAbsG3"; + case ELFAbs32: + return "ELFAbs32"; case ELFAbs64: return "ELFAbs64"; case ELFPrel32: diff --git a/llvm/test/ExecutionEngine/JITLink/AArch64/ELF_R_AARCH64_ABS32.yaml b/llvm/test/ExecutionEngine/JITLink/AArch64/ELF_R_AARCH64_ABS32.yaml new file mode 100644 index 0000000000000..ca992adbba819 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/AArch64/ELF_R_AARCH64_ABS32.yaml @@ -0,0 +1,90 @@ +# Check success and failure cases of R_AARCH64_32 handling. +# This file contains a single R_AARCH64_ABS32 relocation. We expect the +# relocation to apply successfully when we assign x an address in the low +# 32-bits of the address space, and fail if we assign x an address outside that +# range. + +# RUN: yaml2obj -o %t.o %s +# RUN: llvm-jitlink -abs x=0x12345678 -noexec -check=%s %t.o +# RUN: not llvm-jitlink -abs x=0x123456789 -noexec %t.o 2>&1 | \ +# RUN: FileCheck -check-prefix=CHECK-ERROR %s + +# jitlink-check: *{8}xptr = x + +# CHECK-ERROR: relocation target "x" {{.*}} is out of range of Pointer32 fixup + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_AARCH64 + SectionHeaderStringTable: .strtab +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + AddressAlign: 0x4 + Content: E0031F2AC0035FD6 + - Name: .data + Type: SHT_PROGBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + AddressAlign: 0x8 + Content: '0000000000000000' + - Name: .comment + Type: SHT_PROGBITS + Flags: [ SHF_MERGE, SHF_STRINGS ] + AddressAlign: 0x1 + EntSize: 0x1 + Content: 00636C616E672076657273696F6E2031372E302E30202868747470733A2F2F6769746875622E636F6D2F6C6C766D2F6C6C766D2D70726F6A6563742E67697420333130616161633834363336656336633933383739633138643933653332663537623264383938362900 + - Name: .note.GNU-stack + Type: SHT_PROGBITS + AddressAlign: 0x1 + - Name: .rela.data + Type: SHT_RELA + Flags: [ SHF_INFO_LINK ] + Link: .symtab + AddressAlign: 0x8 + Info: .data + Relocations: + - Symbol: x + Type: R_AARCH64_ABS32 + - Name: .llvm_addrsig + Type: SHT_LLVM_ADDRSIG + Flags: [ SHF_EXCLUDE ] + Link: .symtab + AddressAlign: 0x1 + Symbols: [ x ] + - Type: SectionHeaderTable + Sections: + - Name: .strtab + - Name: .text + - Name: .data + - Name: .rela.data + - Name: .comment + - Name: .note.GNU-stack + - Name: .llvm_addrsig + - Name: .symtab +Symbols: + - Name: hw.c + Type: STT_FILE + Index: SHN_ABS + - Name: '$x.0' + Section: .text + - Name: '$d.1' + Section: .data + - Name: '$d.2' + Section: .comment + - Name: main + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + Size: 0x8 + - Name: xptr + Type: STT_OBJECT + Section: .data + Binding: STB_GLOBAL + Size: 0x8 + - Name: x + Binding: STB_GLOBAL +... From c21e88cc02617e0f04807a8dcf164b405d67d5e4 Mon Sep 17 00:00:00 2001 From: Mahesh Ravishankar Date: Mon, 20 Mar 2023 20:56:41 +0000 Subject: [PATCH 072/691] [mlir][Tensor] Avoid dropping attributes for `tensor.pad` operations during canonicalization. Reviewed By: hanchung Differential Revision: https://reviews.llvm.org/D146440 --- .../include/mlir/Dialect/Linalg/Utils/Utils.h | 15 ++------ .../mlir/Dialect/Tensor/IR/TensorOps.td | 10 ++--- .../mlir/Dialect/Utils/StructuredOpsUtils.h | 5 +++ mlir/lib/Dialect/Tensor/IR/TensorOps.cpp | 38 ++++++++++--------- mlir/lib/Dialect/Utils/StructuredOpsUtils.cpp | 14 +++++++ 5 files changed, 49 insertions(+), 33 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h index cc8bbd570ef66..3c3fa70e161f7 100644 --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -11,6 +11,7 @@ #include "mlir/Dialect/Linalg/IR/Linalg.h" #include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/Dialect/Utils/StructuredOpsUtils.h" #include "llvm/ADT/StringSet.h" #include @@ -461,18 +462,10 @@ struct GenerateLoopNest { /// Returns an attribute list that excludes pre-defined attributes. template SmallVector getPrunedAttributeList(OpTy op) { - llvm::StringSet<> elidedAttrs; - elidedAttrs.insert(op.getAttributeNames().begin(), - op.getAttributeNames().end()); + auto elidedAttrs = llvm::to_vector(op.getAttributeNames()); if (isa(op.getOperation())) - elidedAttrs.insert(LinalgDialect::kMemoizedIndexingMapsAttrName); - SmallVector attrs; - for (auto attr : op->getAttrs()) { - if (elidedAttrs.count(attr.getName())) - continue; - attrs.push_back(attr); - } - return attrs; + elidedAttrs.push_back(LinalgDialect::kMemoizedIndexingMapsAttrName); + return getPrunedAttributeList(op, elidedAttrs); } } // namespace linalg diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td index 09b7775dcaae4..66d6dcc7b27ed 100644 --- a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td +++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td @@ -1295,13 +1295,13 @@ def Tensor_PadOp : Tensor_Op<"pad", [ let builders = [ // Build a PadOp with mixed static and dynamic entries. - OpBuilder<(ins "Value":$source, "ArrayRef":$staticLow, - "ArrayRef":$staticHigh, "ValueRange":$low, "ValueRange":$high, - CArg<"bool", "false">:$nofold, + OpBuilder<(ins "Type":$resultType, "Value":$source, + "ArrayRef":$staticLow, "ArrayRef":$staticHigh, + "ValueRange":$low, "ValueRange":$high, CArg<"bool", "false">:$nofold, CArg<"ArrayRef", "{}">:$attrs)>, // Build a PadOp with all dynamic entries. - OpBuilder<(ins "Value":$source, "ValueRange":$low, "ValueRange":$high, - CArg<"bool", "false">:$nofold, + OpBuilder<(ins "Type":$resultType, "Value":$source, "ValueRange":$low, + "ValueRange":$high, CArg<"bool", "false">:$nofold, CArg<"ArrayRef", "{}">:$attrs)>, // Build a PadOp with mixed static and dynamic entries and custom // result type. If the type passed is nullptr, it is inferred. diff --git a/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h b/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h index 1297e87714f79..c4f9fa8a6fe05 100644 --- a/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h +++ b/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h @@ -123,6 +123,11 @@ Operation *cloneWithoutRegions(OpBuilder &b, Operation *op, TypeRange newResultTypes, ValueRange newOperands); +// Get the list of attributes associated with the op, ignoring +// those with the provided name. +SmallVector +getPrunedAttributeList(Operation *op, ArrayRef elidedAttrs); + } // namespace mlir #endif // MLIR_DIALECT_UTILS_STRUCTUREDOPSUTILS_H diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp index f2da1088eb04d..9d26e51e04fd5 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp @@ -2518,26 +2518,27 @@ RankedTensorType PadOp::inferResultType(RankedTensorType sourceType, return RankedTensorType::get(inferredShape, sourceType.getElementType()); } -void PadOp::build(OpBuilder &b, OperationState &result, Value source, - ArrayRef staticLow, ArrayRef staticHigh, - ValueRange low, ValueRange high, bool nofold, - ArrayRef attrs) { +void PadOp::build(OpBuilder &b, OperationState &result, Type resultType, + Value source, ArrayRef staticLow, + ArrayRef staticHigh, ValueRange low, ValueRange high, + bool nofold, ArrayRef attrs) { auto sourceType = source.getType().cast(); - auto resultType = inferResultType(sourceType, staticLow, staticHigh); + if (!resultType) + resultType = inferResultType(sourceType, staticLow, staticHigh); build(b, result, resultType, source, low, high, b.getDenseI64ArrayAttr(staticLow), b.getDenseI64ArrayAttr(staticHigh), nofold ? b.getUnitAttr() : UnitAttr()); result.addAttributes(attrs); } -void PadOp::build(OpBuilder &b, OperationState &result, Value source, - ValueRange low, ValueRange high, bool nofold, +void PadOp::build(OpBuilder &b, OperationState &result, Type resultType, + Value source, ValueRange low, ValueRange high, bool nofold, ArrayRef attrs) { auto sourceType = source.getType().cast(); unsigned rank = sourceType.getRank(); SmallVector staticVector(rank, ShapedType::kDynamic); - build(b, result, source, staticVector, staticVector, low, high, nofold, - attrs); + build(b, result, resultType, source, staticVector, staticVector, low, high, + nofold, attrs); } void PadOp::build(OpBuilder &b, OperationState &result, Type resultType, @@ -2635,9 +2636,9 @@ struct FoldSourceTensorCast : public OpRewritePattern { } else { auto newOp = rewriter.create( padTensorOp->getLoc(), newResultType, padTensorOp.getSource(), - padTensorOp.getLow(), padTensorOp.getHigh(), padTensorOp.getStaticLow(), padTensorOp.getStaticHigh(), - padTensorOp.getNofold()); + padTensorOp.getLow(), padTensorOp.getHigh(), padTensorOp.getNofold(), + getPrunedAttributeList(padTensorOp, PadOp::getAttributeNames())); IRMapping mapper; padTensorOp.getRegion().cloneInto(&newOp.getRegion(), mapper); @@ -2667,9 +2668,10 @@ struct FoldTargetTensorCast : public OpRewritePattern { auto replacementOp = rewriter.create( padTensorOp.getLoc(), tensorCastOp.getDest().getType(), - padTensorOp.getSource(), padTensorOp.getLow(), padTensorOp.getHigh(), - padTensorOp.getStaticLow(), padTensorOp.getStaticHigh(), - padTensorOp.getNofold()); + padTensorOp.getSource(), padTensorOp.getStaticLow(), + padTensorOp.getStaticHigh(), padTensorOp.getLow(), + padTensorOp.getHigh(), padTensorOp.getNofold(), + getPrunedAttributeList(padTensorOp, PadOp::getAttributeNames())); replacementOp.getRegion().takeBody(padTensorOp.getRegion()); rewriter.replaceOp(padTensorOp, replacementOp.getResult()); @@ -2827,7 +2829,8 @@ struct FoldOrthogonalPaddings : public OpRewritePattern { innerSliceOp.getMixedStrides()); auto newPadOp = rewriter.create( padOp.getLoc(), padOp.getResultType(), newSliceOp.getResult(), - padOp.getMixedLowPad(), newHighPad, padOp.getNofold()); + padOp.getMixedLowPad(), newHighPad, padOp.getNofold(), + getPrunedAttributeList(padOp, PadOp::getAttributeNames())); rewriter.inlineRegionBefore(padOp.getRegion(), newPadOp.getRegion(), newPadOp.getRegion().begin()); rewriter.replaceOp(padOp, newPadOp.getResult()); @@ -2916,8 +2919,9 @@ struct FoldStaticPadding : public OpRewritePattern { auto newResultType = RankedTensorType::get( newOutDims, padTensorOp.getType().getElementType()); auto newOp = rewriter.create( - padTensorOp->getLoc(), newResultType, input, padTensorOp.getLow(), - padTensorOp.getHigh(), staticLow, staticHigh, padTensorOp.getNofold()); + padTensorOp->getLoc(), newResultType, input, staticLow, staticHigh, + padTensorOp.getLow(), padTensorOp.getHigh(), padTensorOp.getNofold(), + getPrunedAttributeList(padTensorOp, PadOp::getAttributeNames())); IRMapping mapper; padTensorOp.getRegion().cloneInto(&newOp.getRegion(), mapper); diff --git a/mlir/lib/Dialect/Utils/StructuredOpsUtils.cpp b/mlir/lib/Dialect/Utils/StructuredOpsUtils.cpp index b22f42c09da59..49b49ef639708 100644 --- a/mlir/lib/Dialect/Utils/StructuredOpsUtils.cpp +++ b/mlir/lib/Dialect/Utils/StructuredOpsUtils.cpp @@ -11,6 +11,7 @@ #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/IRMapping.h" +#include "llvm/ADT/StringSet.h" #include "mlir/Dialect/Utils/DialectUtilsEnums.cpp.inc" @@ -114,3 +115,16 @@ Operation *mlir::cloneWithoutRegions(OpBuilder &b, Operation *op, state.addRegion(); return b.create(state); } + +SmallVector +mlir::getPrunedAttributeList(Operation *op, ArrayRef elidedAttrs) { + llvm::StringSet elidedAttrsSet; + elidedAttrsSet.insert(elidedAttrs.begin(), elidedAttrs.end()); + SmallVector attrs; + for (auto attr : op->getAttrs()) { + if (elidedAttrsSet.count(attr.getName())) + continue; + attrs.push_back(attr); + } + return attrs; +} From c71fe7c9e83c16c8304d5cbc0dc3936c41e3e703 Mon Sep 17 00:00:00 2001 From: Anlun Xu Date: Mon, 20 Mar 2023 13:58:45 -0700 Subject: [PATCH 073/691] [mlir][openMP] Add bazel deps Differential Revision: https://reviews.llvm.org/D146455 --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 3bca5fcefb519..328df87ba2063 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -6754,6 +6754,19 @@ cc_library( ], ) +cc_library( + name = "OpenMPCommon", + srcs = ["lib/Target/LLVMIR/Dialect/OpenMPCommon.cpp"], + hdrs = ["include/mlir/Target/LLVMIR/Dialect/OpenMPCommon.h"], + includes = ["include"], + deps = [ + ":IR", + ":Support", + "//third_party/llvm/llvm-project/llvm:Core", + "//third_party/llvm/llvm-project/llvm:FrontendOpenMP", + ], +) + cc_library( name = "LLVMIRToLLVMTranslation", srcs = ["lib/Target/LLVMIR/Dialect/LLVMIR/LLVMIRToLLVMTranslation.cpp"], @@ -6780,6 +6793,7 @@ cc_library( ":LLVMDialect", ":OpenACCDialect", ":OpenACCToLLVM", + ":OpenMPCommon", ":Support", ":ToLLVMIRTranslation", "//llvm:Core", @@ -6795,6 +6809,7 @@ cc_library( includes = ["include"], deps = [ ":IR", + ":OpenMPCommon", ":OpenMPDialect", ":Support", ":ToLLVMIRTranslation", From d3d6a5ff184d4d9c7ac7bcd281281a3b53ed058b Mon Sep 17 00:00:00 2001 From: Ilyas Mustafazade Date: Mon, 20 Mar 2023 13:58:26 -0700 Subject: [PATCH 074/691] [NFC] Rename ArgSize to SpellingSize, and add ArgStringSize. Differential Revision: https://reviews.llvm.org/D146394 --- llvm/lib/Option/Option.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Option/Option.cpp b/llvm/lib/Option/Option.cpp index 1f1eb93bcca05..95edf32b2174f 100644 --- a/llvm/lib/Option/Option.cpp +++ b/llvm/lib/Option/Option.cpp @@ -108,20 +108,20 @@ bool Option::matches(OptSpecifier Opt) const { std::unique_ptr Option::acceptInternal(const ArgList &Args, StringRef Spelling, unsigned &Index) const { - size_t ArgSize = Spelling.size(); + const size_t SpellingSize = Spelling.size(); switch (getKind()) { case FlagClass: { - if (ArgSize != strlen(Args.getArgString(Index))) + if (SpellingSize != strlen(Args.getArgString(Index))) return nullptr; return std::make_unique(*this, Spelling, Index++); } case JoinedClass: { - const char *Value = Args.getArgString(Index) + ArgSize; + const char *Value = Args.getArgString(Index) + SpellingSize; return std::make_unique(*this, Spelling, Index++, Value); } case CommaJoinedClass: { // Always matches. - const char *Str = Args.getArgString(Index) + ArgSize; + const char *Str = Args.getArgString(Index) + SpellingSize; auto A = std::make_unique(*this, Spelling, Index++); // Parse out the comma separated values. @@ -150,7 +150,7 @@ std::unique_ptr Option::acceptInternal(const ArgList &Args, case SeparateClass: // Matches iff this is an exact match. // FIXME: Avoid strlen. - if (ArgSize != strlen(Args.getArgString(Index))) + if (SpellingSize != strlen(Args.getArgString(Index))) return nullptr; Index += 2; @@ -163,7 +163,7 @@ std::unique_ptr Option::acceptInternal(const ArgList &Args, case MultiArgClass: { // Matches iff this is an exact match. // FIXME: Avoid strlen. - if (ArgSize != strlen(Args.getArgString(Index))) + if (SpellingSize != strlen(Args.getArgString(Index))) return nullptr; Index += 1 + getNumArgs(); @@ -179,8 +179,8 @@ std::unique_ptr Option::acceptInternal(const ArgList &Args, case JoinedOrSeparateClass: { // If this is not an exact match, it is a joined arg. // FIXME: Avoid strlen. - if (ArgSize != strlen(Args.getArgString(Index))) { - const char *Value = Args.getArgString(Index) + ArgSize; + if (SpellingSize != strlen(Args.getArgString(Index))) { + const char *Value = Args.getArgString(Index) + SpellingSize; return std::make_unique(*this, Spelling, Index++, Value); } @@ -201,12 +201,12 @@ std::unique_ptr Option::acceptInternal(const ArgList &Args, return nullptr; return std::make_unique(*this, Spelling, Index - 2, - Args.getArgString(Index - 2) + ArgSize, + Args.getArgString(Index - 2) + SpellingSize, Args.getArgString(Index - 1)); case RemainingArgsClass: { // Matches iff this is an exact match. // FIXME: Avoid strlen. - if (ArgSize != strlen(Args.getArgString(Index))) + if (SpellingSize != strlen(Args.getArgString(Index))) return nullptr; auto A = std::make_unique(*this, Spelling, Index++); while (Index < Args.getNumInputArgStrings() && @@ -216,9 +216,9 @@ std::unique_ptr Option::acceptInternal(const ArgList &Args, } case RemainingArgsJoinedClass: { auto A = std::make_unique(*this, Spelling, Index); - if (ArgSize != strlen(Args.getArgString(Index))) { + if (SpellingSize != strlen(Args.getArgString(Index))) { // An inexact match means there is a joined arg. - A->getValues().push_back(Args.getArgString(Index) + ArgSize); + A->getValues().push_back(Args.getArgString(Index) + SpellingSize); } Index++; while (Index < Args.getNumInputArgStrings() && From 38fc680c2dc31ae7c397faa919e1881fb9e8f82a Mon Sep 17 00:00:00 2001 From: Ilyas Mustafazade Date: Mon, 20 Mar 2023 14:00:46 -0700 Subject: [PATCH 075/691] Replace strlen with StringRef::size Replace multiple strlen calls with a StringRef constructor and a StringRef::size call. Differential Revision: https://reviews.llvm.org/D146394 --- llvm/lib/Option/Option.cpp | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Option/Option.cpp b/llvm/lib/Option/Option.cpp index 95edf32b2174f..c570b02b08ce7 100644 --- a/llvm/lib/Option/Option.cpp +++ b/llvm/lib/Option/Option.cpp @@ -109,9 +109,10 @@ std::unique_ptr Option::acceptInternal(const ArgList &Args, StringRef Spelling, unsigned &Index) const { const size_t SpellingSize = Spelling.size(); + const size_t ArgStringSize = StringRef(Args.getArgString(Index)).size(); switch (getKind()) { case FlagClass: { - if (SpellingSize != strlen(Args.getArgString(Index))) + if (SpellingSize != ArgStringSize) return nullptr; return std::make_unique(*this, Spelling, Index++); } @@ -149,8 +150,7 @@ std::unique_ptr Option::acceptInternal(const ArgList &Args, } case SeparateClass: // Matches iff this is an exact match. - // FIXME: Avoid strlen. - if (SpellingSize != strlen(Args.getArgString(Index))) + if (SpellingSize != ArgStringSize) return nullptr; Index += 2; @@ -162,8 +162,7 @@ std::unique_ptr Option::acceptInternal(const ArgList &Args, Args.getArgString(Index - 1)); case MultiArgClass: { // Matches iff this is an exact match. - // FIXME: Avoid strlen. - if (SpellingSize != strlen(Args.getArgString(Index))) + if (SpellingSize != ArgStringSize) return nullptr; Index += 1 + getNumArgs(); @@ -178,8 +177,7 @@ std::unique_ptr Option::acceptInternal(const ArgList &Args, } case JoinedOrSeparateClass: { // If this is not an exact match, it is a joined arg. - // FIXME: Avoid strlen. - if (SpellingSize != strlen(Args.getArgString(Index))) { + if (SpellingSize != ArgStringSize) { const char *Value = Args.getArgString(Index) + SpellingSize; return std::make_unique(*this, Spelling, Index++, Value); } @@ -205,8 +203,7 @@ std::unique_ptr Option::acceptInternal(const ArgList &Args, Args.getArgString(Index - 1)); case RemainingArgsClass: { // Matches iff this is an exact match. - // FIXME: Avoid strlen. - if (SpellingSize != strlen(Args.getArgString(Index))) + if (SpellingSize != ArgStringSize) return nullptr; auto A = std::make_unique(*this, Spelling, Index++); while (Index < Args.getNumInputArgStrings() && @@ -216,7 +213,7 @@ std::unique_ptr Option::acceptInternal(const ArgList &Args, } case RemainingArgsJoinedClass: { auto A = std::make_unique(*this, Spelling, Index); - if (SpellingSize != strlen(Args.getArgString(Index))) { + if (SpellingSize != ArgStringSize) { // An inexact match means there is a joined arg. A->getValues().push_back(Args.getArgString(Index) + SpellingSize); } From d0e507f56990fa09fe9ce326419dda8d54486c0a Mon Sep 17 00:00:00 2001 From: Mahesh Ravishankar Date: Mon, 20 Mar 2023 21:20:54 +0000 Subject: [PATCH 076/691] [mlir][Tensor] Fix build error due to missing `<>` in D146440. Differential Revision: https://reviews.llvm.org/D146458 --- mlir/lib/Dialect/Utils/StructuredOpsUtils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/Utils/StructuredOpsUtils.cpp b/mlir/lib/Dialect/Utils/StructuredOpsUtils.cpp index 49b49ef639708..aed39f8644008 100644 --- a/mlir/lib/Dialect/Utils/StructuredOpsUtils.cpp +++ b/mlir/lib/Dialect/Utils/StructuredOpsUtils.cpp @@ -118,7 +118,7 @@ Operation *mlir::cloneWithoutRegions(OpBuilder &b, Operation *op, SmallVector mlir::getPrunedAttributeList(Operation *op, ArrayRef elidedAttrs) { - llvm::StringSet elidedAttrsSet; + llvm::StringSet<> elidedAttrsSet; elidedAttrsSet.insert(elidedAttrs.begin(), elidedAttrs.end()); SmallVector attrs; for (auto attr : op->getAttrs()) { From 411b1d8f079533860a990ee615abae3b0e6dbd8b Mon Sep 17 00:00:00 2001 From: Mahesh Ravishankar Date: Mon, 20 Mar 2023 21:04:55 +0000 Subject: [PATCH 077/691] [mlir][Tensor] Make `TilingInterface` implementation only return handle to the created `pad` operation. Pad tiling implementation only needs to return the tiled pad operation. The rest of the generated code is related to handling boundary conditions. Reviewed By: hanchung Differential Revision: https://reviews.llvm.org/D146439 --- mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp index 0faa29ade8047..cd3a6f680e972 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp @@ -648,7 +648,7 @@ FailureOr tensor::bubbleUpPadSlice(OpBuilder &b, elseOp = createPadOfExtractSlice(); b.create(loc, castResult(elseOp->getResult(0))); }); - return TilingResult{{result}, SmallVector(result->getResults())}; + return TilingResult{{elseOp}, SmallVector(result->getResults())}; } Operation *newPadOp = createPadOfExtractSlice(); From d03805f2ee0bdaa2513fbc3efb9e404e128bdbb3 Mon Sep 17 00:00:00 2001 From: Peiming Liu Date: Thu, 19 Jan 2023 21:39:11 +0000 Subject: [PATCH 078/691] [mlir][sparse] add merger/topo sort support for slice-based affine sparse index codegen Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D142928 --- .../mlir/Dialect/SparseTensor/Utils/Merger.h | 69 ++- .../SparseTensor/Transforms/CodegenEnv.h | 1 + .../Transforms/Sparsification.cpp | 414 ++++++++++++++---- .../lib/Dialect/SparseTensor/Utils/Merger.cpp | 12 +- 4 files changed, 392 insertions(+), 104 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h index 59c5b78fda7b8..0e6c2f1553f1c 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h @@ -435,19 +435,58 @@ class Merger { /// Iterates over a set of `TensorLoopId`s, invoking the callback /// for each `TensorLoopId` and passing it the corresponding tensor - /// identifier, level, and level-type. - void - foreachTensorLoopId(LatPointId p, - function_ref, DimLevelType)> - callback) const { - for (const TensorLoopId b : latPoints[p].bits.set_bits()) - callback(b, tensor(b), getLvl(b), getDimLevelType(b)); + /// identifier, level, and level-type, following with a boolean value + /// indicating whether it is a dependent index reduction loop condition. + void foreachTensorLoopId( + LatPointId p, function_ref, DimLevelType, bool)> + callback) { + for (const TensorLoopId b : latPoints[p].bits.set_bits()) { + TensorId t = tensor(b); + if (isLvlWithNonTrivialIdxExp(b)) { + // This must be an undefined level. + assert(!getLvl(b).has_value()); + // Slice the tid along the dependent level to iterate current loop. + callback(b, t, loopToDependencies[loop(b)][t], getDimLevelType(b), + /*isIdxReduc=*/true); + } else { + callback(b, t, getLvl(b), getDimLevelType(b), /*isIdxReduc=*/false); + } + } } /// Sets whether the output tensor is sparse or not. void setHasSparseOut(bool s) { hasSparseOut = s; } + /// Establishes the two-way map that i <-> . + void setLoopDependentTensorLevel(LoopId i, TensorId t, Level lvl) { + assert(lvl < numLoops); + loopToDependencies[i][t] = lvl; + levelToDependentIdx[t][lvl].push_back(i); + } + + /// Whether the loop has dependent slice. + bool hasDependentLvl(LoopId i, TensorId tid) { + return loopToDependencies[i][tid].has_value(); + } + + /// Returns the list of loop indices which appear in the non-trivial index + /// expression on t_l, e.g., A[i+j] => {i, j} + std::vector &getDependentLoops(TensorId t, Level lvl) { + return levelToDependentIdx[t][lvl]; + } + + /// Returns the defining [tid, lvl] for the loop. + std::pair getLoopDefiningLvl(LoopId i) const { + return loopBounds[i]; + } + + /// Checks whether the TensorLoopId represents a tensor level with + /// non-trivial index expression on it. + bool isLvlWithNonTrivialIdxExp(TensorLoopId b) const { + return loopToDependencies[loop(b)][tensor(b)].has_value(); + } + /// Convenience getters to immediately access the stored nodes. /// Typically it is inadvisible to keep the reference around, as in /// `TensorExpr &te = merger.exp(e)`, since insertions into the merger @@ -511,6 +550,20 @@ class Merger { // Map that converts pair to the corresponding LoopId. std::vector>> lvlToLoop; + // Map from a loop to its dependencies if any. + // The dependencies of a loop is a set of (tensor, level) pairs. + // It is currently only set for non-trivial index expressions. + // E.g., A[i+j] => i and j will have dependencies {A0} to indicate that + // i and j are used in the non-trivial index expression on A0. + std::vector>> loopToDependencies; + // The inverse map of ldxToDependencies from tensor level -> dependent loop + // E.g., A[i+j], we have A0 => {i, j}, to indicate that A0 uses both {i, j} + // to compute its indices. + std::vector>> levelToDependentIdx; + + // Map from a loop to the [tid, lvl] pair that defines the loop boundary. + std::vector> loopBounds; + llvm::SmallVector tensorExps; llvm::SmallVector latPoints; llvm::SmallVector> latSets; diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h index 8c6a7bd6433db..776d7f7f47ece 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h @@ -99,6 +99,7 @@ class CodegenEnv { topSort.reserve(capacity); } + ArrayRef getTopSort() const { return topSort; }; ArrayRef getTopSortSlice(LoopOrd n, LoopOrd m) const; ArrayRef getLoopStackUpTo(LoopOrd n) const; ArrayRef getCurrentLoopStack() const; diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp index f119ac3ba7ae5..d7ce2b7f63f5c 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp @@ -109,6 +109,12 @@ class AffineDimFinder : public AffineExprVisitor { SmallVector iterTypes; }; +// Flattens an affine expression into a list of AffineDimExprs. +struct AffineDimCollector : public AffineExprVisitor { + void visitDimExpr(AffineDimExpr expr) { dims.push_back(expr); } + SmallVector dims; +}; + } // namespace //===----------------------------------------------------------------------===// @@ -254,6 +260,69 @@ static bool findAffine(Merger &merger, TensorId tid, Level lvl, AffineExpr a, } } +/// Helper method to inspect affine expressions for index variable reduction +/// based codegen. It finds the dependent index set for all tensor levels in the +/// current expression we are generating. +/// +/// For example, when handling A[i+j][j+k], we build the two way mapping in +/// merger between (tensor, level) pairs and their dependent index variable set: +/// A_0 <=> [i, j] and A_1 <=> [j, k] +/// +/// It rejects cases (returns false) +/// 1st, when the same index is used more than once, e.g., A[i+j][i] +/// 2nd, when multiplication is used in the non-trivial index expression. +/// 3rd, when a constant operand is used in the non-trivial index expression. +/// +/// TODO: constant should be easy to handle. +static bool findDepIdxSet(Merger &merger, TensorId tensor, Level lvl, + AffineExpr a, DimLevelType dlt, + bool isSubExp = false) { + switch (a.getKind()) { + case AffineExprKind::DimId: { + LoopId ldx = a.cast().getPosition(); + if (!isUndefDLT(merger.getDimLevelType(tensor, ldx))) + return false; // used more than once, e.g., A[i][i] + + // TODO: Generalizes the following two cases. A[i] (with trivial index + // expression) can be treated as a special affine index expression. We do + // not necessarily need to differentiate them. + if (!isSubExp) + merger.setLevelAndType(tensor, ldx, lvl, dlt); + + if (isSubExp) { + // The current loops appears in more than one affine expressions on the + // same tensor. We can not handle this case. e.g., A[i+j][i+k], `i` is + // used twice. + if (merger.hasDependentLvl(ldx, tensor)) { + // TODO: This can be supported by coiterate slices if the loop idx is + // appeared on affine index for different tensor, or take slice on + // mulitple dimensions when it is on the same tensor. + // E.g., + // `d0 + d1` for indexing t0[lvl0] and `d0 + d2` for indexing t1[lvl0] + // d0_1 = getNextSliceOffset t0 along lvl0 + // d0_2 = getNextSliceOffset t1 along lvl0 + // if d0_1 == d0_2 then d0 = d0_1 = d0_1 + // else increase min(d0_1, d0_2). + return false; + } + merger.setLoopDependentTensorLevel(ldx, tensor, lvl); + } + return true; + } + case AffineExprKind::Constant: + case AffineExprKind::Mul: + // TODO: Support Mul and Constant AffineExp for slice-based codegen + return false; + case AffineExprKind::Add: { + auto binOp = a.cast(); + return findDepIdxSet(merger, tensor, lvl, binOp.getLHS(), dlt, true) && + findDepIdxSet(merger, tensor, lvl, binOp.getRHS(), dlt, true); + } + default: + return false; + } +} + /// Get the total number of compound affine expressions in the /// `getMatchingIndexingMap` for the given tensor. For the following inputs: /// @@ -262,7 +331,8 @@ static bool findAffine(Merger &merger, TensorId tid, Level lvl, AffineExpr a, /// /// Returns 1 (because the first level is compressed and its corresponding /// indexing-expression is `d0 + d1`) -static unsigned getNumCompoundAffineOnSparseLvls(AffineMap map, Value tensor) { +static unsigned getNumNonTrivialIdxExpOnSparseLvls(AffineMap map, + Value tensor) { // The `tensor` is not guaranted to have `RankedTensorType`, therefore // we can't use `getRankedTensorType`/`getSparseTensorType` here. // However, we don't need to handle `StorageSpecifierType`, so we @@ -305,20 +375,20 @@ static unsigned getNumCompoundAffineOnSparseLvls(AffineMap map, Value tensor) { /// Get the total number of sparse levels with compound affine /// expressions, summed over all operands of the `GenericOp`. -static unsigned getNumCompoundAffineOnSparseLvls(linalg::GenericOp op) { +static unsigned getNumNonTrivialIdxExpOnSparseLvls(linalg::GenericOp op) { unsigned num = 0; for (OpOperand &t : op->getOpOperands()) - num += getNumCompoundAffineOnSparseLvls(op.getMatchingIndexingMap(&t), - t.get()); + num += getNumNonTrivialIdxExpOnSparseLvls(op.getMatchingIndexingMap(&t), + t.get()); return num; } -static bool hasCompoundAffineOnSparseOut(linalg::GenericOp op) { +static bool hasNonTrivialAffineOnSparseOut(linalg::GenericOp op) { OpOperand *out = op.getDpsInitOperand(0); if (getSparseTensorType(out->get()).isAllDense()) return false; - return getNumCompoundAffineOnSparseLvls(op.getMatchingIndexingMap(out), - out->get()); + return getNumNonTrivialIdxExpOnSparseLvls(op.getMatchingIndexingMap(out), + out->get()); } /// Helper method to inspect sparse encodings in the tensor types. @@ -326,7 +396,14 @@ static bool hasCompoundAffineOnSparseOut(linalg::GenericOp op) { /// Returns true if the sparse annotations and affine subscript /// expressions of all tensors are admissible. Returns false if /// no annotations are found or inadmissible constructs occur. -static bool findSparseAnnotations(CodegenEnv &env) { +/// We currently support two different ways to handle non-trivial index +/// expression on sparse tensors, and they accept different affine expressions. +/// When using filter-loop-based approach, it accept (almost) arbitrary affine +/// index expression on sparse tensor but it is much less efficient, and will be +/// gradually removed from the codebase. +/// When using dependent index reducton-based approach, it currently only +/// supports affine addition index expression. +static bool findSparseAnnotations(CodegenEnv &env, bool idxReducBased) { bool annotated = false; // `filterLdx` may be mutated by `findAffine`. LoopId filterLdx = env.merger().getStartingFilterLoopId(); @@ -335,17 +412,30 @@ static bool findSparseAnnotations(CodegenEnv &env) { const auto enc = getSparseTensorEncoding(t.get().getType()); if (enc) annotated = true; + const Level lvlRank = map.getNumResults(); assert(!enc || lvlRank == enc.getLvlRank()); assert(static_cast(env.op().getRank(&t)) == lvlRank); + + // We only need to do index reduction if there is at least one non-trivial + // index expression on sparse levels. + // If all non-trivial index expression is on dense levels, we can + // efficiently rely on the random access to locate the element. + bool needIdxReduc = + enc && getNumNonTrivialIdxExpOnSparseLvls(map, t.get()) != 0; + // If then current tensor being inspected requires affine index, it need + // to be sliced. for (Level l = 0; l < lvlRank; l++) { const TensorId tid = t.getOperandNumber(); - // FIXME: `toOrigDim` is deprecated. - // FIXME: above we asserted that there are `lvlRank` many results, - // but this is assuming there are in fact `dimRank` many results instead. - const AffineExpr a = map.getResult(toOrigDim(enc, l)); - if (!findAffine(env.merger(), tid, l, a, enc.getLvlType(l), filterLdx)) - return false; // inadmissible affine expression + AffineExpr a = map.getResult(toOrigDim(enc, l)); + DimLevelType dlt = enc.getLvlType(l); + if (idxReducBased && needIdxReduc) { + if (!findDepIdxSet(env.merger(), tid, l, a, dlt)) + return false; // inadmissible affine expression + } else { + if (!findAffine(env.merger(), tid, l, a, dlt, filterLdx)) + return false; // inadmissible affine expression + } } } assert(filterLdx == env.merger().getNumLoops()); @@ -469,11 +559,11 @@ static void addAffineOrderings(std::vector> &adjM, } } -static void tryLoosenAffineDenseConstraints(linalg::GenericOp op, - std::optional &fldx, - AffineExpr &fa, - std::optional &tldx, - AffineExpr &ta) { +static void tryRelaxAffineConstraints(linalg::GenericOp op, + std::optional &fldx, + AffineExpr &fa, + std::optional &tldx, + AffineExpr &ta) { // We use a heuristic here to only pick one dim expression from each // compound affine expression to establish the order between two dense // dimensions. @@ -494,7 +584,7 @@ static void tryLoosenAffineDenseConstraints(linalg::GenericOp op, } if (!ta.isa()) { // Heuristic: we prefer reduction loop for rhs to reduce the chance - // addint reduce < parallel ordering. + // adding reduce < parallel ordering. finder.setPickedIterType(utils::IteratorType::reduction); finder.walkPostOrder(ta); ta = finder.getDimExpr(); @@ -503,14 +593,183 @@ static void tryLoosenAffineDenseConstraints(linalg::GenericOp op, } } +/// Makes target array's elements appear in the same order as the `order` array. +static void sortArrayBasedOnOrder(std::vector &target, + ArrayRef order) { + std::sort(target.begin(), target.end(), [&order](LoopId l, LoopId r) { + assert(l != r); + int idxL = -1, idxR = -1; + for (int i = 0, e = order.size(); i < e; i++) { + if (order[i] == l) + idxL = i; + if (order[i] == r) + idxR = i; + } + assert(idxL >= 0 && idxR >= 0); + return idxL < idxR; + }); +} + +static void addFilterLoopBasedConstraints(CodegenEnv &env, OpOperand &t, + OpOperand *skip, SortMask mask, + std::vector> &adjM, + std::vector &inDegree) { + // Get map and encoding. + auto map = env.op().getMatchingIndexingMap(&t); + auto enc = getSparseTensorEncoding(t.get().getType()); + + // Each tensor expression and optional dimension ordering (row-major + // by default) puts an ordering constraint on the loop indices. For + // example, the tensor expresion A_ijk forces the ordering i < j < k + // on the loop indices if no explicit dimension ordering is given. + for (Level l = 0, rank = map.getNumResults(); l < rank; l++) { + AffineExpr ta = map.getResult(toOrigDim(enc, l)); + std::optional tldx = + env.merger().getLoopId(t.getOperandNumber(), l); + // Filter loops should be constructed after all the dependent loops, + // i.e., d0 + d1 < filter_loop(d0 + d1) + if (tldx && env.merger().isFilterLoop(*tldx)) { + assert(!ta.isa() && !isDenseDLT(enc.getDimLevelType()[l])); + addAffineOrderings(adjM, inDegree, ta, AffineExpr(), std::nullopt, tldx); + // Now that the ordering of affine expression is captured by filter + // loop idx, we only need to ensure the affine ordering against filter + // loop. Thus, we reset the affine express to nil here to mark it as + // resolved. + ta = AffineExpr(); + } + + // Skip tensor during cycle resolution, though order between filter loop + // and dependent loops need to be guaranteed unconditionally. + if (&t == skip) + continue; + + if (l > 0) { + AffineExpr fa = map.getResult(toOrigDim(enc, l - 1)); + std::optional fldx = + env.merger().getLoopId(t.getOperandNumber(), l - 1); + + // Applying order constraints on every pair of dimExpr between two + // compound affine expressions can sometime too strict: + // E.g, for [dense, dense] -> (d0 + d1, d2 + d3). + // It is totally fine to have loop sequence d0->d2->d1->d3 instead of + // requiring d0 < d2, d1 < d2, d0 < d3, d1 < d3. + // We also relax the affine constraint when use slice-based algorithm + // as there is no filter loop for affine index on sparse dimension. + // TODO: do we really need the condition? + if (!includesDense(mask)) + tryRelaxAffineConstraints(env.op(), fldx, fa, tldx, ta); + + // (d0 + d1) < (d2 + d3), or + // filter_loop_d-1 < (d2 + d3), or + // (d0 + d1) < filter_loop_d, or + // filter_loop_d-1 < filter_loop_d depending on whether fa/ta is reset + // above. + addAffineOrderings(adjM, inDegree, fa, ta, fldx, tldx); + } + } +} + +static void addSliceBasedConstraints(CodegenEnv &env, OpOperand &t, + OpOperand *skip, SortMask mask, + std::vector> &adjM, + std::vector &inDegree) { + // Get map and encoding. + auto map = env.op().getMatchingIndexingMap(&t); + auto enc = getSparseTensorEncoding(t.get().getType()); + + // No special treatment for simple indices. + if (getNumNonTrivialIdxExpOnSparseLvls(map, t.get()) == 0) + return addFilterLoopBasedConstraints(env, t, skip, mask, adjM, inDegree); + + // Skip tensor during cycle resolution, though order between filter loop + // and dependent loops need to be guaranteed unconditionally. + if (&t == skip) + return; + + AffineDimFinder finder(env.op()); + finder.setPickedIterType(utils::IteratorType::reduction); + // To compute iteration graph for tensor[d0 + d1 + d3, d4 + d5 + d6], + // we requires there exist d_x \in {d0, d1, d3} and d_y \in {d4, d5, d6}, + // and d_x > d_y && {d0, d1, d3} - d_x > {d4, d5, d6} - d_y + for (Level lvl = 1, rank = map.getNumResults(); lvl < rank; lvl++) { + AffineExpr fa = map.getResult(toOrigDim(enc, lvl - 1)); + AffineExpr ta = map.getResult(toOrigDim(enc, lvl)); + + // This is a heuristic, we pick an abitrary reduction loop from lhs and + // rhs and use them as d_x and d_y. + finder.walkPostOrder(fa); + AffineDimExpr fexp = finder.getDimExpr(); + LoopId fldx = fexp.getPosition(); + + finder.walkPostOrder(ta); + AffineDimExpr texp = finder.getDimExpr(); + LoopId tldx = texp.getPosition(); + + // d_x > d_y + if (!adjM[fldx][tldx]) { + adjM[fldx][tldx] = true; + inDegree[tldx]++; + } + + AffineDimCollector fCollector; + fCollector.walkPostOrder(fa); + AffineDimCollector tCollector; + tCollector.walkPostOrder(ta); + + // make sure dx and dy is the last; + for (auto fd : fCollector.dims) { + LoopId f = fd.getPosition(); + if (f == fldx) + continue; + if (!adjM[f][fldx]) { + adjM[f][fldx] = true; + inDegree[fldx]++; + } + } + for (auto td : tCollector.dims) { + LoopId t = td.getPosition(); + if (t == tldx) + continue; + if (!adjM[t][tldx]) { + adjM[t][tldx] = true; + inDegree[tldx]++; + } + } + // Since we only support affine addition, the order between two dim + // expression does not really matters. + // {d0, d1, d3} - d_x > {d4, d5, d6} - d_y + // This is to ensure that the affine expressions are reduced in sparse + // tensor level ordering. + // TODO: this ordering could probably be loosen if we support out-of-order + // reduction. + // TODO: the evaluation order need to be ensure to + // support affine multiplication. + for (auto fd : fCollector.dims) { + LoopId f = fd.getPosition(); + if (f == fldx) // skip d_x + continue; + + for (auto td : tCollector.dims) { + LoopId t = td.getPosition(); + if (t == tldx) // skip d_y + continue; + if (!adjM[f][t]) { + adjM[f][t] = true; + inDegree[t]++; + } + } + } + } +} + /// Computes a topologically sorted iteration graph for the linalg operation. -/// Ensures all tensors are visited in natural coordinate order. This is +/// Ensures all tensors are visited in natural index order. This is /// essential for sparse storage formats since these only support access -/// along fixed levels. Even for dense storage formats, however, the natural -/// coordinate order yields innermost unit-stride access with better spatial +/// along fixed dimensions. Even for dense storage formats, however, the natural +/// index order yields innermost unit-stride access with better spatial /// locality. static bool computeIterationGraph(CodegenEnv &env, SortMask mask, - OpOperand *skip = nullptr) { + OpOperand *skip, bool idxReducBased = false) { // Set up an n x n from/to adjacency matrix of the iteration graph // for the implicit loop indices i_0 .. i_n-1. const LoopId n = env.merger().getNumLoops(); @@ -522,7 +781,8 @@ static bool computeIterationGraph(CodegenEnv &env, SortMask mask, // Get map and encoding. const auto map = env.op().getMatchingIndexingMap(&t); const auto enc = getSparseTensorEncoding(t.get().getType()); - assert(map.getNumDims() + getNumCompoundAffineOnSparseLvls(env.op()) == n); + assert(map.getNumDims() + getNumNonTrivialIdxExpOnSparseLvls(env.op()) == + n); // Skips dense inputs/outputs when not requested. const bool isDenseInput = !enc && env.op().isDpsInput(&t); @@ -549,63 +809,12 @@ static bool computeIterationGraph(CodegenEnv &env, SortMask mask, } } } - - // Each tensor expression and optional dimension ordering (row-major - // by default) puts an ordering constraint on the loop indices. For - // example, the tensor expresion A_ijk forces the ordering i < j < k - // on the loop indices if no explicit dimension ordering is given. - const Level lvlRank = map.getNumResults(); - assert(!enc || lvlRank == enc.getLvlRank()); - for (Level l = 0; l < lvlRank; l++) { - // FIXME: `toOrigDim` is deprecated. - // FIXME: above we asserted that there are `lvlRank` many results, - // but this is assuming there are in fact `dimRank` many results instead. - AffineExpr ta = map.getResult(toOrigDim(enc, l)); - std::optional tldx = - env.merger().getLoopId(t.getOperandNumber(), l); - - // Filter loops should be constructed after all the dependent loops, - // i.e., d0 + d1 < filter_loop(d0 + d1) - if (tldx && env.merger().isFilterLoop(*tldx)) { - assert(!ta.isa() && !isDenseDLT(enc.getLvlType(l))); - addAffineOrderings(adjM, inDegree, ta, AffineExpr(), std::nullopt, - tldx); - // Now that the ordering of affine expression is captured by filter - // loop idx, we only need to ensure the affine ordering against filter - // loop. Thus, we reset the affine express to nil here to mark it as - // resolved. - ta = AffineExpr(); - } - - // Skip tensor during cycle resolution, though order between filter loop - // and dependent loops need to be guaranteed unconditionally. - if (&t == skip) - continue; - - if (l > 0) { - // FIXME: `toOrigDim` is deprecated. - // FIXME: above we asserted that there are `lvlRank` many results, - // but this is assuming there are in fact `dimRank` many results. - AffineExpr fa = map.getResult(toOrigDim(enc, l - 1)); - std::optional fldx = - env.merger().getLoopId(t.getOperandNumber(), l - 1); - - // Applying order constraints on every pair of dimExpr between two - // compound affine expressions can sometime too strict: - // E.g, for [dense, dense] -> (d0 + d1, d2 + d3). - // It is totally fine to have loop sequence d0->d2->d1->d3 instead of - // requiring d0 < d2, d1 < d2, d0 < d3, d1 < d3. - if (!includesDense(mask)) - tryLoosenAffineDenseConstraints(env.op(), fldx, fa, tldx, ta); - - // (d0 + d1) < (d2 + d3), or - // filter_loop_d-1 < (d2 + d3), or - // (d0 + d1) < filter_loop_d, or - // filter_loop_d-1 < filter_loop_d depending on whether fa/ta is reset - // above. - addAffineOrderings(adjM, inDegree, fa, ta, fldx, tldx); - } - } + // Push unrelated loops into sparse iteration space, so these + // will be skipped more often. + if (idxReducBased) + addSliceBasedConstraints(env, t, skip, mask, adjM, inDegree); + else + addFilterLoopBasedConstraints(env, t, skip, mask, adjM, inDegree); } // Topologically sort the iteration graph to determine loop order. // Report failure for a cyclic iteration graph. @@ -1275,7 +1484,7 @@ static bool startLoopSeq(CodegenEnv &env, OpBuilder &builder, ExprId exp, SmallVector lvls; env.merger().foreachTensorLoopId(l0, [&](TensorLoopId b, TensorId tid, std::optional lvl, - DimLevelType dlt) { + DimLevelType dlt, bool) { assert(env.merger().loop(b) == idx); if (isDenseDLT(dlt) || isUndefDLT(dlt)) { needsUniv = true; @@ -1350,7 +1559,7 @@ static bool translateBitsToTidLvlPairs( bool hasNonUnique = false; env.merger().foreachTensorLoopId(li, [&, ldx](TensorLoopId b, TensorId tid, std::optional lvl, - DimLevelType dlt) { + DimLevelType dlt, bool) { if (simple.test(b)) { if (isUndefDLT(dlt)) { // An undefined dlt in the lattices, we probably mean to @@ -1596,21 +1805,25 @@ struct GenericOpSparsifier : public OpRewritePattern { PatternRewriter &rewriter) const override { // Only accept single output operations without affine index on sparse // output. - if (op.getNumDpsInits() != 1 || hasCompoundAffineOnSparseOut(op)) + if (op.getNumDpsInits() != 1 || hasNonTrivialAffineOnSparseOut(op)) return failure(); - if (options.enableIndexReduction) - llvm_unreachable("not yet implemented"); - // Sets up a code generation environment. const unsigned numTensors = op->getNumOperands(); const unsigned numLoops = op.getNumLoops(); - const unsigned numFilterLoops = getNumCompoundAffineOnSparseLvls(op); - CodegenEnv env(op, options, numTensors, numLoops, numFilterLoops); + const unsigned numFilterLoops = getNumNonTrivialIdxExpOnSparseLvls(op); + // TODO: we should probably always use slice-based codegen whenever + // possible, we can even intermix slice-based and filter-loop based codegen. + bool idxReducBased = options.enableIndexReduction && numFilterLoops != 0; + + // If we uses slice based algorithm for affine index, we do not need filter + // loop. + CodegenEnv env(op, options, numTensors, numLoops, + /*numFilterLoops=*/idxReducBased ? 0 : numFilterLoops); // Detects sparse annotations and translates the per-level sparsity // information for all tensors to loop indices in the kernel. - if (!findSparseAnnotations(env)) + if (!findSparseAnnotations(env, idxReducBased)) return failure(); // Constructs the tensor expressions tree from `op`, returns failure if the @@ -1635,7 +1848,7 @@ struct GenericOpSparsifier : public OpRewritePattern { SortMask::kIncludeDenseInput, SortMask::kIncludeDenseOutput, SortMask::kIncludeUndef, SortMask::kSparseOnly}; for (const SortMask mask : allMasks) { - if (computeIterationGraph(env, mask)) { + if (computeIterationGraph(env, mask, nullptr, idxReducBased)) { hasCycle = false; if (env.isAdmissibleTopoOrder()) { isAdmissible = true; @@ -1644,11 +1857,24 @@ struct GenericOpSparsifier : public OpRewritePattern { // else try a set of less strict constraints. } } - if (hasCycle) - return resolveCycle(env, rewriter); // one last shot + if (hasCycle) { + return idxReducBased + ? failure() // TODO: should cycle be resolved differently? + : resolveCycle(env, rewriter); // one last shot + } + if (!isAdmissible) return failure(); // inadmissible expression, reject + for (OpOperand &t : env.op()->getOpOperands()) { + Level rank = env.op().getMatchingIndexingMap(&t).getNumResults(); + for (Level lvl = 0; lvl < rank; lvl++) { + sortArrayBasedOnOrder( + env.merger().getDependentLoops(t.getOperandNumber(), lvl), + env.getTopSort()); + } + } + // Recursively generates code if admissible. env.startEmit(); genBuffers(env, rewriter); diff --git a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp index 029ce3f3f91ec..7f4400188cf14 100644 --- a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp +++ b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp @@ -220,7 +220,12 @@ Merger::Merger(unsigned numInputOutputTensors, unsigned numNativeLoops, loopToLvl(numTensors, std::vector>(numLoops, std::nullopt)), lvlToLoop(numTensors, - std::vector>(numLoops, std::nullopt)) {} + std::vector>(numLoops, std::nullopt)), + loopToDependencies(numLoops, std::vector>( + numTensors, std::nullopt)), + levelToDependentIdx(numTensors, std::vector>( + numLoops, std::vector())), + loopBounds(numLoops, std::make_pair(numTensors, numLoops)) {} //===----------------------------------------------------------------------===// // Lattice methods. @@ -762,7 +767,10 @@ void Merger::dumpBits(const BitVector &bits) const { const TensorId t = tensor(b); const LoopId i = loop(b); const auto dlt = lvlTypes[t][i]; - llvm::dbgs() << " i_" << t << "_" << i << "_" << toMLIRString(dlt); + if (isLvlWithNonTrivialIdxExp(b)) + llvm::dbgs() << " DEP_" << t << "_" << i; + else + llvm::dbgs() << " i_" << t << "_" << i << "_" << toMLIRString(dlt); } } } From 6ee4ea8e2fe60647d9aecf42caed417cab13b091 Mon Sep 17 00:00:00 2001 From: Nemanja Ivanovic Date: Mon, 20 Mar 2023 16:43:30 -0500 Subject: [PATCH 079/691] [PowerPC][NFC] Test needs to include constant pool values --- llvm/test/CodeGen/PowerPC/pr61315.ll | 52 +++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/PowerPC/pr61315.ll b/llvm/test/CodeGen/PowerPC/pr61315.ll index 8203de9cbc142..9a763e16122cb 100644 --- a/llvm/test/CodeGen/PowerPC/pr61315.ll +++ b/llvm/test/CodeGen/PowerPC/pr61315.ll @@ -1,7 +1,23 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ ; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0) local_unnamed_addr #0 { +; CHECK: .LCPI0_0: +; CHECK-NEXT: .byte 16 # 0x10 +; CHECK-NEXT: .byte 16 # 0x10 +; CHECK-NEXT: .byte 16 # 0x10 +; CHECK-NEXT: .byte 16 # 0x10 +; CHECK-NEXT: .byte 16 # 0x10 +; CHECK-NEXT: .byte 16 # 0x10 +; CHECK-NEXT: .byte 16 # 0x10 +; CHECK-NEXT: .byte 16 # 0x10 +; CHECK-NEXT: .byte 7 # 0x7 +; CHECK-NEXT: .byte 7 # 0x7 +; CHECK-NEXT: .byte 7 # 0x7 +; CHECK-NEXT: .byte 7 # 0x7 +; CHECK-NEXT: .byte 7 # 0x7 +; CHECK-NEXT: .byte 7 # 0x7 +; CHECK-NEXT: .byte 7 # 0x7 +; CHECK-NEXT: .byte 7 # 0x7 ; CHECK-LABEL: ConvertExtractedMaskBitsToVect: ; CHECK: # %bb.0: ; CHECK-NEXT: addis r3, r2, .LCPI0_0@toc@ha @@ -29,6 +45,23 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect(<16 x i8> noundef %0) } define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0) local_unnamed_addr #0 { +; CHECK: .LCPI1_0: +; CHECK-NEXT: .byte 16 # 0x10 +; CHECK-NEXT: .byte 16 # 0x10 +; CHECK-NEXT: .byte 16 # 0x10 +; CHECK-NEXT: .byte 16 # 0x10 +; CHECK-NEXT: .byte 16 # 0x10 +; CHECK-NEXT: .byte 16 # 0x10 +; CHECK-NEXT: .byte 16 # 0x10 +; CHECK-NEXT: .byte 16 # 0x10 +; CHECK-NEXT: .byte 7 # 0x7 +; CHECK-NEXT: .byte 7 # 0x7 +; CHECK-NEXT: .byte 7 # 0x7 +; CHECK-NEXT: .byte 7 # 0x7 +; CHECK-NEXT: .byte 7 # 0x7 +; CHECK-NEXT: .byte 7 # 0x7 +; CHECK-NEXT: .byte 7 # 0x7 +; CHECK-NEXT: .byte 7 # 0x7 ; CHECK-LABEL: ConvertExtractedMaskBitsToVect2: ; CHECK: # %bb.0: ; CHECK-NEXT: addis r3, r2, .LCPI1_0@toc@ha @@ -56,6 +89,23 @@ define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect2(<16 x i8> noundef %0 } define dso_local <16 x i8> @ConvertExtractedMaskBitsToVect3(<8 x i16> noundef %0) local_unnamed_addr #0 { +; CHECK: .LCPI2_0: +; CHECK-NEXT: .byte 16 # 0x10 +; CHECK-NEXT: .byte 17 # 0x11 +; CHECK-NEXT: .byte 16 # 0x10 +; CHECK-NEXT: .byte 17 # 0x11 +; CHECK-NEXT: .byte 16 # 0x10 +; CHECK-NEXT: .byte 17 # 0x11 +; CHECK-NEXT: .byte 16 # 0x10 +; CHECK-NEXT: .byte 17 # 0x11 +; CHECK-NEXT: .byte 6 # 0x6 +; CHECK-NEXT: .byte 6 # 0x6 +; CHECK-NEXT: .byte 6 # 0x6 +; CHECK-NEXT: .byte 6 # 0x6 +; CHECK-NEXT: .byte 6 # 0x6 +; CHECK-NEXT: .byte 6 # 0x6 +; CHECK-NEXT: .byte 6 # 0x6 +; CHECK-NEXT: .byte 6 # 0x6 ; CHECK-LABEL: ConvertExtractedMaskBitsToVect3: ; CHECK: # %bb.0: ; CHECK-NEXT: addis r3, r2, .LCPI2_0@toc@ha From 919a3f1c751bf27c3c28018d8ff6cb55a81a5164 Mon Sep 17 00:00:00 2001 From: Dave MacLachlan Date: Mon, 20 Mar 2023 14:36:07 -0700 Subject: [PATCH 080/691] Add declaration for `__tsan_default_options` to tsan_interface.h `__tsan_default_options` is part of the tsan interface so should be exposed in tsan_interface.h. Differential Revision: https://reviews.llvm.org/D146259 --- compiler-rt/lib/tsan/rtl/tsan_flags.cpp | 10 ++++++---- compiler-rt/lib/tsan/rtl/tsan_interface.h | 3 +++ 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/compiler-rt/lib/tsan/rtl/tsan_flags.cpp b/compiler-rt/lib/tsan/rtl/tsan_flags.cpp index ee78f25cc65c4..3fd58f46983fd 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_flags.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_flags.cpp @@ -10,19 +10,21 @@ // //===----------------------------------------------------------------------===// -#include "sanitizer_common/sanitizer_flags.h" +#include "tsan_flags.h" + #include "sanitizer_common/sanitizer_flag_parser.h" +#include "sanitizer_common/sanitizer_flags.h" #include "sanitizer_common/sanitizer_libc.h" -#include "tsan_flags.h" -#include "tsan_rtl.h" +#include "tsan_interface.h" #include "tsan_mman.h" +#include "tsan_rtl.h" #include "ubsan/ubsan_flags.h" namespace __tsan { // Can be overriden in frontend. #ifdef TSAN_EXTERNAL_HOOKS -extern "C" const char* __tsan_default_options(); +extern "C" const char *__tsan_default_options(); #else SANITIZER_WEAK_DEFAULT_IMPL const char *__tsan_default_options() { diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface.h b/compiler-rt/lib/tsan/rtl/tsan_interface.h index 5b9d664e503fc..b32fb657adc3e 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interface.h +++ b/compiler-rt/lib/tsan/rtl/tsan_interface.h @@ -32,6 +32,9 @@ extern "C" { // before any instrumented code is executed and before any call to malloc. SANITIZER_INTERFACE_ATTRIBUTE void __tsan_init(); +SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE const char * +__tsan_default_options(); + SANITIZER_INTERFACE_ATTRIBUTE void __tsan_flush_memory(); SANITIZER_INTERFACE_ATTRIBUTE void __tsan_read1(void *addr); From f2c3a9cbd229791b63d58f7499fe9ebf74c3303b Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Mon, 20 Mar 2023 15:04:48 -0700 Subject: [PATCH 081/691] [libc][obvious] fix missing memory_check_utils memmove and bcopy tests were missing a dependancy on memory_check_utils in the bazel build, causing build failures. Differential Revision: https://reviews.llvm.org/D146462 --- .../bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel index 78e8b3a89f348..72b75a3975026 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel @@ -144,6 +144,7 @@ libc_test( deps = [ "//libc:__support_cpp_span", "//libc/test/UnitTest:memory_matcher", + ":memory_check_utils", ], ) @@ -156,6 +157,7 @@ libc_test( deps = [ "//libc:__support_cpp_span", "//libc/test/UnitTest:memory_matcher", + ":memory_check_utils", ], ) From 99047c0501e0fe5c60bb583185f3b45bdc112199 Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Tue, 14 Mar 2023 15:08:56 -0700 Subject: [PATCH 082/691] [libc][bazel] add targets for sprintf The bazel build is currently overlay mode only, so the FILE functions are still out of reach for it, but sprintf only uses strings. This adds targets for sprintf, snprintf, and all the interal printf pieces, as well as tests. Reviewed By: sivachandra, lntue Differential Revision: https://reviews.llvm.org/D146100 --- libc/src/stdio/printf_core/CMakeLists.txt | 4 + .../llvm-project-overlay/libc/BUILD.bazel | 275 ++++++++++++++++-- .../libc/test/UnitTest/BUILD.bazel | 16 + .../libc/test/src/stdio/BUILD.bazel | 77 +++++ 4 files changed, 353 insertions(+), 19 deletions(-) create mode 100644 utils/bazel/llvm-project-overlay/libc/test/src/stdio/BUILD.bazel diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt index 54a5cbabc38a7..31db8ad3c524c 100644 --- a/libc/src/stdio/printf_core/CMakeLists.txt +++ b/libc/src/stdio/printf_core/CMakeLists.txt @@ -20,6 +20,7 @@ add_object_library( libc.src.__support.ctype_utils libc.src.__support.str_to_integer libc.src.__support.CPP.bit + libc.src.__support.CPP.optional libc.src.__support.CPP.string_view libc.src.__support.CPP.type_traits libc.src.__support.common @@ -37,6 +38,7 @@ add_object_library( libc.src.__support.ctype_utils libc.src.__support.str_to_integer libc.src.__support.CPP.bit + libc.src.__support.CPP.optional libc.src.__support.CPP.string_view libc.src.__support.CPP.type_traits libc.src.__support.common @@ -63,6 +65,8 @@ add_object_library( writer.cpp HDRS writer.h + DEPENDS + libc.src.__support.CPP.string_view ) add_object_library( diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 23c288db31ada..5e563fe71a1dd 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -214,6 +214,30 @@ libc_support_library( ], ) +libc_support_library( + name = "__support_arg_list", + hdrs = ["src/__support/arg_list.h"], + deps = [ + ":libc_root", + ], +) + +libc_support_library( + name = "__support_float_to_string", + hdrs = [ + "src/__support/float_to_string.h", + "src/__support/ryu_constants.h", + ], + deps = [ + ":__support_cpp_type_traits", + ":__support_fputil_fp_bits", + ":__support_uint", + ":__support_libc_assert", + ":__support_common", + ":libc_root", + ], +) + libc_support_library( name = "__support_number_pair", hdrs = ["src/__support/number_pair.h"], @@ -276,6 +300,31 @@ libc_support_library( deps = [":__support_cpp_type_traits"], ) +libc_support_library( + name = "__support_integer_to_string", + hdrs = ["src/__support/integer_to_string.h"], + deps = [ + ":__support_cpp_type_traits", + ":__support_cpp_optional", + ":__support_cpp_span", + ":__support_cpp_string_view", + ":__support_common", + ":libc_root", + ], +) + +libc_support_library( + name = "__support_libc_assert", + hdrs = ["src/__support/libc_assert.h"], + deps = [ + ":__support_osutil_io", + ":__support_osutil_quick_exit", + ":__support_integer_to_string", + ":__support_macros_attributes", + ":libc_root", + ], +) + libc_support_library( name = "__support_ctype_utils", hdrs = ["src/__support/ctype_utils.h"], @@ -588,7 +637,7 @@ libc_support_library( ) libc_support_library( - name = "__support_osutil", + name = "__support_osutil_syscall", hdrs = ["src/__support/OSUtil/syscall.h"], textual_hdrs = [ "src/__support/OSUtil/linux/syscall.h", @@ -600,6 +649,35 @@ libc_support_library( ":libc_root", ], ) + +libc_support_library( + name = "__support_osutil_io", + hdrs = ["src/__support/OSUtil/io.h"], + textual_hdrs = [ + "src/__support/OSUtil/linux/io.h", + ], + deps = [ + ":__support_common", + ":__support_osutil_syscall", + ":string_utils", + ":libc_root", + ], +) + +libc_support_library( + name = "__support_osutil_quick_exit", + hdrs = ["src/__support/OSUtil/quick_exit.h"], + textual_hdrs = [ + "src/__support/OSUtil/linux/quick_exit.h", + #TODO: add support for GPU quick_exit (isn't just in a header.) + ], + deps = [ + ":__support_osutil_syscall", + ":libc_root", + ], +) + + ############################### errno targets ################################ libc_function( @@ -1780,7 +1858,7 @@ libc_function( hdrs = ["src/unistd/chdir.h"], deps = [ ":__support_common", - ":__support_osutil", + ":__support_osutil_syscall", ":errno", ], ) @@ -1791,7 +1869,7 @@ libc_function( hdrs = ["src/unistd/close.h"], deps = [ ":__support_common", - ":__support_osutil", + ":__support_osutil_syscall", ":errno", ], ) @@ -1802,7 +1880,7 @@ libc_function( hdrs = ["src/unistd/fchdir.h"], deps = [ ":__support_common", - ":__support_osutil", + ":__support_osutil_syscall", ":errno", ], ) @@ -1813,7 +1891,7 @@ libc_function( hdrs = ["src/unistd/fsync.h"], deps = [ ":__support_common", - ":__support_osutil", + ":__support_osutil_syscall", ":errno", ], ) @@ -1824,7 +1902,7 @@ libc_function( hdrs = ["src/unistd/ftruncate.h"], deps = [ ":__support_common", - ":__support_osutil", + ":__support_osutil_syscall", ":errno", ], ) @@ -1835,7 +1913,7 @@ libc_function( hdrs = ["src/unistd/link.h"], deps = [ ":__support_common", - ":__support_osutil", + ":__support_osutil_syscall", ":errno", ], ) @@ -1846,7 +1924,7 @@ libc_function( hdrs = ["src/unistd/linkat.h"], deps = [ ":__support_common", - ":__support_osutil", + ":__support_osutil_syscall", ":errno", ], ) @@ -1857,7 +1935,7 @@ libc_function( hdrs = ["src/unistd/lseek.h"], deps = [ ":__support_common", - ":__support_osutil", + ":__support_osutil_syscall", ":errno", ], ) @@ -1868,7 +1946,7 @@ libc_function( hdrs = ["src/unistd/read.h"], deps = [ ":__support_common", - ":__support_osutil", + ":__support_osutil_syscall", ":errno", ], ) @@ -1879,7 +1957,7 @@ libc_function( hdrs = ["src/unistd/readlink.h"], deps = [ ":__support_common", - ":__support_osutil", + ":__support_osutil_syscall", ":errno", ], ) @@ -1890,7 +1968,7 @@ libc_function( hdrs = ["src/unistd/readlinkat.h"], deps = [ ":__support_common", - ":__support_osutil", + ":__support_osutil_syscall", ":errno", ], ) @@ -1901,7 +1979,7 @@ libc_function( hdrs = ["src/unistd/rmdir.h"], deps = [ ":__support_common", - ":__support_osutil", + ":__support_osutil_syscall", ":errno", ], ) @@ -1912,7 +1990,7 @@ libc_function( hdrs = ["src/unistd/symlink.h"], deps = [ ":__support_common", - ":__support_osutil", + ":__support_osutil_syscall", ":errno", ], ) @@ -1923,7 +2001,7 @@ libc_function( hdrs = ["src/unistd/symlinkat.h"], deps = [ ":__support_common", - ":__support_osutil", + ":__support_osutil_syscall", ":errno", ], ) @@ -1934,7 +2012,7 @@ libc_function( hdrs = ["src/unistd/truncate.h"], deps = [ ":__support_common", - ":__support_osutil", + ":__support_osutil_syscall", ":errno", ], ) @@ -1945,7 +2023,7 @@ libc_function( hdrs = ["src/unistd/unlink.h"], deps = [ ":__support_common", - ":__support_osutil", + ":__support_osutil_syscall", ":errno", ], ) @@ -1956,7 +2034,7 @@ libc_function( hdrs = ["src/unistd/unlinkat.h"], deps = [ ":__support_common", - ":__support_osutil", + ":__support_osutil_syscall", ":errno", ], ) @@ -1967,7 +2045,166 @@ libc_function( hdrs = ["src/unistd/write.h"], deps = [ ":__support_common", - ":__support_osutil", + ":__support_osutil_syscall", + ":errno", + ], +) + +################################ stdio targets ################################# + +libc_support_library( + name = "printf_core_structs", + hdrs = ["src/stdio/printf_core/core_structs.h"], + deps = [ + ":__support_cpp_string_view", + ":__support_fputil_fp_bits", + ":libc_root", + ], +) + +libc_support_library( + name = "printf_config", + hdrs = ["src/stdio/printf_core/printf_config.h"], + deps = [ + ":libc_root", + ], +) + + +libc_support_library( + name = "printf_parser", + hdrs = ["src/stdio/printf_core/parser.h"], + srcs = ["src/stdio/printf_core/parser.cpp"], + deps = [ + ":printf_core_structs", + ":printf_config", + ":__support_cpp_string_view", + ":__support_cpp_type_traits", + ":__support_cpp_optional", + ":__support_cpp_bit", + ":__support_fputil_fp_bits", + ":__support_arg_list", + ":__support_ctype_utils", + ":__support_str_to_integer", + ":__support_common", + ":libc_root", + ], +) + +# Only used for testing. +libc_support_library( + name = "printf_mock_parser", + hdrs = ["src/stdio/printf_core/parser.h"], + srcs = ["src/stdio/printf_core/parser.cpp"], + deps = [ + ":printf_core_structs", + ":printf_config", + ":__support_cpp_string_view", + ":__support_cpp_type_traits", + ":__support_cpp_optional", + ":__support_cpp_bit", + ":__support_fputil_fp_bits", + ":__support_arg_list", + ":__support_ctype_utils", + ":__support_str_to_integer", + ":__support_common", + ":libc_root", + ], + copts = ["-DLIBC_COPT_MOCK_ARG_LIST"], +) + +libc_support_library( + name = "printf_string_writer", + hdrs = ["src/stdio/printf_core/string_writer.h"], + srcs = ["src/stdio/printf_core/string_writer.cpp"], + deps = [ + ":__support_cpp_string_view", + ":string_memory_utils", + ":printf_core_structs", + ":libc_root", + ], +) + +libc_support_library( + name = "printf_writer", + hdrs = ["src/stdio/printf_core/writer.h"], + srcs = ["src/stdio/printf_core/writer.cpp"], + deps = [ + ":__support_cpp_string_view", + ":libc_root", + ], +) + +libc_support_library( + name = "printf_converter", + hdrs = [ + "src/stdio/printf_core/converter.h", + "src/stdio/printf_core/converter_utils.h", + "src/stdio/printf_core/converter_atlas.h", + "src/stdio/printf_core/string_converter.h", + "src/stdio/printf_core/char_converter.h", + "src/stdio/printf_core/int_converter.h", + "src/stdio/printf_core/ptr_converter.h", + "src/stdio/printf_core/write_int_converter.h", + "src/stdio/printf_core/float_inf_nan_converter.h", + "src/stdio/printf_core/float_hex_converter.h", + "src/stdio/printf_core/float_dec_converter.h", + ], + srcs = ["src/stdio/printf_core/converter.cpp"], + deps = [ + ":printf_writer", + ":printf_core_structs", + ":__support_cpp_string_view", + ":__support_cpp_limits", + ":__support_cpp_span", + ":__support_fputil_fp_bits", + ":__support_fputil_fenv_impl", + ":__support_libc_assert", + ":__support_uint", + ":__support_uint128", + ":__support_integer_to_string", + ":__support_float_to_string", + ":__support_common", + ":libc_root", + ], +) + +libc_support_library( + name = "printf_main", + hdrs = ["src/stdio/printf_core/printf_main.h"], + srcs = ["src/stdio/printf_core/printf_main.cpp"], + deps = [ + ":printf_parser", + ":printf_converter", + ":printf_writer", + ":printf_core_structs", + ":__support_arg_list", + ":libc_root", + ], +) + +libc_function( + name = "sprintf", + srcs = ["src/stdio/sprintf.cpp"], + hdrs = ["src/stdio/sprintf.h"], + deps = [ + ":__support_arg_list", + ":printf_main", + ":printf_string_writer", + ":printf_writer", + ":errno", + ], +) + +libc_function( + name = "snprintf", + srcs = ["src/stdio/snprintf.cpp"], + hdrs = ["src/stdio/snprintf.h"], + deps = [ + ":__support_arg_list", + ":printf_main", + ":printf_string_writer", + ":printf_writer", ":errno", ], ) diff --git a/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel index bf01bd463129e..297b98c1193b0 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/UnitTest/BUILD.bazel @@ -73,6 +73,22 @@ cc_library( ], ) +cc_library( + name = "printf_matcher", + srcs = [ + "PrintfMatcher.cpp", + ], + hdrs = [ + "PrintfMatcher.h", + ], + deps = [ + ":LibcUnitTest", + ":string_utils", + "//libc:__support_fputil_fp_bits", + "//libc:printf_core_structs", + ], +) + cc_library( name = "string_utils", hdrs = [ diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/stdio/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/stdio/BUILD.bazel new file mode 100644 index 0000000000000..4f49ec7e53cb1 --- /dev/null +++ b/utils/bazel/llvm-project-overlay/libc/test/src/stdio/BUILD.bazel @@ -0,0 +1,77 @@ +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# Tests for LLVM libc stdio.h functions. + +load("//libc/test:libc_test_rules.bzl", "libc_test") + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) + +libc_test( + name = "printf_parser_test", + srcs = ["printf_core/parser_test.cpp"], + libc_function_deps = [ + ], + deps = [ + "//libc:printf_parser", + "//libc:printf_core_structs", + "//libc:__support_cpp_string_view", + "//libc:__support_cpp_bit", + "//libc:__support_arg_list", + "//libc/utils/testutils:libc_test_utils", + "//libc/test/UnitTest:printf_matcher", + ], +) + +libc_test( + name = "printf_string_writer_test", + srcs = ["printf_core/string_writer_test.cpp"], + libc_function_deps = [ + ], + deps = [ + "//libc:printf_string_writer", + "//libc:printf_writer", + "//libc:printf_core_structs", + "//libc:__support_cpp_string_view", + "//libc:__support_arg_list", + ], +) + +libc_test( + name = "printf_converter_test", + srcs = ["printf_core/converter_test.cpp"], + libc_function_deps = [ + ], + deps = [ + "//libc:printf_converter", + "//libc:printf_string_writer", + "//libc:printf_writer", + "//libc:printf_core_structs", + "//libc:__support_cpp_string_view", + "//libc:__support_arg_list", + ], +) + +libc_test( + name = "sprintf_test", + srcs = ["sprintf_test.cpp"], + libc_function_deps = [ + "//libc:sprintf", + ], + deps = [ + "//libc:__support_fputil_fp_bits", + "//libc:__support_fputil_platform_defs", + "//libc/utils/testutils:libc_test_utils", + ], +) + +libc_test( + name = "snprintf_test", + srcs = ["snprintf_test.cpp"], + libc_function_deps = [ + "//libc:snprintf", + ], +) From 1187d8a62ba288e2221731f1795fa184571cd854 Mon Sep 17 00:00:00 2001 From: Ian Anderson Date: Tue, 31 Jan 2023 00:56:27 -0800 Subject: [PATCH 083/691] [libunwind][Modules] Add unwind_arm_ehabi.h and unwind_itanium.h to the unwind module) Add unwind_arm_ehabi.h and unwind_itanium.h to the unwind module and use angle includes to include them. Reviewed By: ldionne, #libunwind Differential Revision: https://reviews.llvm.org/D144323 --- libunwind/include/libunwind.modulemap | 3 +++ libunwind/include/unwind.h | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/libunwind/include/libunwind.modulemap b/libunwind/include/libunwind.modulemap index 162fe1d279a3c..775841ecb5f18 100644 --- a/libunwind/include/libunwind.modulemap +++ b/libunwind/include/libunwind.modulemap @@ -6,5 +6,8 @@ module libunwind [system] { module unwind [system] { header "__libunwind_config.h" header "unwind.h" + private textual header "unwind_arm_ehabi.h" + private textual header "unwind_itanium.h" + export * } diff --git a/libunwind/include/unwind.h b/libunwind/include/unwind.h index 26cdef22207ed..b1775d3a3decc 100644 --- a/libunwind/include/unwind.h +++ b/libunwind/include/unwind.h @@ -56,9 +56,9 @@ typedef enum { typedef struct _Unwind_Context _Unwind_Context; // opaque #if defined(_LIBUNWIND_ARM_EHABI) -#include "unwind_arm_ehabi.h" +#include #else -#include "unwind_itanium.h" +#include #endif typedef _Unwind_Reason_Code (*_Unwind_Stop_Fn) From b333f3393934937e1d857873934325ae0a9af30e Mon Sep 17 00:00:00 2001 From: Michal Paszkowski Date: Mon, 20 Mar 2023 23:08:23 +0100 Subject: [PATCH 084/691] [SPIR-V] Add Machine Value Type for SPIR-V builtins Differential Revision: https://reviews.llvm.org/D145703 --- llvm/include/llvm/CodeGen/ValueTypes.td | 1 + llvm/include/llvm/Support/MachineValueType.h | 7 +++++-- llvm/lib/CodeGen/ValueTypes.cpp | 10 ++++++++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td index 934800f107473..336b2a49b131e 100644 --- a/llvm/include/llvm/CodeGen/ValueTypes.td +++ b/llvm/include/llvm/CodeGen/ValueTypes.td @@ -238,6 +238,7 @@ def x86amx : ValueType<8192, 194>; // X86 AMX value def i64x8 : ValueType<512, 195>; // 8 Consecutive GPRs (AArch64) def aarch64svcount : ValueType<16, 196>; // AArch64 predicate-as-counter +def spirvbuiltin : ValueType<0, 197>; // SPIR-V's builtin type def token : ValueType<0, 248>; // TokenTy def MetadataVT : ValueType<0, 249>; // Metadata diff --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h index 58f294d1b8731..eb97239612c5f 100644 --- a/llvm/include/llvm/Support/MachineValueType.h +++ b/llvm/include/llvm/Support/MachineValueType.h @@ -292,9 +292,10 @@ namespace llvm { x86amx = 194, // This is an X86 AMX value i64x8 = 195, // 8 Consecutive GPRs (AArch64) aarch64svcount = 196, // AArch64 predicate-as-counter + spirvbuiltin = 197, // SPIR-V's builtin type FIRST_VALUETYPE = 1, // This is always the beginning of the list. - LAST_VALUETYPE = aarch64svcount, // This always remains at the end of the list. + LAST_VALUETYPE = spirvbuiltin, // This always remains at the end of the list. VALUETYPE_SIZE = LAST_VALUETYPE + 1, // This is the current maximum for LAST_VALUETYPE. @@ -1143,7 +1144,9 @@ namespace llvm { case v2048i32: case v2048f32: return TypeSize::Fixed(65536); case funcref: - case externref: return TypeSize::Fixed(0); // opaque type + case externref: + case spirvbuiltin: + return TypeSize::Fixed(0); // opaque type } } diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp index b4c873c0b1abd..1b317bd0d7902 100644 --- a/llvm/lib/CodeGen/ValueTypes.cpp +++ b/llvm/lib/CodeGen/ValueTypes.cpp @@ -176,6 +176,8 @@ std::string EVT::getEVTString() const { case MVT::externref: return "externref"; case MVT::aarch64svcount: return "aarch64svcount"; + case MVT::spirvbuiltin: + return "spirvbuiltin"; } } @@ -583,12 +585,16 @@ MVT MVT::getVT(Type *Ty, bool HandleUnknown){ case Type::DoubleTyID: return MVT(MVT::f64); case Type::X86_FP80TyID: return MVT(MVT::f80); case Type::X86_MMXTyID: return MVT(MVT::x86mmx); - case Type::TargetExtTyID: - if (cast(Ty)->getName() == "aarch64.svcount") + case Type::TargetExtTyID: { + TargetExtType *TargetExtTy = cast(Ty); + if (TargetExtTy->getName() == "aarch64.svcount") return MVT(MVT::aarch64svcount); + else if (TargetExtTy->getName().starts_with("spirv.")) + return MVT(MVT::spirvbuiltin); if (HandleUnknown) return MVT(MVT::Other); llvm_unreachable("Unknown target ext type!"); + } case Type::X86_AMXTyID: return MVT(MVT::x86amx); case Type::FP128TyID: return MVT(MVT::f128); case Type::PPC_FP128TyID: return MVT(MVT::ppcf128); From 8d024a79ea783ed3fbb5691aeaf186ad3f0a4ae9 Mon Sep 17 00:00:00 2001 From: Jim Ingham Date: Mon, 20 Mar 2023 15:12:38 -0700 Subject: [PATCH 085/691] Fix a problem with "watchpoint triggers before" watchpoint handling. We need to step the watchpoint instruction in these cases, but the when we queued the ThreadPlanStepOverWatchpoint to do this, we didn't make it a Controlling plan. So if you are stepping, this plan returns as though it were a utility plan, and the stepping plan keeps going. This only partially fixes the problem on Darwin; there's another bug with reporting a watchpoint when we're instruction single stepping over an instruction that triggers a watchpoint. The kernel reports the "single step completed" but not the watchpoint hit. So this commit also refactors the test into a part that works (at least on Darwin) and a part that still fails. We may have to adjust the test result expectations for other systems after this fix. Differential Revision: https://reviews.llvm.org/D146337 --- lldb/include/lldb/Target/Process.h | 19 +++++- .../Process/Utility/StopInfoMachException.cpp | 13 ++++ lldb/source/Target/StopInfo.cpp | 5 ++ lldb/source/Target/Thread.cpp | 3 + .../TestStepOverWatchpoint.py | 68 ++++++++++--------- .../watchpoints/step_over_watchpoint/main.c | 4 +- 6 files changed, 78 insertions(+), 34 deletions(-) diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h index 3ffacb52299b9..6ce38f63cd249 100644 --- a/lldb/include/lldb/Target/Process.h +++ b/lldb/include/lldb/Target/Process.h @@ -311,6 +311,14 @@ class ProcessModID { return m_last_natural_stop_event; return lldb::EventSP(); } + + void SetSafeToCallFunctions(bool safe) { + m_safe = safe; + } + + bool GetSafeToCallFunctions() { + return m_safe; + } private: uint32_t m_stop_id = 0; @@ -321,6 +329,7 @@ class ProcessModID { uint32_t m_running_user_expression = false; uint32_t m_running_utility_function = 0; lldb::EventSP m_last_natural_stop_event; + std::atomic m_safe = true; }; inline bool operator==(const ProcessModID &lhs, const ProcessModID &rhs) { @@ -459,7 +468,7 @@ class Process : public std::enable_shared_from_this, void SetRestarted(bool new_value) { m_restarted = new_value; } void SetInterrupted(bool new_value) { m_interrupted = new_value; } - + void AddRestartedReason(const char *reason) { m_restarted_reasons.push_back(reason); } @@ -1250,6 +1259,14 @@ class Process : public std::enable_shared_from_this, DiagnosticManager &diagnostic_manager); static const char *ExecutionResultAsCString(lldb::ExpressionResults result); + + void SetSafeToCallFunctions(bool safe) { + GetModID().SetSafeToCallFunctions(safe); + } + + bool GetSafeToCallFunctions() { + return GetModID().GetSafeToCallFunctions(); + } void GetStatus(Stream &ostrm); diff --git a/lldb/source/Plugins/Process/Utility/StopInfoMachException.cpp b/lldb/source/Plugins/Process/Utility/StopInfoMachException.cpp index aae15b2ef4624..458d44f6feb33 100644 --- a/lldb/source/Plugins/Process/Utility/StopInfoMachException.cpp +++ b/lldb/source/Plugins/Process/Utility/StopInfoMachException.cpp @@ -795,6 +795,19 @@ StopInfoSP StopInfoMachException::CreateStopReasonWithMachException( case 9: // EXC_RPC_ALERT case 10: // EXC_CRASH break; + case 12: // EXC_GUARD + { + // Some EXC_GUARD exceptions are fatal, and the process will go away + // the next time you allow it to run. When we get one of those + // exceptions we have to make sure SafeToCallFunctions returns false to + // prevent us or other agents running the process. This has to be set + // on the process because even the threads that didn't get the exception + // can't run. + ProcessSP process_sp(thread.GetProcess()); + if (process_sp) + process_sp->SetSafeToCallFunctions(false); + + } } return StopInfoSP(new StopInfoMachException(thread, exc_type, exc_data_count, diff --git a/lldb/source/Target/StopInfo.cpp b/lldb/source/Target/StopInfo.cpp index 9fdb29f9e4273..ebc355c90d0ab 100644 --- a/lldb/source/Target/StopInfo.cpp +++ b/lldb/source/Target/StopInfo.cpp @@ -831,6 +831,11 @@ class StopInfoWatchpoint : public StopInfo { = std::static_pointer_cast(shared_from_this()); ThreadPlanSP step_over_wp_sp(new ThreadPlanStepOverWatchpoint( *(thread_sp.get()), me_as_siwp_sp, wp_sp)); + // When this plan is done we want to stop, so set this as a Controlling + // plan. + step_over_wp_sp->SetIsControllingPlan(true); + step_over_wp_sp->SetOkayToDiscard(false); + Status error; error = thread_sp->QueueThreadPlan(step_over_wp_sp, false); // If we couldn't push the thread plan, just stop here: diff --git a/lldb/source/Target/Thread.cpp b/lldb/source/Target/Thread.cpp index d620f746339e7..df8bff5102b83 100644 --- a/lldb/source/Target/Thread.cpp +++ b/lldb/source/Target/Thread.cpp @@ -1664,6 +1664,9 @@ addr_t Thread::GetThreadLocalData(const ModuleSP module, bool Thread::SafeToCallFunctions() { Process *process = GetProcess().get(); if (process) { + if (!process->SafeToCallFunctions()) + return false; + DynamicLoader *loader = GetProcess()->GetDynamicLoader(); if (loader && loader->IsFullyInitialized() == false) return false; diff --git a/lldb/test/API/commands/watchpoints/step_over_watchpoint/TestStepOverWatchpoint.py b/lldb/test/API/commands/watchpoints/step_over_watchpoint/TestStepOverWatchpoint.py index 7d54156aebb5b..52fc899b13e61 100644 --- a/lldb/test/API/commands/watchpoints/step_over_watchpoint/TestStepOverWatchpoint.py +++ b/lldb/test/API/commands/watchpoints/step_over_watchpoint/TestStepOverWatchpoint.py @@ -11,36 +11,11 @@ class TestStepOverWatchpoint(TestBase): NO_DEBUG_INFO_TESTCASE = True - @expectedFailureAll( - oslist=["freebsd", "linux"], - archs=[ - 'aarch64', - 'arm'], - bugnumber="llvm.org/pr26031") - # Read-write watchpoints not supported on SystemZ - @expectedFailureAll(archs=['s390x']) - @expectedFailureAll( - oslist=["ios", "watchos", "tvos", "bridgeos", "macosx"], - archs=['aarch64', 'arm'], - bugnumber="") - @add_test_categories(["basic_process"]) - def test(self): + def get_to_start(self, bkpt_text): """Test stepping over watchpoints.""" self.build() - target = self.createTestTarget() - - lldbutil.run_break_set_by_symbol(self, 'main') - - process = target.LaunchSimple(None, None, - self.get_process_working_directory()) - self.assertTrue(process.IsValid(), PROCESS_IS_VALID) - self.assertState(process.GetState(), lldb.eStateStopped, - PROCESS_STOPPED) - - thread = lldbutil.get_stopped_thread(process, - lldb.eStopReasonBreakpoint) - self.assertTrue(thread.IsValid(), "Failed to get thread.") - + target, process, thread, bkpt = lldbutil.run_to_source_breakpoint(self, bkpt_text, + lldb.SBFileSpec("main.c")) frame = thread.GetFrameAtIndex(0) self.assertTrue(frame.IsValid(), "Failed to get frame.") @@ -55,14 +30,45 @@ def test(self): self.assertSuccess(error, "Error while setting watchpoint") self.assertTrue(read_watchpoint, "Failed to set read watchpoint.") + # Disable the breakpoint we hit so we don't muddy the waters with + # stepping off from the breakpoint: + bkpt.SetEnabled(False) + + return (target, process, thread, read_watchpoint) + + @expectedFailureAll( + oslist=["freebsd", "linux"], + archs=[ + 'aarch64', + 'arm'], + bugnumber="llvm.org/pr26031") + # Read-write watchpoints not supported on SystemZ + @expectedFailureAll(archs=['s390x']) + @add_test_categories(["basic_process"]) + def test_step_over(self): + target, process, thread, wp = self.get_to_start("Set a breakpoint here") + thread.StepOver() self.assertStopReason(thread.GetStopReason(), lldb.eStopReasonWatchpoint, STOPPED_DUE_TO_WATCHPOINT) self.assertEquals(thread.GetStopDescription(20), 'watchpoint 1') - process.Continue() - self.assertState(process.GetState(), lldb.eStateStopped, - PROCESS_STOPPED) + @expectedFailureAll( + oslist=["freebsd", "linux"], + archs=[ + 'aarch64', + 'arm'], + bugnumber="llvm.org/pr26031") + # Read-write watchpoints not supported on SystemZ + @expectedFailureAll(archs=['s390x']) + @expectedFailureAll( + oslist=["ios", "watchos", "tvos", "bridgeos", "macosx"], + archs=['aarch64', 'arm'], + bugnumber="") + @add_test_categories(["basic_process"]) + def test_step_instruction(self): + target, process, thread, wp = self.get_to_start("Set breakpoint after call") + self.assertEquals(thread.GetStopDescription(20), 'step over') self.step_inst_for_watchpoint(1) diff --git a/lldb/test/API/commands/watchpoints/step_over_watchpoint/main.c b/lldb/test/API/commands/watchpoints/step_over_watchpoint/main.c index 2d87d9a2f73fe..e48d43cb7a974 100644 --- a/lldb/test/API/commands/watchpoints/step_over_watchpoint/main.c +++ b/lldb/test/API/commands/watchpoints/step_over_watchpoint/main.c @@ -11,8 +11,8 @@ void watch_write() { } int main() { - watch_read(); - g_temp = g_watch_me_read; + watch_read(); // Set a breakpoint here + g_temp = g_watch_me_read; // Set breakpoint after call watch_write(); g_watch_me_write = g_temp; return 0; From 1328bb6ef1645951606ee3e8fa6acbbff6b2438f Mon Sep 17 00:00:00 2001 From: Peiming Liu Date: Fri, 20 Jan 2023 22:51:26 +0000 Subject: [PATCH 086/691] [mlir][sparse] extend loop emitter and optimize lattices with the awareness of slice based iteration Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D142929 --- .../mlir/Dialect/SparseTensor/Utils/Merger.h | 6 + .../SparseTensor/Transforms/CodegenEnv.cpp | 58 +++++- .../SparseTensor/Transforms/CodegenEnv.h | 1 - .../SparseTensor/Transforms/LoopEmitter.cpp | 23 ++- .../SparseTensor/Transforms/LoopEmitter.h | 27 ++- .../Transforms/Sparsification.cpp | 193 ++++++++---------- .../lib/Dialect/SparseTensor/Utils/Merger.cpp | 17 +- 7 files changed, 201 insertions(+), 124 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h index 0e6c2f1553f1c..4a83237fb1634 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h @@ -399,11 +399,17 @@ class Merger { /// to sparse level-type. bool hasAnySparse(const BitVector &bits) const; + /// Returns true if bits contains a dependent index reduction condition on + /// sparse levels. + bool hasSparseIdxReduction(const BitVector &bits) const; + /// Gets the level-type of the `t`th tensor on `i`th loop. DimLevelType getDimLevelType(TensorId t, LoopId i) const { assert(t < numTensors && i < numLoops); return lvlTypes[t][i]; } + + /// Gets the level-type of the TensorLoopId. DimLevelType getDimLevelType(TensorLoopId b) const { return getDimLevelType(tensor(b), loop(b)); } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.cpp index 8e4904ad3a592..f326d5b950a31 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.cpp @@ -28,6 +28,23 @@ static bool isMaterializing(Value val) { val.getDefiningOp(); } +/// Makes target array's elements sorted according to the `order` array. +static void sortArrayBasedOnOrder(std::vector &target, + ArrayRef order) { + std::sort(target.begin(), target.end(), [&order](LoopId l, LoopId r) { + assert(l != r); + int idxL = -1, idxR = -1; + for (int i = 0, e = order.size(); i < e; i++) { + if (order[i] == l) + idxL = i; + if (order[i] == r) + idxR = i; + } + assert(idxL >= 0 && idxR >= 0); + return idxL < idxR; + }); +} + //===----------------------------------------------------------------------===// // Code generation environment constructor and general methods //===----------------------------------------------------------------------===// @@ -57,15 +74,42 @@ void CodegenEnv::startEmit() { insChain = sparseOut->get(); latticeMerger.setHasSparseOut(true); } + + // Sort the related loop array such that they are in the same order as they + // appears on the topoOrder. + // TODO: since we only handle affine addition for slice based codegen, and + // addition is assoicative, the order how we evaluate the expression does + // not matter. However, to support multiplication, the order of the loop + // index should match the evaluation order to the affine expression AST. + // Initialize loop emitter. - SmallVector tensors; - for (OpOperand &t : linalgOp->getOpOperands()) + SmallVector tensors; // input tensors passed to loop emitter + for (OpOperand &t : linalgOp->getOpOperands()) { tensors.push_back(t.get()); - loopEmitter.initialize(tensors, - StringAttr::get(linalgOp.getContext(), - linalg::GenericOp::getOperationName()), - /*hasOutput=*/true, - /*isSparseOut=*/sparseOut != nullptr, topSort); + Level rank = linalgOp.getMatchingIndexingMap(&t).getNumResults(); + for (Level lvl = 0; lvl < rank; lvl++) { + sortArrayBasedOnOrder( + latticeMerger.getDependentLoops(t.getOperandNumber(), lvl), topSort); + } + } + + loopEmitter.initialize( + tensors, + StringAttr::get(linalgOp.getContext(), + linalg::GenericOp::getOperationName()), + /*hasOutput=*/true, + /*isSparseOut=*/sparseOut != nullptr, topSort, + // TODO: compute the map and pass it to loop emitter directly instead of + // passing in a callback. + [this](TensorId t, Level lvl) -> std::vector> { + // Translates from a list of loop index to a list of [tid, dim] pair. + std::vector rLoops = this->merger().getDependentLoops(t, lvl); + std::vector> ret; + ret.reserve(rLoops.size()); + for (LoopId l : rLoops) + ret.emplace_back(this->merger().getLoopDefiningLvl(l)); + return ret; + }); } std::optional CodegenEnv::genLoopBoundary( diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h index 776d7f7f47ece..8c6a7bd6433db 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h @@ -99,7 +99,6 @@ class CodegenEnv { topSort.reserve(capacity); } - ArrayRef getTopSort() const { return topSort; }; ArrayRef getTopSortSlice(LoopOrd n, LoopOrd m) const; ArrayRef getLoopStackUpTo(LoopOrd n) const; ArrayRef getCurrentLoopStack() const; diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp index c3823c0f204d9..459a1b38e03de 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp @@ -208,12 +208,14 @@ Value LoopEmitter::genSparseCrd(OpBuilder &builder, Location loc, TensorId tid, } LoopEmitter::LoopEmitter(ValueRange tensors, StringAttr loopTag, bool hasOutput, - bool isSparseOut, ArrayRef topSort) { - initialize(tensors, loopTag, hasOutput, isSparseOut, topSort); + bool isSparseOut, ArrayRef topSort, + DependentLvlGetter dimGetter) { + initialize(tensors, loopTag, hasOutput, isSparseOut, topSort, dimGetter); } void LoopEmitter::initialize(ValueRange ts, StringAttr loopTag, bool hasOutput, - bool isSparseOut, ArrayRef topSort) { + bool isSparseOut, ArrayRef topSort, + DependentLvlGetter dimGetter) { // First initialize the top-level type of the fields. this->loopTag = loopTag; this->hasOutput = hasOutput; @@ -242,6 +244,9 @@ void LoopEmitter::initialize(ValueRange ts, StringAttr loopTag, bool hasOutput, this->loopStack.reserve(numLoops); this->loopSeqStack.reserve(numLoops); + this->dependentLvlMap.assign( + numTensors, std::vector>>()); + // Initialize nested types of `TensorId`-indexed fields. for (TensorId tid = 0; tid < numTensors; tid++) { const Value t = tensors[tid]; @@ -283,6 +288,12 @@ void LoopEmitter::initialize(ValueRange ts, StringAttr loopTag, bool hasOutput, coordinatesBuffers[tid].assign(lvlRank, Value()); sliceOffsets[tid].assign(lvlRank, Value()); sliceStrides[tid].assign(lvlRank, Value()); + + dependentLvlMap[tid].assign(lvlRank, + std::vector>()); + if (dimGetter) + for (Level l = 0; l < lvlRank; l++) + dependentLvlMap[tid][l] = dimGetter(tid, l); } // Construct the inverse of the `topSort` from the sparsifier. @@ -997,8 +1008,8 @@ void LoopEmitter::exitForLoop(RewriterBase &rewriter, Location loc, } } -void LoopEmitter::exitCoIterationLoop(OpBuilder &builder, Location loc, - MutableArrayRef reduc) { +void LoopEmitter::exitWhileLoop(OpBuilder &builder, Location loc, + MutableArrayRef reduc) { const LoopInfo &loopInfo = loopStack.back(); auto whileOp = llvm::cast(loopInfo.loop); builder.setInsertionPointToEnd(loopInfo.userCodeBlock); @@ -1082,7 +1093,7 @@ void LoopEmitter::exitCurrentLoop(RewriterBase &rewriter, Location loc, assert(loopInfo.tids.size() == loopInfo.lvls.size()); SmallVector red; if (llvm::isa(loopInfo.loop)) { - exitCoIterationLoop(rewriter, loc, reduc); + exitWhileLoop(rewriter, loc, reduc); } else { exitForLoop(rewriter, loc, reduc); } diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.h b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.h index 8e6c65fd96c92..8cfe00100eba8 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.h @@ -76,6 +76,14 @@ class LoopEmitter { /// initializing the loop emitter (e.g., to fill a dense output with zeros). using OutputUpdater = function_ref; + // Map from [tid, dim] to a list of dependent [tid, dim] for affine expression + // index on sparse tensors. + // E.g., for affine index (d0 + d1), it depends on two [tid, dim] that defines + // d0 and d1 (for affine expression reduction). + // If the list is empty, it means that there is no affine expression on the + // input [tid, dim]. + using DependentLvlGetter = + function_ref>(TensorId, Level)>; LoopEmitter() = default; @@ -89,11 +97,13 @@ class LoopEmitter { /// to `LoopId`. void initialize(ValueRange tensors, StringAttr loopTag = nullptr, bool hasOutput = false, bool isSparseOut = false, - ArrayRef topSort = {}); + ArrayRef topSort = {}, + DependentLvlGetter getter = nullptr); explicit LoopEmitter(ValueRange tensors, StringAttr loopTag = nullptr, bool hasOutput = false, bool isSparseOut = false, - ArrayRef topSort = {}); + ArrayRef topSort = {}, + DependentLvlGetter getter = nullptr); /// Starts a loop emitting session by generating all the buffers needed /// for iterating over the tensors. @@ -295,8 +305,8 @@ class LoopEmitter { MutableArrayRef reduc); /// Exits a while loop, returns the reduction results. - void exitCoIterationLoop(OpBuilder &builder, Location loc, - MutableArrayRef reduc); + void exitWhileLoop(OpBuilder &builder, Location loc, + MutableArrayRef reduc); // // View-based-reshape methods. @@ -380,6 +390,15 @@ class LoopEmitter { std::vector> sliceOffsets; std::vector> sliceStrides; + // Map from [tid, level] to a list of dependent [tid, level]. + // See comments for `DependentDimGetter`. + std::vector>>> + dependentLvlMap; + + // + // View based reshape related-fields and methods + // + /// Collapse Reassociations related to a specific tensor // TODO: support expand. std::vector collapseReassoc; diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp index d7ce2b7f63f5c..f189b14c60c7e 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp @@ -593,23 +593,6 @@ static void tryRelaxAffineConstraints(linalg::GenericOp op, } } -/// Makes target array's elements appear in the same order as the `order` array. -static void sortArrayBasedOnOrder(std::vector &target, - ArrayRef order) { - std::sort(target.begin(), target.end(), [&order](LoopId l, LoopId r) { - assert(l != r); - int idxL = -1, idxR = -1; - for (int i = 0, e = order.size(); i < e; i++) { - if (order[i] == l) - idxL = i; - if (order[i] == r) - idxR = i; - } - assert(idxL >= 0 && idxR >= 0); - return idxL < idxR; - }); -} - static void addFilterLoopBasedConstraints(CodegenEnv &env, OpOperand &t, OpOperand *skip, SortMask mask, std::vector> &adjM, @@ -1484,9 +1467,10 @@ static bool startLoopSeq(CodegenEnv &env, OpBuilder &builder, ExprId exp, SmallVector lvls; env.merger().foreachTensorLoopId(l0, [&](TensorLoopId b, TensorId tid, std::optional lvl, - DimLevelType dlt, bool) { + DimLevelType dlt, bool isIdxReduc) { assert(env.merger().loop(b) == idx); - if (isDenseDLT(dlt) || isUndefDLT(dlt)) { + // FIXME: Dense index reduction can reuse the universal index as well. + if (!isIdxReduc && (isDenseDLT(dlt) || isUndefDLT(dlt))) { needsUniv = true; } else { // sparse/singleton levels. @@ -1503,7 +1487,8 @@ static bool startLoopSeq(CodegenEnv &env, OpBuilder &builder, ExprId exp, unsigned lsize = env.set(lts).size(); for (unsigned i = 1; i < lsize; i++) { const LatPointId li = env.set(lts)[i]; - if (!env.merger().hasAnySparse(env.lat(li).simple)) + if (!env.merger().hasAnySparse(env.lat(li).simple) && + !env.merger().hasSparseIdxReduction(env.lat(li).simple)) return true; } } @@ -1557,75 +1542,82 @@ static bool translateBitsToTidLvlPairs( unsigned numloopCond = 0; bool hasNonUnique = false; - env.merger().foreachTensorLoopId(li, [&, ldx](TensorLoopId b, TensorId tid, - std::optional lvl, - DimLevelType dlt, bool) { - if (simple.test(b)) { - if (isUndefDLT(dlt)) { - // An undefined dlt in the lattices, we probably mean to - // iterate based on the level of output tensor. E.g., this - // could be a synthetic tensor (for invariants and sparse - // output tensor). - // out[i][j] = invariant; or a broadcast - // out[i][j] = in[i] (j is undef for input) - tid = outTid; - lvl = outLvl; - // Skips invalid lvl (e.g., when this is a zero ranked tensor). - if (!lvl) - return; - } - hasNonUnique = !isUniqueDLT(dlt) || hasNonUnique; - tids.push_back(tid); - lvls.push_back(*lvl); - numloopCond++; - } else if (isDenseDLT(dlt)) { - tids.push_back(tid); - lvls.push_back(*lvl); - } else { - assert(isUndefDLT(dlt)); - linalg::GenericOp op = env.op(); - if (tid >= op.getNumDpsInputs()) - // We only handle affine expression on input tensors (for now). - return; - OpOperand *operand = &op->getOpOperand(tid); - const auto stt = getSparseTensorType(operand->get()); - // Non-annotated dense tensors requires no special handling. - if (!stt.hasEncoding()) - return; - - ArrayRef affines = - op.getMatchingIndexingMap(operand).getResults(); - const Level lvlRank = stt.getLvlRank(); - assert(affines.size() == static_cast(lvlRank)); - for (Level l = 0; l < lvlRank; l++) { - // FIXME: `toOrigDim` is deprecated. - AffineExpr exp = affines[toOrigDim(stt.getEncoding(), l)]; - // Skip simple affine expression and non-dense levels (which - // have their own filter loop). - if (exp.isa() || !stt.isDenseLvl(l)) - continue; - // Constant affine expression are handled in genLoop - if (!exp.isa()) { - bool isAtLoop = false; - if (isInvariantAffine(env, exp, ldx, isAtLoop) && isAtLoop) { - // If the compound affine is invariant and we are right at the - // level. We need to generate the address according to the - // affine expression. This is also the best place we can do it - // to avoid putting it inside inner loops. - // NOTE: It assumes that the levels of the input tensor are - // initialized in order (and it is also currently guaranteed by - // computeIterationGraph), another more admissible approach - // might be accepting out-of-order access between consecutive - // dense levels. - affineTids.push_back(tid); - affineLvls.push_back(l); - exps.push_back(exp); + env.merger().foreachTensorLoopId( + li, [&, ldx](TensorLoopId b, TensorId tid, std::optional lvl, + DimLevelType dlt, bool isIdxReduc) { + if (simple.test(b)) { + if (isIdxReduc) { + tids.push_back(tid); + lvls.push_back(*lvl); + numloopCond++; + return; + } + if (isUndefDLT(dlt)) { + // An undefined dlt in the lattices, we probably mean to + // iterate based on the level of output tensor. E.g., this + // could be a synthetic tensor (for invariants and sparse + // output tensor). + // out[i][j] = invariant; or a broadcast + // out[i][j] = in[i] (j is undef for input) + tid = outTid; + lvl = outLvl; + // Skips invalid lvl (e.g., when this is a zero ranked tensor). + if (!lvl) + return; + } + hasNonUnique = !isUniqueDLT(dlt) || hasNonUnique; + tids.push_back(tid); + lvls.push_back(*lvl); + numloopCond++; + } else if (isDenseDLT(dlt)) { + tids.push_back(tid); + lvls.push_back(*lvl); + } else { + assert(isUndefDLT(dlt)); + linalg::GenericOp op = env.op(); + if (tid >= op.getNumDpsInputs()) + // We only handle affine expression on input tensors (for now). + return; + OpOperand *operand = &op->getOpOperand(tid); + const auto stt = getSparseTensorType(operand->get()); + // Non-annotated dense tensors requires no special handling. + if (!stt.hasEncoding()) + return; + + ArrayRef affines = + op.getMatchingIndexingMap(operand).getResults(); + const Level lvlRank = stt.getLvlRank(); + assert(affines.size() == static_cast(lvlRank)); + for (Level l = 0; l < lvlRank; l++) { + // FIXME: `toOrigDim` is deprecated. + AffineExpr exp = affines[toOrigDim(stt.getEncoding(), l)]; + // Skip simple affine expression and non-dense levels (which + // have their own filter loop). + if (exp.isa() || !stt.isDenseLvl(l)) + continue; + + // Constant affine expression are handled in genLoop + if (!exp.isa()) { + bool isAtLoop = false; + if (isInvariantAffine(env, exp, ldx, isAtLoop) && isAtLoop) { + // If the compound affine is invariant and we are right at the + // level. We need to generate the address according to the + // affine expression. This is also the best place we can do it + // to avoid putting it inside inner loops. + // NOTE: It assumes that the levels of the input tensor are + // initialized in order (and it is also currently guaranteed by + // computeIterationGraph), another more admissible approach + // might be accepting out-of-order access between consecutive + // dense levels. + affineTids.push_back(tid); + affineLvls.push_back(l); + exps.push_back(exp); + } + } } } - } - } - }); + }); if (isDenseDLT(env.dlt(outTid, ldx))) { // Note that we generate dense indices of the output tensor @@ -1642,8 +1634,9 @@ static bool translateBitsToTidLvlPairs( } /// Starts a single loop in current sequence. -static Operation *startLoop(CodegenEnv &env, OpBuilder &builder, LoopOrd at, - LatPointId li, bool needsUniv) { +static std::pair startLoop(CodegenEnv &env, + OpBuilder &builder, unsigned at, + unsigned li, bool needsUniv) { // The set of tensors + lvls to generate loops on SmallVector tids, affineTids; SmallVector lvls, affineLvls; @@ -1651,11 +1644,12 @@ static Operation *startLoop(CodegenEnv &env, OpBuilder &builder, LoopOrd at, // becomes invariant and the address shall now be generated at the current // level. SmallVector affines; - bool isFor = translateBitsToTidLvlPairs( + bool isSingleCond = translateBitsToTidLvlPairs( env, li, env.topSortAt(at), tids, lvls, affineTids, affineLvls, affines); // Emit the for/while-loop control. - Operation *loop = genLoop(env, builder, at, needsUniv, tids, lvls, isFor); + Operation *loop = + genLoop(env, builder, at, needsUniv, tids, lvls, isSingleCond); Location loc = env.op().getLoc(); for (auto [tid, lvl, exp] : llvm::zip(affineTids, affineLvls, affines)) { env.emitter().genDenseAffineAddress(builder, loc, tid, lvl, exp); @@ -1671,7 +1665,7 @@ static Operation *startLoop(CodegenEnv &env, OpBuilder &builder, LoopOrd at, genConstantDenseAddressFromLevel(env, builder, tid, lvl + 1); } - return loop; + return std::make_pair(loop, isSingleCond); } /// Ends a single loop in current sequence. Returns new values for needsUniv. @@ -1734,20 +1728,19 @@ static void genStmt(CodegenEnv &env, RewriterBase &rewriter, ExprId exp, for (unsigned i = 0; i < lsize; i++) { // Start a loop. const LatPointId li = env.set(lts)[i]; - Operation *loop = startLoop(env, rewriter, at, li, needsUniv); + auto [loop, isSingleCond] = startLoop(env, rewriter, at, li, needsUniv); // Visit all lattices points with Li >= Lj to generate the // loop-body, possibly with if statements for coiteration. Value redInput = env.getReduc(); Value cntInput = env.getExpandCount(); Value insInput = env.getInsertionChain(); - bool isWhile = dyn_cast(loop) != nullptr; for (unsigned j = 0; j < lsize; j++) { const LatPointId lj = env.set(lts)[j]; const ExprId ej = env.lat(lj).exp; if (li == lj || env.merger().latGT(li, lj)) { // Recurse into body of each branch. - if (isWhile) { + if (!isSingleCond) { scf::IfOp ifOp = genIf(env, rewriter, idx, env.lat(lj).simple); genStmt(env, rewriter, ej, at + 1); endIf(env, rewriter, ifOp, loop, redInput, cntInput, insInput); @@ -1866,18 +1859,12 @@ struct GenericOpSparsifier : public OpRewritePattern { if (!isAdmissible) return failure(); // inadmissible expression, reject - for (OpOperand &t : env.op()->getOpOperands()) { - Level rank = env.op().getMatchingIndexingMap(&t).getNumResults(); - for (Level lvl = 0; lvl < rank; lvl++) { - sortArrayBasedOnOrder( - env.merger().getDependentLoops(t.getOperandNumber(), lvl), - env.getTopSort()); - } - } - // Recursively generates code if admissible. env.startEmit(); genBuffers(env, rewriter); + // TODO: Constant affine expression should be handled differently when using + // slice-based codegen, it does not matter now becasue we already reject the + // constant expression at a earlier stage. genInitConstantDenseAddress(env, rewriter); genStmt(env, rewriter, env.getExprId(), 0); genResult(env, rewriter); diff --git a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp index 7f4400188cf14..40db5411132b4 100644 --- a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp +++ b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp @@ -362,7 +362,8 @@ BitVector Merger::simplifyCond(LatSetId s0, LatPointId p0) { } BitVector simple(latPoints[p0].bits); - bool reset = isSingleton && hasAnySparse(simple); + bool reset = + isSingleton && (hasAnySparse(simple) || hasSparseIdxReduction(simple)); const TensorLoopId be = simple.size(); TensorLoopId offset = 0; // relative to the end if (!reset) @@ -379,7 +380,9 @@ BitVector Merger::simplifyCond(LatSetId s0, LatPointId p0) { // keep the rightmost bit (which could possibly be a synthetic tensor). for (TensorLoopId b = be - 1 - offset, i = 0; i < be; b = b == 0 ? be - 1 : b - 1, i++) { - if (simple[b]) { + // FIXME: better name? also slice on dense level has locate property as + // well. Handle it correctly! + if (simple[b] && !isLvlWithNonTrivialIdxExp(b)) { const auto dlt = getDimLevelType(b); if (!isCompressedDLT(dlt) && !isSingletonDLT(dlt)) { if (reset) @@ -407,7 +410,7 @@ bool Merger::latGT(LatPointId i, LatPointId j) const { bool Merger::onlyDenseDiff(LatPointId i, LatPointId j) const { BitVector tmp(latPoints[j].bits); tmp ^= latPoints[i].bits; - return !hasAnySparse(tmp); + return !hasAnySparse(tmp) && !hasSparseIdxReduction(tmp); } bool Merger::expContainsTensor(ExprId e, TensorId t) const { @@ -555,6 +558,14 @@ bool Merger::hasAnySparse(const BitVector &bits) const { return false; } +bool Merger::hasSparseIdxReduction(const BitVector &bits) const { + // TODO: return false on dense levels. + for (unsigned b = 0, be = bits.size(); b < be; b++) + if (bits[b] && isLvlWithNonTrivialIdxExp(b)) + return true; + return false; +} + #ifndef NDEBUG //===----------------------------------------------------------------------===// From f05ac803ffe76c7f4299a4e1288cc6bb8b098410 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Mon, 20 Mar 2023 18:53:31 +0000 Subject: [PATCH 087/691] Re-apply "[JITLink][ELF] Don't skip debug info sections by default." This reapplies 57aeb305460406f7b822cfe6ab9fb4d730fc3b38, which was reverted in f721fcb6ed0a186b8f146282467dd8420a5a36d0 due to buildbot failures. The cause of the failure was missing support for R_AARCH64_ABS32, which was added in fb1b9945be7657a3a25b727eaf0eeb3f74525210. --- .../JITLink/ELFLinkGraphBuilder.h | 35 +-- ...ELF_debug_section_lifetime_is_NoAlloc.yaml | 195 ++++++++++++++ .../JITLink/X86/ELF_skip_debug_sections.s | 241 ------------------ 3 files changed, 213 insertions(+), 258 deletions(-) create mode 100644 llvm/test/ExecutionEngine/JITLink/X86/ELF_debug_section_lifetime_is_NoAlloc.yaml delete mode 100644 llvm/test/ExecutionEngine/JITLink/X86/ELF_skip_debug_sections.s diff --git a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h index cd046057217f1..26feb8ea3277b 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h +++ b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h @@ -62,6 +62,14 @@ class ELFLinkGraphBuilder : public ELFLinkGraphBuilderBase { StringRef FileName, LinkGraph::GetEdgeKindNameFunction GetEdgeKindName); + /// Debug sections are included in the graph by default. Use + /// setProcessDebugSections(false) to ignore them if debug info is not + /// needed. + ELFLinkGraphBuilder &setProcessDebugSections(bool ProcessDebugSections) { + this->ProcessDebugSections = ProcessDebugSections; + return *this; + } + /// Attempt to construct and return the LinkGraph. Expected> buildGraph(); @@ -115,8 +123,7 @@ class ELFLinkGraphBuilder : public ELFLinkGraphBuilderBase { /// template Error forEachRelaRelocation(const typename ELFT::Shdr &RelSect, - RelocHandlerMethod &&Func, - bool ProcessDebugSections = false); + RelocHandlerMethod &&Func); /// Traverse all matching ELFT::Rel relocation records in the given section. /// The handler function Func should be callable with this signature: @@ -125,22 +132,19 @@ class ELFLinkGraphBuilder : public ELFLinkGraphBuilderBase { /// template Error forEachRelRelocation(const typename ELFT::Shdr &RelSect, - RelocHandlerMethod &&Func, - bool ProcessDebugSections = false); + RelocHandlerMethod &&Func); /// Traverse all matching rela relocation records in the given section. /// Convenience wrapper to allow passing a member function for the handler. /// template Error forEachRelaRelocation(const typename ELFT::Shdr &RelSect, - ClassT *Instance, RelocHandlerMethod &&Method, - bool ProcessDebugSections = false) { + ClassT *Instance, RelocHandlerMethod &&Method) { return forEachRelaRelocation( RelSect, [Instance, Method](const auto &Rel, const auto &Target, auto &GS) { return (Instance->*Method)(Rel, Target, GS); - }, - ProcessDebugSections); + }); } /// Traverse all matching rel relocation records in the given section. @@ -148,14 +152,12 @@ class ELFLinkGraphBuilder : public ELFLinkGraphBuilderBase { /// template Error forEachRelRelocation(const typename ELFT::Shdr &RelSect, - ClassT *Instance, RelocHandlerMethod &&Method, - bool ProcessDebugSections = false) { + ClassT *Instance, RelocHandlerMethod &&Method) { return forEachRelRelocation( RelSect, [Instance, Method](const auto &Rel, const auto &Target, auto &GS) { return (Instance->*Method)(Rel, Target, GS); - }, - ProcessDebugSections); + }); } const ELFFile &Obj; @@ -163,6 +165,7 @@ class ELFLinkGraphBuilder : public ELFLinkGraphBuilderBase { typename ELFFile::Elf_Shdr_Range Sections; const typename ELFFile::Elf_Shdr *SymTabSec = nullptr; StringRef SectionStringTab; + bool ProcessDebugSections = true; // Maps ELF section indexes to LinkGraph Blocks. // Only SHF_ALLOC sections will have graph blocks. @@ -318,7 +321,7 @@ template Error ELFLinkGraphBuilder::graphifySections() { // If the name indicates that it's a debug section then skip it: We don't // support those yet. - if (isDwarfSection(*Name)) { + if (!ProcessDebugSections && isDwarfSection(*Name)) { LLVM_DEBUG({ dbgs() << " " << SecIndex << ": \"" << *Name << "\" is a debug section: " @@ -522,8 +525,7 @@ template Error ELFLinkGraphBuilder::graphifySymbols() { template template Error ELFLinkGraphBuilder::forEachRelaRelocation( - const typename ELFT::Shdr &RelSect, RelocHandlerFunction &&Func, - bool ProcessDebugSections) { + const typename ELFT::Shdr &RelSect, RelocHandlerFunction &&Func) { // Only look into sections that store relocation entries. if (RelSect.sh_type != ELF::SHT_RELA) return Error::success(); @@ -569,8 +571,7 @@ Error ELFLinkGraphBuilder::forEachRelaRelocation( template template Error ELFLinkGraphBuilder::forEachRelRelocation( - const typename ELFT::Shdr &RelSect, RelocHandlerFunction &&Func, - bool ProcessDebugSections) { + const typename ELFT::Shdr &RelSect, RelocHandlerFunction &&Func) { // Only look into sections that store relocation entries. if (RelSect.sh_type != ELF::SHT_REL) return Error::success(); diff --git a/llvm/test/ExecutionEngine/JITLink/X86/ELF_debug_section_lifetime_is_NoAlloc.yaml b/llvm/test/ExecutionEngine/JITLink/X86/ELF_debug_section_lifetime_is_NoAlloc.yaml new file mode 100644 index 0000000000000..0afcda467c326 --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/X86/ELF_debug_section_lifetime_is_NoAlloc.yaml @@ -0,0 +1,195 @@ +# REQUIRES: asserts +# RUN: yaml2obj -o %t.o %s +# RUN: llvm-jitlink -debug-only=jitlink -noexec %t.o 2>&1 | FileCheck %s +# +# Check that debug sections get NoAlloc lifetimes. +# +# CHECK: ".debug_str" is not a SHF_ALLOC section. Using NoAlloc lifetime. +# CHECK: ".debug_abbrev" is not a SHF_ALLOC section. Using NoAlloc lifetime. +# CHECK: ".debug_info" is not a SHF_ALLOC section. Using NoAlloc lifetime. +# CHECK: ".debug_line" is not a SHF_ALLOC section. Using NoAlloc lifetime. + + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_REL + Machine: EM_X86_64 + SectionHeaderStringTable: .strtab +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + AddressAlign: 0x10 + Content: B82A000000C3662E0F1F840000000000B82A000000C3 + - Name: .debug_abbrev + Type: SHT_PROGBITS + AddressAlign: 0x1 + Content: 011101250E1305030E10171B0E110112060000022E00110112064018974219030E3A0B3B0B271949133F190000032E01110112064018974219030E3A0B3B0B271949133F1900000405000218030E3A0B3B0B49130000052400030E3E0B0B0B0000060F004913000000 + - Name: .debug_info + Type: SHT_PROGBITS + AddressAlign: 0x1 + Content: 8C0000000400000000000801000000000C000000000000000000000000000000000000000000160000000200000000000000000600000001570000000001017700000003000000000000000006000000015700000000010577000000040155000000000105770000000401540000000001057E0000000005000000000504068300000006880000000500000000060100 + - Name: .comment + Type: SHT_PROGBITS + Flags: [ SHF_MERGE, SHF_STRINGS ] + AddressAlign: 0x1 + EntSize: 0x1 + Content: 00636C616E672076657273696F6E2031302E302E302D347562756E7475312000 + - Name: .note.GNU-stack + Type: SHT_PROGBITS + AddressAlign: 0x1 + - Name: .debug_line + Type: SHT_PROGBITS + AddressAlign: 0x1 + Content: 58000000040036000000010101FB0E0D0001010101000000010000012F746D700000454C465F736B69705F64656275675F73656374696F6E732E63000100000000090200000000000000000105030A130500F505030A130206000101 + - Name: .eh_frame + Type: SHT_X86_64_UNWIND + Flags: [ SHF_ALLOC ] + AddressAlign: 0x8 + Content: 1400000000000000017A5200017810011B0C070890010000100000001C0000000000000006000000000000001000000030000000000000000600000000000000 + - Name: .rela.debug_info + Type: SHT_RELA + Flags: [ SHF_INFO_LINK ] + Link: .symtab + AddressAlign: 0x8 + Info: .debug_info + Relocations: + - Offset: 0x6 + Symbol: .debug_abbrev + Type: R_X86_64_32 + - Offset: 0xC + Symbol: .debug_str + Type: R_X86_64_32 + - Offset: 0x12 + Symbol: .debug_str + Type: R_X86_64_32 + Addend: 31 + - Offset: 0x16 + Symbol: .debug_line + Type: R_X86_64_32 + - Offset: 0x1A + Symbol: .debug_str + Type: R_X86_64_32 + Addend: 57 + - Offset: 0x1E + Symbol: .text + Type: R_X86_64_64 + - Offset: 0x2B + Symbol: .text + Type: R_X86_64_64 + - Offset: 0x39 + Symbol: .debug_str + Type: R_X86_64_32 + Addend: 62 + - Offset: 0x44 + Symbol: .text + Type: R_X86_64_64 + Addend: 16 + - Offset: 0x52 + Symbol: .debug_str + Type: R_X86_64_32 + Addend: 70 + - Offset: 0x5F + Symbol: .debug_str + Type: R_X86_64_32 + Addend: 75 + - Offset: 0x6C + Symbol: .debug_str + Type: R_X86_64_32 + Addend: 80 + - Offset: 0x78 + Symbol: .debug_str + Type: R_X86_64_32 + Addend: 66 + - Offset: 0x89 + Symbol: .debug_str + Type: R_X86_64_32 + Addend: 85 + - Name: .rela.debug_line + Type: SHT_RELA + Flags: [ SHF_INFO_LINK ] + Link: .symtab + AddressAlign: 0x8 + Info: .debug_line + Relocations: + - Offset: 0x43 + Symbol: .text + Type: R_X86_64_64 + - Name: .rela.eh_frame + Type: SHT_RELA + Flags: [ SHF_INFO_LINK ] + Link: .symtab + AddressAlign: 0x8 + Info: .eh_frame + Relocations: + - Offset: 0x20 + Symbol: .text + Type: R_X86_64_PC32 + - Offset: 0x34 + Symbol: .text + Type: R_X86_64_PC32 + Addend: 16 + - Name: .llvm_addrsig + Type: SHT_LLVM_ADDRSIG + Flags: [ SHF_EXCLUDE ] + Link: .symtab + AddressAlign: 0x1 + Offset: 0x4C0 + Symbols: [ ] + - Type: SectionHeaderTable + Sections: + - Name: .strtab + - Name: .text + - Name: .debug_str + - Name: .debug_abbrev + - Name: .debug_info + - Name: .rela.debug_info + - Name: .comment + - Name: .note.GNU-stack + - Name: .debug_line + - Name: .rela.debug_line + - Name: .eh_frame + - Name: .rela.eh_frame + - Name: .llvm_addrsig + - Name: .symtab +Symbols: + - Name: ELF_skip_debug_sections.c + Type: STT_FILE + Index: SHN_ABS + - Name: .text + Type: STT_SECTION + Section: .text + - Name: .debug_str + Type: STT_SECTION + Section: .debug_str + - Name: .debug_abbrev + Type: STT_SECTION + Section: .debug_abbrev + - Name: .debug_line + Type: STT_SECTION + Section: .debug_line + - Name: foo + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + Size: 0x6 + - Name: main + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + Value: 0x10 + Size: 0x6 +DWARF: + debug_str: + - 'clang version 10.0.0-4ubuntu1 ' + - ELF_skip_debug_sections.c + - '/tmp' + - foo + - int + - main + - argc + - argv + - char +... diff --git a/llvm/test/ExecutionEngine/JITLink/X86/ELF_skip_debug_sections.s b/llvm/test/ExecutionEngine/JITLink/X86/ELF_skip_debug_sections.s deleted file mode 100644 index acd3ae7ad8b1f..0000000000000 --- a/llvm/test/ExecutionEngine/JITLink/X86/ELF_skip_debug_sections.s +++ /dev/null @@ -1,241 +0,0 @@ -# REQUIRES: asserts -# RUN: llvm-mc -triple=x86_64-pc-linux-gnu -filetype=obj -o %t %s -# RUN: llvm-jitlink -debug-only=jitlink -noexec %t 2>&1 | FileCheck %s -# -# Check that debug sections are not emitted. -# -# CHECK: ".debug_info" is a debug section: No graph section will be created. - - .text - .file "ELF_skip_debug_sections.c" - .globl foo - .p2align 4, 0x90 - .type foo,@function -foo: -.Lfunc_begin0: - .file 1 "/tmp" "ELF_skip_debug_sections.c" - .loc 1 1 0 - .cfi_startproc - - .loc 1 2 3 prologue_end - movl $42, %eax - retq -.Ltmp0: -.Lfunc_end0: - .size foo, .Lfunc_end0-foo - .cfi_endproc - - .globl main - .p2align 4, 0x90 - .type main,@function -main: -.Lfunc_begin1: - .loc 1 5 0 - .cfi_startproc - - - - .loc 1 6 3 prologue_end - movl $42, %eax - retq -.Ltmp1: -.Lfunc_end1: - .size main, .Lfunc_end1-main - .cfi_endproc - - .section .debug_str,"MS",@progbits,1 -.Linfo_string0: - .asciz "clang version 10.0.0-4ubuntu1 " -.Linfo_string1: - .asciz "ELF_skip_debug_sections.c" -.Linfo_string2: - .asciz "/tmp" -.Linfo_string3: - .asciz "foo" -.Linfo_string4: - .asciz "int" -.Linfo_string5: - .asciz "main" -.Linfo_string6: - .asciz "argc" -.Linfo_string7: - .asciz "argv" -.Linfo_string8: - .asciz "char" - .section .debug_abbrev,"",@progbits - .byte 1 - .byte 17 - .byte 1 - .byte 37 - .byte 14 - .byte 19 - .byte 5 - .byte 3 - .byte 14 - .byte 16 - .byte 23 - .byte 27 - .byte 14 - .byte 17 - .byte 1 - .byte 18 - .byte 6 - .byte 0 - .byte 0 - .byte 2 - .byte 46 - .byte 0 - .byte 17 - .byte 1 - .byte 18 - .byte 6 - .byte 64 - .byte 24 - .ascii "\227B" - .byte 25 - .byte 3 - .byte 14 - .byte 58 - .byte 11 - .byte 59 - .byte 11 - .byte 39 - .byte 25 - .byte 73 - .byte 19 - .byte 63 - .byte 25 - .byte 0 - .byte 0 - .byte 3 - .byte 46 - .byte 1 - .byte 17 - .byte 1 - .byte 18 - .byte 6 - .byte 64 - .byte 24 - .ascii "\227B" - .byte 25 - .byte 3 - .byte 14 - .byte 58 - .byte 11 - .byte 59 - .byte 11 - .byte 39 - .byte 25 - .byte 73 - .byte 19 - .byte 63 - .byte 25 - .byte 0 - .byte 0 - .byte 4 - .byte 5 - .byte 0 - .byte 2 - .byte 24 - .byte 3 - .byte 14 - .byte 58 - .byte 11 - .byte 59 - .byte 11 - .byte 73 - .byte 19 - .byte 0 - .byte 0 - .byte 5 - .byte 36 - .byte 0 - .byte 3 - .byte 14 - .byte 62 - .byte 11 - .byte 11 - .byte 11 - .byte 0 - .byte 0 - .byte 6 - .byte 15 - .byte 0 - .byte 73 - .byte 19 - .byte 0 - .byte 0 - .byte 0 - .section .debug_info,"",@progbits -.Lcu_begin0: - .long .Ldebug_info_end0-.Ldebug_info_start0 -.Ldebug_info_start0: - .short 4 - .long .debug_abbrev - .byte 8 - .byte 1 - .long .Linfo_string0 - .short 12 - .long .Linfo_string1 - .long .Lline_table_start0 - .long .Linfo_string2 - .quad .Lfunc_begin0 - .long .Lfunc_end1-.Lfunc_begin0 - .byte 2 - .quad .Lfunc_begin0 - .long .Lfunc_end0-.Lfunc_begin0 - .byte 1 - .byte 87 - - .long .Linfo_string3 - .byte 1 - .byte 1 - - .long 119 - - .byte 3 - .quad .Lfunc_begin1 - .long .Lfunc_end1-.Lfunc_begin1 - .byte 1 - .byte 87 - - .long .Linfo_string5 - .byte 1 - .byte 5 - - .long 119 - - .byte 4 - .byte 1 - .byte 85 - .long .Linfo_string6 - .byte 1 - .byte 5 - .long 119 - .byte 4 - .byte 1 - .byte 84 - .long .Linfo_string7 - .byte 1 - .byte 5 - .long 126 - .byte 0 - .byte 5 - .long .Linfo_string4 - .byte 5 - .byte 4 - .byte 6 - .long 131 - .byte 6 - .long 136 - .byte 5 - .long .Linfo_string8 - .byte 6 - .byte 1 - .byte 0 -.Ldebug_info_end0: - .ident "clang version 10.0.0-4ubuntu1 " - .section ".note.GNU-stack","",@progbits - .addrsig - .section .debug_line,"",@progbits -.Lline_table_start0: From ff48a2925fa3726815fc7666326d54a5c053b47b Mon Sep 17 00:00:00 2001 From: yijia1212 Date: Mon, 20 Mar 2023 23:33:54 +0100 Subject: [PATCH 088/691] modify dir of dep in OpenMPCommon Fix dir error of deps in OpenMPCommon bazel build Differential Revision: https://reviews.llvm.org/D146461 --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 328df87ba2063..10eb6b82f8f25 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -6762,8 +6762,8 @@ cc_library( deps = [ ":IR", ":Support", - "//third_party/llvm/llvm-project/llvm:Core", - "//third_party/llvm/llvm-project/llvm:FrontendOpenMP", + "//llvm:Core", + "//llvm:FrontendOpenMP", ], ) From ee232506b870ce5282cc4da5ca493d41d361feb3 Mon Sep 17 00:00:00 2001 From: Alex Langford Date: Thu, 16 Mar 2023 14:47:17 -0700 Subject: [PATCH 089/691] [lldb] Move UnixSignals creation into Platform plugins The high level goal of this change is to remove lldbTarget's dependency on lldbPluginProcessUtility. The reason for this existing dependency is so that we can create the appropriate UnixSignals object based on an ArchSpec. Instead of using the ArchSpec, we can instead take advantage of the Platform associated with the current Target. This is accomplished by adding a new method to Platform, CreateUnixSignals, which will create the correct UnixSignals object for us. We then can use `Platform::GetUnixSignals` and rely on that to give us the correct signals as needed. Differential Revision: https://reviews.llvm.org/D146263 --- lldb/include/lldb/Target/Platform.h | 4 +- lldb/include/lldb/Target/UnixSignals.h | 1 - .../ObjC/AppleObjCRuntime/CMakeLists.txt | 1 + .../Plugins/Platform/FreeBSD/CMakeLists.txt | 1 + .../FreeBSD}/FreeBSDSignals.cpp | 0 .../FreeBSD}/FreeBSDSignals.h | 0 .../Platform/FreeBSD/PlatformFreeBSD.cpp | 5 +++ .../Platform/FreeBSD/PlatformFreeBSD.h | 2 + .../Plugins/Platform/Linux/CMakeLists.txt | 1 + .../Linux}/LinuxSignals.cpp | 0 .../Utility => Platform/Linux}/LinuxSignals.h | 0 .../Plugins/Platform/Linux/PlatformLinux.cpp | 5 +++ .../Plugins/Platform/Linux/PlatformLinux.h | 2 + .../Plugins/Platform/NetBSD/CMakeLists.txt | 1 + .../NetBSD}/NetBSDSignals.cpp | 0 .../NetBSD}/NetBSDSignals.h | 0 .../Platform/NetBSD/PlatformNetBSD.cpp | 5 +++ .../Plugins/Platform/NetBSD/PlatformNetBSD.h | 2 + .../Plugins/Platform/POSIX/PlatformPOSIX.cpp | 15 ++++++-- .../Plugins/Platform/POSIX/PlatformPOSIX.h | 4 +- .../Platform/QemuUser/PlatformQemuUser.h | 6 +++ .../Platform/Windows/PlatformWindows.h | 4 ++ .../Platform/gdb-server/CMakeLists.txt | 2 +- .../gdb-server}/GDBRemoteSignals.cpp | 0 .../gdb-server}/GDBRemoteSignals.h | 0 .../gdb-server/PlatformRemoteGDBServer.cpp | 10 ++--- .../gdb-server/PlatformRemoteGDBServer.h | 8 +++- .../Plugins/Process/Utility/CMakeLists.txt | 4 -- .../Process/elf-core/ProcessElfCore.cpp | 8 ++-- .../Process/gdb-remote/ProcessGDBRemote.cpp | 16 ++++---- .../Process/minidump/ProcessMinidump.cpp | 6 ++- lldb/source/Target/CMakeLists.txt | 1 - lldb/source/Target/Platform.cpp | 2 +- lldb/source/Target/UnixSignals.cpp | 37 +++++++++---------- .../Process/gdb-remote/CMakeLists.txt | 2 +- .../gdb-remote/GDBRemoteClientBaseTest.cpp | 2 +- .../Target/RemoteAwarePlatformTest.cpp | 3 ++ 37 files changed, 103 insertions(+), 57 deletions(-) rename lldb/source/Plugins/{Process/Utility => Platform/FreeBSD}/FreeBSDSignals.cpp (100%) rename lldb/source/Plugins/{Process/Utility => Platform/FreeBSD}/FreeBSDSignals.h (100%) rename lldb/source/Plugins/{Process/Utility => Platform/Linux}/LinuxSignals.cpp (100%) rename lldb/source/Plugins/{Process/Utility => Platform/Linux}/LinuxSignals.h (100%) rename lldb/source/Plugins/{Process/Utility => Platform/NetBSD}/NetBSDSignals.cpp (100%) rename lldb/source/Plugins/{Process/Utility => Platform/NetBSD}/NetBSDSignals.h (100%) rename lldb/source/Plugins/{Process/Utility => Platform/gdb-server}/GDBRemoteSignals.cpp (100%) rename lldb/source/Plugins/{Process/Utility => Platform/gdb-server}/GDBRemoteSignals.h (100%) diff --git a/lldb/include/lldb/Target/Platform.h b/lldb/include/lldb/Target/Platform.h index 08e47cc132473..e184249a62980 100644 --- a/lldb/include/lldb/Target/Platform.h +++ b/lldb/include/lldb/Target/Platform.h @@ -619,10 +619,12 @@ class Platform : public PluginInterface { return 1; } - virtual const lldb::UnixSignalsSP &GetRemoteUnixSignals(); + virtual lldb::UnixSignalsSP GetRemoteUnixSignals(); lldb::UnixSignalsSP GetUnixSignals(); + virtual lldb::UnixSignalsSP CreateUnixSignals() = 0; + /// Locate a queue name given a thread's qaddr /// /// On a system using libdispatch ("Grand Central Dispatch") style queues, a diff --git a/lldb/include/lldb/Target/UnixSignals.h b/lldb/include/lldb/Target/UnixSignals.h index ac47a2769b927..1287e0007cc75 100644 --- a/lldb/include/lldb/Target/UnixSignals.h +++ b/lldb/include/lldb/Target/UnixSignals.h @@ -22,7 +22,6 @@ namespace lldb_private { class UnixSignals { public: - static lldb::UnixSignalsSP Create(const ArchSpec &arch); static lldb::UnixSignalsSP CreateForHost(); // Constructors and Destructors diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/CMakeLists.txt b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/CMakeLists.txt index 3789f56325980..2844ba6b2bda2 100644 --- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/CMakeLists.txt +++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/CMakeLists.txt @@ -19,6 +19,7 @@ add_lldb_library(lldbPluginAppleObjCRuntime PLUGIN lldbUtility lldbPluginExpressionParserClang lldbPluginCPPRuntime + lldbPluginProcessUtility lldbPluginTypeSystemClang CLANG_LIBS clangAST diff --git a/lldb/source/Plugins/Platform/FreeBSD/CMakeLists.txt b/lldb/source/Plugins/Platform/FreeBSD/CMakeLists.txt index f981534d92163..8095a0b584dc3 100644 --- a/lldb/source/Plugins/Platform/FreeBSD/CMakeLists.txt +++ b/lldb/source/Plugins/Platform/FreeBSD/CMakeLists.txt @@ -1,4 +1,5 @@ add_lldb_library(lldbPluginPlatformFreeBSD PLUGIN + FreeBSDSignals.cpp PlatformFreeBSD.cpp LINK_LIBS diff --git a/lldb/source/Plugins/Process/Utility/FreeBSDSignals.cpp b/lldb/source/Plugins/Platform/FreeBSD/FreeBSDSignals.cpp similarity index 100% rename from lldb/source/Plugins/Process/Utility/FreeBSDSignals.cpp rename to lldb/source/Plugins/Platform/FreeBSD/FreeBSDSignals.cpp diff --git a/lldb/source/Plugins/Process/Utility/FreeBSDSignals.h b/lldb/source/Plugins/Platform/FreeBSD/FreeBSDSignals.h similarity index 100% rename from lldb/source/Plugins/Process/Utility/FreeBSDSignals.h rename to lldb/source/Plugins/Platform/FreeBSD/FreeBSDSignals.h diff --git a/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.cpp b/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.cpp index 3d29739c19adf..5ef029b917031 100644 --- a/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.cpp +++ b/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "PlatformFreeBSD.h" +#include "FreeBSDSignals.h" #include "lldb/Host/Config.h" #include @@ -282,3 +283,7 @@ CompilerType PlatformFreeBSD::GetSiginfoType(const llvm::Triple &triple) { ast->CompleteTagDeclarationDefinition(siginfo_type); return siginfo_type; } + +lldb::UnixSignalsSP PlatformFreeBSD::CreateUnixSignals() { + return std::make_shared(); +} diff --git a/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.h b/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.h index 1e92bb4a1e147..d65b15aec243a 100644 --- a/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.h +++ b/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.h @@ -59,6 +59,8 @@ class PlatformFreeBSD : public PlatformPOSIX { std::vector m_supported_architectures; + lldb::UnixSignalsSP CreateUnixSignals() override; + private: std::mutex m_mutex; std::shared_ptr m_type_system; diff --git a/lldb/source/Plugins/Platform/Linux/CMakeLists.txt b/lldb/source/Plugins/Platform/Linux/CMakeLists.txt index bad039d0ad509..b8bfa3bc78863 100644 --- a/lldb/source/Plugins/Platform/Linux/CMakeLists.txt +++ b/lldb/source/Plugins/Platform/Linux/CMakeLists.txt @@ -1,4 +1,5 @@ add_lldb_library(lldbPluginPlatformLinux PLUGIN + LinuxSignals.cpp PlatformLinux.cpp LINK_LIBS diff --git a/lldb/source/Plugins/Process/Utility/LinuxSignals.cpp b/lldb/source/Plugins/Platform/Linux/LinuxSignals.cpp similarity index 100% rename from lldb/source/Plugins/Process/Utility/LinuxSignals.cpp rename to lldb/source/Plugins/Platform/Linux/LinuxSignals.cpp diff --git a/lldb/source/Plugins/Process/Utility/LinuxSignals.h b/lldb/source/Plugins/Platform/Linux/LinuxSignals.h similarity index 100% rename from lldb/source/Plugins/Process/Utility/LinuxSignals.h rename to lldb/source/Plugins/Platform/Linux/LinuxSignals.h diff --git a/lldb/source/Plugins/Platform/Linux/PlatformLinux.cpp b/lldb/source/Plugins/Platform/Linux/PlatformLinux.cpp index bf226fabda5c9..d4e09c971eb2d 100644 --- a/lldb/source/Plugins/Platform/Linux/PlatformLinux.cpp +++ b/lldb/source/Plugins/Platform/Linux/PlatformLinux.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "PlatformLinux.h" +#include "LinuxSignals.h" #include "lldb/Host/Config.h" #include @@ -480,3 +481,7 @@ CompilerType PlatformLinux::GetSiginfoType(const llvm::Triple &triple) { ast->CompleteTagDeclarationDefinition(siginfo_type); return siginfo_type; } + +lldb::UnixSignalsSP PlatformLinux::CreateUnixSignals() { + return std::make_shared(); +} diff --git a/lldb/source/Plugins/Platform/Linux/PlatformLinux.h b/lldb/source/Plugins/Platform/Linux/PlatformLinux.h index 89f0bd709ef60..e604d735ae017 100644 --- a/lldb/source/Plugins/Platform/Linux/PlatformLinux.h +++ b/lldb/source/Plugins/Platform/Linux/PlatformLinux.h @@ -64,6 +64,8 @@ class PlatformLinux : public PlatformPOSIX { std::vector m_supported_architectures; + lldb::UnixSignalsSP CreateUnixSignals() override; + private: std::mutex m_mutex; std::shared_ptr m_type_system; diff --git a/lldb/source/Plugins/Platform/NetBSD/CMakeLists.txt b/lldb/source/Plugins/Platform/NetBSD/CMakeLists.txt index 5e63f10980672..9c8083cbf2f2e 100644 --- a/lldb/source/Plugins/Platform/NetBSD/CMakeLists.txt +++ b/lldb/source/Plugins/Platform/NetBSD/CMakeLists.txt @@ -1,4 +1,5 @@ add_lldb_library(lldbPluginPlatformNetBSD PLUGIN + NetBSDSignals.cpp PlatformNetBSD.cpp LINK_LIBS diff --git a/lldb/source/Plugins/Process/Utility/NetBSDSignals.cpp b/lldb/source/Plugins/Platform/NetBSD/NetBSDSignals.cpp similarity index 100% rename from lldb/source/Plugins/Process/Utility/NetBSDSignals.cpp rename to lldb/source/Plugins/Platform/NetBSD/NetBSDSignals.cpp diff --git a/lldb/source/Plugins/Process/Utility/NetBSDSignals.h b/lldb/source/Plugins/Platform/NetBSD/NetBSDSignals.h similarity index 100% rename from lldb/source/Plugins/Process/Utility/NetBSDSignals.h rename to lldb/source/Plugins/Platform/NetBSD/NetBSDSignals.h diff --git a/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.cpp b/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.cpp index 59bbc3f638af1..41a48249dbbe9 100644 --- a/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.cpp +++ b/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "PlatformNetBSD.h" +#include "NetBSDSignals.h" #include "lldb/Host/Config.h" #include @@ -348,3 +349,7 @@ CompilerType PlatformNetBSD::GetSiginfoType(const llvm::Triple &triple) { ast->CompleteTagDeclarationDefinition(siginfo_type); return siginfo_type; } + +lldb::UnixSignalsSP PlatformNetBSD::CreateUnixSignals() { + return std::make_shared(); +} diff --git a/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.h b/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.h index 3437d7e5eb511..de591b8f7a255 100644 --- a/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.h +++ b/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.h @@ -61,6 +61,8 @@ class PlatformNetBSD : public PlatformPOSIX { std::vector m_supported_architectures; + lldb::UnixSignalsSP CreateUnixSignals() override; + private: std::mutex m_mutex; std::shared_ptr m_type_system; diff --git a/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.cpp b/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.cpp index 222dbfa719647..7033158c928e6 100644 --- a/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.cpp +++ b/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.cpp @@ -27,6 +27,7 @@ #include "lldb/Target/ExecutionContext.h" #include "lldb/Target/Process.h" #include "lldb/Target/Thread.h" +#include "lldb/Target/UnixSignals.h" #include "lldb/Utility/DataBufferHeap.h" #include "lldb/Utility/FileSpec.h" #include "lldb/Utility/LLDBLog.h" @@ -294,9 +295,13 @@ std::string PlatformPOSIX::GetPlatformSpecificConnectionInformation() { return ""; } -const lldb::UnixSignalsSP &PlatformPOSIX::GetRemoteUnixSignals() { - if (IsRemote() && m_remote_platform_sp) - return m_remote_platform_sp->GetRemoteUnixSignals(); +lldb::UnixSignalsSP PlatformPOSIX::GetRemoteUnixSignals() { + if (IsRemote() && m_remote_platform_sp) { + if (auto unix_signals_sp = m_remote_platform_sp->GetRemoteUnixSignals()) + return unix_signals_sp; + } + if (auto unix_signals_sp = CreateUnixSignals()) + return unix_signals_sp; return Platform::GetRemoteUnixSignals(); } @@ -989,3 +994,7 @@ ConstString PlatformPOSIX::GetFullNameForDylib(ConstString basename) { stream.Printf("lib%s.so", basename.GetCString()); return ConstString(stream.GetString()); } + +lldb::UnixSignalsSP PlatformPOSIX::CreateUnixSignals() { + return std::make_shared(); +} diff --git a/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.h b/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.h index 511797ce6bb7c..f48dc6ef53d04 100644 --- a/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.h +++ b/lldb/source/Plugins/Platform/POSIX/PlatformPOSIX.h @@ -35,7 +35,7 @@ class PlatformPOSIX : public lldb_private::RemoteAwarePlatform { GetFile(const lldb_private::FileSpec &source, const lldb_private::FileSpec &destination) override; - const lldb::UnixSignalsSP &GetRemoteUnixSignals() override; + lldb::UnixSignalsSP GetRemoteUnixSignals() override; lldb::ProcessSP Attach(lldb_private::ProcessAttachInfo &attach_info, lldb_private::Debugger &debugger, @@ -69,6 +69,8 @@ class PlatformPOSIX : public lldb_private::RemoteAwarePlatform { lldb_private::ConstString GetFullNameForDylib(lldb_private::ConstString basename) override; + lldb::UnixSignalsSP CreateUnixSignals() override; + protected: std::unique_ptr m_option_group_platform_rsync; diff --git a/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.h b/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.h index 596cf75b591f2..ca72062b21e88 100644 --- a/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.h +++ b/lldb/source/Plugins/Platform/QemuUser/PlatformQemuUser.h @@ -69,6 +69,12 @@ class PlatformQemuUser : public Platform { arch, addr, length, prot, flags, fd, offset); } + lldb::UnixSignalsSP CreateUnixSignals() override { + // PlatformQemuUser shouldn't create its own UnixSignals. It should defer to + // other platforms. + return lldb::UnixSignalsSP(); + } + private: static lldb::PlatformSP CreateInstance(bool force, const ArchSpec *arch); static void DebuggerInitialize(Debugger &debugger); diff --git a/lldb/source/Plugins/Platform/Windows/PlatformWindows.h b/lldb/source/Plugins/Platform/Windows/PlatformWindows.h index 771133f341e90..8c36f8c4a13ba 100644 --- a/lldb/source/Plugins/Platform/Windows/PlatformWindows.h +++ b/lldb/source/Plugins/Platform/Windows/PlatformWindows.h @@ -82,6 +82,10 @@ class PlatformWindows : public RemoteAwarePlatform { std::vector m_supported_architectures; + lldb::UnixSignalsSP CreateUnixSignals() override { + return lldb::UnixSignalsSP(); + } + private: std::unique_ptr MakeLoadImageUtilityFunction(lldb_private::ExecutionContext &context, diff --git a/lldb/source/Plugins/Platform/gdb-server/CMakeLists.txt b/lldb/source/Plugins/Platform/gdb-server/CMakeLists.txt index 2fbe817acbdd2..f00aa19cfac9c 100644 --- a/lldb/source/Plugins/Platform/gdb-server/CMakeLists.txt +++ b/lldb/source/Plugins/Platform/gdb-server/CMakeLists.txt @@ -1,4 +1,5 @@ add_lldb_library(lldbPluginPlatformGDB PLUGIN + GDBRemoteSignals.cpp PlatformRemoteGDBServer.cpp LINK_LIBS @@ -6,6 +7,5 @@ add_lldb_library(lldbPluginPlatformGDB PLUGIN lldbCore lldbHost lldbTarget - lldbPluginProcessUtility lldbPluginProcessGDBRemote ) diff --git a/lldb/source/Plugins/Process/Utility/GDBRemoteSignals.cpp b/lldb/source/Plugins/Platform/gdb-server/GDBRemoteSignals.cpp similarity index 100% rename from lldb/source/Plugins/Process/Utility/GDBRemoteSignals.cpp rename to lldb/source/Plugins/Platform/gdb-server/GDBRemoteSignals.cpp diff --git a/lldb/source/Plugins/Process/Utility/GDBRemoteSignals.h b/lldb/source/Plugins/Platform/gdb-server/GDBRemoteSignals.h similarity index 100% rename from lldb/source/Plugins/Process/Utility/GDBRemoteSignals.h rename to lldb/source/Plugins/Platform/gdb-server/GDBRemoteSignals.h diff --git a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp index 0858a2a8d3c8b..b25a151b0fcca 100644 --- a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp +++ b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "PlatformRemoteGDBServer.h" +#include "GDBRemoteSignals.h" #include "lldb/Host/Config.h" #include "lldb/Breakpoint/BreakpointLocation.h" @@ -31,7 +32,6 @@ #include "lldb/Utility/UriParser.h" #include "llvm/Support/FormatAdapters.h" -#include "Plugins/Process/Utility/GDBRemoteSignals.h" #include "Plugins/Process/gdb-remote/ProcessGDBRemote.h" #include @@ -680,17 +680,13 @@ void PlatformRemoteGDBServer::CalculateTrapHandlerSymbolNames() { m_trap_handlers.push_back(ConstString("_sigtramp")); } -const UnixSignalsSP &PlatformRemoteGDBServer::GetRemoteUnixSignals() { +UnixSignalsSP PlatformRemoteGDBServer::GetRemoteUnixSignals() { if (!IsConnected()) - return Platform::GetRemoteUnixSignals(); + return UnixSignalsSP(); if (m_remote_signals_sp) return m_remote_signals_sp; - // If packet not implemented or JSON failed to parse, we'll guess the signal - // set based on the remote architecture. - m_remote_signals_sp = UnixSignals::Create(GetRemoteSystemArchitecture()); - StringExtractorGDBRemote response; auto result = m_gdb_client_up->SendPacketAndWaitForResponse("jSignalsInfo", response); diff --git a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h index 638f7db5ef800..f83f607830f4a 100644 --- a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h +++ b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.h @@ -13,7 +13,6 @@ #include #include -#include "Plugins/Process/Utility/GDBRemoteSignals.h" #include "Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h" #include "lldb/Target/Platform.h" @@ -146,7 +145,7 @@ class PlatformRemoteGDBServer : public Platform, private UserIDResolver { void CalculateTrapHandlerSymbolNames() override; - const lldb::UnixSignalsSP &GetRemoteUnixSignals() override; + lldb::UnixSignalsSP GetRemoteUnixSignals() override; size_t ConnectToWaitingProcesses(lldb_private::Debugger &debugger, lldb_private::Status &error) override; @@ -154,6 +153,11 @@ class PlatformRemoteGDBServer : public Platform, private UserIDResolver { virtual size_t GetPendingGdbServerList(std::vector &connection_urls); + lldb::UnixSignalsSP CreateUnixSignals() override { + // PlatformRemoteGDBServer should defer to other platforms. + return lldb::UnixSignalsSP(); + } + protected: std::unique_ptr m_gdb_client_up; diff --git a/lldb/source/Plugins/Process/Utility/CMakeLists.txt b/lldb/source/Plugins/Process/Utility/CMakeLists.txt index edf4e851b653c..7feb8596d8bfe 100644 --- a/lldb/source/Plugins/Process/Utility/CMakeLists.txt +++ b/lldb/source/Plugins/Process/Utility/CMakeLists.txt @@ -1,18 +1,14 @@ add_lldb_library(lldbPluginProcessUtility AuxVector.cpp - FreeBSDSignals.cpp - GDBRemoteSignals.cpp HistoryThread.cpp HistoryUnwind.cpp InferiorCallPOSIX.cpp LinuxProcMaps.cpp - LinuxSignals.cpp MemoryTagManagerAArch64MTE.cpp NativeProcessSoftwareSingleStep.cpp NativeRegisterContextDBReg_arm64.cpp NativeRegisterContextDBReg_x86.cpp NativeRegisterContextRegisterInfo.cpp - NetBSDSignals.cpp RegisterContext_x86.cpp RegisterContextDarwin_arm.cpp RegisterContextDarwin_arm64.cpp diff --git a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp index 24d3c4bd0ba24..2771d1d20cf0c 100644 --- a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp +++ b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp @@ -18,6 +18,7 @@ #include "lldb/Target/ABI.h" #include "lldb/Target/DynamicLoader.h" #include "lldb/Target/MemoryRegionInfo.h" +#include "lldb/Target/Platform.h" #include "lldb/Target/Target.h" #include "lldb/Target/UnixSignals.h" #include "lldb/Utility/DataBufferHeap.h" @@ -223,9 +224,10 @@ Status ProcessElfCore::DoLoadCore() { ArchSpec target_arch = GetTarget().GetArchitecture(); ArchSpec core_arch(m_core_module_sp->GetArchitecture()); target_arch.MergeFrom(core_arch); - GetTarget().SetArchitecture(target_arch); - - SetUnixSignals(UnixSignals::Create(GetArchitecture())); + GetTarget().SetArchitecture(target_arch, /* set_platform = */ true); + + if (auto platform_sp = GetTarget().GetPlatform()) + SetUnixSignals(platform_sp->GetUnixSignals()); // Ensure we found at least one thread that was stopped on a signal. bool siginfo_signal_found = false; diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp index 7b083e1478db0..c2e60ac2b1797 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp @@ -74,7 +74,7 @@ #include "GDBRemoteRegisterContext.h" #include "GDBRemoteRegisterFallback.h" -#include "Plugins/Process/Utility/GDBRemoteSignals.h" +#include "Plugins/Platform/gdb-server/GDBRemoteSignals.h" #include "Plugins/Process/Utility/InferiorCallPOSIX.h" #include "Plugins/Process/Utility/StopInfoMachException.h" #include "ProcessGDBRemote.h" @@ -967,15 +967,13 @@ void ProcessGDBRemote::DidLaunchOrAttach(ArchSpec &process_arch) { MapSupportedStructuredDataPlugins(*supported_packets); // If connected to LLDB ("native-signals+"), use signal defs for - // the remote platform. If connected to GDB, just use the standard set. - if (!m_gdb_comm.UsesNativeSignals()) { + // the remote platform (assuming it's available). If connected to GDB, just + // use the standard set. + auto platform_sp = GetTarget().GetPlatform(); + if (!platform_sp || !m_gdb_comm.UsesNativeSignals()) SetUnixSignals(std::make_shared()); - } else { - PlatformSP platform_sp = GetTarget().GetPlatform(); - if (platform_sp && platform_sp->IsConnected()) - SetUnixSignals(platform_sp->GetUnixSignals()); - else - SetUnixSignals(UnixSignals::Create(GetTarget().GetArchitecture())); + else { + SetUnixSignals(platform_sp->GetUnixSignals()); } } diff --git a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp index cf6166ecd77b9..f082d6f0e8afa 100644 --- a/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp +++ b/lldb/source/Plugins/Process/minidump/ProcessMinidump.cpp @@ -206,12 +206,14 @@ Status ProcessMinidump::DoLoadCore() { arch.GetArchitectureName()); return error; } - GetTarget().SetArchitecture(arch, true /*set_platform*/); + GetTarget().SetArchitecture(arch, /*set_platform = */ true); m_thread_list = m_minidump_parser->GetThreads(); m_active_exception = m_minidump_parser->GetExceptionStream(); - SetUnixSignals(UnixSignals::Create(GetArchitecture())); + auto platform_sp = GetTarget().GetPlatform(); + if (platform_sp) + SetUnixSignals(platform_sp->GetUnixSignals()); ReadModuleList(); if (ModuleSP module = GetTarget().GetExecutableModule()) diff --git a/lldb/source/Target/CMakeLists.txt b/lldb/source/Target/CMakeLists.txt index c75a10cf61c14..0cb3573916424 100644 --- a/lldb/source/Target/CMakeLists.txt +++ b/lldb/source/Target/CMakeLists.txt @@ -85,7 +85,6 @@ add_lldb_library(lldbTarget lldbInterpreter lldbSymbol lldbUtility - lldbPluginProcessUtility LINK_COMPONENTS Support diff --git a/lldb/source/Target/Platform.cpp b/lldb/source/Target/Platform.cpp index 1ddd7596280ee..e0c0e9b689b03 100644 --- a/lldb/source/Target/Platform.cpp +++ b/lldb/source/Target/Platform.cpp @@ -1672,7 +1672,7 @@ FileSpec Platform::GetModuleCacheRoot() { const char *Platform::GetCacheHostname() { return GetHostname(); } -const UnixSignalsSP &Platform::GetRemoteUnixSignals() { +UnixSignalsSP Platform::GetRemoteUnixSignals() { static const auto s_default_unix_signals_sp = std::make_shared(); return s_default_unix_signals_sp; } diff --git a/lldb/source/Target/UnixSignals.cpp b/lldb/source/Target/UnixSignals.cpp index d754537cc4cf4..7c643c05b2f9b 100644 --- a/lldb/source/Target/UnixSignals.cpp +++ b/lldb/source/Target/UnixSignals.cpp @@ -7,10 +7,8 @@ //===----------------------------------------------------------------------===// #include "lldb/Target/UnixSignals.h" -#include "Plugins/Process/Utility/FreeBSDSignals.h" -#include "Plugins/Process/Utility/LinuxSignals.h" -#include "Plugins/Process/Utility/NetBSDSignals.h" #include "lldb/Host/HostInfo.h" +#include "lldb/Target/Platform.h" #include "lldb/Utility/ArchSpec.h" #include #include @@ -30,24 +28,25 @@ UnixSignals::Signal::Signal(const char *name, bool default_suppress, m_description.assign(description); } -lldb::UnixSignalsSP UnixSignals::Create(const ArchSpec &arch) { - const auto &triple = arch.GetTriple(); - switch (triple.getOS()) { - case llvm::Triple::Linux: - return std::make_shared(); - case llvm::Triple::FreeBSD: - case llvm::Triple::OpenBSD: - return std::make_shared(); - case llvm::Triple::NetBSD: - return std::make_shared(); - default: - return std::make_shared(); +lldb::UnixSignalsSP UnixSignals::CreateForHost() { + static lldb::UnixSignalsSP s_unix_signals_sp; + if (s_unix_signals_sp) + return s_unix_signals_sp; + + auto host_platform_sp = Platform::GetHostPlatform(); + + // If we have no host platform, be resilient and use default UnixSignals. + if (!host_platform_sp) + s_unix_signals_sp = std::make_shared(); + else { + s_unix_signals_sp = host_platform_sp->CreateUnixSignals(); + // If the Host platform cannot create a UnixSignals object, fall back to the + // default UnixSignals. This may happen on platforms without a + // UnixSignals implementation (e.g. Windows). + if (!s_unix_signals_sp) + s_unix_signals_sp = std::make_shared(); } -} -lldb::UnixSignalsSP UnixSignals::CreateForHost() { - static lldb::UnixSignalsSP s_unix_signals_sp = - Create(HostInfo::GetArchitecture()); return s_unix_signals_sp; } diff --git a/lldb/unittests/Process/gdb-remote/CMakeLists.txt b/lldb/unittests/Process/gdb-remote/CMakeLists.txt index de14dc0169c13..233065609d841 100644 --- a/lldb/unittests/Process/gdb-remote/CMakeLists.txt +++ b/lldb/unittests/Process/gdb-remote/CMakeLists.txt @@ -11,7 +11,7 @@ add_lldb_unittest(ProcessGdbRemoteTests lldbCore lldbHost lldbPluginPlatformMacOSX - lldbPluginProcessUtility + lldbPluginPlatformLinux lldbPluginProcessGDBRemote LLVMTestingSupport diff --git a/lldb/unittests/Process/gdb-remote/GDBRemoteClientBaseTest.cpp b/lldb/unittests/Process/gdb-remote/GDBRemoteClientBaseTest.cpp index 99d1e12359e72..934f0dcb0596f 100644 --- a/lldb/unittests/Process/gdb-remote/GDBRemoteClientBaseTest.cpp +++ b/lldb/unittests/Process/gdb-remote/GDBRemoteClientBaseTest.cpp @@ -9,7 +9,7 @@ #include "GDBRemoteTestUtils.h" -#include "Plugins/Process/Utility/LinuxSignals.h" +#include "Plugins/Platform/Linux/LinuxSignals.h" #include "Plugins/Process/gdb-remote/GDBRemoteClientBase.h" #include "Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.h" #include "lldb/Utility/GDBRemote.h" diff --git a/lldb/unittests/Target/RemoteAwarePlatformTest.cpp b/lldb/unittests/Target/RemoteAwarePlatformTest.cpp index c36bd35c819dd..cf80de6bec056 100644 --- a/lldb/unittests/Target/RemoteAwarePlatformTest.cpp +++ b/lldb/unittests/Target/RemoteAwarePlatformTest.cpp @@ -35,6 +35,8 @@ class RemoteAwarePlatformTester : public RemoteAwarePlatform { MOCK_METHOD2(ResolveRemoteExecutable, std::pair(const ModuleSpec &, const FileSpecList *)); + MOCK_METHOD0(CreateUnixSignals, lldb::UnixSignalsSP()); + Status ResolveRemoteExecutable( const ModuleSpec &module_spec, lldb::ModuleSP &exe_module_sp, const FileSpecList *module_search_paths_ptr) /*override*/ @@ -61,6 +63,7 @@ class TargetPlatformTester : public Platform { ProcessSP(ProcessAttachInfo &, Debugger &, Target *, Status &)); MOCK_METHOD0(CalculateTrapHandlerSymbolNames, void()); MOCK_METHOD0(GetUserIDResolver, UserIDResolver &()); + MOCK_METHOD0(CreateUnixSignals, lldb::UnixSignalsSP()); }; namespace { From 33789125ca384c27d408cd0c2ecf2c03bb23f610 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Mon, 20 Mar 2023 15:48:55 -0700 Subject: [PATCH 090/691] [NFC][tsan] Add include guards --- compiler-rt/test/sanitizer_common/print_address.h | 5 +++++ compiler-rt/test/tsan/java.h | 5 +++++ compiler-rt/test/tsan/test.h | 5 +++++ 3 files changed, 15 insertions(+) diff --git a/compiler-rt/test/sanitizer_common/print_address.h b/compiler-rt/test/sanitizer_common/print_address.h index 1128c928b0534..df31322246a08 100644 --- a/compiler-rt/test/sanitizer_common/print_address.h +++ b/compiler-rt/test/sanitizer_common/print_address.h @@ -1,6 +1,9 @@ #include #include +#ifndef __SANITIZER_COMMON_PRINT_ADDRESS_H__ +# define __SANITIZER_COMMON_PRINT_ADDRESS_H__ + void print_address(const char *str, int n, ...) { fprintf(stderr, "%s", str); va_list ap; @@ -21,3 +24,5 @@ void print_address(const char *str, int n, ...) { } fprintf(stderr, "\n"); } + +#endif // __SANITIZER_COMMON_PRINT_ADDRESS_H__ \ No newline at end of file diff --git a/compiler-rt/test/tsan/java.h b/compiler-rt/test/tsan/java.h index 9df7fb6f9fcc2..b6a71292ced92 100644 --- a/compiler-rt/test/tsan/java.h +++ b/compiler-rt/test/tsan/java.h @@ -1,3 +1,6 @@ +#ifndef __TSAN_JAVA_H__ +#define __TSAN_JAVA_H__ + #include "test.h" extern "C" { @@ -27,3 +30,5 @@ void __tsan_func_exit(); } const jptr kExternalPCBit = 1ULL << 60; + +#endif // __TSAN_JAVA_H__ \ No newline at end of file diff --git a/compiler-rt/test/tsan/test.h b/compiler-rt/test/tsan/test.h index efd66cbf91a43..7406318243c57 100644 --- a/compiler-rt/test/tsan/test.h +++ b/compiler-rt/test/tsan/test.h @@ -1,3 +1,6 @@ +#ifndef __TSAN_TEST_H__ +#define __TSAN_TEST_H__ + #include #include #include @@ -133,3 +136,5 @@ void WTFAnnotateBenignRaceSized(const char *f, int l, const volatile void *mem, #else #define ASM_SYMBOL(symbol) #symbol #endif + +#endif // __TSAN_TEST_H__ From e520800eda3d9810be6c78d8056c7e384d0ace70 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Mon, 20 Mar 2023 15:56:38 -0700 Subject: [PATCH 091/691] [NFC] Include tsan_interface.h in tests --- compiler-rt/test/tsan/Linux/check_preinit.cpp | 1 + compiler-rt/test/tsan/cxa_guard_acquire.cpp | 1 + compiler-rt/test/tsan/default_options.cpp | 1 + compiler-rt/test/tsan/java_symbolization.cpp | 1 + compiler-rt/test/tsan/java_symbolization_legacy.cpp | 1 + 5 files changed, 5 insertions(+) diff --git a/compiler-rt/test/tsan/Linux/check_preinit.cpp b/compiler-rt/test/tsan/Linux/check_preinit.cpp index 8f5bf40337606..b5f63d3d4b9e3 100644 --- a/compiler-rt/test/tsan/Linux/check_preinit.cpp +++ b/compiler-rt/test/tsan/Linux/check_preinit.cpp @@ -12,6 +12,7 @@ // constructors of the binary which are called after constructors of shared // library. +#include #include #if BUILD_SO diff --git a/compiler-rt/test/tsan/cxa_guard_acquire.cpp b/compiler-rt/test/tsan/cxa_guard_acquire.cpp index d483cd37f328f..100a40b281410 100644 --- a/compiler-rt/test/tsan/cxa_guard_acquire.cpp +++ b/compiler-rt/test/tsan/cxa_guard_acquire.cpp @@ -1,5 +1,6 @@ // RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s +#include #include namespace __tsan { diff --git a/compiler-rt/test/tsan/default_options.cpp b/compiler-rt/test/tsan/default_options.cpp index 3b447e734f23f..37af05739de15 100644 --- a/compiler-rt/test/tsan/default_options.cpp +++ b/compiler-rt/test/tsan/default_options.cpp @@ -1,5 +1,6 @@ // RUN: %clang_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s #include +#include #include #if (__APPLE__) diff --git a/compiler-rt/test/tsan/java_symbolization.cpp b/compiler-rt/test/tsan/java_symbolization.cpp index e6a3ee8ac1536..2600d05a0c84c 100644 --- a/compiler-rt/test/tsan/java_symbolization.cpp +++ b/compiler-rt/test/tsan/java_symbolization.cpp @@ -1,6 +1,7 @@ // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s #include "java.h" #include +#include #if (__APPLE__) __attribute__((weak)) // Required for dyld macOS 12.0+ diff --git a/compiler-rt/test/tsan/java_symbolization_legacy.cpp b/compiler-rt/test/tsan/java_symbolization_legacy.cpp index bdf986acfc1c4..502d59def0891 100644 --- a/compiler-rt/test/tsan/java_symbolization_legacy.cpp +++ b/compiler-rt/test/tsan/java_symbolization_legacy.cpp @@ -1,6 +1,7 @@ // RUN: %clangxx_tsan -O1 %s -o %t && %deflake %run %t | FileCheck %s #include "java.h" #include +#include #if (__APPLE__) __attribute__((weak)) // Required for dyld macOS 12.0+ From 9b655c2627e31ea62032e175b3a9f2cae8aea653 Mon Sep 17 00:00:00 2001 From: Jim Ingham Date: Mon, 20 Mar 2023 16:05:01 -0700 Subject: [PATCH 092/691] Revert "Fix a problem with "watchpoint triggers before" watchpoint handling." This reverts commit 8d024a79ea783ed3fbb5691aeaf186ad3f0a4ae9. I accidentally included some "in progress" work that wasn't supposed to go with this commit. --- lldb/include/lldb/Target/Process.h | 19 +----- .../Process/Utility/StopInfoMachException.cpp | 13 ---- lldb/source/Target/StopInfo.cpp | 5 -- lldb/source/Target/Thread.cpp | 3 - .../TestStepOverWatchpoint.py | 68 +++++++++---------- .../watchpoints/step_over_watchpoint/main.c | 4 +- 6 files changed, 34 insertions(+), 78 deletions(-) diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h index 6ce38f63cd249..3ffacb52299b9 100644 --- a/lldb/include/lldb/Target/Process.h +++ b/lldb/include/lldb/Target/Process.h @@ -311,14 +311,6 @@ class ProcessModID { return m_last_natural_stop_event; return lldb::EventSP(); } - - void SetSafeToCallFunctions(bool safe) { - m_safe = safe; - } - - bool GetSafeToCallFunctions() { - return m_safe; - } private: uint32_t m_stop_id = 0; @@ -329,7 +321,6 @@ class ProcessModID { uint32_t m_running_user_expression = false; uint32_t m_running_utility_function = 0; lldb::EventSP m_last_natural_stop_event; - std::atomic m_safe = true; }; inline bool operator==(const ProcessModID &lhs, const ProcessModID &rhs) { @@ -468,7 +459,7 @@ class Process : public std::enable_shared_from_this, void SetRestarted(bool new_value) { m_restarted = new_value; } void SetInterrupted(bool new_value) { m_interrupted = new_value; } - + void AddRestartedReason(const char *reason) { m_restarted_reasons.push_back(reason); } @@ -1259,14 +1250,6 @@ class Process : public std::enable_shared_from_this, DiagnosticManager &diagnostic_manager); static const char *ExecutionResultAsCString(lldb::ExpressionResults result); - - void SetSafeToCallFunctions(bool safe) { - GetModID().SetSafeToCallFunctions(safe); - } - - bool GetSafeToCallFunctions() { - return GetModID().GetSafeToCallFunctions(); - } void GetStatus(Stream &ostrm); diff --git a/lldb/source/Plugins/Process/Utility/StopInfoMachException.cpp b/lldb/source/Plugins/Process/Utility/StopInfoMachException.cpp index 458d44f6feb33..aae15b2ef4624 100644 --- a/lldb/source/Plugins/Process/Utility/StopInfoMachException.cpp +++ b/lldb/source/Plugins/Process/Utility/StopInfoMachException.cpp @@ -795,19 +795,6 @@ StopInfoSP StopInfoMachException::CreateStopReasonWithMachException( case 9: // EXC_RPC_ALERT case 10: // EXC_CRASH break; - case 12: // EXC_GUARD - { - // Some EXC_GUARD exceptions are fatal, and the process will go away - // the next time you allow it to run. When we get one of those - // exceptions we have to make sure SafeToCallFunctions returns false to - // prevent us or other agents running the process. This has to be set - // on the process because even the threads that didn't get the exception - // can't run. - ProcessSP process_sp(thread.GetProcess()); - if (process_sp) - process_sp->SetSafeToCallFunctions(false); - - } } return StopInfoSP(new StopInfoMachException(thread, exc_type, exc_data_count, diff --git a/lldb/source/Target/StopInfo.cpp b/lldb/source/Target/StopInfo.cpp index ebc355c90d0ab..9fdb29f9e4273 100644 --- a/lldb/source/Target/StopInfo.cpp +++ b/lldb/source/Target/StopInfo.cpp @@ -831,11 +831,6 @@ class StopInfoWatchpoint : public StopInfo { = std::static_pointer_cast(shared_from_this()); ThreadPlanSP step_over_wp_sp(new ThreadPlanStepOverWatchpoint( *(thread_sp.get()), me_as_siwp_sp, wp_sp)); - // When this plan is done we want to stop, so set this as a Controlling - // plan. - step_over_wp_sp->SetIsControllingPlan(true); - step_over_wp_sp->SetOkayToDiscard(false); - Status error; error = thread_sp->QueueThreadPlan(step_over_wp_sp, false); // If we couldn't push the thread plan, just stop here: diff --git a/lldb/source/Target/Thread.cpp b/lldb/source/Target/Thread.cpp index df8bff5102b83..d620f746339e7 100644 --- a/lldb/source/Target/Thread.cpp +++ b/lldb/source/Target/Thread.cpp @@ -1664,9 +1664,6 @@ addr_t Thread::GetThreadLocalData(const ModuleSP module, bool Thread::SafeToCallFunctions() { Process *process = GetProcess().get(); if (process) { - if (!process->SafeToCallFunctions()) - return false; - DynamicLoader *loader = GetProcess()->GetDynamicLoader(); if (loader && loader->IsFullyInitialized() == false) return false; diff --git a/lldb/test/API/commands/watchpoints/step_over_watchpoint/TestStepOverWatchpoint.py b/lldb/test/API/commands/watchpoints/step_over_watchpoint/TestStepOverWatchpoint.py index 52fc899b13e61..7d54156aebb5b 100644 --- a/lldb/test/API/commands/watchpoints/step_over_watchpoint/TestStepOverWatchpoint.py +++ b/lldb/test/API/commands/watchpoints/step_over_watchpoint/TestStepOverWatchpoint.py @@ -11,11 +11,36 @@ class TestStepOverWatchpoint(TestBase): NO_DEBUG_INFO_TESTCASE = True - def get_to_start(self, bkpt_text): + @expectedFailureAll( + oslist=["freebsd", "linux"], + archs=[ + 'aarch64', + 'arm'], + bugnumber="llvm.org/pr26031") + # Read-write watchpoints not supported on SystemZ + @expectedFailureAll(archs=['s390x']) + @expectedFailureAll( + oslist=["ios", "watchos", "tvos", "bridgeos", "macosx"], + archs=['aarch64', 'arm'], + bugnumber="") + @add_test_categories(["basic_process"]) + def test(self): """Test stepping over watchpoints.""" self.build() - target, process, thread, bkpt = lldbutil.run_to_source_breakpoint(self, bkpt_text, - lldb.SBFileSpec("main.c")) + target = self.createTestTarget() + + lldbutil.run_break_set_by_symbol(self, 'main') + + process = target.LaunchSimple(None, None, + self.get_process_working_directory()) + self.assertTrue(process.IsValid(), PROCESS_IS_VALID) + self.assertState(process.GetState(), lldb.eStateStopped, + PROCESS_STOPPED) + + thread = lldbutil.get_stopped_thread(process, + lldb.eStopReasonBreakpoint) + self.assertTrue(thread.IsValid(), "Failed to get thread.") + frame = thread.GetFrameAtIndex(0) self.assertTrue(frame.IsValid(), "Failed to get frame.") @@ -30,45 +55,14 @@ def get_to_start(self, bkpt_text): self.assertSuccess(error, "Error while setting watchpoint") self.assertTrue(read_watchpoint, "Failed to set read watchpoint.") - # Disable the breakpoint we hit so we don't muddy the waters with - # stepping off from the breakpoint: - bkpt.SetEnabled(False) - - return (target, process, thread, read_watchpoint) - - @expectedFailureAll( - oslist=["freebsd", "linux"], - archs=[ - 'aarch64', - 'arm'], - bugnumber="llvm.org/pr26031") - # Read-write watchpoints not supported on SystemZ - @expectedFailureAll(archs=['s390x']) - @add_test_categories(["basic_process"]) - def test_step_over(self): - target, process, thread, wp = self.get_to_start("Set a breakpoint here") - thread.StepOver() self.assertStopReason(thread.GetStopReason(), lldb.eStopReasonWatchpoint, STOPPED_DUE_TO_WATCHPOINT) self.assertEquals(thread.GetStopDescription(20), 'watchpoint 1') - @expectedFailureAll( - oslist=["freebsd", "linux"], - archs=[ - 'aarch64', - 'arm'], - bugnumber="llvm.org/pr26031") - # Read-write watchpoints not supported on SystemZ - @expectedFailureAll(archs=['s390x']) - @expectedFailureAll( - oslist=["ios", "watchos", "tvos", "bridgeos", "macosx"], - archs=['aarch64', 'arm'], - bugnumber="") - @add_test_categories(["basic_process"]) - def test_step_instruction(self): - target, process, thread, wp = self.get_to_start("Set breakpoint after call") - + process.Continue() + self.assertState(process.GetState(), lldb.eStateStopped, + PROCESS_STOPPED) self.assertEquals(thread.GetStopDescription(20), 'step over') self.step_inst_for_watchpoint(1) diff --git a/lldb/test/API/commands/watchpoints/step_over_watchpoint/main.c b/lldb/test/API/commands/watchpoints/step_over_watchpoint/main.c index e48d43cb7a974..2d87d9a2f73fe 100644 --- a/lldb/test/API/commands/watchpoints/step_over_watchpoint/main.c +++ b/lldb/test/API/commands/watchpoints/step_over_watchpoint/main.c @@ -11,8 +11,8 @@ void watch_write() { } int main() { - watch_read(); // Set a breakpoint here - g_temp = g_watch_me_read; // Set breakpoint after call + watch_read(); + g_temp = g_watch_me_read; watch_write(); g_watch_me_write = g_temp; return 0; From 2c7abc83f605b2d3b1062f28422eeff81278ecd5 Mon Sep 17 00:00:00 2001 From: Jim Ingham Date: Mon, 20 Mar 2023 16:11:00 -0700 Subject: [PATCH 093/691] Reapply 8d024a79ea783ed3fbb5691aeaf186ad3f0a4ae9 w/o the extra bits. --- lldb/source/Target/StopInfo.cpp | 5 ++ .../TestStepOverWatchpoint.py | 68 ++++++++++--------- .../watchpoints/step_over_watchpoint/main.c | 4 +- 3 files changed, 44 insertions(+), 33 deletions(-) diff --git a/lldb/source/Target/StopInfo.cpp b/lldb/source/Target/StopInfo.cpp index 9fdb29f9e4273..ebc355c90d0ab 100644 --- a/lldb/source/Target/StopInfo.cpp +++ b/lldb/source/Target/StopInfo.cpp @@ -831,6 +831,11 @@ class StopInfoWatchpoint : public StopInfo { = std::static_pointer_cast(shared_from_this()); ThreadPlanSP step_over_wp_sp(new ThreadPlanStepOverWatchpoint( *(thread_sp.get()), me_as_siwp_sp, wp_sp)); + // When this plan is done we want to stop, so set this as a Controlling + // plan. + step_over_wp_sp->SetIsControllingPlan(true); + step_over_wp_sp->SetOkayToDiscard(false); + Status error; error = thread_sp->QueueThreadPlan(step_over_wp_sp, false); // If we couldn't push the thread plan, just stop here: diff --git a/lldb/test/API/commands/watchpoints/step_over_watchpoint/TestStepOverWatchpoint.py b/lldb/test/API/commands/watchpoints/step_over_watchpoint/TestStepOverWatchpoint.py index 7d54156aebb5b..52fc899b13e61 100644 --- a/lldb/test/API/commands/watchpoints/step_over_watchpoint/TestStepOverWatchpoint.py +++ b/lldb/test/API/commands/watchpoints/step_over_watchpoint/TestStepOverWatchpoint.py @@ -11,36 +11,11 @@ class TestStepOverWatchpoint(TestBase): NO_DEBUG_INFO_TESTCASE = True - @expectedFailureAll( - oslist=["freebsd", "linux"], - archs=[ - 'aarch64', - 'arm'], - bugnumber="llvm.org/pr26031") - # Read-write watchpoints not supported on SystemZ - @expectedFailureAll(archs=['s390x']) - @expectedFailureAll( - oslist=["ios", "watchos", "tvos", "bridgeos", "macosx"], - archs=['aarch64', 'arm'], - bugnumber="") - @add_test_categories(["basic_process"]) - def test(self): + def get_to_start(self, bkpt_text): """Test stepping over watchpoints.""" self.build() - target = self.createTestTarget() - - lldbutil.run_break_set_by_symbol(self, 'main') - - process = target.LaunchSimple(None, None, - self.get_process_working_directory()) - self.assertTrue(process.IsValid(), PROCESS_IS_VALID) - self.assertState(process.GetState(), lldb.eStateStopped, - PROCESS_STOPPED) - - thread = lldbutil.get_stopped_thread(process, - lldb.eStopReasonBreakpoint) - self.assertTrue(thread.IsValid(), "Failed to get thread.") - + target, process, thread, bkpt = lldbutil.run_to_source_breakpoint(self, bkpt_text, + lldb.SBFileSpec("main.c")) frame = thread.GetFrameAtIndex(0) self.assertTrue(frame.IsValid(), "Failed to get frame.") @@ -55,14 +30,45 @@ def test(self): self.assertSuccess(error, "Error while setting watchpoint") self.assertTrue(read_watchpoint, "Failed to set read watchpoint.") + # Disable the breakpoint we hit so we don't muddy the waters with + # stepping off from the breakpoint: + bkpt.SetEnabled(False) + + return (target, process, thread, read_watchpoint) + + @expectedFailureAll( + oslist=["freebsd", "linux"], + archs=[ + 'aarch64', + 'arm'], + bugnumber="llvm.org/pr26031") + # Read-write watchpoints not supported on SystemZ + @expectedFailureAll(archs=['s390x']) + @add_test_categories(["basic_process"]) + def test_step_over(self): + target, process, thread, wp = self.get_to_start("Set a breakpoint here") + thread.StepOver() self.assertStopReason(thread.GetStopReason(), lldb.eStopReasonWatchpoint, STOPPED_DUE_TO_WATCHPOINT) self.assertEquals(thread.GetStopDescription(20), 'watchpoint 1') - process.Continue() - self.assertState(process.GetState(), lldb.eStateStopped, - PROCESS_STOPPED) + @expectedFailureAll( + oslist=["freebsd", "linux"], + archs=[ + 'aarch64', + 'arm'], + bugnumber="llvm.org/pr26031") + # Read-write watchpoints not supported on SystemZ + @expectedFailureAll(archs=['s390x']) + @expectedFailureAll( + oslist=["ios", "watchos", "tvos", "bridgeos", "macosx"], + archs=['aarch64', 'arm'], + bugnumber="") + @add_test_categories(["basic_process"]) + def test_step_instruction(self): + target, process, thread, wp = self.get_to_start("Set breakpoint after call") + self.assertEquals(thread.GetStopDescription(20), 'step over') self.step_inst_for_watchpoint(1) diff --git a/lldb/test/API/commands/watchpoints/step_over_watchpoint/main.c b/lldb/test/API/commands/watchpoints/step_over_watchpoint/main.c index 2d87d9a2f73fe..e48d43cb7a974 100644 --- a/lldb/test/API/commands/watchpoints/step_over_watchpoint/main.c +++ b/lldb/test/API/commands/watchpoints/step_over_watchpoint/main.c @@ -11,8 +11,8 @@ void watch_write() { } int main() { - watch_read(); - g_temp = g_watch_me_read; + watch_read(); // Set a breakpoint here + g_temp = g_watch_me_read; // Set breakpoint after call watch_write(); g_watch_me_write = g_temp; return 0; From 1f58ae80661b7c9738ca5cff08ff8246ddecf987 Mon Sep 17 00:00:00 2001 From: wren romano <2998727+wrengr@users.noreply.github.com> Date: Mon, 20 Mar 2023 16:11:17 -0700 Subject: [PATCH 094/691] [mlir][sparse] Making `TensorExp::Kind` a nested enum-class This improves namespacing, and follows the pattern used for "Kind" enums elsewhere in MLIR. Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D146086 --- .../mlir/Dialect/SparseTensor/Utils/Merger.h | 186 ++-- .../Transforms/Sparsification.cpp | 34 +- .../lib/Dialect/SparseTensor/Utils/Merger.cpp | 999 +++++++++--------- .../Dialect/SparseTensor/MergerTest.cpp | 185 ++-- 4 files changed, 707 insertions(+), 697 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h index 4a83237fb1634..6e39404bb28aa 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h @@ -23,87 +23,6 @@ namespace mlir { namespace sparse_tensor { -/// Tensor expression kind. -/// -/// The `kLoopVar` leaf kind is for representing `linalg::IndexOp`. -/// That is, its argument is a `LoopId` identifying the loop-variable -/// in question, and its value will be the current iteration's value -/// of that loop-variable. See the `LoopId` documentation for more details. -// -// TODO: make this an `enum class` nested in the `TensorExp` class; -// to improve namespacing, and match the pattern used by other "Kind" -// enums in MLIR. -// -// TODO: Modify this definition so that the numeric values already encode -// the `ExpArity` (while extending the notion of "arity" to include not -// just the number of `ExprId` children the node has, but also whether the -// node has a `Value` and/or `Operation*`). Doing this will avoid needing -// to enumerate all the kinds in `getExpArity` and in the `TensorExp` ctor, -// and should help clean up a few other places as well. -enum Kind { - // Leaf. - kTensor = 0, - kInvariant, - kLoopVar, - // Unary operations. - kAbsF, - kAbsC, - kAbsI, - kCeilF, - kFloorF, - kSqrtF, - kSqrtC, - kExpm1F, - kExpm1C, - kLog1pF, - kLog1pC, - kSinF, - kSinC, - kTanhF, - kTanhC, - kNegF, - kNegC, - kNegI, - kTruncF, - kExtF, - kCastFS, // signed - kCastFU, // unsigned - kCastSF, // signed - kCastUF, // unsigned - kCastS, // signed - kCastU, // unsigned - kCastIdx, - kTruncI, - kCIm, // complex.im - kCRe, // complex.re - kBitCast, - kBinaryBranch, // semiring unary branch created from a binary op - kUnary, // semiring unary op - kSelect, // custom selection criteria - // Binary operations. - kMulF, - kMulC, - kMulI, - kDivF, - kDivC, // complex - kDivS, // signed - kDivU, // unsigned - kAddF, - kAddC, - kAddI, - kSubF, - kSubC, - kSubI, - kAndI, - kOrI, - kXorI, - kShrS, // signed - kShrU, // unsigned - kShlI, - kBinary, // semiring binary op - kReduce, // semiring reduction op -}; - // TODO: These type aliases currently only serve to make the code more // self-documenting, however because they are not type-checked they can // do nothing to prevent mixups. We should really change them from mere @@ -169,6 +88,8 @@ struct Children { /// Tensor expression. Represents a MLIR expression in tensor index notation. struct TensorExp { + enum class Kind; + // The `x` parameter has different types depending on the value of the // `k` parameter. The correspondences are: // * `kTensor` -> `TensorId` @@ -207,6 +128,83 @@ struct TensorExp { Operation *op; }; +/// Tensor expression kind. +/// +/// The `kLoopVar` leaf kind is for representing `linalg::IndexOp`. +/// That is, its argument is a `LoopId` identifying the loop-variable +/// in question, and its value will be the current iteration's value +/// of that loop-variable. See the `LoopId` documentation for more details. +// +// TODO: Modify this definition so that the numeric values already encode +// the `ExpArity` (while extending the notion of "arity" to include not +// just the number of `ExprId` children the node has, but also whether the +// node has a `Value` and/or `Operation*`). Doing this will avoid needing +// to enumerate all the kinds in `getExpArity` and in the `TensorExp` ctor, +// and should help clean up a few other places as well. +enum class TensorExp::Kind { + // Leaf. + kTensor = 0, + kInvariant, + kLoopVar, + // Unary operations. + kAbsF, + kAbsC, + kAbsI, + kCeilF, + kFloorF, + kSqrtF, + kSqrtC, + kExpm1F, + kExpm1C, + kLog1pF, + kLog1pC, + kSinF, + kSinC, + kTanhF, + kTanhC, + kNegF, + kNegC, + kNegI, + kTruncF, + kExtF, + kCastFS, // signed + kCastFU, // unsigned + kCastSF, // signed + kCastUF, // unsigned + kCastS, // signed + kCastU, // unsigned + kCastIdx, + kTruncI, + kCIm, // complex.im + kCRe, // complex.re + kBitCast, + kBinaryBranch, // semiring unary branch created from a binary op + kUnary, // semiring unary op + kSelect, // custom selection criteria + // Binary operations. + kMulF, + kMulC, + kMulI, + kDivF, + kDivC, // complex + kDivS, // signed + kDivU, // unsigned + kAddF, + kAddC, + kAddI, + kSubF, + kSubC, + kSubI, + kAndI, + kOrI, + kXorI, + kShrS, // signed + kShrU, // unsigned + kShlI, + kBinary, // semiring binary op + kReduce, // semiring reduction op +}; + /// Lattice point. Each lattice point consists of a formal conjunction /// of `TensorLoopId`s, together with the identifier of the corresponding /// tensor expression. The formal conjunction is represented as a set of @@ -271,12 +269,12 @@ class Merger { /// Constructs a new tensor expression, and returns its identifier. /// The type of the `e0` argument varies according to the value of the /// `k` argument, as described by the `TensorExp` ctor. - ExprId addExp(Kind k, unsigned e0, ExprId e1 = kInvalidId, Value v = Value(), - Operation *op = nullptr); - ExprId addExp(Kind k, ExprId e, Value v, Operation *op = nullptr) { + ExprId addExp(TensorExp::Kind k, unsigned e0, ExprId e1 = kInvalidId, + Value v = Value(), Operation *op = nullptr); + ExprId addExp(TensorExp::Kind k, ExprId e, Value v, Operation *op = nullptr) { return addExp(k, e, kInvalidId, v, op); } - ExprId addExp(Kind k, Value v, Operation *op = nullptr) { + ExprId addExp(TensorExp::Kind k, Value v, Operation *op = nullptr) { return addExp(k, kInvalidId, kInvalidId, v, op); } @@ -290,30 +288,31 @@ class Merger { /// of `LoopId` (effectively constructing a larger "intersection" of those /// loops) with a newly constructed tensor (sub)expression of given kind. /// Returns the identifier of the new lattice point. - LatPointId conjLat(Kind kind, LatPointId p0, LatPointId p1, + LatPointId conjLat(TensorExp::Kind kind, LatPointId p0, LatPointId p1, Operation *op = nullptr); /// Conjunctive merge of two lattice sets: `(s0 /\_op s1)`. /// Returns the identifier of the new set. - LatSetId conjSet(Kind kind, LatSetId s0, LatSetId s1, + LatSetId conjSet(TensorExp::Kind kind, LatSetId s0, LatSetId s1, Operation *op = nullptr); /// Disjunctive merge of two lattice sets: `(s0 /\_op s1, s0, s1)`. /// Returns the identifier of the new set. - LatSetId disjSet(Kind kind, LatSetId s0, LatSetId s1, + LatSetId disjSet(TensorExp::Kind kind, LatSetId s0, LatSetId s1, Operation *op = nullptr); /// Disjunctive merge of two lattice sets with custom handling of the /// overlap, left, and right regions. Any region may be left missing /// in the output. Returns the identifier of the new set. - LatSetId combiSet(Kind kind, LatSetId s0, LatSetId s1, Operation *orig, - bool includeLeft, Kind ltrans, Operation *opleft, - bool includeRight, Kind rtrans, Operation *opright); + LatSetId combiSet(TensorExp::Kind kind, LatSetId s0, LatSetId s1, + Operation *orig, bool includeLeft, TensorExp::Kind ltrans, + Operation *opleft, bool includeRight, + TensorExp::Kind rtrans, Operation *opright); /// Maps the unary operator over the lattice set of the operand, i.e. each /// lattice point on an expression E is simply copied over, but with OP E /// as new expression. Returns the identifier of the new set. - LatSetId mapSet(Kind kind, LatSetId s, Value v = Value(), + LatSetId mapSet(TensorExp::Kind kind, LatSetId s, Value v = Value(), Operation *op = nullptr); /// Optimizes the iteration lattice points in the given set. This @@ -377,7 +376,8 @@ class Merger { /// Returns true if the expression is `(kTensor t)`. bool expIsTensor(ExprId e, TensorId t) const { - return tensorExps[e].kind == kTensor && tensorExps[e].tensor == t; + return tensorExps[e].kind == TensorExp::Kind::kTensor && + tensorExps[e].tensor == t; } /// Returns true if the expression contains the tensor as an operand. diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp index f189b14c60c7e..d8aeb44811534 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp @@ -1045,8 +1045,9 @@ static void genTensorStore(CodegenEnv &env, OpBuilder &builder, ExprId exp, if (!rhs) { // Only unary and binary are allowed to return uninitialized rhs // to indicate missing output. - assert(env.exp(exp).kind == kUnary || env.exp(exp).kind == kBinary); - } else if (env.exp(exp).kind == kSelect) { + assert(env.exp(exp).kind == TensorExp::Kind::kUnary || + env.exp(exp).kind == TensorExp::Kind::kBinary); + } else if (env.exp(exp).kind == TensorExp::Kind::kSelect) { // Select operation insertion. Value chain = env.getInsertionChain(); scf::IfOp ifOp = @@ -1114,28 +1115,29 @@ static Value genExp(CodegenEnv &env, RewriterBase &rewriter, ExprId e, return Value(); const TensorExp &exp = env.exp(e); const auto kind = exp.kind; - if (kind == Kind::kTensor) + if (kind == TensorExp::Kind::kTensor) return genTensorLoad(env, rewriter, e); - if (kind == Kind::kInvariant) + if (kind == TensorExp::Kind::kInvariant) return genInvariantValue(env, e); - if (kind == Kind::kLoopVar) + if (kind == TensorExp::Kind::kLoopVar) return env.getLoopVar(exp.loop); - if (kind == Kind::kReduce) + if (kind == TensorExp::Kind::kReduce) env.startCustomReduc(e); // enter custom Value v0 = genExp(env, rewriter, exp.children.e0, ldx); Value v1 = genExp(env, rewriter, exp.children.e1, ldx); Value ee = env.merger().buildExp(rewriter, loc, e, v0, v1); - if (ee && (kind == Kind::kUnary || kind == Kind::kBinary || - kind == Kind::kBinaryBranch || kind == Kind::kReduce || - kind == Kind::kSelect)) + if (ee && + (kind == TensorExp::Kind::kUnary || kind == TensorExp::Kind::kBinary || + kind == TensorExp::Kind::kBinaryBranch || + kind == TensorExp::Kind::kReduce || kind == TensorExp::Kind::kSelect)) ee = relinkBranch(env, rewriter, ee.getParentBlock(), ee, ldx); - if (kind == Kind::kReduce) + if (kind == TensorExp::Kind::kReduce) env.endCustomReduc(); // exit custom - if (kind == kSelect) { + if (kind == TensorExp::Kind::kSelect) { assert(!exp.val); env.exp(e).val = v0; // Preserve value for later use. } @@ -1148,7 +1150,7 @@ static void genInvariants(CodegenEnv &env, OpBuilder &builder, ExprId exp, LoopId ldx, bool atStart) { if (exp == kInvalidId) return; - if (env.exp(exp).kind == Kind::kTensor) { + if (env.exp(exp).kind == TensorExp::Kind::kTensor) { // Inspect tensor indices. bool isAtLoop = ldx == kInvalidId; linalg::GenericOp op = env.op(); @@ -1192,18 +1194,18 @@ static void genInvariants(CodegenEnv &env, OpBuilder &builder, ExprId exp, // Start or end loop invariant hoisting of a tensor load. env.exp(exp).val = atStart ? genTensorLoad(env, builder, exp) : Value(); } - } else if (env.exp(exp).kind != Kind::kInvariant && - env.exp(exp).kind != Kind::kLoopVar) { + } else if (env.exp(exp).kind != TensorExp::Kind::kInvariant && + env.exp(exp).kind != TensorExp::Kind::kLoopVar) { // Traverse into the binary operations. Note that we only hoist // tensor loads, since subsequent MLIR/LLVM passes know how to // deal with all other kinds of derived loop invariants. - if (env.exp(exp).kind == Kind::kReduce) + if (env.exp(exp).kind == TensorExp::Kind::kReduce) env.startCustomReduc(exp); // enter custom const ExprId e0 = env.exp(exp).children.e0; const ExprId e1 = env.exp(exp).children.e1; genInvariants(env, builder, e0, ldx, atStart); genInvariants(env, builder, e1, ldx, atStart); - if (env.exp(exp).kind == Kind::kReduce) + if (env.exp(exp).kind == TensorExp::Kind::kReduce) env.endCustomReduc(); // exit custom } } diff --git a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp index 40db5411132b4..4a8c3cbfbe584 100644 --- a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp +++ b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp @@ -25,70 +25,70 @@ enum class ExpArity { kBinary, }; -static ExpArity getExpArity(Kind k) { +static ExpArity getExpArity(TensorExp::Kind k) { switch (k) { // Leaf. - case kTensor: - case kInvariant: - case kLoopVar: + case TensorExp::Kind::kTensor: + case TensorExp::Kind::kInvariant: + case TensorExp::Kind::kLoopVar: return ExpArity::kNullary; - case kAbsF: - case kAbsC: - case kAbsI: - case kCeilF: - case kFloorF: - case kSqrtF: - case kSqrtC: - case kExpm1F: - case kExpm1C: - case kLog1pF: - case kLog1pC: - case kSinF: - case kSinC: - case kTanhF: - case kTanhC: - case kTruncF: - case kExtF: - case kCastFS: - case kCastFU: - case kCastSF: - case kCastUF: - case kCastS: - case kCastU: - case kCastIdx: - case kTruncI: - case kCIm: - case kCRe: - case kBitCast: - case kBinaryBranch: - case kUnary: - case kSelect: - case kNegF: - case kNegC: - case kNegI: + case TensorExp::Kind::kAbsF: + case TensorExp::Kind::kAbsC: + case TensorExp::Kind::kAbsI: + case TensorExp::Kind::kCeilF: + case TensorExp::Kind::kFloorF: + case TensorExp::Kind::kSqrtF: + case TensorExp::Kind::kSqrtC: + case TensorExp::Kind::kExpm1F: + case TensorExp::Kind::kExpm1C: + case TensorExp::Kind::kLog1pF: + case TensorExp::Kind::kLog1pC: + case TensorExp::Kind::kSinF: + case TensorExp::Kind::kSinC: + case TensorExp::Kind::kTanhF: + case TensorExp::Kind::kTanhC: + case TensorExp::Kind::kTruncF: + case TensorExp::Kind::kExtF: + case TensorExp::Kind::kCastFS: + case TensorExp::Kind::kCastFU: + case TensorExp::Kind::kCastSF: + case TensorExp::Kind::kCastUF: + case TensorExp::Kind::kCastS: + case TensorExp::Kind::kCastU: + case TensorExp::Kind::kCastIdx: + case TensorExp::Kind::kTruncI: + case TensorExp::Kind::kCIm: + case TensorExp::Kind::kCRe: + case TensorExp::Kind::kBitCast: + case TensorExp::Kind::kBinaryBranch: + case TensorExp::Kind::kUnary: + case TensorExp::Kind::kSelect: + case TensorExp::Kind::kNegF: + case TensorExp::Kind::kNegC: + case TensorExp::Kind::kNegI: return ExpArity::kUnary; // Binary operations. - case kDivF: - case kDivC: - case kDivS: - case kDivU: - case kShrS: - case kShrU: - case kShlI: - case kMulF: - case kMulC: - case kMulI: - case kAndI: - case kAddF: - case kAddC: - case kAddI: - case kOrI: - case kXorI: - case kBinary: - case kReduce: - case kSubF: - case kSubC: - case kSubI: + case TensorExp::Kind::kDivF: + case TensorExp::Kind::kDivC: + case TensorExp::Kind::kDivS: + case TensorExp::Kind::kDivU: + case TensorExp::Kind::kShrS: + case TensorExp::Kind::kShrU: + case TensorExp::Kind::kShlI: + case TensorExp::Kind::kMulF: + case TensorExp::Kind::kMulC: + case TensorExp::Kind::kMulI: + case TensorExp::Kind::kAndI: + case TensorExp::Kind::kAddF: + case TensorExp::Kind::kAddC: + case TensorExp::Kind::kAddI: + case TensorExp::Kind::kOrI: + case TensorExp::Kind::kXorI: + case TensorExp::Kind::kBinary: + case TensorExp::Kind::kReduce: + case TensorExp::Kind::kSubF: + case TensorExp::Kind::kSubC: + case TensorExp::Kind::kSubI: return ExpArity::kBinary; } llvm_unreachable("unexpected kind"); @@ -102,64 +102,64 @@ TensorExp::TensorExp(Kind k, unsigned x, ExprId y, Value v, Operation *o) : kind(k), val(v), op(o) { switch (kind) { // Leaf. - case kTensor: + case TensorExp::Kind::kTensor: assert(x != kInvalidId && y == kInvalidId && !v && !o); tensor = x; break; - case kInvariant: + case TensorExp::Kind::kInvariant: assert(x == kInvalidId && y == kInvalidId && v && !o); break; - case kLoopVar: + case TensorExp::Kind::kLoopVar: assert(x != kInvalidId && y == kInvalidId && !v && !o); loop = x; break; // Unary operations. - case kAbsF: - case kAbsC: - case kAbsI: - case kCeilF: - case kFloorF: - case kSqrtF: - case kSqrtC: - case kExpm1F: - case kExpm1C: - case kLog1pF: - case kLog1pC: - case kSinF: - case kSinC: - case kTanhF: - case kTanhC: - case kNegF: - case kNegC: - case kNegI: - case kCIm: - case kCRe: + case TensorExp::Kind::kAbsF: + case TensorExp::Kind::kAbsC: + case TensorExp::Kind::kAbsI: + case TensorExp::Kind::kCeilF: + case TensorExp::Kind::kFloorF: + case TensorExp::Kind::kSqrtF: + case TensorExp::Kind::kSqrtC: + case TensorExp::Kind::kExpm1F: + case TensorExp::Kind::kExpm1C: + case TensorExp::Kind::kLog1pF: + case TensorExp::Kind::kLog1pC: + case TensorExp::Kind::kSinF: + case TensorExp::Kind::kSinC: + case TensorExp::Kind::kTanhF: + case TensorExp::Kind::kTanhC: + case TensorExp::Kind::kNegF: + case TensorExp::Kind::kNegC: + case TensorExp::Kind::kNegI: + case TensorExp::Kind::kCIm: + case TensorExp::Kind::kCRe: assert(x != kInvalidId && y == kInvalidId && !v && !o); children.e0 = x; children.e1 = y; break; - case kTruncF: - case kExtF: - case kCastFS: - case kCastFU: - case kCastSF: - case kCastUF: - case kCastS: - case kCastU: - case kCastIdx: - case kTruncI: - case kBitCast: + case TensorExp::Kind::kTruncF: + case TensorExp::Kind::kExtF: + case TensorExp::Kind::kCastFS: + case TensorExp::Kind::kCastFU: + case TensorExp::Kind::kCastSF: + case TensorExp::Kind::kCastUF: + case TensorExp::Kind::kCastS: + case TensorExp::Kind::kCastU: + case TensorExp::Kind::kCastIdx: + case TensorExp::Kind::kTruncI: + case TensorExp::Kind::kBitCast: assert(x != kInvalidId && y == kInvalidId && v && !o); children.e0 = x; children.e1 = y; break; - case kBinaryBranch: - case kSelect: + case TensorExp::Kind::kBinaryBranch: + case TensorExp::Kind::kSelect: assert(x != kInvalidId && y == kInvalidId && !v && o); children.e0 = x; children.e1 = y; break; - case kUnary: + case TensorExp::Kind::kUnary: // No assertion on y can be made, as the branching paths involve both // a unary (`mapSet`) and binary (`disjSet`) pathway. assert(x != kInvalidId && !v && o); @@ -167,31 +167,31 @@ TensorExp::TensorExp(Kind k, unsigned x, ExprId y, Value v, Operation *o) children.e1 = y; break; // Binary operations. - case kMulF: - case kMulC: - case kMulI: - case kDivF: - case kDivC: - case kDivS: - case kDivU: - case kAddF: - case kAddC: - case kAddI: - case kSubF: - case kSubC: - case kSubI: - case kAndI: - case kOrI: - case kXorI: - case kShrS: - case kShrU: - case kShlI: + case TensorExp::Kind::kMulF: + case TensorExp::Kind::kMulC: + case TensorExp::Kind::kMulI: + case TensorExp::Kind::kDivF: + case TensorExp::Kind::kDivC: + case TensorExp::Kind::kDivS: + case TensorExp::Kind::kDivU: + case TensorExp::Kind::kAddF: + case TensorExp::Kind::kAddC: + case TensorExp::Kind::kAddI: + case TensorExp::Kind::kSubF: + case TensorExp::Kind::kSubC: + case TensorExp::Kind::kSubI: + case TensorExp::Kind::kAndI: + case TensorExp::Kind::kOrI: + case TensorExp::Kind::kXorI: + case TensorExp::Kind::kShrS: + case TensorExp::Kind::kShrU: + case TensorExp::Kind::kShlI: assert(x != kInvalidId && y != kInvalidId && !v && !o); children.e0 = x; children.e1 = y; break; - case kBinary: - case kReduce: + case TensorExp::Kind::kBinary: + case TensorExp::Kind::kReduce: assert(x != kInvalidId && y != kInvalidId && !v && o); children.e0 = x; children.e1 = y; @@ -231,9 +231,11 @@ Merger::Merger(unsigned numInputOutputTensors, unsigned numNativeLoops, // Lattice methods. //===----------------------------------------------------------------------===// -ExprId Merger::addExp(Kind k, unsigned x, ExprId y, Value v, Operation *op) { +ExprId Merger::addExp(TensorExp::Kind k, unsigned x, ExprId y, Value v, + Operation *op) { const ExprId e = tensorExps.size(); - assert((k != kTensor || x < numTensors) && (k != kLoopVar || x < numLoops)); + assert((k != TensorExp::Kind::kTensor || x < numTensors) && + (k != TensorExp::Kind::kLoopVar || x < numLoops)); tensorExps.emplace_back(k, x, y, v, op); return e; } @@ -251,7 +253,7 @@ LatSetId Merger::addSet() { return s; } -LatPointId Merger::conjLat(Kind kind, LatPointId p0, LatPointId p1, +LatPointId Merger::conjLat(TensorExp::Kind kind, LatPointId p0, LatPointId p1, Operation *op) { const LatPointId p = latPoints.size(); BitVector bits(latPoints[p0].bits); @@ -262,7 +264,8 @@ LatPointId Merger::conjLat(Kind kind, LatPointId p0, LatPointId p1, return p; } -LatSetId Merger::conjSet(Kind kind, LatSetId s0, LatSetId s1, Operation *op) { +LatSetId Merger::conjSet(TensorExp::Kind kind, LatSetId s0, LatSetId s1, + Operation *op) { const LatSetId s = addSet(); for (const LatPointId p0 : latSets[s0]) for (const LatPointId p1 : latSets[s1]) @@ -270,28 +273,31 @@ LatSetId Merger::conjSet(Kind kind, LatSetId s0, LatSetId s1, Operation *op) { return s; } -LatSetId Merger::disjSet(Kind kind, LatSetId s0, LatSetId s1, Operation *op) { +LatSetId Merger::disjSet(TensorExp::Kind kind, LatSetId s0, LatSetId s1, + Operation *op) { const LatSetId s = conjSet(kind, s0, s1, op); // Followed by all in s0. for (const LatPointId p : latSets[s0]) latSets[s].push_back(p); // Map binary 0-y to unary -y. // TODO: move this if-else logic into buildLattices - if (kind == kSubF) - s1 = mapSet(kNegF, s1); - else if (kind == kSubC) - s1 = mapSet(kNegC, s1); - else if (kind == kSubI) - s1 = mapSet(kNegI, s1); + if (kind == TensorExp::Kind::kSubF) + s1 = mapSet(TensorExp::Kind::kNegF, s1); + else if (kind == TensorExp::Kind::kSubC) + s1 = mapSet(TensorExp::Kind::kNegC, s1); + else if (kind == TensorExp::Kind::kSubI) + s1 = mapSet(TensorExp::Kind::kNegI, s1); // Followed by all in s1. for (const LatPointId p : latSets[s1]) latSets[s].push_back(p); return s; } -LatSetId Merger::combiSet(Kind kind, LatSetId s0, LatSetId s1, Operation *orig, - bool includeLeft, Kind ltrans, Operation *opleft, - bool includeRight, Kind rtrans, Operation *opright) { +LatSetId Merger::combiSet(TensorExp::Kind kind, LatSetId s0, LatSetId s1, + Operation *orig, bool includeLeft, + TensorExp::Kind ltrans, Operation *opleft, + bool includeRight, TensorExp::Kind rtrans, + Operation *opright) { const LatSetId s = conjSet(kind, s0, s1, orig); // Left Region. if (includeLeft) { @@ -310,8 +316,9 @@ LatSetId Merger::combiSet(Kind kind, LatSetId s0, LatSetId s1, Operation *orig, return s; } -LatSetId Merger::mapSet(Kind kind, LatSetId s0, Value v, Operation *op) { - assert(kAbsF <= kind && kind <= kSelect); +LatSetId Merger::mapSet(TensorExp::Kind kind, LatSetId s0, Value v, + Operation *op) { + assert(TensorExp::Kind::kAbsF <= kind && kind <= TensorExp::Kind::kSelect); const LatSetId s = addSet(); for (const LatPointId p : latSets[s0]) { const ExprId e = addExp(kind, latPoints[p].exp, v, op); @@ -414,7 +421,7 @@ bool Merger::onlyDenseDiff(LatPointId i, LatPointId j) const { } bool Merger::expContainsTensor(ExprId e, TensorId t) const { - if (tensorExps[e].kind == kTensor) + if (tensorExps[e].kind == TensorExp::Kind::kTensor) return tensorExps[e].tensor == t; switch (getExpArity(tensorExps[e].kind)) { @@ -439,13 +446,13 @@ bool Merger::expContainsTensor(ExprId e, TensorId t) const { bool Merger::hasNegateOnOut(ExprId e) const { switch (tensorExps[e].kind) { - case kNegF: - case kNegC: - case kNegI: + case TensorExp::Kind::kNegF: + case TensorExp::Kind::kNegC: + case TensorExp::Kind::kNegI: return expContainsTensor(tensorExps[e].children.e0, outTensor); - case kSubF: - case kSubC: - case kSubI: + case TensorExp::Kind::kSubF: + case TensorExp::Kind::kSubC: + case TensorExp::Kind::kSubI: return expContainsTensor(tensorExps[e].children.e1, outTensor) || hasNegateOnOut(tensorExps[e].children.e0); default: { @@ -467,82 +474,82 @@ bool Merger::isSingleCondition(TensorId t, ExprId e) const { assert(t < numTensors && e < tensorExps.size()); switch (tensorExps[e].kind) { // Leaf. - case kTensor: + case TensorExp::Kind::kTensor: return tensorExps[e].tensor == t; - case kInvariant: - case kLoopVar: + case TensorExp::Kind::kInvariant: + case TensorExp::Kind::kLoopVar: return false; // Unary operations. - case kAbsF: - case kAbsC: - case kAbsI: - case kCeilF: - case kFloorF: - case kSqrtF: - case kSqrtC: - case kExpm1F: - case kExpm1C: - case kLog1pF: - case kLog1pC: - case kSinF: - case kSinC: - case kTanhF: - case kTanhC: - case kNegF: - case kNegC: - case kNegI: - case kTruncF: - case kExtF: - case kCastFS: - case kCastFU: - case kCastSF: - case kCastUF: - case kCastS: - case kCastU: - case kCastIdx: - case kTruncI: - case kCIm: - case kCRe: - case kBitCast: + case TensorExp::Kind::kAbsF: + case TensorExp::Kind::kAbsC: + case TensorExp::Kind::kAbsI: + case TensorExp::Kind::kCeilF: + case TensorExp::Kind::kFloorF: + case TensorExp::Kind::kSqrtF: + case TensorExp::Kind::kSqrtC: + case TensorExp::Kind::kExpm1F: + case TensorExp::Kind::kExpm1C: + case TensorExp::Kind::kLog1pF: + case TensorExp::Kind::kLog1pC: + case TensorExp::Kind::kSinF: + case TensorExp::Kind::kSinC: + case TensorExp::Kind::kTanhF: + case TensorExp::Kind::kTanhC: + case TensorExp::Kind::kNegF: + case TensorExp::Kind::kNegC: + case TensorExp::Kind::kNegI: + case TensorExp::Kind::kTruncF: + case TensorExp::Kind::kExtF: + case TensorExp::Kind::kCastFS: + case TensorExp::Kind::kCastFU: + case TensorExp::Kind::kCastSF: + case TensorExp::Kind::kCastUF: + case TensorExp::Kind::kCastS: + case TensorExp::Kind::kCastU: + case TensorExp::Kind::kCastIdx: + case TensorExp::Kind::kTruncI: + case TensorExp::Kind::kCIm: + case TensorExp::Kind::kCRe: + case TensorExp::Kind::kBitCast: return isSingleCondition(t, tensorExps[e].children.e0); - case kBinaryBranch: - case kUnary: - case kSelect: + case TensorExp::Kind::kBinaryBranch: + case TensorExp::Kind::kUnary: + case TensorExp::Kind::kSelect: return false; // Binary operations. - case kDivF: // note: x / c only - case kDivC: - case kDivS: - case kDivU: + case TensorExp::Kind::kDivF: // note: x / c only + case TensorExp::Kind::kDivC: + case TensorExp::Kind::kDivS: + case TensorExp::Kind::kDivU: assert(!maybeZero(tensorExps[e].children.e1)); return isSingleCondition(t, tensorExps[e].children.e0); - case kShrS: // note: x >> inv only - case kShrU: - case kShlI: + case TensorExp::Kind::kShrS: // note: x >> inv only + case TensorExp::Kind::kShrU: + case TensorExp::Kind::kShlI: assert(isInvariant(tensorExps[e].children.e1)); return isSingleCondition(t, tensorExps[e].children.e0); - case kMulF: - case kMulC: - case kMulI: - case kAndI: + case TensorExp::Kind::kMulF: + case TensorExp::Kind::kMulC: + case TensorExp::Kind::kMulI: + case TensorExp::Kind::kAndI: if (isSingleCondition(t, tensorExps[e].children.e0)) return isSingleCondition(t, tensorExps[e].children.e1) || isInvariant(tensorExps[e].children.e1); if (isSingleCondition(t, tensorExps[e].children.e1)) return isInvariant(tensorExps[e].children.e0); return false; - case kAddF: - case kAddC: - case kAddI: + case TensorExp::Kind::kAddF: + case TensorExp::Kind::kAddC: + case TensorExp::Kind::kAddI: return isSingleCondition(t, tensorExps[e].children.e0) && isSingleCondition(t, tensorExps[e].children.e1); - case kSubF: - case kSubC: - case kSubI: - case kOrI: - case kXorI: - case kBinary: - case kReduce: + case TensorExp::Kind::kSubF: + case TensorExp::Kind::kSubC: + case TensorExp::Kind::kSubI: + case TensorExp::Kind::kOrI: + case TensorExp::Kind::kXorI: + case TensorExp::Kind::kBinary: + case TensorExp::Kind::kReduce: return false; } llvm_unreachable("unexpected kind"); @@ -572,98 +579,98 @@ bool Merger::hasSparseIdxReduction(const BitVector &bits) const { // Print methods (for debugging). //===----------------------------------------------------------------------===// -static const char *kindToOpSymbol(Kind kind) { +static const char *kindToOpSymbol(TensorExp::Kind kind) { switch (kind) { // Leaf. - case kTensor: + case TensorExp::Kind::kTensor: return "tensor"; - case kInvariant: + case TensorExp::Kind::kInvariant: return "invariant"; - case kLoopVar: + case TensorExp::Kind::kLoopVar: return "index"; // Unary operations. - case kAbsF: - case kAbsC: - case kAbsI: + case TensorExp::Kind::kAbsF: + case TensorExp::Kind::kAbsC: + case TensorExp::Kind::kAbsI: return "abs"; - case kCeilF: + case TensorExp::Kind::kCeilF: return "ceil"; - case kFloorF: + case TensorExp::Kind::kFloorF: return "floor"; - case kSqrtF: - case kSqrtC: + case TensorExp::Kind::kSqrtF: + case TensorExp::Kind::kSqrtC: return "sqrt"; - case kExpm1F: - case kExpm1C: + case TensorExp::Kind::kExpm1F: + case TensorExp::Kind::kExpm1C: return "expm1"; - case kLog1pF: - case kLog1pC: + case TensorExp::Kind::kLog1pF: + case TensorExp::Kind::kLog1pC: return "log1p"; - case kSinF: - case kSinC: + case TensorExp::Kind::kSinF: + case TensorExp::Kind::kSinC: return "sin"; - case kTanhF: - case kTanhC: + case TensorExp::Kind::kTanhF: + case TensorExp::Kind::kTanhC: return "tanh"; - case kNegF: - case kNegC: - case kNegI: + case TensorExp::Kind::kNegF: + case TensorExp::Kind::kNegC: + case TensorExp::Kind::kNegI: return "-"; - case kTruncF: - case kExtF: - case kCastFS: - case kCastFU: - case kCastSF: - case kCastUF: - case kCastS: - case kCastU: - case kCastIdx: - case kTruncI: - case kCIm: + case TensorExp::Kind::kTruncF: + case TensorExp::Kind::kExtF: + case TensorExp::Kind::kCastFS: + case TensorExp::Kind::kCastFU: + case TensorExp::Kind::kCastSF: + case TensorExp::Kind::kCastUF: + case TensorExp::Kind::kCastS: + case TensorExp::Kind::kCastU: + case TensorExp::Kind::kCastIdx: + case TensorExp::Kind::kTruncI: + case TensorExp::Kind::kCIm: return "complex.im"; - case kCRe: + case TensorExp::Kind::kCRe: return "complex.re"; - case kBitCast: + case TensorExp::Kind::kBitCast: return "cast"; - case kBinaryBranch: + case TensorExp::Kind::kBinaryBranch: return "binary_branch"; - case kUnary: + case TensorExp::Kind::kUnary: return "unary"; - case kSelect: + case TensorExp::Kind::kSelect: return "select"; // Binary operations. - case kMulF: - case kMulC: - case kMulI: + case TensorExp::Kind::kMulF: + case TensorExp::Kind::kMulC: + case TensorExp::Kind::kMulI: return "*"; - case kDivF: - case kDivC: - case kDivS: - case kDivU: + case TensorExp::Kind::kDivF: + case TensorExp::Kind::kDivC: + case TensorExp::Kind::kDivS: + case TensorExp::Kind::kDivU: return "/"; - case kAddF: - case kAddC: - case kAddI: + case TensorExp::Kind::kAddF: + case TensorExp::Kind::kAddC: + case TensorExp::Kind::kAddI: return "+"; - case kSubF: - case kSubC: - case kSubI: + case TensorExp::Kind::kSubF: + case TensorExp::Kind::kSubC: + case TensorExp::Kind::kSubI: return "-"; - case kAndI: + case TensorExp::Kind::kAndI: return "&"; - case kOrI: + case TensorExp::Kind::kOrI: return "|"; - case kXorI: + case TensorExp::Kind::kXorI: return "^"; - case kShrS: + case TensorExp::Kind::kShrS: return "a>>"; - case kShrU: + case TensorExp::Kind::kShrU: return ">>"; - case kShlI: + case TensorExp::Kind::kShlI: return "<<"; - case kBinary: + case TensorExp::Kind::kBinary: return "binary"; - case kReduce: + case TensorExp::Kind::kReduce: return "reduce"; } llvm_unreachable("unexpected kind for symbol"); @@ -672,79 +679,79 @@ static const char *kindToOpSymbol(Kind kind) { void Merger::dumpExp(ExprId e) const { switch (tensorExps[e].kind) { // Leaf. - case kTensor: + case TensorExp::Kind::kTensor: if (tensorExps[e].tensor == syntheticTensor) llvm::dbgs() << "synthetic_"; else if (tensorExps[e].tensor == outTensor) llvm::dbgs() << "output_"; llvm::dbgs() << "tensor_" << tensorExps[e].tensor; break; - case kInvariant: + case TensorExp::Kind::kInvariant: llvm::dbgs() << "invariant"; break; - case kLoopVar: + case TensorExp::Kind::kLoopVar: llvm::dbgs() << "loopvar_" << tensorExps[e].loop; break; // Unary operations. - case kAbsF: - case kAbsC: - case kAbsI: - case kCeilF: - case kFloorF: - case kSqrtF: - case kSqrtC: - case kExpm1F: - case kExpm1C: - case kLog1pF: - case kLog1pC: - case kSinF: - case kSinC: - case kTanhF: - case kTanhC: - case kNegF: - case kNegC: - case kNegI: - case kTruncF: - case kExtF: - case kCastFS: - case kCastFU: - case kCastSF: - case kCastUF: - case kCastS: - case kCastU: - case kCastIdx: - case kTruncI: - case kCIm: - case kCRe: - case kBitCast: - case kBinaryBranch: - case kUnary: - case kSelect: + case TensorExp::Kind::kAbsF: + case TensorExp::Kind::kAbsC: + case TensorExp::Kind::kAbsI: + case TensorExp::Kind::kCeilF: + case TensorExp::Kind::kFloorF: + case TensorExp::Kind::kSqrtF: + case TensorExp::Kind::kSqrtC: + case TensorExp::Kind::kExpm1F: + case TensorExp::Kind::kExpm1C: + case TensorExp::Kind::kLog1pF: + case TensorExp::Kind::kLog1pC: + case TensorExp::Kind::kSinF: + case TensorExp::Kind::kSinC: + case TensorExp::Kind::kTanhF: + case TensorExp::Kind::kTanhC: + case TensorExp::Kind::kNegF: + case TensorExp::Kind::kNegC: + case TensorExp::Kind::kNegI: + case TensorExp::Kind::kTruncF: + case TensorExp::Kind::kExtF: + case TensorExp::Kind::kCastFS: + case TensorExp::Kind::kCastFU: + case TensorExp::Kind::kCastSF: + case TensorExp::Kind::kCastUF: + case TensorExp::Kind::kCastS: + case TensorExp::Kind::kCastU: + case TensorExp::Kind::kCastIdx: + case TensorExp::Kind::kTruncI: + case TensorExp::Kind::kCIm: + case TensorExp::Kind::kCRe: + case TensorExp::Kind::kBitCast: + case TensorExp::Kind::kBinaryBranch: + case TensorExp::Kind::kUnary: + case TensorExp::Kind::kSelect: llvm::dbgs() << kindToOpSymbol(tensorExps[e].kind) << " "; dumpExp(tensorExps[e].children.e0); break; // Binary operations. - case kMulF: - case kMulC: - case kMulI: - case kDivF: - case kDivC: - case kDivS: - case kDivU: - case kAddF: - case kAddC: - case kAddI: - case kSubF: - case kSubC: - case kSubI: - case kAndI: - case kOrI: - case kXorI: - case kShrS: - case kShrU: - case kShlI: - case kBinary: - case kReduce: + case TensorExp::Kind::kMulF: + case TensorExp::Kind::kMulC: + case TensorExp::Kind::kMulI: + case TensorExp::Kind::kDivF: + case TensorExp::Kind::kDivC: + case TensorExp::Kind::kDivS: + case TensorExp::Kind::kDivU: + case TensorExp::Kind::kAddF: + case TensorExp::Kind::kAddC: + case TensorExp::Kind::kAddI: + case TensorExp::Kind::kSubF: + case TensorExp::Kind::kSubC: + case TensorExp::Kind::kSubI: + case TensorExp::Kind::kAndI: + case TensorExp::Kind::kOrI: + case TensorExp::Kind::kXorI: + case TensorExp::Kind::kShrS: + case TensorExp::Kind::kShrU: + case TensorExp::Kind::kShlI: + case TensorExp::Kind::kBinary: + case TensorExp::Kind::kReduce: llvm::dbgs() << "("; dumpExp(tensorExps[e].children.e0); llvm::dbgs() << " " << kindToOpSymbol(tensorExps[e].kind) << " "; @@ -793,12 +800,12 @@ void Merger::dumpBits(const BitVector &bits) const { //===----------------------------------------------------------------------===// LatSetId Merger::buildLattices(ExprId e, LoopId i) { - const Kind kind = tensorExps[e].kind; + const TensorExp::Kind kind = tensorExps[e].kind; switch (kind) { // Leaf. - case kTensor: - case kInvariant: - case kLoopVar: { + case TensorExp::Kind::kTensor: + case TensorExp::Kind::kInvariant: + case TensorExp::Kind::kLoopVar: { // Either the loop-var is really used in the tensor expression, or it is // set to the undefined loop-var in that level. An invariant expression, // a proper index value, and a truly dynamic sparse output tensor are set @@ -806,7 +813,7 @@ LatSetId Merger::buildLattices(ExprId e, LoopId i) { // iteration space is not skipped as a result of their contents. const LatSetId s = addSet(); TensorId t = syntheticTensor; - if (kind == kTensor) { + if (kind == TensorExp::Kind::kTensor) { t = tensorExps[e].tensor; if (hasSparseOut && t == outTensor) t = syntheticTensor; @@ -815,37 +822,37 @@ LatSetId Merger::buildLattices(ExprId e, LoopId i) { return s; } // Unary operations. - case kAbsF: - case kAbsC: - case kAbsI: - case kCeilF: - case kFloorF: - case kSqrtF: - case kSqrtC: - case kExpm1F: - case kExpm1C: - case kLog1pF: - case kLog1pC: - case kSinF: - case kSinC: - case kTanhF: - case kTanhC: - case kNegF: - case kNegC: - case kNegI: - case kTruncF: - case kExtF: - case kCastFS: - case kCastFU: - case kCastSF: - case kCastUF: - case kCastS: - case kCastU: - case kCastIdx: - case kTruncI: - case kCIm: - case kCRe: - case kBitCast: + case TensorExp::Kind::kAbsF: + case TensorExp::Kind::kAbsC: + case TensorExp::Kind::kAbsI: + case TensorExp::Kind::kCeilF: + case TensorExp::Kind::kFloorF: + case TensorExp::Kind::kSqrtF: + case TensorExp::Kind::kSqrtC: + case TensorExp::Kind::kExpm1F: + case TensorExp::Kind::kExpm1C: + case TensorExp::Kind::kLog1pF: + case TensorExp::Kind::kLog1pC: + case TensorExp::Kind::kSinF: + case TensorExp::Kind::kSinC: + case TensorExp::Kind::kTanhF: + case TensorExp::Kind::kTanhC: + case TensorExp::Kind::kNegF: + case TensorExp::Kind::kNegC: + case TensorExp::Kind::kNegI: + case TensorExp::Kind::kTruncF: + case TensorExp::Kind::kExtF: + case TensorExp::Kind::kCastFS: + case TensorExp::Kind::kCastFU: + case TensorExp::Kind::kCastSF: + case TensorExp::Kind::kCastUF: + case TensorExp::Kind::kCastS: + case TensorExp::Kind::kCastU: + case TensorExp::Kind::kCastIdx: + case TensorExp::Kind::kTruncI: + case TensorExp::Kind::kCIm: + case TensorExp::Kind::kCRe: + case TensorExp::Kind::kBitCast: // A zero preserving operation (viz. f(0) = 0, [Bik96,Ch5]) maps the // lattice set of the operand through the operator into a new set. // @@ -854,13 +861,13 @@ LatSetId Merger::buildLattices(ExprId e, LoopId i) { // | 0 |-y | return mapSet(kind, buildLattices(tensorExps[e].children.e0, i), tensorExps[e].val); - case kBinaryBranch: - case kSelect: + case TensorExp::Kind::kBinaryBranch: + case TensorExp::Kind::kSelect: // The left or right half of a binary operation which has already // been split into separate operations for each region. return mapSet(kind, buildLattices(tensorExps[e].children.e0, i), Value(), tensorExps[e].op); - case kUnary: + case TensorExp::Kind::kUnary: // A custom unary operation. // // op y| !y | y | @@ -879,14 +886,14 @@ LatSetId Merger::buildLattices(ExprId e, LoopId i) { Block &absentBlock = absentRegion.front(); YieldOp absentYield = cast(absentBlock.getTerminator()); Value absentVal = absentYield.getResult(); - const ExprId rhs = addExp(kInvariant, absentVal); + const ExprId rhs = addExp(TensorExp::Kind::kInvariant, absentVal); return disjSet(kind, child0, buildLattices(rhs, i), unop); } // Binary operations. - case kMulF: - case kMulC: - case kMulI: - case kAndI: + case TensorExp::Kind::kMulF: + case TensorExp::Kind::kMulC: + case TensorExp::Kind::kMulI: + case TensorExp::Kind::kAndI: // A multiplicative operation only needs to be performed // for the conjunction of sparse iteration spaces. // @@ -898,10 +905,10 @@ LatSetId Merger::buildLattices(ExprId e, LoopId i) { // Note even here, 0*NaN=NaN and 0*Inf=NaN, but that is ignored. return conjSet(kind, buildLattices(tensorExps[e].children.e0, i), buildLattices(tensorExps[e].children.e1, i)); - case kDivF: - case kDivC: - case kDivS: - case kDivU: + case TensorExp::Kind::kDivF: + case TensorExp::Kind::kDivC: + case TensorExp::Kind::kDivS: + case TensorExp::Kind::kDivU: // A division is tricky, since 0/0, 0/c, c/0 all have // specific outcomes for floating-point and integers. // Thus, we need to traverse the full iteration space. @@ -918,14 +925,14 @@ LatSetId Merger::buildLattices(ExprId e, LoopId i) { assert(!maybeZero(tensorExps[e].children.e1)); return conjSet(kind, buildLattices(tensorExps[e].children.e0, i), buildLattices(tensorExps[e].children.e1, i)); - case kAddF: - case kAddC: - case kAddI: - case kSubF: - case kSubC: - case kSubI: - case kOrI: - case kXorI: + case TensorExp::Kind::kAddF: + case TensorExp::Kind::kAddC: + case TensorExp::Kind::kAddI: + case TensorExp::Kind::kSubF: + case TensorExp::Kind::kSubC: + case TensorExp::Kind::kSubI: + case TensorExp::Kind::kOrI: + case TensorExp::Kind::kXorI: // An additive operation needs to be performed // for the disjunction of sparse iteration spaces. // @@ -935,16 +942,16 @@ LatSetId Merger::buildLattices(ExprId e, LoopId i) { // x | x |x+y| x | x |x-y| return disjSet(kind, buildLattices(tensorExps[e].children.e0, i), buildLattices(tensorExps[e].children.e1, i)); - case kShrS: - case kShrU: - case kShlI: + case TensorExp::Kind::kShrS: + case TensorExp::Kind::kShrU: + case TensorExp::Kind::kShlI: // A shift operation by an invariant amount (viz. tensor expressions // can only occur at the left-hand-side of the operator) can be handled // with the conjuction rule. assert(isInvariant(tensorExps[e].children.e1)); return conjSet(kind, buildLattices(tensorExps[e].children.e0, i), buildLattices(tensorExps[e].children.e1, i)); - case kBinary: + case TensorExp::Kind::kBinary: // A custom binary operation. // // x op y| !y | y | @@ -971,11 +978,11 @@ LatSetId Merger::buildLattices(ExprId e, LoopId i) { } bool includeLeft = binop.getLeftIdentity() || !leftRegion.empty(); bool includeRight = binop.getRightIdentity() || !rightRegion.empty(); - return combiSet(kBinary, child0, child1, binop, includeLeft, - kBinaryBranch, leftYield, includeRight, kBinaryBranch, - rightYield); + return combiSet(TensorExp::Kind::kBinary, child0, child1, binop, + includeLeft, TensorExp::Kind::kBinaryBranch, leftYield, + includeRight, TensorExp::Kind::kBinaryBranch, rightYield); } - case kReduce: + case TensorExp::Kind::kReduce: // A custom reduce operation. return conjSet(kind, buildLattices(tensorExps[e].children.e0, i), buildLattices(tensorExps[e].children.e1, i), @@ -993,7 +1000,7 @@ std::optional Merger::buildTensorExpFromLinalg(linalg::GenericOp op) { /// Only returns false if we are certain this is a nonzero. bool Merger::maybeZero(ExprId e) const { - if (tensorExps[e].kind == kInvariant) { + if (tensorExps[e].kind == TensorExp::Kind::kInvariant) { if (auto c = tensorExps[e].val.getDefiningOp()) { ArrayAttr arrayAttr = c.getValue(); return arrayAttr[0].cast().getValue().isZero() && @@ -1008,7 +1015,7 @@ bool Merger::maybeZero(ExprId e) const { } bool Merger::isInvariant(ExprId e) const { - return tensorExps[e].kind == kInvariant; + return tensorExps[e].kind == TensorExp::Kind::kInvariant; } Type Merger::inferType(ExprId e, Value src) const { @@ -1060,21 +1067,21 @@ std::optional Merger::buildTensorExp(linalg::GenericOp op, Value v) { if (arg.getOwner()->getParentOp() == op) { OpOperand &t = op->getOpOperand(argN); if (!op.isScalar(&t)) - return addExp(kTensor, argN); + return addExp(TensorExp::Kind::kTensor, argN); v = t.get(); // get scalar value } // Any other argument (marked as scalar argument for the generic op // or belonging to an enveloping op) is considered invariant. - return addExp(kInvariant, v); + return addExp(TensorExp::Kind::kInvariant, v); } // Something defined outside is invariant. Operation *def = v.getDefiningOp(); if (def->getBlock() != &op.getRegion().front()) - return addExp(kInvariant, v); + return addExp(TensorExp::Kind::kInvariant, v); // Construct index operations. if (def->getNumOperands() == 0) { if (auto indexOp = dyn_cast(def)) - return addExp(kLoopVar, indexOp.getDim()); + return addExp(TensorExp::Kind::kLoopVar, indexOp.getDim()); } // Construct unary operations if subexpression can be built. if (def->getNumOperands() == 1) { @@ -1082,73 +1089,73 @@ std::optional Merger::buildTensorExp(linalg::GenericOp op, Value v) { if (x.has_value()) { const ExprId e = *x; if (isa(def)) - return addExp(kAbsF, e); + return addExp(TensorExp::Kind::kAbsF, e); if (isa(def)) - return addExp(kAbsC, e); + return addExp(TensorExp::Kind::kAbsC, e); if (isa(def)) - return addExp(kAbsI, e); + return addExp(TensorExp::Kind::kAbsI, e); if (isa(def)) - return addExp(kCeilF, e); + return addExp(TensorExp::Kind::kCeilF, e); if (isa(def)) - return addExp(kFloorF, e); + return addExp(TensorExp::Kind::kFloorF, e); if (isa(def)) - return addExp(kSqrtF, e); + return addExp(TensorExp::Kind::kSqrtF, e); if (isa(def)) - return addExp(kSqrtC, e); + return addExp(TensorExp::Kind::kSqrtC, e); if (isa(def)) - return addExp(kExpm1F, e); + return addExp(TensorExp::Kind::kExpm1F, e); if (isa(def)) - return addExp(kExpm1C, e); + return addExp(TensorExp::Kind::kExpm1C, e); if (isa(def)) - return addExp(kLog1pF, e); + return addExp(TensorExp::Kind::kLog1pF, e); if (isa(def)) - return addExp(kLog1pC, e); + return addExp(TensorExp::Kind::kLog1pC, e); if (isa(def)) - return addExp(kSinF, e); + return addExp(TensorExp::Kind::kSinF, e); if (isa(def)) - return addExp(kSinC, e); + return addExp(TensorExp::Kind::kSinC, e); if (isa(def)) - return addExp(kTanhF, e); + return addExp(TensorExp::Kind::kTanhF, e); if (isa(def)) - return addExp(kTanhC, e); + return addExp(TensorExp::Kind::kTanhC, e); if (isa(def)) - return addExp(kNegF, e); // no negi in std + return addExp(TensorExp::Kind::kNegF, e); // no negi in std if (isa(def)) - return addExp(kNegC, e); + return addExp(TensorExp::Kind::kNegC, e); if (isa(def)) - return addExp(kTruncF, e, v); + return addExp(TensorExp::Kind::kTruncF, e, v); if (isa(def)) - return addExp(kExtF, e, v); + return addExp(TensorExp::Kind::kExtF, e, v); if (isa(def)) - return addExp(kCastFS, e, v); + return addExp(TensorExp::Kind::kCastFS, e, v); if (isa(def)) - return addExp(kCastFU, e, v); + return addExp(TensorExp::Kind::kCastFU, e, v); if (isa(def)) - return addExp(kCastSF, e, v); + return addExp(TensorExp::Kind::kCastSF, e, v); if (isa(def)) - return addExp(kCastUF, e, v); + return addExp(TensorExp::Kind::kCastUF, e, v); if (isa(def)) - return addExp(kCastS, e, v); + return addExp(TensorExp::Kind::kCastS, e, v); if (isa(def)) - return addExp(kCastU, e, v); + return addExp(TensorExp::Kind::kCastU, e, v); if (isa(def)) - return addExp(kCastIdx, e, v); + return addExp(TensorExp::Kind::kCastIdx, e, v); if (isa(def)) - return addExp(kTruncI, e, v); + return addExp(TensorExp::Kind::kTruncI, e, v); if (isa(def)) - return addExp(kCIm, e); + return addExp(TensorExp::Kind::kCIm, e); if (isa(def)) - return addExp(kCRe, e); + return addExp(TensorExp::Kind::kCRe, e); if (isa(def)) - return addExp(kBitCast, e, v); + return addExp(TensorExp::Kind::kBitCast, e, v); if (auto unop = dyn_cast(def)) { if (isAdmissibleBranch(unop, unop.getPresentRegion()) && isAdmissibleBranch(unop, unop.getAbsentRegion())) - return addExp(kUnary, e, Value(), def); + return addExp(TensorExp::Kind::kUnary, e, Value(), def); } if (auto selop = dyn_cast(def)) { if (isAdmissibleBranch(selop, selop.getRegion())) - return addExp(kSelect, e, Value(), def); + return addExp(TensorExp::Kind::kSelect, e, Value(), def); } } } @@ -1162,50 +1169,50 @@ std::optional Merger::buildTensorExp(linalg::GenericOp op, Value v) { const ExprId e0 = *x; const ExprId e1 = *y; if (isa(def)) - return addExp(kMulF, e0, e1); + return addExp(TensorExp::Kind::kMulF, e0, e1); if (isa(def)) - return addExp(kMulC, e0, e1); + return addExp(TensorExp::Kind::kMulC, e0, e1); if (isa(def)) - return addExp(kMulI, e0, e1); + return addExp(TensorExp::Kind::kMulI, e0, e1); if (isa(def) && !maybeZero(e1)) - return addExp(kDivF, e0, e1); + return addExp(TensorExp::Kind::kDivF, e0, e1); if (isa(def) && !maybeZero(e1)) - return addExp(kDivC, e0, e1); + return addExp(TensorExp::Kind::kDivC, e0, e1); if (isa(def) && !maybeZero(e1)) - return addExp(kDivS, e0, e1); + return addExp(TensorExp::Kind::kDivS, e0, e1); if (isa(def) && !maybeZero(e1)) - return addExp(kDivU, e0, e1); + return addExp(TensorExp::Kind::kDivU, e0, e1); if (isa(def)) - return addExp(kAddF, e0, e1); + return addExp(TensorExp::Kind::kAddF, e0, e1); if (isa(def)) - return addExp(kAddC, e0, e1); + return addExp(TensorExp::Kind::kAddC, e0, e1); if (isa(def)) - return addExp(kAddI, e0, e1); + return addExp(TensorExp::Kind::kAddI, e0, e1); if (isa(def)) - return addExp(kSubF, e0, e1); + return addExp(TensorExp::Kind::kSubF, e0, e1); if (isa(def)) - return addExp(kSubC, e0, e1); + return addExp(TensorExp::Kind::kSubC, e0, e1); if (isa(def)) - return addExp(kSubI, e0, e1); + return addExp(TensorExp::Kind::kSubI, e0, e1); if (isa(def)) - return addExp(kAndI, e0, e1); + return addExp(TensorExp::Kind::kAndI, e0, e1); if (isa(def)) - return addExp(kOrI, e0, e1); + return addExp(TensorExp::Kind::kOrI, e0, e1); if (isa(def)) - return addExp(kXorI, e0, e1); + return addExp(TensorExp::Kind::kXorI, e0, e1); if (isa(def) && isInvariant(e1)) - return addExp(kShrS, e0, e1); + return addExp(TensorExp::Kind::kShrS, e0, e1); if (isa(def) && isInvariant(e1)) - return addExp(kShrU, e0, e1); + return addExp(TensorExp::Kind::kShrU, e0, e1); if (isa(def) && isInvariant(e1)) - return addExp(kShlI, e0, e1); + return addExp(TensorExp::Kind::kShlI, e0, e1); if (auto binop = dyn_cast(def)) { if (isAdmissibleBranch(binop, binop.getOverlapRegion()) && (binop.getLeftIdentity() || isAdmissibleBranch(binop, binop.getLeftRegion())) && (binop.getRightIdentity() || isAdmissibleBranch(binop, binop.getRightRegion()))) - return addExp(kBinary, e0, e1, Value(), def); + return addExp(TensorExp::Kind::kBinary, e0, e1, Value(), def); } } } @@ -1219,7 +1226,7 @@ std::optional Merger::buildTensorExp(linalg::GenericOp op, Value v) { const ExprId e1 = *y; if (auto redop = dyn_cast(def)) { if (isAdmissibleBranch(redop, redop.getRegion())) - return addExp(kReduce, e0, e1, Value(), def); + return addExp(TensorExp::Kind::kReduce, e0, e1, Value(), def); } } } @@ -1276,136 +1283,136 @@ Value Merger::buildExp(RewriterBase &rewriter, Location loc, ExprId e, Value v0, Value v1) const { switch (tensorExps[e].kind) { // Leaf. - case kTensor: - case kInvariant: - case kLoopVar: + case TensorExp::Kind::kTensor: + case TensorExp::Kind::kInvariant: + case TensorExp::Kind::kLoopVar: llvm_unreachable("unexpected non-op"); // Unary operations. - case kAbsF: + case TensorExp::Kind::kAbsF: return rewriter.create(loc, v0); - case kAbsC: { + case TensorExp::Kind::kAbsC: { auto type = v0.getType().cast(); auto eltType = type.getElementType().cast(); return rewriter.create(loc, eltType, v0); } - case kAbsI: + case TensorExp::Kind::kAbsI: return rewriter.create(loc, v0); - case kCeilF: + case TensorExp::Kind::kCeilF: return rewriter.create(loc, v0); - case kFloorF: + case TensorExp::Kind::kFloorF: return rewriter.create(loc, v0); - case kSqrtF: + case TensorExp::Kind::kSqrtF: return rewriter.create(loc, v0); - case kSqrtC: + case TensorExp::Kind::kSqrtC: return rewriter.create(loc, v0); - case kExpm1F: + case TensorExp::Kind::kExpm1F: return rewriter.create(loc, v0); - case kExpm1C: + case TensorExp::Kind::kExpm1C: return rewriter.create(loc, v0); - case kLog1pF: + case TensorExp::Kind::kLog1pF: return rewriter.create(loc, v0); - case kLog1pC: + case TensorExp::Kind::kLog1pC: return rewriter.create(loc, v0); - case kSinF: + case TensorExp::Kind::kSinF: return rewriter.create(loc, v0); - case kSinC: + case TensorExp::Kind::kSinC: return rewriter.create(loc, v0); - case kTanhF: + case TensorExp::Kind::kTanhF: return rewriter.create(loc, v0); - case kTanhC: + case TensorExp::Kind::kTanhC: return rewriter.create(loc, v0); - case kNegF: + case TensorExp::Kind::kNegF: return rewriter.create(loc, v0); - case kNegC: + case TensorExp::Kind::kNegC: return rewriter.create(loc, v0); - case kNegI: // no negi in std + case TensorExp::Kind::kNegI: // no negi in std return rewriter.create( loc, rewriter.create(loc, v0.getType(), rewriter.getZeroAttr(v0.getType())), v0); - case kTruncF: + case TensorExp::Kind::kTruncF: return rewriter.create(loc, inferType(e, v0), v0); - case kExtF: + case TensorExp::Kind::kExtF: return rewriter.create(loc, inferType(e, v0), v0); - case kCastFS: + case TensorExp::Kind::kCastFS: return rewriter.create(loc, inferType(e, v0), v0); - case kCastFU: + case TensorExp::Kind::kCastFU: return rewriter.create(loc, inferType(e, v0), v0); - case kCastSF: + case TensorExp::Kind::kCastSF: return rewriter.create(loc, inferType(e, v0), v0); - case kCastUF: + case TensorExp::Kind::kCastUF: return rewriter.create(loc, inferType(e, v0), v0); - case kCastS: + case TensorExp::Kind::kCastS: return rewriter.create(loc, inferType(e, v0), v0); - case kCastU: + case TensorExp::Kind::kCastU: return rewriter.create(loc, inferType(e, v0), v0); - case kCastIdx: + case TensorExp::Kind::kCastIdx: return rewriter.create(loc, inferType(e, v0), v0); - case kTruncI: + case TensorExp::Kind::kTruncI: return rewriter.create(loc, inferType(e, v0), v0); - case kCIm: { + case TensorExp::Kind::kCIm: { auto type = v0.getType().cast(); auto eltType = type.getElementType().cast(); return rewriter.create(loc, eltType, v0); } - case kCRe: { + case TensorExp::Kind::kCRe: { auto type = v0.getType().cast(); auto eltType = type.getElementType().cast(); return rewriter.create(loc, eltType, v0); } - case kBitCast: + case TensorExp::Kind::kBitCast: return rewriter.create(loc, inferType(e, v0), v0); // Binary operations. - case kMulF: + case TensorExp::Kind::kMulF: return rewriter.create(loc, v0, v1); - case kMulC: + case TensorExp::Kind::kMulC: return rewriter.create(loc, v0, v1); - case kMulI: + case TensorExp::Kind::kMulI: return rewriter.create(loc, v0, v1); - case kDivF: + case TensorExp::Kind::kDivF: return rewriter.create(loc, v0, v1); - case kDivC: + case TensorExp::Kind::kDivC: return rewriter.create(loc, v0, v1); - case kDivS: + case TensorExp::Kind::kDivS: return rewriter.create(loc, v0, v1); - case kDivU: + case TensorExp::Kind::kDivU: return rewriter.create(loc, v0, v1); - case kAddF: + case TensorExp::Kind::kAddF: return rewriter.create(loc, v0, v1); - case kAddC: + case TensorExp::Kind::kAddC: return rewriter.create(loc, v0, v1); - case kAddI: + case TensorExp::Kind::kAddI: return rewriter.create(loc, v0, v1); - case kSubF: + case TensorExp::Kind::kSubF: return rewriter.create(loc, v0, v1); - case kSubC: + case TensorExp::Kind::kSubC: return rewriter.create(loc, v0, v1); - case kSubI: + case TensorExp::Kind::kSubI: return rewriter.create(loc, v0, v1); - case kAndI: + case TensorExp::Kind::kAndI: return rewriter.create(loc, v0, v1); - case kOrI: + case TensorExp::Kind::kOrI: return rewriter.create(loc, v0, v1); - case kXorI: + case TensorExp::Kind::kXorI: return rewriter.create(loc, v0, v1); - case kShrS: + case TensorExp::Kind::kShrS: return rewriter.create(loc, v0, v1); - case kShrU: + case TensorExp::Kind::kShrU: return rewriter.create(loc, v0, v1); - case kShlI: + case TensorExp::Kind::kShlI: return rewriter.create(loc, v0, v1); - case kBinaryBranch: // semi-ring ops with custom logic. + case TensorExp::Kind::kBinaryBranch: // semi-ring ops with custom logic. return insertYieldOp(rewriter, loc, *tensorExps[e].op->getBlock()->getParent(), {v0}); - case kUnary: + case TensorExp::Kind::kUnary: return buildUnaryPresent(rewriter, loc, tensorExps[e].op, v0); - case kSelect: + case TensorExp::Kind::kSelect: return insertYieldOp(rewriter, loc, cast(tensorExps[e].op).getRegion(), {v0}); - case kBinary: + case TensorExp::Kind::kBinary: return buildBinaryOverlap(rewriter, loc, tensorExps[e].op, v0, v1); - case kReduce: { + case TensorExp::Kind::kReduce: { ReduceOp redOp = cast(tensorExps[e].op); return insertYieldOp(rewriter, loc, redOp.getRegion(), {v0, v1}); } diff --git a/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp b/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp index 10d350f7c6b97..270b5836907e3 100644 --- a/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp +++ b/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp @@ -23,18 +23,18 @@ namespace { /// #define FOREVERY_BINOP(DO) \ - DO(mulf, Kind::kMulF) \ - DO(mulc, Kind::kMulC) \ - DO(muli, Kind::kMulI) \ - DO(addf, Kind::kAddF) \ - DO(addc, Kind::kAddC) \ - DO(addi, Kind::kAddI) \ - DO(subf, Kind::kSubF) \ - DO(subc, Kind::kSubC) \ - DO(subi, Kind::kSubI) \ - DO(andi, Kind::kAndI) \ - DO(xori, Kind::kXorI) \ - DO(ori, Kind::kOrI) + DO(mulf, TensorExp::Kind::kMulF) \ + DO(mulc, TensorExp::Kind::kMulC) \ + DO(muli, TensorExp::Kind::kMulI) \ + DO(addf, TensorExp::Kind::kAddF) \ + DO(addc, TensorExp::Kind::kAddC) \ + DO(addi, TensorExp::Kind::kAddI) \ + DO(subf, TensorExp::Kind::kSubF) \ + DO(subc, TensorExp::Kind::kSubC) \ + DO(subi, TensorExp::Kind::kSubI) \ + DO(andi, TensorExp::Kind::kAndI) \ + DO(xori, TensorExp::Kind::kXorI) \ + DO(ori, TensorExp::Kind::kOrI) // TODO: Disjunctive binary operations that need special handling are not // included, e.g., Division are not tested (for now) as it need a constant @@ -82,7 +82,7 @@ namespace { /// Simple recursive data structure used to match expressions in Mergers. struct Pattern { - Kind kind; + TensorExp::Kind kind; /// Expressions representing tensors simply have a tensor number. unsigned tensorNum; @@ -94,11 +94,12 @@ struct Pattern { /// Constructors. /// Rather than using these, please use the readable helper constructor /// functions below to make tests more readable. - Pattern(unsigned tensorNum) : kind(Kind::kTensor), tensorNum(tensorNum) {} - Pattern(Kind kind, const std::shared_ptr &e0, + Pattern(unsigned tensorNum) + : kind(TensorExp::Kind::kTensor), tensorNum(tensorNum) {} + Pattern(TensorExp::Kind kind, const std::shared_ptr &e0, const std::shared_ptr &e1) : kind(kind), e0(e0), e1(e1) { - assert(kind >= Kind::kMulF); + assert(kind >= TensorExp::Kind::kMulF); assert(e0 && e1); } }; @@ -134,7 +135,7 @@ class MergerTestBase : public ::testing::Test { /// unsigned tensor(unsigned tensor) { - return merger.addExp(Kind::kTensor, tensor); + return merger.addExp(TensorExp::Kind::kTensor, tensor); } #define IMPL_BINOP_EXPR(OP, KIND) \ @@ -222,69 +223,69 @@ class MergerTestBase : public ::testing::Test { return false; switch (tensorExp.kind) { // Leaf. - case kTensor: + case TensorExp::Kind::kTensor: return tensorExp.tensor == pattern->tensorNum; - case kInvariant: - case kLoopVar: + case TensorExp::Kind::kInvariant: + case TensorExp::Kind::kLoopVar: llvm_unreachable("invariant not handled yet"); // Unary operations. - case kAbsF: - case kAbsC: - case kAbsI: - case kCeilF: - case kFloorF: - case kSqrtF: - case kSqrtC: - case kExpm1F: - case kExpm1C: - case kLog1pF: - case kLog1pC: - case kSinF: - case kSinC: - case kTanhF: - case kTanhC: - case kNegF: - case kNegC: - case kNegI: - case kTruncF: - case kExtF: - case kCastFS: - case kCastFU: - case kCastSF: - case kCastUF: - case kCastS: - case kCastU: - case kCastIdx: - case kTruncI: - case kCIm: - case kCRe: - case kBitCast: - case kSelect: - case kBinaryBranch: - case kUnary: + case TensorExp::Kind::kAbsF: + case TensorExp::Kind::kAbsC: + case TensorExp::Kind::kAbsI: + case TensorExp::Kind::kCeilF: + case TensorExp::Kind::kFloorF: + case TensorExp::Kind::kSqrtF: + case TensorExp::Kind::kSqrtC: + case TensorExp::Kind::kExpm1F: + case TensorExp::Kind::kExpm1C: + case TensorExp::Kind::kLog1pF: + case TensorExp::Kind::kLog1pC: + case TensorExp::Kind::kSinF: + case TensorExp::Kind::kSinC: + case TensorExp::Kind::kTanhF: + case TensorExp::Kind::kTanhC: + case TensorExp::Kind::kNegF: + case TensorExp::Kind::kNegC: + case TensorExp::Kind::kNegI: + case TensorExp::Kind::kTruncF: + case TensorExp::Kind::kExtF: + case TensorExp::Kind::kCastFS: + case TensorExp::Kind::kCastFU: + case TensorExp::Kind::kCastSF: + case TensorExp::Kind::kCastUF: + case TensorExp::Kind::kCastS: + case TensorExp::Kind::kCastU: + case TensorExp::Kind::kCastIdx: + case TensorExp::Kind::kTruncI: + case TensorExp::Kind::kCIm: + case TensorExp::Kind::kCRe: + case TensorExp::Kind::kBitCast: + case TensorExp::Kind::kSelect: + case TensorExp::Kind::kBinaryBranch: + case TensorExp::Kind::kUnary: return compareExpression(tensorExp.children.e0, pattern->e0); // Binary operations. - case kMulF: - case kMulC: - case kMulI: - case kDivF: - case kDivC: - case kDivS: - case kDivU: - case kAddF: - case kAddC: - case kAddI: - case kSubF: - case kSubC: - case kSubI: - case kAndI: - case kOrI: - case kXorI: - case kShrS: - case kShrU: - case kShlI: - case kBinary: - case kReduce: + case TensorExp::Kind::kMulF: + case TensorExp::Kind::kMulC: + case TensorExp::Kind::kMulI: + case TensorExp::Kind::kDivF: + case TensorExp::Kind::kDivC: + case TensorExp::Kind::kDivS: + case TensorExp::Kind::kDivU: + case TensorExp::Kind::kAddF: + case TensorExp::Kind::kAddC: + case TensorExp::Kind::kAddI: + case TensorExp::Kind::kSubF: + case TensorExp::Kind::kSubC: + case TensorExp::Kind::kSubI: + case TensorExp::Kind::kAndI: + case TensorExp::Kind::kOrI: + case TensorExp::Kind::kXorI: + case TensorExp::Kind::kShrS: + case TensorExp::Kind::kShrU: + case TensorExp::Kind::kShlI: + case TensorExp::Kind::kBinary: + case TensorExp::Kind::kReduce: return compareExpression(tensorExp.children.e0, pattern->e0) && compareExpression(tensorExp.children.e1, pattern->e1); } @@ -312,15 +313,15 @@ class MergerTest3T1L : public MergerTestBase { EXPECT_TRUE(merger.getOutTensorID() == t2); // Tensor 0: sparse input vector. - merger.addExp(Kind::kTensor, t0, -1u); + merger.addExp(TensorExp::Kind::kTensor, t0, -1u); merger.setLevelAndType(t0, l0, 0, DimLevelType::Compressed); // Tensor 1: sparse input vector. - merger.addExp(Kind::kTensor, t1, -1u); + merger.addExp(TensorExp::Kind::kTensor, t1, -1u); merger.setLevelAndType(t1, l0, 0, DimLevelType::Compressed); // Tensor 2: dense output vector. - merger.addExp(Kind::kTensor, t2, -1u); + merger.addExp(TensorExp::Kind::kTensor, t2, -1u); merger.setLevelAndType(t2, l0, 0, DimLevelType::Dense); } }; @@ -337,19 +338,19 @@ class MergerTest4T1L : public MergerTestBase { EXPECT_TRUE(merger.getOutTensorID() == t3); // Tensor 0: sparse input vector. - merger.addExp(Kind::kTensor, t0, -1u); + merger.addExp(TensorExp::Kind::kTensor, t0, -1u); merger.setLevelAndType(t0, l0, 0, DimLevelType::Compressed); // Tensor 1: sparse input vector. - merger.addExp(Kind::kTensor, t1, -1u); + merger.addExp(TensorExp::Kind::kTensor, t1, -1u); merger.setLevelAndType(t1, l0, 0, DimLevelType::Compressed); // Tensor 2: sparse input vector - merger.addExp(Kind::kTensor, t2, -1u); + merger.addExp(TensorExp::Kind::kTensor, t2, -1u); merger.setLevelAndType(t2, l0, 0, DimLevelType::Compressed); // Tensor 3: dense output vector - merger.addExp(Kind::kTensor, t3, -1u); + merger.addExp(TensorExp::Kind::kTensor, t3, -1u); merger.setLevelAndType(t3, l0, 0, DimLevelType::Dense); } }; @@ -370,15 +371,15 @@ class MergerTest3T1LD : public MergerTestBase { EXPECT_TRUE(merger.getOutTensorID() == t2); // Tensor 0: sparse input vector. - merger.addExp(Kind::kTensor, t0, -1u); + merger.addExp(TensorExp::Kind::kTensor, t0, -1u); merger.setLevelAndType(t0, l0, 0, DimLevelType::Compressed); // Tensor 1: dense input vector. - merger.addExp(Kind::kTensor, t1, -1u); + merger.addExp(TensorExp::Kind::kTensor, t1, -1u); merger.setLevelAndType(t1, l0, 0, DimLevelType::Dense); // Tensor 2: dense output vector. - merger.addExp(Kind::kTensor, t2, -1u); + merger.addExp(TensorExp::Kind::kTensor, t2, -1u); merger.setLevelAndType(t2, l0, 0, DimLevelType::Dense); } }; @@ -399,19 +400,19 @@ class MergerTest4T1LU : public MergerTestBase { EXPECT_TRUE(merger.getOutTensorID() == t3); // Tensor 0: undef input vector. - merger.addExp(Kind::kTensor, t0, -1u); + merger.addExp(TensorExp::Kind::kTensor, t0, -1u); merger.setLevelAndType(t0, l0, 0, DimLevelType::Undef); // Tensor 1: dense input vector. - merger.addExp(Kind::kTensor, t1, -1u); + merger.addExp(TensorExp::Kind::kTensor, t1, -1u); merger.setLevelAndType(t1, l0, 0, DimLevelType::Dense); // Tensor 2: undef input vector. - merger.addExp(Kind::kTensor, t2, -1u); + merger.addExp(TensorExp::Kind::kTensor, t2, -1u); merger.setLevelAndType(t2, l0, 0, DimLevelType::Undef); // Tensor 3: dense output vector. - merger.addExp(Kind::kTensor, t3, -1u); + merger.addExp(TensorExp::Kind::kTensor, t3, -1u); merger.setLevelAndType(t3, l0, 0, DimLevelType::Dense); } }; @@ -435,15 +436,15 @@ class MergerTest3T1LSo : public MergerTestBase { merger.setHasSparseOut(true); // Tensor 0: undef input vector. - merger.addExp(Kind::kTensor, t0, -1u); + merger.addExp(TensorExp::Kind::kTensor, t0, -1u); merger.setLevelAndType(t0, l0, 0, DimLevelType::Undef); // Tensor 1: undef input vector. - merger.addExp(Kind::kTensor, t1, -1u); + merger.addExp(TensorExp::Kind::kTensor, t1, -1u); merger.setLevelAndType(t1, l0, 0, DimLevelType::Undef); // Tensor 2: sparse output vector. - merger.addExp(Kind::kTensor, t2, -1u); + merger.addExp(TensorExp::Kind::kTensor, t2, -1u); merger.setLevelAndType(t2, l0, 0, DimLevelType::Compressed); } }; From 3b9732132d8b73d66a1c84edf0ce2a24bab52b5a Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Mon, 20 Mar 2023 16:20:03 -0700 Subject: [PATCH 095/691] [NFC][tsan] Clang-format includes --- compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp | 10 +++++----- compiler-rt/lib/tsan/rtl/tsan_suppressions.cpp | 5 +++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp index c2cff60e2da2e..0311df553fdd0 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp @@ -10,20 +10,20 @@ // //===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_libc.h" #include "sanitizer_common/sanitizer_placement_new.h" #include "sanitizer_common/sanitizer_stackdepot.h" -#include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_stacktrace.h" +#include "tsan_fd.h" +#include "tsan_flags.h" +#include "tsan_mman.h" #include "tsan_platform.h" +#include "tsan_report.h" #include "tsan_rtl.h" #include "tsan_suppressions.h" #include "tsan_symbolize.h" -#include "tsan_report.h" #include "tsan_sync.h" -#include "tsan_mman.h" -#include "tsan_flags.h" -#include "tsan_fd.h" namespace __tsan { diff --git a/compiler-rt/lib/tsan/rtl/tsan_suppressions.cpp b/compiler-rt/lib/tsan/rtl/tsan_suppressions.cpp index a1c1bf81bf670..9cdfa32a93430 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_suppressions.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_suppressions.cpp @@ -10,15 +10,16 @@ // //===----------------------------------------------------------------------===// +#include "tsan_suppressions.h" + #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_libc.h" #include "sanitizer_common/sanitizer_placement_new.h" #include "sanitizer_common/sanitizer_suppressions.h" -#include "tsan_suppressions.h" -#include "tsan_rtl.h" #include "tsan_flags.h" #include "tsan_mman.h" #include "tsan_platform.h" +#include "tsan_rtl.h" #if !SANITIZER_GO // Suppressions for true/false positives in standard libraries. From 402dd79a293dc23f0ccf521d79386880e4969584 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Fri, 17 Mar 2023 20:48:21 -0700 Subject: [PATCH 096/691] sanitizer_common: Use plain thread_local for __sancov_lowest_stack definition. We can't use initial-exec for the definition of __sancov_lowest_stack because it gets linked into shared libraries such as the UBSan and HWASan runtimes on Android. I think we can expect plain thread_local to work unconditionally in sanitizer_common as several other sanitizers are already using it, so drop the platform-dependent macro and just use thread_local in the definition. Differential Revision: https://reviews.llvm.org/D146351 --- .../sanitizer_common/sanitizer_coverage_libcdep_new.cpp | 6 +++++- .../lib/sanitizer_common/sanitizer_internal_defs.h | 9 --------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_coverage_libcdep_new.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_coverage_libcdep_new.cpp index 956b48e0b434b..f2dd6b227ccd3 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_coverage_libcdep_new.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_coverage_libcdep_new.cpp @@ -282,7 +282,11 @@ SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_pcs_init, const uptr* beg, // Weak definition for code instrumented with -fsanitize-coverage=stack-depth // and later linked with code containing a strong definition. // E.g., -fsanitize=fuzzer-no-link +// FIXME: Update Apple deployment target so that thread_local is always +// supported, and remove the #if. +#if !SANITIZER_APPLE SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE -SANITIZER_TLS_INITIAL_EXEC_ATTRIBUTE uptr __sancov_lowest_stack; +thread_local uptr __sancov_lowest_stack; +#endif #endif // !SANITIZER_FUCHSIA diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h b/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h index 6b800820ab80b..98186c429e9c0 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h @@ -37,15 +37,6 @@ # define SANITIZER_WEAK_ATTRIBUTE __attribute__((weak)) #endif -// TLS is handled differently on different platforms -#if SANITIZER_LINUX || SANITIZER_NETBSD || \ - SANITIZER_FREEBSD -# define SANITIZER_TLS_INITIAL_EXEC_ATTRIBUTE \ - __attribute__((tls_model("initial-exec"))) thread_local -#else -# define SANITIZER_TLS_INITIAL_EXEC_ATTRIBUTE -#endif - //--------------------------- WEAK FUNCTIONS ---------------------------------// // When working with weak functions, to simplify the code and make it more // portable, when possible define a default implementation using this macro: From 805ec19d7d9915989be8a8a626176b5e29e19eee Mon Sep 17 00:00:00 2001 From: Andrew Litteken Date: Sun, 2 Oct 2022 12:34:14 -0500 Subject: [PATCH 097/691] [IRSim] Check largest sections first when analyzing similarity When we check for similarity, right now there is no order to how it is checked, except for via the suffix tree ordering. We can reduce how much structural analysis we perform by checking the the regions in decreasing size. In doing so, we know that if two large sections match, each of their contained regions also match. This allows us to skip the structural checking for each smaller section. IT does require that we use the large regions as a "bridge" to create the canonical mapping between the two regions. This reduces compile time significantly for some benchmarks. It will not perform as well for programs with many small items. Reviewer: paquette Differential Revision: https://reviews.llvm.org/D139338 --- .../llvm/Analysis/IRSimilarityIdentifier.h | 43 ++++ llvm/lib/Analysis/IRSimilarityIdentifier.cpp | 214 +++++++++++++++++- .../Transforms/IROutliner/illegal-assumes.ll | 40 ++-- 3 files changed, 273 insertions(+), 24 deletions(-) diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h index 9f9e7c59b42ba..d40d51e2d3376 100644 --- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -850,6 +850,49 @@ class IRSimilarityCandidate { IRSimilarityCandidate &SourceCand, DenseMap> &ToSourceMapping, DenseMap> &FromSourceMapping); + + /// Create a mapping for the value numbering of the calling + /// IRSimilarityCandidate, to a different separate set of numbers, based on + /// the canonical ordering in \p SourceCand. These are defined based on the + /// found mappings in \p ToSourceMapping and \p FromSourceMapping. Both of + /// these relationships should have the same information, just in opposite + /// directions. Uses the \p OneToOne mapping from target candidate to \p + /// SourceCand GVNs to determine the mapping first for values with multiple + /// mappings. This mapping is created by the ordering of operands in the + /// instruction they are first seen in the candidates. + /// + /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a + /// canonical numbering from. + /// \param [in,out] OneToOne - A mapping of value numbers from candidate + /// \p A to candidate \B using the structure of the original instructions. + /// \param ToSourceMapping - The mapping of value numbers from this candidate + /// to \p SourceCand. + /// \param FromSourceMapping - The mapping of value numbers from \p SoureCand + /// to this candidate. + void createCanonicalRelationFrom( + IRSimilarityCandidate &SourceCand, + DenseMap &OneToOne, + DenseMap> &ToSourceMapping, + DenseMap> &FromSourceMapping); + + /// Create a mapping for the value numbering of the calling + /// IRSimilarityCandidate, to a different separate set of numbers, based on + /// the canonical ordering in \p SourceCand. These are defined based on the + /// canonical mapping defined between \p SoureCandLarge and + /// \p TargetCandLarge. These IRSimilarityCandidates are already structurally + /// similar, and fully encapsulate the IRSimilarityCandidates in question. + /// These are used as a "bridge" from the \p SourceCand to the target. + /// + /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a + /// canonical numbering from. + /// \param SoureCandLarge - The IRSimilarityCandidate fully containing + /// \p SourceCand. + /// \param TargetCandLarge - The IRSimilarityCandidate fully containing + /// this Candidate. + void createCanonicalRelationFrom( + IRSimilarityCandidate &SourceCand, + IRSimilarityCandidate &SourceCandLarge, + IRSimilarityCandidate &TargetCandLarge); /// \param [in,out] BBSet - The set to track the basic blocks. void getBasicBlocks(DenseSet &BBSet) const { diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp index c8007be4142cf..454ce5a6925bf 100644 --- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -1101,6 +1101,76 @@ void IRSimilarityCandidate::createCanonicalRelationFrom( } } +void IRSimilarityCandidate::createCanonicalRelationFrom( + IRSimilarityCandidate &SourceCand, IRSimilarityCandidate &SourceCandLarge, + IRSimilarityCandidate &TargetCandLarge) { + assert(!SourceCand.CanonNumToNumber.empty() && + "Canonical Relationship is non-empty"); + assert(!SourceCand.NumberToCanonNum.empty() && + "Canonical Relationship is non-empty"); + + assert(!SourceCandLarge.CanonNumToNumber.empty() && + "Canonical Relationship is non-empty"); + assert(!SourceCandLarge.NumberToCanonNum.empty() && + "Canonical Relationship is non-empty"); + + assert(!TargetCandLarge.CanonNumToNumber.empty() && + "Canonical Relationship is non-empty"); + assert(!TargetCandLarge.NumberToCanonNum.empty() && + "Canonical Relationship is non-empty"); + + assert(CanonNumToNumber.empty() && "Canonical Relationship is non-empty"); + assert(NumberToCanonNum.empty() && "Canonical Relationship is non-empty"); + + // We're going to use the larger candidates as a "bridge" to create the + // canonical number for the target candidate since we have idetified two + // candidates as subsequences of larger sequences, and therefore must be + // structurally similar. + for (std::pair &ValueNumPair : ValueToNumber) { + Value *CurrVal = ValueNumPair.first; + unsigned TargetCandGVN = ValueNumPair.second; + + // Find the numbering in the large candidate that surrounds the + // current candidate. + std::optional OLargeTargetGVN = TargetCandLarge.getGVN(CurrVal); + assert(OLargeTargetGVN.has_value() && "GVN not found for Value"); + + // Get the canonical numbering in the large target candidate. + std::optional OTargetCandCanon = + TargetCandLarge.getCanonicalNum(OLargeTargetGVN.value()); + assert(OTargetCandCanon.has_value() && + "Canononical Number not found for GVN"); + + // Get the GVN in the large source candidate from the canonical numbering. + std::optional OLargeSourceGVN = + SourceCandLarge.fromCanonicalNum(OTargetCandCanon.value()); + assert(OLargeSourceGVN.has_value() && + "GVN Number not found for Canonical Number"); + + // Get the Value from the GVN in the large source candidate. + std::optional OLargeSourceV = + SourceCandLarge.fromGVN(OLargeSourceGVN.value()); + assert(OLargeSourceV.has_value() && "Value not found for GVN"); + + // Get the GVN number for the Value in the source candidate. + std::optional OSourceGVN = + SourceCand.getGVN(OLargeSourceV.value()); + assert(OSourceGVN.has_value() && "GVN Number not found for Value"); + + // Get the canonical numbering from the GVN/ + std::optional OSourceCanon = + SourceCand.getCanonicalNum(OSourceGVN.value()); + assert(OSourceCanon.has_value() && "Canon Number not found for GVN"); + + // Insert the canonical numbering and GVN pair into their respective + // mappings. + CanonNumToNumber.insert( + std::make_pair(OSourceCanon.value(), TargetCandGVN)); + NumberToCanonNum.insert( + std::make_pair(TargetCandGVN, OSourceCanon.value())); + } +} + void IRSimilarityCandidate::createCanonicalMappingFor( IRSimilarityCandidate &CurrCand) { assert(CurrCand.CanonNumToNumber.size() == 0 && @@ -1118,6 +1188,81 @@ void IRSimilarityCandidate::createCanonicalMappingFor( } } +/// Look for larger IRSimilarityCandidates From the previously matched +/// IRSimilarityCandidates that fully contain \p CandA or \p CandB. If there is +/// an overlap, return a pair of structurally similar, larger +/// IRSimilarityCandidates. +/// +/// \param [in] CandA - The first candidate we are trying to determine the +/// structure of. +/// \param [in] CandB - The second candidate we are trying to determine the +/// structure of. +/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in +/// a circuit to the IRSimilarityCandidates that include this instruction. +/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a +/// number representing the structural group assigned to it. +static std::optional< + std::pair> +CheckLargerCands( + IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB, + DenseMap> &IndexToIncludedCand, + DenseMap &CandToGroup) { + DenseMap IncludedGroupAndCandA; + DenseMap IncludedGroupAndCandB; + DenseSet IncludedGroupsA; + DenseSet IncludedGroupsB; + + // Find the overall similarity group numbers that fully contain the candidate, + // and record the larger candidate for each group. + auto IdxToCandidateIt = IndexToIncludedCand.find(CandA.getStartIdx()); + std::optional> + Result; + + unsigned CandAStart = CandA.getStartIdx(); + unsigned CandAEnd = CandA.getEndIdx(); + unsigned CandBStart = CandB.getStartIdx(); + unsigned CandBEnd = CandB.getEndIdx(); + if (IdxToCandidateIt == IndexToIncludedCand.end()) + return Result; + for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) { + if (MatchedCand->getStartIdx() > CandAStart || + (MatchedCand->getEndIdx() < CandAEnd)) + continue; + unsigned GroupNum = CandToGroup.find(MatchedCand)->second; + IncludedGroupAndCandA.insert(std::make_pair(GroupNum, MatchedCand)); + IncludedGroupsA.insert(GroupNum); + } + + // Find the overall similarity group numbers that fully contain the next + // candidate, and record the larger candidate for each group. + IdxToCandidateIt = IndexToIncludedCand.find(CandBStart); + if (IdxToCandidateIt == IndexToIncludedCand.end()) + return Result; + for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) { + if (MatchedCand->getStartIdx() > CandBStart || + MatchedCand->getEndIdx() < CandBEnd) + continue; + unsigned GroupNum = CandToGroup.find(MatchedCand)->second; + IncludedGroupAndCandB.insert(std::make_pair(GroupNum, MatchedCand)); + IncludedGroupsB.insert(GroupNum); + } + + // Find the intersection between the two groups, these are the groups where + // the larger candidates exist. + set_intersect(IncludedGroupsA, IncludedGroupsB); + + // If there is no intersection between the sets, then we cannot determine + // whether or not there is a match. + if (IncludedGroupsA.empty()) + return Result; + + // Create a pair that contains the larger candidates. + auto ItA = IncludedGroupAndCandA.find(*IncludedGroupsA.begin()); + auto ItB = IncludedGroupAndCandB.find(*IncludedGroupsA.begin()); + Result = std::make_pair(ItA->second, ItB->second); + return Result; +} + /// From the list of IRSimilarityCandidates, perform a comparison between each /// IRSimilarityCandidate to determine if there are overlapping /// IRInstructionData, or if they do not have the same structure. @@ -1127,9 +1272,16 @@ void IRSimilarityCandidate::createCanonicalMappingFor( /// \param [out] StructuralGroups - the mapping of unsigned integers to vector /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the /// vector are structurally similar to one another. +/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in +/// a circuit to the IRSimilarityCandidates that include this instruction. +/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a +/// number representing the structural group assigned to it. static void findCandidateStructures( std::vector &CandsForRepSubstring, - DenseMap &StructuralGroups) { + DenseMap &StructuralGroups, + DenseMap> &IndexToIncludedCand, + DenseMap &CandToOverallGroup + ) { std::vector::iterator CandIt, CandEndIt, InnerCandIt, InnerCandEndIt; @@ -1192,6 +1344,24 @@ static void findCandidateStructures( if (CandToGroupItInner != CandToGroup.end()) continue; + // Check if we have found structural similarity between two candidates + // that fully contains the first and second candidates. + std::optional> + LargerPair = CheckLargerCands( + *CandIt, *InnerCandIt, IndexToIncludedCand, CandToOverallGroup); + + // If a pair was found, it means that we can assume that these smaller + // substrings are also structurally similar. Use the larger candidates to + // determine the canonical mapping between the two sections. + if (LargerPair.has_value()) { + SameStructure = true; + InnerCandIt->createCanonicalRelationFrom( + *CandIt, *LargerPair.value().first, *LargerPair.value().second); + CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum)); + CurrentGroupPair->second.push_back(*InnerCandIt); + continue; + } + // Otherwise we determine if they have the same structure and add it to // vector if they match. ValueNumberMappingA.clear(); @@ -1218,24 +1388,58 @@ void IRSimilarityIdentifier::findCandidates( std::vector NewCandidateGroups; DenseMap StructuralGroups; + DenseMap> IndexToIncludedCand; + DenseMap CandToGroup; // Iterate over the subsequences found by the Suffix Tree to create // IRSimilarityCandidates for each repeated subsequence and determine which // instances are structurally similar to one another. - for (SuffixTree::RepeatedSubstring &RS : ST) { + + // Sort the suffix tree from longest substring to shortest. + std::vector RSes; + for (SuffixTree::RepeatedSubstring &RS : ST) + RSes.push_back(RS); + + llvm::stable_sort(RSes, [](const SuffixTree::RepeatedSubstring &LHS, + const SuffixTree::RepeatedSubstring &RHS) { + return LHS.Length > RHS.Length; + }); + for (SuffixTree::RepeatedSubstring &RS : RSes) { createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS, CandsForRepSubstring); if (CandsForRepSubstring.size() < 2) continue; - findCandidateStructures(CandsForRepSubstring, StructuralGroups); - for (std::pair &Group : StructuralGroups) + findCandidateStructures(CandsForRepSubstring, StructuralGroups, + IndexToIncludedCand, CandToGroup); + for (std::pair &Group : StructuralGroups) { // We only add the group if it contains more than one // IRSimilarityCandidate. If there is only one, that means there is no // other repeated subsequence with the same structure. - if (Group.second.size() > 1) + if (Group.second.size() > 1) { SimilarityCandidates->push_back(Group.second); + // Iterate over each candidate in the group, and add an entry for each + // instruction included with a mapping to a set of + // IRSimilarityCandidates that include that instruction. + for (IRSimilarityCandidate &IRCand : SimilarityCandidates->back()) { + for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx(); + Idx <= Edx; ++Idx) { + DenseMap>::iterator + IdIt; + IdIt = IndexToIncludedCand.find(Idx); + bool Inserted = false; + if (IdIt == IndexToIncludedCand.end()) + std::tie(IdIt, Inserted) = IndexToIncludedCand.insert( + std::make_pair(Idx, DenseSet())); + IdIt->second.insert(&IRCand); + } + // Add mapping of candidate to the overall similarity group number. + CandToGroup.insert( + std::make_pair(&IRCand, SimilarityCandidates->size() - 1)); + } + } + } CandsForRepSubstring.clear(); StructuralGroups.clear(); diff --git a/llvm/test/Transforms/IROutliner/illegal-assumes.ll b/llvm/test/Transforms/IROutliner/illegal-assumes.ll index b91f8ec92200e..d863fe7a29903 100644 --- a/llvm/test/Transforms/IROutliner/illegal-assumes.ll +++ b/llvm/test/Transforms/IROutliner/illegal-assumes.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=verify,iroutliner -ir-outlining-no-cost < %s | FileCheck %s +; RUN: opt -S -p iroutliner,verify -ir-outlining-no-cost < %s | FileCheck %s ; This test ensures that we do not include llvm.assumes. There are exceptions ; in the CodeExtractor's algorithm for llvm.assumes, so we ignore it for now. @@ -13,13 +13,13 @@ define void @outline_assumes() { ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[DL_LOC]]) -; CHECK-NEXT: call void @outlined_ir_func_3(i1 true, ptr [[D]], ptr [[DL_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_4(i1 true, ptr [[D]], ptr [[DL_LOC]]) ; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, ptr [[DL_LOC]], align 1 ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr [[DL_LOC]]) ; CHECK-NEXT: [[SPLIT_INST:%.*]] = sub i1 [[DL_RELOAD]], [[DL_RELOAD]] -; CHECK-NEXT: call void @outlined_ir_func_0(ptr [[A]], ptr [[B]], ptr [[C]]) -; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) ; CHECK-NEXT: call void @outlined_ir_func_1(ptr [[A]], ptr [[B]], ptr [[C]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) +; CHECK-NEXT: call void @outlined_ir_func_2(ptr [[A]], ptr [[B]], ptr [[C]]) ; CHECK-NEXT: ret void ; entry: @@ -49,12 +49,12 @@ define void @outline_assumes2() { ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[DL_LOC]]) -; CHECK-NEXT: call void @outlined_ir_func_3(i1 false, ptr [[D]], ptr [[DL_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_4(i1 false, ptr [[D]], ptr [[DL_LOC]]) ; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, ptr [[DL_LOC]], align 1 ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr [[DL_LOC]]) -; CHECK-NEXT: call void @outlined_ir_func_0(ptr [[A]], ptr [[B]], ptr [[C]]) -; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) ; CHECK-NEXT: call void @outlined_ir_func_1(ptr [[A]], ptr [[B]], ptr [[C]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) +; CHECK-NEXT: call void @outlined_ir_func_2(ptr [[A]], ptr [[B]], ptr [[C]]) ; CHECK-NEXT: ret void ; entry: @@ -77,16 +77,17 @@ entry: define void @outline_assumes3() { ; CHECK-LABEL: @outline_assumes3( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[DL_LOC:%.*]] = alloca i1, align 1 ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 -; CHECK-NEXT: store i1 true, ptr [[D]], align 4 -; CHECK-NEXT: [[DL:%.*]] = load i1, ptr [[D]], align 1 -; CHECK-NEXT: [[SPLIT_INST:%.*]] = add i1 [[DL]], [[DL]] -; CHECK-NEXT: call void @outlined_ir_func_0(ptr [[A]], ptr [[B]], ptr [[C]]) -; CHECK-NEXT: call void @llvm.assume(i1 [[DL]]) -; CHECK-NEXT: call void @outlined_ir_func_2(ptr [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[DL_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i1 true, ptr [[D]], ptr [[A]], ptr [[B]], ptr [[C]], ptr [[DL_LOC]]) +; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, ptr [[DL_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr [[DL_LOC]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) +; CHECK-NEXT: call void @outlined_ir_func_3(ptr [[A]]) ; CHECK-NEXT: ret void ; entry: @@ -109,16 +110,17 @@ entry: define void @outline_assumes4() { ; CHECK-LABEL: @outline_assumes4( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[DL_LOC:%.*]] = alloca i1, align 1 ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 -; CHECK-NEXT: store i1 false, ptr [[D]], align 4 -; CHECK-NEXT: [[DL:%.*]] = load i1, ptr [[D]], align 1 -; CHECK-NEXT: [[SPLIT_INST:%.*]] = add i1 [[DL]], [[DL]] -; CHECK-NEXT: call void @outlined_ir_func_0(ptr [[A]], ptr [[B]], ptr [[C]]) -; CHECK-NEXT: call void @llvm.assume(i1 [[DL]]) -; CHECK-NEXT: call void @outlined_ir_func_2(ptr [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[DL_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i1 false, ptr [[D]], ptr [[A]], ptr [[B]], ptr [[C]], ptr [[DL_LOC]]) +; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, ptr [[DL_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr [[DL_LOC]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) +; CHECK-NEXT: call void @outlined_ir_func_3(ptr [[A]]) ; CHECK-NEXT: ret void ; entry: From c600b99e5ee6e63a062342fe0b9c00e4b4d8ece4 Mon Sep 17 00:00:00 2001 From: Dave MacLachlan Date: Mon, 20 Mar 2023 16:30:18 -0700 Subject: [PATCH 098/691] Expose `__tsan_on_thread_idle` as part of tsan shared interface `__tsan_on_thread_idle` is part of the tsan interface that was not being exported in shared libraries. Fixes it so that we can link to dynamic versions of tsan and still call `__tsan_on_thread_idle`. Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D146166 --- compiler-rt/lib/tsan/rtl/tsan_interface.h | 2 ++ compiler-rt/lib/tsan/rtl/tsan_mman.cpp | 1 + 2 files changed, 3 insertions(+) diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface.h b/compiler-rt/lib/tsan/rtl/tsan_interface.h index b32fb657adc3e..d53c1e3935df7 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interface.h +++ b/compiler-rt/lib/tsan/rtl/tsan_interface.h @@ -88,6 +88,8 @@ SANITIZER_INTERFACE_ATTRIBUTE void __tsan_func_exit(); SANITIZER_INTERFACE_ATTRIBUTE void __tsan_ignore_thread_begin(); SANITIZER_INTERFACE_ATTRIBUTE void __tsan_ignore_thread_end(); +SANITIZER_INTERFACE_ATTRIBUTE void __tsan_on_thread_idle(); + SANITIZER_INTERFACE_ATTRIBUTE void *__tsan_external_register_tag(const char *object_type); SANITIZER_INTERFACE_ATTRIBUTE diff --git a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp index 0937e521193f4..99fa492265615 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_mman.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_mman.cpp @@ -15,6 +15,7 @@ #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_errno.h" #include "sanitizer_common/sanitizer_placement_new.h" +#include "tsan_interface.h" #include "tsan_mman.h" #include "tsan_rtl.h" #include "tsan_report.h" From 46b5087227246c9f89569af3a96126d85c87bbc4 Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Mon, 13 Mar 2023 15:30:36 -0700 Subject: [PATCH 099/691] [libc] add basic wide char functions This patch adds the wchar header, as well as the functions to convert to and from wide chars. The header also sets up the definitions for wint and wchar. Reviewed By: lntue Differential Revision: https://reviews.llvm.org/D145995 --- libc/config/linux/api.td | 8 ++++ libc/config/linux/x86_64/entrypoints.txt | 3 ++ libc/config/linux/x86_64/headers.txt | 1 + libc/include/CMakeLists.txt | 9 ++++ libc/include/llvm-libc-macros/CMakeLists.txt | 6 +++ libc/include/llvm-libc-macros/wchar-macros.h | 16 +++++++ libc/include/llvm-libc-types/CMakeLists.txt | 2 + libc/include/llvm-libc-types/size_t.h | 3 +- libc/include/llvm-libc-types/wchar_t.h | 19 +++++++++ libc/include/llvm-libc-types/wint_t.h | 19 +++++++++ libc/include/wchar.h.def | 17 ++++++++ libc/spec/spec.td | 4 ++ libc/spec/stdc.td | 24 +++++++++++ libc/src/CMakeLists.txt | 1 + libc/src/__support/CMakeLists.txt | 6 +++ libc/src/__support/wctype_utils.h | 45 ++++++++++++++++++++ libc/src/wchar/CMakeLists.txt | 10 +++++ libc/src/wchar/btowc.cpp | 26 +++++++++++ libc/src/wchar/btowc.h | 20 +++++++++ libc/src/wchar/wctob.cpp | 26 +++++++++++ libc/src/wchar/wctob.h | 20 +++++++++ libc/test/src/CMakeLists.txt | 1 + libc/test/src/wchar/CMakeLists.txt | 21 +++++++++ libc/test/src/wchar/btowc_test.cpp | 24 +++++++++++ libc/test/src/wchar/wctob_test.cpp | 24 +++++++++++ 25 files changed, 354 insertions(+), 1 deletion(-) create mode 100644 libc/include/llvm-libc-macros/wchar-macros.h create mode 100644 libc/include/llvm-libc-types/wchar_t.h create mode 100644 libc/include/llvm-libc-types/wint_t.h create mode 100644 libc/include/wchar.h.def create mode 100644 libc/src/__support/wctype_utils.h create mode 100644 libc/src/wchar/CMakeLists.txt create mode 100644 libc/src/wchar/btowc.cpp create mode 100644 libc/src/wchar/btowc.h create mode 100644 libc/src/wchar/wctob.cpp create mode 100644 libc/src/wchar/wctob.h create mode 100644 libc/test/src/wchar/CMakeLists.txt create mode 100644 libc/test/src/wchar/btowc_test.cpp create mode 100644 libc/test/src/wchar/wctob_test.cpp diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td index 253546fe83e0a..10b4abfee7a43 100644 --- a/libc/config/linux/api.td +++ b/libc/config/linux/api.td @@ -178,6 +178,14 @@ def UniStdAPI : PublicAPI<"unistd.h"> { "ssize_t", "uid_t", "__getoptargv_t"]; } +def WCharAPI : PublicAPI<"wchar.h"> { + let Types = [ + "wchar_t", + "wint_t", + "size_t", + ]; +} + def SysRandomAPI : PublicAPI<"sys/random.h"> { let Types = ["size_t", "ssize_t"]; } diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 5899c1d00c865..39b7e8c84effa 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -193,6 +193,9 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.unistd.unlink libc.src.unistd.unlinkat libc.src.unistd.write + + # wchar.h entrypoints + libc.src.wchar.wctob ) set(TARGET_LIBM_ENTRYPOINTS diff --git a/libc/config/linux/x86_64/headers.txt b/libc/config/linux/x86_64/headers.txt index e285105b1224a..aaa75a9dd08cb 100644 --- a/libc/config/linux/x86_64/headers.txt +++ b/libc/config/linux/x86_64/headers.txt @@ -20,6 +20,7 @@ set(TARGET_PUBLIC_HEADERS libc.include.threads libc.include.time libc.include.unistd + libc.include.wchar libc.include.arpa_inet diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt index d058cb51db0a1..75ef824d2e3f0 100644 --- a/libc/include/CMakeLists.txt +++ b/libc/include/CMakeLists.txt @@ -468,6 +468,15 @@ add_gen_header( .llvm-libc-types.tcflag_t ) +add_gen_header( + wchar + DEF_FILE wchar.h.def + GEN_HDR wchar.h + DEPENDS + .llvm_libc_common_h + .llvm-libc-macros.wchar_macros +) + if(NOT LLVM_LIBC_FULL_BUILD) # We don't install headers in non-fullbuild mode. return() diff --git a/libc/include/llvm-libc-macros/CMakeLists.txt b/libc/include/llvm-libc-macros/CMakeLists.txt index 74a5977641f48..58bbf5e2e4317 100644 --- a/libc/include/llvm-libc-macros/CMakeLists.txt +++ b/libc/include/llvm-libc-macros/CMakeLists.txt @@ -166,3 +166,9 @@ add_header( DEPENDS .linux.unistd_macros ) + +add_header( + wchar_macros + HDR + wchar-macros.h +) diff --git a/libc/include/llvm-libc-macros/wchar-macros.h b/libc/include/llvm-libc-macros/wchar-macros.h new file mode 100644 index 0000000000000..adca41eb01227 --- /dev/null +++ b/libc/include/llvm-libc-macros/wchar-macros.h @@ -0,0 +1,16 @@ +//===-- Macros defined in wchar.h header file -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __LLVM_LIBC_MACROS_WCHAR_MACROS_H +#define __LLVM_LIBC_MACROS_WCHAR_MACROS_H + +#ifndef WEOF +#define WEOF 0xffffffffu +#endif + +#endif // __LLVM_LIBC_MACROS_WCHAR_MACROS_H diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt index 7be24415b41b3..1df2e7c962c1b 100644 --- a/libc/include/llvm-libc-types/CMakeLists.txt +++ b/libc/include/llvm-libc-types/CMakeLists.txt @@ -84,3 +84,5 @@ add_header(speed_t HDR speed_t.h) add_header(tcflag_t HDR tcflag_t.h) add_header(struct_termios HDR struct_termios.h DEPENDS .cc_t .speed_t .tcflag_t) add_header(__getoptargv_t HDR __getoptargv_t.h) +add_header(wchar_t HDR wchar_t.h) +add_header(wint_t HDR wint_t.h) diff --git a/libc/include/llvm-libc-types/size_t.h b/libc/include/llvm-libc-types/size_t.h index ce055d0bc3ba7..8eaf194e05727 100644 --- a/libc/include/llvm-libc-types/size_t.h +++ b/libc/include/llvm-libc-types/size_t.h @@ -11,8 +11,9 @@ // Since __need_size_t is defined, we get the definition of size_t from the // standalone C header stddef.h. Also, because __need_size_t is defined, -// including stddef.h will pull only the type size_t and nothing else.a +// including stddef.h will pull only the type size_t and nothing else. #define __need_size_t #include +#undef __need_size_t #endif // __LLVM_LIBC_TYPES_SIZE_T_H__ diff --git a/libc/include/llvm-libc-types/wchar_t.h b/libc/include/llvm-libc-types/wchar_t.h new file mode 100644 index 0000000000000..9efb5cd8e6652 --- /dev/null +++ b/libc/include/llvm-libc-types/wchar_t.h @@ -0,0 +1,19 @@ +//===-- Definition of wchar_t types ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __LLVM_LIBC_TYPES_WCHAR_T_H__ +#define __LLVM_LIBC_TYPES_WCHAR_T_H__ + +// Since __need_wchar_t is defined, we get the definition of wchar_t from the +// standalone C header stddef.h. Also, because __need_wchar_t is defined, +// including stddef.h will pull only the type wchar_t and nothing else. +#define __need_wchar_t +#include +#undef __need_wchar_t + +#endif // __LLVM_LIBC_TYPES_WCHAR_T_H__ diff --git a/libc/include/llvm-libc-types/wint_t.h b/libc/include/llvm-libc-types/wint_t.h new file mode 100644 index 0000000000000..cf6ccd7e1ae76 --- /dev/null +++ b/libc/include/llvm-libc-types/wint_t.h @@ -0,0 +1,19 @@ +//===-- Definition of wint_t types ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef __LLVM_LIBC_TYPES_WINT_T_H__ +#define __LLVM_LIBC_TYPES_WINT_T_H__ + +// Since __need_wint_t is defined, we get the definition of wint_t from the +// standalone C header stddef.h. Also, because __need_wint_t is defined, +// including stddef.h will pull only the type wint_t and nothing else. +#define __need_wint_t +#include +#undef __need_wint_t + +#endif // __LLVM_LIBC_TYPES_WINT_T_H__ diff --git a/libc/include/wchar.h.def b/libc/include/wchar.h.def new file mode 100644 index 0000000000000..ac72f80aa0833 --- /dev/null +++ b/libc/include/wchar.h.def @@ -0,0 +1,17 @@ +//===-- C standard library header wchar.h ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_WCHAR_H +#define LLVM_LIBC_WCHAR_H + +#include <__llvm-libc-common.h> +#include + +%%public_api() + +#endif // LLVM_LIBC_WCHAR_H diff --git a/libc/spec/spec.td b/libc/spec/spec.td index 87bafb087d3fb..7a691f255e0b7 100644 --- a/libc/spec/spec.td +++ b/libc/spec/spec.td @@ -59,6 +59,10 @@ def SizeTType : NamedType<"size_t">; def SizeTPtr : PtrType; def RestrictedSizeTPtr : RestrictedPtrType; +def WCharType : NamedType<"wchar_t">; +def WIntType : NamedType<"wint_t">; +def MBStateType : NamedType<"mbstate_t">; + def LongDoublePtr : PtrType; def IntMaxTType : NamedType<"intmax_t">; diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index 84277f099a13c..fd732b74fae1e 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -1069,6 +1069,29 @@ def StdC : StandardSpec<"stdc"> { ] >; + HeaderSpec WChar = HeaderSpec< + "wchar.h", + [ // Macros + Macro<"WEOF">, + ], + [ //Types + SizeTType, + WIntType, + WCharType, + MBStateType, + StructTmType, + ], + [], // Enumerations + [ + FunctionSpec< + "wctob", + RetValSpec, + [ArgSpec] + >, + ] + >; + + let Headers = [ Assert, CType, @@ -1083,5 +1106,6 @@ def StdC : StandardSpec<"stdc"> { Signal, Threads, Time, + WChar, ]; } diff --git a/libc/src/CMakeLists.txt b/libc/src/CMakeLists.txt index 59e7d06871787..9f2f45165e408 100644 --- a/libc/src/CMakeLists.txt +++ b/libc/src/CMakeLists.txt @@ -8,6 +8,7 @@ add_subdirectory(math) add_subdirectory(string) add_subdirectory(stdlib) add_subdirectory(stdio) +add_subdirectory(wchar) if(${LIBC_TARGET_OS} STREQUAL "linux") add_subdirectory(dirent) diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index e4eb354aefcd3..30bbf3ae96d26 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -51,6 +51,12 @@ add_header_library( ctype_utils.h ) +add_header_library( + wctype_utils + HDRS + wctype_utils.h +) + add_header_library( str_to_num_result HDRS diff --git a/libc/src/__support/wctype_utils.h b/libc/src/__support/wctype_utils.h new file mode 100644 index 0000000000000..7e11b2d8b1725 --- /dev/null +++ b/libc/src/__support/wctype_utils.h @@ -0,0 +1,45 @@ +//===-- Collection of utils for implementing wide char functions --*-C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_SUPPORT_WCTYPE_UTILS_H +#define LLVM_LIBC_SRC_SUPPORT_WCTYPE_UTILS_H + +#include "src/__support/CPP/optional.h" +#include "src/__support/macros/attributes.h" // LIBC_INLINE + +#define __need_wint_t +#define __need_wchar_t +#include // needed for wint_t and wchar_t + +namespace __llvm_libc { +namespace internal { + +// ------------------------------------------------------ +// Rationale: Since these classification functions are +// called in other functions, we will avoid the overhead +// of a function call by inlining them. +// ------------------------------------------------------ + +LIBC_INLINE cpp::optional wctob(wint_t c) { + // This needs to be translated to EOF at the callsite. This is to avoid + // including stdio.h in this file. + if (c > 127 || c < 0) + return cpp::nullopt; + return static_cast(c); +} + +LIBC_INLINE cpp::optional btowc(int c) { + if (c > 127 || c < 0) + return cpp::nullopt; + return static_cast(c); +} + +} // namespace internal +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_SUPPORT_WCTYPE_UTILS_H diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt new file mode 100644 index 0000000000000..58e168266829c --- /dev/null +++ b/libc/src/wchar/CMakeLists.txt @@ -0,0 +1,10 @@ + +add_entrypoint_object( + wctob + SRCS + wctob.cpp + HDRS + wctob.h + DEPENDS + libc.src.__support.wctype_utils +) diff --git a/libc/src/wchar/btowc.cpp b/libc/src/wchar/btowc.cpp new file mode 100644 index 0000000000000..6db888c1aa09c --- /dev/null +++ b/libc/src/wchar/btowc.cpp @@ -0,0 +1,26 @@ +//===-- Implementation of btowc -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/btowc.h" +#include "src/__support/common.h" +#include "src/__support/wctype_utils.h" + +#include // for EOF. + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(int, btowc, (wint_t c)) { + auto result = internal::btowc(c); + if (result.has_value()) { + return result.value(); + } else { + return WEOF; + } +} + +} // namespace __llvm_libc diff --git a/libc/src/wchar/btowc.h b/libc/src/wchar/btowc.h new file mode 100644 index 0000000000000..ab37738e8c67c --- /dev/null +++ b/libc/src/wchar/btowc.h @@ -0,0 +1,20 @@ +//===-- Implementation header for btowc -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_BTOWC_H +#define LLVM_LIBC_SRC_WCHAR_BTOWC_H + +#include + +namespace __llvm_libc { + +wint_t btowc(int c); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_WCHAR_BTOWC_H diff --git a/libc/src/wchar/wctob.cpp b/libc/src/wchar/wctob.cpp new file mode 100644 index 0000000000000..274818a37773a --- /dev/null +++ b/libc/src/wchar/wctob.cpp @@ -0,0 +1,26 @@ +//===-- Implementation of wctob -------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/wctob.h" +#include "src/__support/common.h" +#include "src/__support/wctype_utils.h" + +#include // for EOF. + +namespace __llvm_libc { + +LLVM_LIBC_FUNCTION(int, wctob, (wint_t c)) { + auto result = internal::wctob(c); + if (result.has_value()) { + return result.value(); + } else { + return EOF; + } +} + +} // namespace __llvm_libc diff --git a/libc/src/wchar/wctob.h b/libc/src/wchar/wctob.h new file mode 100644 index 0000000000000..5c0f229436d62 --- /dev/null +++ b/libc/src/wchar/wctob.h @@ -0,0 +1,20 @@ +//===-- Implementation header for wctob -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_WCTOB_H +#define LLVM_LIBC_SRC_WCHAR_WCTOB_H + +#include + +namespace __llvm_libc { + +int wctob(wint_t c); + +} // namespace __llvm_libc + +#endif // LLVM_LIBC_SRC_WCHAR_WCTOB_H diff --git a/libc/test/src/CMakeLists.txt b/libc/test/src/CMakeLists.txt index 6c3c987e3291d..6b1c7452fc284 100644 --- a/libc/test/src/CMakeLists.txt +++ b/libc/test/src/CMakeLists.txt @@ -35,6 +35,7 @@ add_subdirectory(string) add_subdirectory(stdlib) add_subdirectory(inttypes) add_subdirectory(stdio) +add_subdirectory(wchar) if(${LIBC_TARGET_OS} STREQUAL "linux") add_subdirectory(fcntl) diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt new file mode 100644 index 0000000000000..88b68c43491a6 --- /dev/null +++ b/libc/test/src/wchar/CMakeLists.txt @@ -0,0 +1,21 @@ +add_libc_testsuite(libc_wchar_unittests) + +add_libc_unittest( + btowc_test + SUITE + libc_wchar_unittests + SRCS + btowc_test.cpp + DEPENDS + libc.src.wchar.btowc +) + +add_libc_unittest( + wctob_test + SUITE + libc_wchar_unittests + SRCS + wctob_test.cpp + DEPENDS + libc.src.wchar.wctob +) diff --git a/libc/test/src/wchar/btowc_test.cpp b/libc/test/src/wchar/btowc_test.cpp new file mode 100644 index 0000000000000..cfac485cdecc7 --- /dev/null +++ b/libc/test/src/wchar/btowc_test.cpp @@ -0,0 +1,24 @@ +//===-- Unittests for btowc ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include //for WEOF + +#include "src/wchar/btowc.h" + +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcBtowc, DefaultLocale) { + // Loops through all characters, verifying that ascii returns itself and + // everything else returns WEOF. + for (int c = 0; c < 255; ++c) { + if (c < 128) + EXPECT_EQ(__llvm_libc::btowc(c), static_cast(c)); + else + EXPECT_EQ(__llvm_libc::btowc(c), WEOF); + } +} diff --git a/libc/test/src/wchar/wctob_test.cpp b/libc/test/src/wchar/wctob_test.cpp new file mode 100644 index 0000000000000..7a8d6eea0c82a --- /dev/null +++ b/libc/test/src/wchar/wctob_test.cpp @@ -0,0 +1,24 @@ +//===-- Unittests for wctob ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include //for EOF + +#include "src/wchar/wctob.h" + +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcWctob, DefaultLocale) { + // Loops through a subset of the wide characters, verifying that ascii returns + // itself and everything else returns EOF. + for (wint_t c = 0; c < 32767; ++c) { + if (c < 128) + EXPECT_EQ(__llvm_libc::wctob(c), static_cast(c)); + else + EXPECT_EQ(__llvm_libc::wctob(c), EOF); + } +} From 75c8abc0e0bdb9d3099ec7e9199207a58de1a022 Mon Sep 17 00:00:00 2001 From: Andrew Litteken Date: Mon, 20 Mar 2023 18:38:39 -0500 Subject: [PATCH 100/691] Revert "[IRSim] Check largest sections first when analyzing similarity" This reverts commit 805ec19d7d9915989be8a8a626176b5e29e19eee. Did not update llvm-sim tests --- .../llvm/Analysis/IRSimilarityIdentifier.h | 43 ---- llvm/lib/Analysis/IRSimilarityIdentifier.cpp | 214 +----------------- .../Transforms/IROutliner/illegal-assumes.ll | 40 ++-- 3 files changed, 24 insertions(+), 273 deletions(-) diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h index d40d51e2d3376..9f9e7c59b42ba 100644 --- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -850,49 +850,6 @@ class IRSimilarityCandidate { IRSimilarityCandidate &SourceCand, DenseMap> &ToSourceMapping, DenseMap> &FromSourceMapping); - - /// Create a mapping for the value numbering of the calling - /// IRSimilarityCandidate, to a different separate set of numbers, based on - /// the canonical ordering in \p SourceCand. These are defined based on the - /// found mappings in \p ToSourceMapping and \p FromSourceMapping. Both of - /// these relationships should have the same information, just in opposite - /// directions. Uses the \p OneToOne mapping from target candidate to \p - /// SourceCand GVNs to determine the mapping first for values with multiple - /// mappings. This mapping is created by the ordering of operands in the - /// instruction they are first seen in the candidates. - /// - /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a - /// canonical numbering from. - /// \param [in,out] OneToOne - A mapping of value numbers from candidate - /// \p A to candidate \B using the structure of the original instructions. - /// \param ToSourceMapping - The mapping of value numbers from this candidate - /// to \p SourceCand. - /// \param FromSourceMapping - The mapping of value numbers from \p SoureCand - /// to this candidate. - void createCanonicalRelationFrom( - IRSimilarityCandidate &SourceCand, - DenseMap &OneToOne, - DenseMap> &ToSourceMapping, - DenseMap> &FromSourceMapping); - - /// Create a mapping for the value numbering of the calling - /// IRSimilarityCandidate, to a different separate set of numbers, based on - /// the canonical ordering in \p SourceCand. These are defined based on the - /// canonical mapping defined between \p SoureCandLarge and - /// \p TargetCandLarge. These IRSimilarityCandidates are already structurally - /// similar, and fully encapsulate the IRSimilarityCandidates in question. - /// These are used as a "bridge" from the \p SourceCand to the target. - /// - /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a - /// canonical numbering from. - /// \param SoureCandLarge - The IRSimilarityCandidate fully containing - /// \p SourceCand. - /// \param TargetCandLarge - The IRSimilarityCandidate fully containing - /// this Candidate. - void createCanonicalRelationFrom( - IRSimilarityCandidate &SourceCand, - IRSimilarityCandidate &SourceCandLarge, - IRSimilarityCandidate &TargetCandLarge); /// \param [in,out] BBSet - The set to track the basic blocks. void getBasicBlocks(DenseSet &BBSet) const { diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp index 454ce5a6925bf..c8007be4142cf 100644 --- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -1101,76 +1101,6 @@ void IRSimilarityCandidate::createCanonicalRelationFrom( } } -void IRSimilarityCandidate::createCanonicalRelationFrom( - IRSimilarityCandidate &SourceCand, IRSimilarityCandidate &SourceCandLarge, - IRSimilarityCandidate &TargetCandLarge) { - assert(!SourceCand.CanonNumToNumber.empty() && - "Canonical Relationship is non-empty"); - assert(!SourceCand.NumberToCanonNum.empty() && - "Canonical Relationship is non-empty"); - - assert(!SourceCandLarge.CanonNumToNumber.empty() && - "Canonical Relationship is non-empty"); - assert(!SourceCandLarge.NumberToCanonNum.empty() && - "Canonical Relationship is non-empty"); - - assert(!TargetCandLarge.CanonNumToNumber.empty() && - "Canonical Relationship is non-empty"); - assert(!TargetCandLarge.NumberToCanonNum.empty() && - "Canonical Relationship is non-empty"); - - assert(CanonNumToNumber.empty() && "Canonical Relationship is non-empty"); - assert(NumberToCanonNum.empty() && "Canonical Relationship is non-empty"); - - // We're going to use the larger candidates as a "bridge" to create the - // canonical number for the target candidate since we have idetified two - // candidates as subsequences of larger sequences, and therefore must be - // structurally similar. - for (std::pair &ValueNumPair : ValueToNumber) { - Value *CurrVal = ValueNumPair.first; - unsigned TargetCandGVN = ValueNumPair.second; - - // Find the numbering in the large candidate that surrounds the - // current candidate. - std::optional OLargeTargetGVN = TargetCandLarge.getGVN(CurrVal); - assert(OLargeTargetGVN.has_value() && "GVN not found for Value"); - - // Get the canonical numbering in the large target candidate. - std::optional OTargetCandCanon = - TargetCandLarge.getCanonicalNum(OLargeTargetGVN.value()); - assert(OTargetCandCanon.has_value() && - "Canononical Number not found for GVN"); - - // Get the GVN in the large source candidate from the canonical numbering. - std::optional OLargeSourceGVN = - SourceCandLarge.fromCanonicalNum(OTargetCandCanon.value()); - assert(OLargeSourceGVN.has_value() && - "GVN Number not found for Canonical Number"); - - // Get the Value from the GVN in the large source candidate. - std::optional OLargeSourceV = - SourceCandLarge.fromGVN(OLargeSourceGVN.value()); - assert(OLargeSourceV.has_value() && "Value not found for GVN"); - - // Get the GVN number for the Value in the source candidate. - std::optional OSourceGVN = - SourceCand.getGVN(OLargeSourceV.value()); - assert(OSourceGVN.has_value() && "GVN Number not found for Value"); - - // Get the canonical numbering from the GVN/ - std::optional OSourceCanon = - SourceCand.getCanonicalNum(OSourceGVN.value()); - assert(OSourceCanon.has_value() && "Canon Number not found for GVN"); - - // Insert the canonical numbering and GVN pair into their respective - // mappings. - CanonNumToNumber.insert( - std::make_pair(OSourceCanon.value(), TargetCandGVN)); - NumberToCanonNum.insert( - std::make_pair(TargetCandGVN, OSourceCanon.value())); - } -} - void IRSimilarityCandidate::createCanonicalMappingFor( IRSimilarityCandidate &CurrCand) { assert(CurrCand.CanonNumToNumber.size() == 0 && @@ -1188,81 +1118,6 @@ void IRSimilarityCandidate::createCanonicalMappingFor( } } -/// Look for larger IRSimilarityCandidates From the previously matched -/// IRSimilarityCandidates that fully contain \p CandA or \p CandB. If there is -/// an overlap, return a pair of structurally similar, larger -/// IRSimilarityCandidates. -/// -/// \param [in] CandA - The first candidate we are trying to determine the -/// structure of. -/// \param [in] CandB - The second candidate we are trying to determine the -/// structure of. -/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in -/// a circuit to the IRSimilarityCandidates that include this instruction. -/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a -/// number representing the structural group assigned to it. -static std::optional< - std::pair> -CheckLargerCands( - IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB, - DenseMap> &IndexToIncludedCand, - DenseMap &CandToGroup) { - DenseMap IncludedGroupAndCandA; - DenseMap IncludedGroupAndCandB; - DenseSet IncludedGroupsA; - DenseSet IncludedGroupsB; - - // Find the overall similarity group numbers that fully contain the candidate, - // and record the larger candidate for each group. - auto IdxToCandidateIt = IndexToIncludedCand.find(CandA.getStartIdx()); - std::optional> - Result; - - unsigned CandAStart = CandA.getStartIdx(); - unsigned CandAEnd = CandA.getEndIdx(); - unsigned CandBStart = CandB.getStartIdx(); - unsigned CandBEnd = CandB.getEndIdx(); - if (IdxToCandidateIt == IndexToIncludedCand.end()) - return Result; - for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) { - if (MatchedCand->getStartIdx() > CandAStart || - (MatchedCand->getEndIdx() < CandAEnd)) - continue; - unsigned GroupNum = CandToGroup.find(MatchedCand)->second; - IncludedGroupAndCandA.insert(std::make_pair(GroupNum, MatchedCand)); - IncludedGroupsA.insert(GroupNum); - } - - // Find the overall similarity group numbers that fully contain the next - // candidate, and record the larger candidate for each group. - IdxToCandidateIt = IndexToIncludedCand.find(CandBStart); - if (IdxToCandidateIt == IndexToIncludedCand.end()) - return Result; - for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) { - if (MatchedCand->getStartIdx() > CandBStart || - MatchedCand->getEndIdx() < CandBEnd) - continue; - unsigned GroupNum = CandToGroup.find(MatchedCand)->second; - IncludedGroupAndCandB.insert(std::make_pair(GroupNum, MatchedCand)); - IncludedGroupsB.insert(GroupNum); - } - - // Find the intersection between the two groups, these are the groups where - // the larger candidates exist. - set_intersect(IncludedGroupsA, IncludedGroupsB); - - // If there is no intersection between the sets, then we cannot determine - // whether or not there is a match. - if (IncludedGroupsA.empty()) - return Result; - - // Create a pair that contains the larger candidates. - auto ItA = IncludedGroupAndCandA.find(*IncludedGroupsA.begin()); - auto ItB = IncludedGroupAndCandB.find(*IncludedGroupsA.begin()); - Result = std::make_pair(ItA->second, ItB->second); - return Result; -} - /// From the list of IRSimilarityCandidates, perform a comparison between each /// IRSimilarityCandidate to determine if there are overlapping /// IRInstructionData, or if they do not have the same structure. @@ -1272,16 +1127,9 @@ CheckLargerCands( /// \param [out] StructuralGroups - the mapping of unsigned integers to vector /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the /// vector are structurally similar to one another. -/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in -/// a circuit to the IRSimilarityCandidates that include this instruction. -/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a -/// number representing the structural group assigned to it. static void findCandidateStructures( std::vector &CandsForRepSubstring, - DenseMap &StructuralGroups, - DenseMap> &IndexToIncludedCand, - DenseMap &CandToOverallGroup - ) { + DenseMap &StructuralGroups) { std::vector::iterator CandIt, CandEndIt, InnerCandIt, InnerCandEndIt; @@ -1344,24 +1192,6 @@ static void findCandidateStructures( if (CandToGroupItInner != CandToGroup.end()) continue; - // Check if we have found structural similarity between two candidates - // that fully contains the first and second candidates. - std::optional> - LargerPair = CheckLargerCands( - *CandIt, *InnerCandIt, IndexToIncludedCand, CandToOverallGroup); - - // If a pair was found, it means that we can assume that these smaller - // substrings are also structurally similar. Use the larger candidates to - // determine the canonical mapping between the two sections. - if (LargerPair.has_value()) { - SameStructure = true; - InnerCandIt->createCanonicalRelationFrom( - *CandIt, *LargerPair.value().first, *LargerPair.value().second); - CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum)); - CurrentGroupPair->second.push_back(*InnerCandIt); - continue; - } - // Otherwise we determine if they have the same structure and add it to // vector if they match. ValueNumberMappingA.clear(); @@ -1388,58 +1218,24 @@ void IRSimilarityIdentifier::findCandidates( std::vector NewCandidateGroups; DenseMap StructuralGroups; - DenseMap> IndexToIncludedCand; - DenseMap CandToGroup; // Iterate over the subsequences found by the Suffix Tree to create // IRSimilarityCandidates for each repeated subsequence and determine which // instances are structurally similar to one another. - - // Sort the suffix tree from longest substring to shortest. - std::vector RSes; - for (SuffixTree::RepeatedSubstring &RS : ST) - RSes.push_back(RS); - - llvm::stable_sort(RSes, [](const SuffixTree::RepeatedSubstring &LHS, - const SuffixTree::RepeatedSubstring &RHS) { - return LHS.Length > RHS.Length; - }); - for (SuffixTree::RepeatedSubstring &RS : RSes) { + for (SuffixTree::RepeatedSubstring &RS : ST) { createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS, CandsForRepSubstring); if (CandsForRepSubstring.size() < 2) continue; - findCandidateStructures(CandsForRepSubstring, StructuralGroups, - IndexToIncludedCand, CandToGroup); - for (std::pair &Group : StructuralGroups) { + findCandidateStructures(CandsForRepSubstring, StructuralGroups); + for (std::pair &Group : StructuralGroups) // We only add the group if it contains more than one // IRSimilarityCandidate. If there is only one, that means there is no // other repeated subsequence with the same structure. - if (Group.second.size() > 1) { + if (Group.second.size() > 1) SimilarityCandidates->push_back(Group.second); - // Iterate over each candidate in the group, and add an entry for each - // instruction included with a mapping to a set of - // IRSimilarityCandidates that include that instruction. - for (IRSimilarityCandidate &IRCand : SimilarityCandidates->back()) { - for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx(); - Idx <= Edx; ++Idx) { - DenseMap>::iterator - IdIt; - IdIt = IndexToIncludedCand.find(Idx); - bool Inserted = false; - if (IdIt == IndexToIncludedCand.end()) - std::tie(IdIt, Inserted) = IndexToIncludedCand.insert( - std::make_pair(Idx, DenseSet())); - IdIt->second.insert(&IRCand); - } - // Add mapping of candidate to the overall similarity group number. - CandToGroup.insert( - std::make_pair(&IRCand, SimilarityCandidates->size() - 1)); - } - } - } CandsForRepSubstring.clear(); StructuralGroups.clear(); diff --git a/llvm/test/Transforms/IROutliner/illegal-assumes.ll b/llvm/test/Transforms/IROutliner/illegal-assumes.ll index d863fe7a29903..b91f8ec92200e 100644 --- a/llvm/test/Transforms/IROutliner/illegal-assumes.ll +++ b/llvm/test/Transforms/IROutliner/illegal-assumes.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -p iroutliner,verify -ir-outlining-no-cost < %s | FileCheck %s +; RUN: opt -S -passes=verify,iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; This test ensures that we do not include llvm.assumes. There are exceptions ; in the CodeExtractor's algorithm for llvm.assumes, so we ignore it for now. @@ -13,13 +13,13 @@ define void @outline_assumes() { ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[DL_LOC]]) -; CHECK-NEXT: call void @outlined_ir_func_4(i1 true, ptr [[D]], ptr [[DL_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_3(i1 true, ptr [[D]], ptr [[DL_LOC]]) ; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, ptr [[DL_LOC]], align 1 ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr [[DL_LOC]]) ; CHECK-NEXT: [[SPLIT_INST:%.*]] = sub i1 [[DL_RELOAD]], [[DL_RELOAD]] -; CHECK-NEXT: call void @outlined_ir_func_1(ptr [[A]], ptr [[B]], ptr [[C]]) +; CHECK-NEXT: call void @outlined_ir_func_0(ptr [[A]], ptr [[B]], ptr [[C]]) ; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) -; CHECK-NEXT: call void @outlined_ir_func_2(ptr [[A]], ptr [[B]], ptr [[C]]) +; CHECK-NEXT: call void @outlined_ir_func_1(ptr [[A]], ptr [[B]], ptr [[C]]) ; CHECK-NEXT: ret void ; entry: @@ -49,12 +49,12 @@ define void @outline_assumes2() { ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[DL_LOC]]) -; CHECK-NEXT: call void @outlined_ir_func_4(i1 false, ptr [[D]], ptr [[DL_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_3(i1 false, ptr [[D]], ptr [[DL_LOC]]) ; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, ptr [[DL_LOC]], align 1 ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr [[DL_LOC]]) -; CHECK-NEXT: call void @outlined_ir_func_1(ptr [[A]], ptr [[B]], ptr [[C]]) +; CHECK-NEXT: call void @outlined_ir_func_0(ptr [[A]], ptr [[B]], ptr [[C]]) ; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) -; CHECK-NEXT: call void @outlined_ir_func_2(ptr [[A]], ptr [[B]], ptr [[C]]) +; CHECK-NEXT: call void @outlined_ir_func_1(ptr [[A]], ptr [[B]], ptr [[C]]) ; CHECK-NEXT: ret void ; entry: @@ -77,17 +77,16 @@ entry: define void @outline_assumes3() { ; CHECK-LABEL: @outline_assumes3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[DL_LOC:%.*]] = alloca i1, align 1 ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[DL_LOC]]) -; CHECK-NEXT: call void @outlined_ir_func_0(i1 true, ptr [[D]], ptr [[A]], ptr [[B]], ptr [[C]], ptr [[DL_LOC]]) -; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, ptr [[DL_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr [[DL_LOC]]) -; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) -; CHECK-NEXT: call void @outlined_ir_func_3(ptr [[A]]) +; CHECK-NEXT: store i1 true, ptr [[D]], align 4 +; CHECK-NEXT: [[DL:%.*]] = load i1, ptr [[D]], align 1 +; CHECK-NEXT: [[SPLIT_INST:%.*]] = add i1 [[DL]], [[DL]] +; CHECK-NEXT: call void @outlined_ir_func_0(ptr [[A]], ptr [[B]], ptr [[C]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[DL]]) +; CHECK-NEXT: call void @outlined_ir_func_2(ptr [[A]]) ; CHECK-NEXT: ret void ; entry: @@ -110,17 +109,16 @@ entry: define void @outline_assumes4() { ; CHECK-LABEL: @outline_assumes4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[DL_LOC:%.*]] = alloca i1, align 1 ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[DL_LOC]]) -; CHECK-NEXT: call void @outlined_ir_func_0(i1 false, ptr [[D]], ptr [[A]], ptr [[B]], ptr [[C]], ptr [[DL_LOC]]) -; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, ptr [[DL_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr [[DL_LOC]]) -; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) -; CHECK-NEXT: call void @outlined_ir_func_3(ptr [[A]]) +; CHECK-NEXT: store i1 false, ptr [[D]], align 4 +; CHECK-NEXT: [[DL:%.*]] = load i1, ptr [[D]], align 1 +; CHECK-NEXT: [[SPLIT_INST:%.*]] = add i1 [[DL]], [[DL]] +; CHECK-NEXT: call void @outlined_ir_func_0(ptr [[A]], ptr [[B]], ptr [[C]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[DL]]) +; CHECK-NEXT: call void @outlined_ir_func_2(ptr [[A]]) ; CHECK-NEXT: ret void ; entry: From 1bd8e87e1b44d23c78f43caa6d419210954821b7 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Mon, 20 Mar 2023 16:38:54 -0700 Subject: [PATCH 101/691] Disable __sancov_lowest_stack on Windows. Should fix Windows buildbot: https://lab.llvm.org/buildbot/#/builders/127/builds/45401 --- .../lib/sanitizer_common/sanitizer_coverage_libcdep_new.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_coverage_libcdep_new.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_coverage_libcdep_new.cpp index f2dd6b227ccd3..ce4326967180d 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_coverage_libcdep_new.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_coverage_libcdep_new.cpp @@ -284,7 +284,10 @@ SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_cov_pcs_init, const uptr* beg, // E.g., -fsanitize=fuzzer-no-link // FIXME: Update Apple deployment target so that thread_local is always // supported, and remove the #if. -#if !SANITIZER_APPLE +// FIXME: Figure out how this should work on Windows, exported thread_local +// symbols are not supported: +// "data with thread storage duration may not have dll interface" +#if !SANITIZER_APPLE && !SANITIZER_WINDOWS SANITIZER_INTERFACE_ATTRIBUTE SANITIZER_WEAK_ATTRIBUTE thread_local uptr __sancov_lowest_stack; #endif From 07cdf4a10d783c6af31877cf3433e46c1527115c Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Mon, 20 Mar 2023 16:41:08 -0700 Subject: [PATCH 102/691] [libc][obvious] fix missing type headers forgot to add wint_t and wchar_t type headers to the wchar headergen definition. Differential Revision: https://reviews.llvm.org/D146472 --- libc/include/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt index 75ef824d2e3f0..39b598824e24e 100644 --- a/libc/include/CMakeLists.txt +++ b/libc/include/CMakeLists.txt @@ -475,6 +475,9 @@ add_gen_header( DEPENDS .llvm_libc_common_h .llvm-libc-macros.wchar_macros + .llvm-libc-types.size_t + .llvm-libc-types.wint_t + .llvm-libc-types.wchar_t ) if(NOT LLVM_LIBC_FULL_BUILD) From c1e4a0d4dbe9d68113a877764c794407bdd65622 Mon Sep 17 00:00:00 2001 From: Jim Ingham Date: Mon, 20 Mar 2023 16:47:07 -0700 Subject: [PATCH 103/691] Follow on to 2c7abc83f605b2d3b1062f28422eeff81278ecd5. I didn't refactor the test that's failing on arm64 correctly so it failed everywhere. Looks like the step test passes on other aarch64 systems as well as Darwin. Turn off the xfail and see how far that gets. --- .../step_over_watchpoint/TestStepOverWatchpoint.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/lldb/test/API/commands/watchpoints/step_over_watchpoint/TestStepOverWatchpoint.py b/lldb/test/API/commands/watchpoints/step_over_watchpoint/TestStepOverWatchpoint.py index 52fc899b13e61..fd70bd692a216 100644 --- a/lldb/test/API/commands/watchpoints/step_over_watchpoint/TestStepOverWatchpoint.py +++ b/lldb/test/API/commands/watchpoints/step_over_watchpoint/TestStepOverWatchpoint.py @@ -36,12 +36,6 @@ def get_to_start(self, bkpt_text): return (target, process, thread, read_watchpoint) - @expectedFailureAll( - oslist=["freebsd", "linux"], - archs=[ - 'aarch64', - 'arm'], - bugnumber="llvm.org/pr26031") # Read-write watchpoints not supported on SystemZ @expectedFailureAll(archs=['s390x']) @add_test_categories(["basic_process"]) @@ -69,8 +63,6 @@ def test_step_over(self): def test_step_instruction(self): target, process, thread, wp = self.get_to_start("Set breakpoint after call") - self.assertEquals(thread.GetStopDescription(20), 'step over') - self.step_inst_for_watchpoint(1) write_value = frame.FindValue('g_watch_me_write', From 72455b314f65fb55e0f10c75f88e651d584307bb Mon Sep 17 00:00:00 2001 From: wren romano <2998727+wrengr@users.noreply.github.com> Date: Mon, 20 Mar 2023 16:41:39 -0700 Subject: [PATCH 104/691] [mlir][sparse] Fixing -Wunused-variable in Sparsification.cpp Reviewed By: aartbik, Peiming Differential Revision: https://reviews.llvm.org/D146474 --- mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp index d8aeb44811534..63228531fcf0c 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp @@ -762,9 +762,9 @@ static bool computeIterationGraph(CodegenEnv &env, SortMask mask, // Iterate over the indexing maps of every tensor in the tensor expression. for (OpOperand &t : env.op()->getOpOperands()) { // Get map and encoding. - const auto map = env.op().getMatchingIndexingMap(&t); const auto enc = getSparseTensorEncoding(t.get().getType()); - assert(map.getNumDims() + getNumNonTrivialIdxExpOnSparseLvls(env.op()) == + assert(env.op().getMatchingIndexingMap(&t).getNumDims() + + getNumNonTrivialIdxExpOnSparseLvls(env.op()) == n); // Skips dense inputs/outputs when not requested. From b6f7e598ac1d72377cdcf5da84526f6289af13ca Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Mon, 20 Mar 2023 23:57:12 +0000 Subject: [PATCH 105/691] [gn build] Port ee232506b870 --- .../gn/secondary/lldb/source/Plugins/Platform/Linux/BUILD.gn | 5 ++++- .../lldb/source/Plugins/Platform/gdb-server/BUILD.gn | 5 ++++- .../secondary/lldb/source/Plugins/Process/Utility/BUILD.gn | 4 ---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/llvm/utils/gn/secondary/lldb/source/Plugins/Platform/Linux/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Plugins/Platform/Linux/BUILD.gn index 2d65a06fdc445..08ef6a6749a6d 100644 --- a/llvm/utils/gn/secondary/lldb/source/Plugins/Platform/Linux/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Plugins/Platform/Linux/BUILD.gn @@ -15,5 +15,8 @@ static_library("Linux") { # Reaches into Plugins/Platform/POSIX. include_dirs = [ "//lldb/source" ] - sources = [ "PlatformLinux.cpp" ] + sources = [ + "LinuxSignals.cpp", + "PlatformLinux.cpp", + ] } diff --git a/llvm/utils/gn/secondary/lldb/source/Plugins/Platform/gdb-server/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Plugins/Platform/gdb-server/BUILD.gn index bcd800a17d24f..fb3fb1e1687c5 100644 --- a/llvm/utils/gn/secondary/lldb/source/Plugins/Platform/gdb-server/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Plugins/Platform/gdb-server/BUILD.gn @@ -16,5 +16,8 @@ static_library("gdb-server") { # Reaches into Plugins/Process/Utility. include_dirs = [ "//lldb/source" ] - sources = [ "PlatformRemoteGDBServer.cpp" ] + sources = [ + "GDBRemoteSignals.cpp", + "PlatformRemoteGDBServer.cpp", + ] } diff --git a/llvm/utils/gn/secondary/lldb/source/Plugins/Process/Utility/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Plugins/Process/Utility/BUILD.gn index 4f50b8fa5964e..787b51f6ebc11 100644 --- a/llvm/utils/gn/secondary/lldb/source/Plugins/Process/Utility/BUILD.gn +++ b/llvm/utils/gn/secondary/lldb/source/Plugins/Process/Utility/BUILD.gn @@ -19,19 +19,15 @@ static_library("Utility") { include_dirs = [ "//lldb/source" ] sources = [ "AuxVector.cpp", - "FreeBSDSignals.cpp", - "GDBRemoteSignals.cpp", "HistoryThread.cpp", "HistoryUnwind.cpp", "InferiorCallPOSIX.cpp", "LinuxProcMaps.cpp", - "LinuxSignals.cpp", "MemoryTagManagerAArch64MTE.cpp", "NativeProcessSoftwareSingleStep.cpp", "NativeRegisterContextDBReg_arm64.cpp", "NativeRegisterContextDBReg_x86.cpp", "NativeRegisterContextRegisterInfo.cpp", - "NetBSDSignals.cpp", "RegisterContextDarwin_arm.cpp", "RegisterContextDarwin_arm64.cpp", "RegisterContextDarwin_i386.cpp", From 6a0f2e539b8ef1f510f62aceb36430b95e40f0d3 Mon Sep 17 00:00:00 2001 From: ziqingluo-90 Date: Mon, 20 Mar 2023 16:57:50 -0700 Subject: [PATCH 106/691] [-Wunsafe-buffer-usage] Add Fixable for dereference of simple ptr arithmetic For each expression `e` of the form `*(DRE + n)` (or `*(n + DRE)`), where `DRE` has a pointer type and `n` is an integer literal, `e` will be transformed to `DRE[n]` (or `n[DRE]` respectively), if - `e` is at the left-hand side of an assignment or is an lvalue being casted to an rvalue; and - the variable referred by `DRE` is going to be transformed to be of `std::span` type. Reviewed by: jkorous, NoQ Differential revision: https://reviews.llvm.org/D142795 --- .../Analyses/UnsafeBufferUsageGadgets.def | 1 + clang/lib/Analysis/UnsafeBufferUsage.cpp | 102 +++++++++ ...er-usage-fixits-deref-simple-ptr-arith.cpp | 199 ++++++++++++++++++ 3 files changed, 302 insertions(+) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-deref-simple-ptr-arith.cpp diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index 75657d8d9a584..89f7c1ed2ba24 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -31,6 +31,7 @@ WARNING_GADGET(ArraySubscript) WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) FIXABLE_GADGET(ULCArraySubscript) +FIXABLE_GADGET(DerefSimplePtrArithFixable) #undef FIXABLE_GADGET #undef WARNING_GADGET diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 116bb4954b168..04e11d0471a7d 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" +#include "clang/AST/Decl.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/Lex/Lexer.h" @@ -558,6 +559,56 @@ class Strategy { }; } // namespace +// Representing a fixable expression of the form `*(ptr + 123)` or `*(123 + +// ptr)`: +class DerefSimplePtrArithFixableGadget : public FixableGadget { + static constexpr const char *const BaseDeclRefExprTag = "BaseDRE"; + static constexpr const char *const DerefOpTag = "DerefOp"; + static constexpr const char *const AddOpTag = "AddOp"; + static constexpr const char *const OffsetTag = "Offset"; + + const DeclRefExpr *BaseDeclRefExpr = nullptr; + const UnaryOperator *DerefOp = nullptr; + const BinaryOperator *AddOp = nullptr; + const IntegerLiteral *Offset = nullptr; + +public: + DerefSimplePtrArithFixableGadget(const MatchFinder::MatchResult &Result) + : FixableGadget(Kind::DerefSimplePtrArithFixable), + BaseDeclRefExpr( + Result.Nodes.getNodeAs(BaseDeclRefExprTag)), + DerefOp(Result.Nodes.getNodeAs(DerefOpTag)), + AddOp(Result.Nodes.getNodeAs(AddOpTag)), + Offset(Result.Nodes.getNodeAs(OffsetTag)) {} + + static Matcher matcher() { + // clang-format off + auto ThePtr = expr(hasPointerType(), + ignoringImpCasts(declRefExpr(to(varDecl())).bind(BaseDeclRefExprTag))); + auto PlusOverPtrAndInteger = expr(anyOf( + binaryOperator(hasOperatorName("+"), hasLHS(ThePtr), + hasRHS(integerLiteral().bind(OffsetTag))) + .bind(AddOpTag), + binaryOperator(hasOperatorName("+"), hasRHS(ThePtr), + hasLHS(integerLiteral().bind(OffsetTag))) + .bind(AddOpTag))); + return isInUnspecifiedLvalueContext(unaryOperator( + hasOperatorName("*"), + hasUnaryOperand(ignoringParens(PlusOverPtrAndInteger))) + .bind(DerefOpTag)); + // clang-format on + } + + virtual std::optional getFixits(const Strategy &s) const final; + + // TODO remove this method from FixableGadget interface + virtual const Stmt *getBaseStmt() const final { return nullptr; } + + virtual DeclUseList getClaimedVarUseSites() const final { + return {BaseDeclRefExpr}; + } +}; + /// Scan the function and return a list of gadgets found with provided kits. static std::tuple findGadgets(const Decl *D, const UnsafeBufferUsageHandler &Handler) { @@ -812,6 +863,57 @@ static StringRef getExprText(const Expr *E, const SourceManager &SM, LangOpts); } +std::optional +DerefSimplePtrArithFixableGadget::getFixits(const Strategy &s) const { + const VarDecl *VD = dyn_cast(BaseDeclRefExpr->getDecl()); + + if (VD && s.lookup(VD) == Strategy::Kind::Span) { + ASTContext &Ctx = VD->getASTContext(); + // std::span can't represent elements before its begin() + if (auto ConstVal = Offset->getIntegerConstantExpr(Ctx)) + if (ConstVal->isNegative()) + return std::nullopt; + + // note that the expr may (oddly) has multiple layers of parens + // example: + // *((..(pointer + 123)..)) + // goal: + // pointer[123] + // Fix-It: + // remove '*(' + // replace ' + ' with '[' + // replace ')' with ']' + + // example: + // *((..(123 + pointer)..)) + // goal: + // 123[pointer] + // Fix-It: + // remove '*(' + // replace ' + ' with '[' + // replace ')' with ']' + + const Expr *LHS = AddOp->getLHS(), *RHS = AddOp->getRHS(); + const SourceManager &SM = Ctx.getSourceManager(); + const LangOptions &LangOpts = Ctx.getLangOpts(); + CharSourceRange StarWithTrailWhitespace = + clang::CharSourceRange::getCharRange(DerefOp->getOperatorLoc(), + LHS->getBeginLoc()); + CharSourceRange PlusWithSurroundingWhitespace = + clang::CharSourceRange::getCharRange(getPastLoc(LHS, SM, LangOpts), + RHS->getBeginLoc()); + CharSourceRange ClosingParenWithPrecWhitespace = + clang::CharSourceRange::getCharRange(getPastLoc(AddOp, SM, LangOpts), + getPastLoc(DerefOp, SM, LangOpts)); + + return FixItList{ + {FixItHint::CreateRemoval(StarWithTrailWhitespace), + FixItHint::CreateReplacement(PlusWithSurroundingWhitespace, "["), + FixItHint::CreateReplacement(ClosingParenWithPrecWhitespace, "]")}}; + } + return std::nullopt; // something wrong or unsupported, give up +} + // For a non-null initializer `Init` of `T *` type, this function returns // `FixItHint`s producing a list initializer `{Init, S}` as a part of a fix-it // to output stream. diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-deref-simple-ptr-arith.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-deref-simple-ptr-arith.cpp new file mode 100644 index 0000000000000..c533a9bd3d38d --- /dev/null +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-deref-simple-ptr-arith.cpp @@ -0,0 +1,199 @@ +// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage -fdiagnostics-parseable-fixits -fsyntax-only %s 2>&1 | FileCheck %s + +// TODO test we don't mess up vertical whitespace +// TODO test different whitespaces +// TODO test different contexts + // when it's on the right side + +void basic() { + int *ptr; +// CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:11}:"std::span ptr" + *(ptr+5)=1; +// CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:5}:"" +// CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:8-[[@LINE-2]]:9}:"[" +// CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:10-[[@LINE-3]]:11}:"]" +} + +// The weird preceding semicolon ensures that we preserve that range intact. +void char_ranges() { + int *p; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:9}:"std::span p" + + ;* ( p + 5 ) = 1; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:4-[[@LINE-1]]:8}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:9-[[@LINE-2]]:12}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:13-[[@LINE-3]]:15}:"]" + + ;* (p+5)= 1; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:4-[[@LINE-1]]:9}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:10-[[@LINE-2]]:11}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:12-[[@LINE-3]]:13}:"]" + + ;*( p+5)= 1; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:4-[[@LINE-1]]:9}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:10-[[@LINE-2]]:11}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:12-[[@LINE-3]]:13}:"]" + + ;*( p+5)= 1; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:4-[[@LINE-1]]:9}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:10-[[@LINE-2]]:11}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:12-[[@LINE-3]]:13}:"]" + + ;*( p +5)= 1; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:4-[[@LINE-1]]:7}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:8-[[@LINE-2]]:12}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:13-[[@LINE-3]]:14}:"]" + + ;*(p+ 5)= 1; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:4-[[@LINE-1]]:6}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:7-[[@LINE-2]]:11}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:12-[[@LINE-3]]:13}:"]" + + ;*(p+ 5 )= 1; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:4-[[@LINE-1]]:6}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:7-[[@LINE-2]]:9}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:10-[[@LINE-3]]:14}:"]" + + ;*(p+ 5) = 1; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:4-[[@LINE-1]]:6}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:7-[[@LINE-2]]:9}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:10-[[@LINE-3]]:11}:"]" + + ; *(p+5)= 1; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:7-[[@LINE-1]]:9}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:10-[[@LINE-2]]:11}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:12-[[@LINE-3]]:13}:"]" + + ;*(p+123456)= 1; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:4-[[@LINE-1]]:6}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:7-[[@LINE-2]]:8}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:14-[[@LINE-3]]:15}:"]" + + ;* (p+123456)= 1; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:4-[[@LINE-1]]:9}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:10-[[@LINE-2]]:11}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:17-[[@LINE-3]]:18}:"]" + + ;*( p+123456)= 1; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:4-[[@LINE-1]]:9}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:10-[[@LINE-2]]:11}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:17-[[@LINE-3]]:18}:"]" + + ;*( p+123456)= 1; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:4-[[@LINE-1]]:9}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:10-[[@LINE-2]]:11}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:17-[[@LINE-3]]:18}:"]" + + ;*(p +123456)= 1; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:4-[[@LINE-1]]:6}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:7-[[@LINE-2]]:11}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:17-[[@LINE-3]]:18}:"]" + + ;*(p+ 123456)= 1; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:4-[[@LINE-1]]:6}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:7-[[@LINE-2]]:11}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:17-[[@LINE-3]]:18}:"]" + + ;*(p+123456 )= 1; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:4-[[@LINE-1]]:6}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:7-[[@LINE-2]]:8}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:14-[[@LINE-3]]:18}:"]" + + ;*(p+123456) = 1; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:4-[[@LINE-1]]:6}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:7-[[@LINE-2]]:8}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:14-[[@LINE-3]]:15}:"]" + + int *ptrrrrrr; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:16}:"std::span ptrrrrrr" + + ;* ( ptrrrrrr + 123456 )= 1; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:4-[[@LINE-1]]:8}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:16-[[@LINE-2]]:19}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:25-[[@LINE-3]]:27}:"]" + + ;* (ptrrrrrr+123456)= 1; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:4-[[@LINE-1]]:9}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:17-[[@LINE-2]]:18}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:24-[[@LINE-3]]:25}:"]" + + ;*( ptrrrrrr+123456)= 1; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:4-[[@LINE-1]]:9}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:17-[[@LINE-2]]:18}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:24-[[@LINE-3]]:25}:"]" + + ;*( ptrrrrrr+123456)= 1; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:4-[[@LINE-1]]:9}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:17-[[@LINE-2]]:18}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:24-[[@LINE-3]]:25}:"]" + + ;*(ptrrrrrr +123456)= 1; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:4-[[@LINE-1]]:6}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:14-[[@LINE-2]]:18}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:24-[[@LINE-3]]:25}:"]" + + ;*(ptrrrrrr+ 123456)= 1; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:4-[[@LINE-1]]:6}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:14-[[@LINE-2]]:18}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:24-[[@LINE-3]]:25}:"]" + + ;*(ptrrrrrr+123456 )= 1; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:4-[[@LINE-1]]:6}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:14-[[@LINE-2]]:15}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:21-[[@LINE-3]]:25}:"]" + + ;*(ptrrrrrr+123456) = 1; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:4-[[@LINE-1]]:6}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:14-[[@LINE-2]]:15}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:21-[[@LINE-3]]:22}:"]" +} + +void base_on_rhs() { + int* ptr; + *(10 + ptr) = 1; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:5}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:7-[[@LINE-2]]:10}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:13-[[@LINE-3]]:14}:"]" +} + +void many_parens() { + int* ptr; + *(( (10 + ptr)) ) = 1; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:8}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:10-[[@LINE-2]]:13}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:16-[[@LINE-3]]:20}:"]" +} + +void lvaue_to_rvalue() { + int * ptr; + int tmp = *(ptr + 10); + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:13-[[@LINE-1]]:15}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:18-[[@LINE-2]]:21}:"[" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:23-[[@LINE-3]]:24}:"]" +} + +// Fixits emitted for the cases below would be incorrect. +// CHECK-NOT: fix-it: +// Array subsctipt opertor of std::span accepts unsigned integer. +void negative() { + int* ptr; + *(ptr + -5) = 1; // skip +} + +void subtraction() { + int* ptr; + *(ptr - 5) = 1; // skip +} + +void subtraction_of_negative() { + int* ptr; + *(ptr - -5) = 1; // FIXME: implement fixit (uncommon case - low priority) +} + + +void bindingDecl(int *p, int *q) { + int * a[2] = {p, q}; + auto [x, y] = a; + + *(x + 1) = 1; // FIXME: deal with `BindingDecl`s +} From e4c1dfed38370b4933f05c8e24b1d77df56b526c Mon Sep 17 00:00:00 2001 From: "Luo, Yuanke" Date: Thu, 9 Mar 2023 13:32:31 +0800 Subject: [PATCH 107/691] [X86] Create extra prolog/epilog for stack realignment The base pointer register is reserved by compiler when there is dynamic size alloca and stack realign in a function. However the base pointer register is not defined in X86 ABI, so user can use this register in inline assembly. The inline assembly would clobber base pointer register without being awared by user. This patch is to create extra prolog to save the stack pointer to a scratch register and use this register to reference argument from stack. For some calling convention (e.g. regcall), there may be few scratch register. Below is the example code for such case. ``` extern int bar(void *p); long long foo(size_t size, char c, int id) { __attribute__((__aligned__(64))) int a; char *p = (char *)alloca(size); asm volatile ("nop"::"S"(405):); asm volatile ("movl %0, %1"::"r"(id), "m"(a):); p[2] = 8; memset(p, c, size); return bar(p); } ``` And below prolog/epilog will be emit for this case. ``` leal 4(%esp), %ebx .cfi_def_cfa %ebx, 0 andl $-128, %esp pushl -4(%ebx) ... leal 4(%ebx), %esp .cfi_def_cfa %esp, 4 ``` Differential Revision: https://reviews.llvm.org/D145650 --- llvm/lib/Target/X86/CMakeLists.txt | 1 + llvm/lib/Target/X86/X86.h | 2 + .../Target/X86/X86ArgumentStackSlotRebase.cpp | 195 ++++++++++++++++++ llvm/lib/Target/X86/X86FrameLowering.cpp | 192 +++++++++++++++-- llvm/lib/Target/X86/X86MachineFunctionInfo.h | 5 + llvm/lib/Target/X86/X86RegisterInfo.cpp | 44 ++++ llvm/lib/Target/X86/X86RegisterInfo.h | 4 + llvm/lib/Target/X86/X86RegisterInfo.td | 12 ++ llvm/lib/Target/X86/X86TargetMachine.cpp | 2 + .../CodeGen/MIR/X86/inline-asm-registers.mir | 8 +- llvm/test/CodeGen/X86/O0-pipeline.ll | 1 + llvm/test/CodeGen/X86/i386-baseptr.ll | 22 +- llvm/test/CodeGen/X86/opt-pipeline.ll | 1 + .../X86/statepoint-invoke-ra-enter-at-end.mir | 4 +- llvm/test/CodeGen/X86/x86-64-baseptr.ll | 118 ++++++----- 15 files changed, 529 insertions(+), 82 deletions(-) create mode 100644 llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt index 545e8a38b6961..5eba6db5b06eb 100644 --- a/llvm/lib/Target/X86/CMakeLists.txt +++ b/llvm/lib/Target/X86/CMakeLists.txt @@ -26,6 +26,7 @@ endif() add_public_tablegen_target(X86CommonTableGen) set(sources + X86ArgumentStackSlotRebase.cpp X86AsmPrinter.cpp X86AvoidTrailingCall.cpp X86CallFrameOptimization.cpp diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 044b2636f951c..5ff9d7facc878 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -166,11 +166,13 @@ FunctionPass *createX86LoadValueInjectionLoadHardeningPass(); FunctionPass *createX86LoadValueInjectionRetHardeningPass(); FunctionPass *createX86SpeculativeLoadHardeningPass(); FunctionPass *createX86SpeculativeExecutionSideEffectSuppression(); +FunctionPass *createX86ArgumentStackSlotPass(); void initializeEvexToVexInstPassPass(PassRegistry &); void initializeFPSPass(PassRegistry &); void initializeFixupBWInstPassPass(PassRegistry &); void initializeFixupLEAPassPass(PassRegistry &); +void initializeX86ArgumentStackSlotPassPass(PassRegistry &); void initializeX86FixupInstTuningPassPass(PassRegistry &); void initializeWinEHStatePassPass(PassRegistry &); void initializeX86AvoidSFBPassPass(PassRegistry &); diff --git a/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp b/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp new file mode 100644 index 0000000000000..ef3751e40f17f --- /dev/null +++ b/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp @@ -0,0 +1,195 @@ +//===---- X86ArgumentStackSlotRebase.cpp - rebase argument stack slot -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass replace the frame register with a GPR virtual register and set +// the stack offset for each instruction which reference argument from stack. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86MachineFunctionInfo.h" +#include "X86RegisterInfo.h" +#include "X86Subtarget.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" + +using namespace llvm; + +#define DEBUG_TYPE "x86argumentstackrebase" + +namespace { + +class X86ArgumentStackSlotPass : public MachineFunctionPass { + +public: + static char ID; // Pass identification, replacement for typeid + + explicit X86ArgumentStackSlotPass() : MachineFunctionPass(ID) { + initializeX86ArgumentStackSlotPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +} // end anonymous namespace + +char X86ArgumentStackSlotPass::ID = 0; + +INITIALIZE_PASS(X86ArgumentStackSlotPass, DEBUG_TYPE, "Argument Stack Rebase", + false, false) + +FunctionPass *llvm::createX86ArgumentStackSlotPass() { + return new X86ArgumentStackSlotPass(); +} + +static Register getArgBaseReg(MachineFunction &MF) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + const X86Subtarget &STI = MF.getSubtarget(); + const Function &F = MF.getFunction(); + CallingConv::ID CC = F.getCallingConv(); + Register NoReg; + const TargetRegisterClass *RC = nullptr; + switch (CC) { + // We need a virtual register in case there is inline assembly + // clobber argument base register. + case CallingConv::C: + RC = STI.is64Bit() ? &X86::GR64_ArgRefRegClass : &X86::GR32_ArgRefRegClass; + break; + case CallingConv::X86_RegCall: + // FIXME: For regcall there is no scratch register on 32-bit target. + // We may use a callee saved register as argument base register and + // save it before being changed as base pointer. We need DW_CFA to + // indicate where the callee saved register is saved, so that it can + // be correctly unwind. + // push ebx + // mov ebx, esp + // and esp, -128 + // ... + // pop ebx + // ret + RC = STI.is64Bit() ? &X86::GR64_ArgRefRegClass : nullptr; + break; + // TODO: Refine register class for each calling convention. + default: + break; + } + if (RC) + return MRI.createVirtualRegister(RC); + else + return NoReg; +} + +bool X86ArgumentStackSlotPass::runOnMachineFunction(MachineFunction &MF) { + const Function &F = MF.getFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + const X86Subtarget &STI = MF.getSubtarget(); + const X86RegisterInfo *TRI = STI.getRegisterInfo(); + const X86InstrInfo *TII = STI.getInstrInfo(); + X86MachineFunctionInfo *X86FI = MF.getInfo(); + bool Changed = false; + + if (F.hasFnAttribute(Attribute::Naked)) + return false; + // Only support Linux + if (!STI.isTargetLinux()) + return false; + if (!TRI->hasBasePointer(MF)) + return false; + + Register BasePtr = TRI->getBaseRegister(); + auto IsBaseRegisterClobbered = [&]() { + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (!MI.isInlineAsm()) + continue; + for (MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (!Register::isPhysicalRegister(Reg)) + continue; + if (TRI->isSuperOrSubRegisterEq(BasePtr, Reg)) + return true; + } + } + } + return false; + }; + if (!IsBaseRegisterClobbered()) + return false; + + Register ArgBaseReg = getArgBaseReg(MF); + if (!ArgBaseReg.isValid()) + return false; + // leal 4(%esp), %reg + // FIXME: will the instruction be duplicated or eliminated? Should + // define a pseudo instruction for it? + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc DL; + // Emit instruction to copy get stack pointer to a virtual register + // and save the instruction to x86 machine functon info. We can get + // physical register of ArgBaseReg after register allocation. The + // stack slot is used to save/restore argument base pointer. We can + // get the index from the instruction. + unsigned SlotSize = TRI->getSlotSize(); + int FI = MFI.CreateSpillStackObject(SlotSize, Align(SlotSize)); + MachineInstr *LEA = + BuildMI(MBB, MBBI, DL, + TII->get(STI.is64Bit() ? X86::LEA64r : X86::LEA32r), ArgBaseReg) + .addFrameIndex(FI) + .addImm(1) + .addUse(X86::NoRegister) + .addImm(SlotSize) + .addUse(X86::NoRegister) + .setMIFlag(MachineInstr::FrameSetup); + X86FI->setStackPtrSaveMI(LEA); + + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + int I = 0; + for (MachineOperand &MO : MI.operands()) { + if (MO.isFI()) { + int Idx = MO.getIndex(); + if (!MFI.isFixedObjectIndex(Idx)) + continue; + int64_t Offset = MFI.getObjectOffset(Idx); + if (Offset < 0) + continue; + // TODO replace register for debug instruction + if (MI.isDebugInstr()) + continue; + // Replace frame register with argument base pointer and its offset. + TRI->eliminateFrameIndex(MI.getIterator(), I, ArgBaseReg, Offset); + Changed = true; + } + ++I; + } + } + } + + return Changed; +} diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index cb42a1025ea21..78f96817c8420 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -33,6 +33,7 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/LEB128.h" #include "llvm/Target/TargetOptions.h" #include @@ -476,6 +477,7 @@ void X86FrameLowering::emitCalleeSavedFrameMoves( MachineFrameInfo &MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); + X86MachineFunctionInfo *X86FI = MF.getInfo(); // Add callee saved registers to move list. const std::vector &CSI = MFI.getCalleeSavedInfo(); @@ -487,13 +489,62 @@ void X86FrameLowering::emitCalleeSavedFrameMoves( unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); if (IsPrologue) { - BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); + if (X86FI->getStackPtrSaveMI()) { + // +2*SlotSize because there is return address and ebp at the bottom + // of the stack. + // | retaddr | + // | ebp | + // | |<--ebp + Offset += 2 * SlotSize; + SmallString<64> CfaExpr; + CfaExpr.push_back(dwarf::DW_CFA_expression); + uint8_t buffer[16]; + CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer)); + CfaExpr.push_back(2); + Register FramePtr = TRI->getFrameRegister(MF); + const Register MachineFramePtr = + STI.isTarget64BitILP32() + ? Register(getX86SubSuperRegister(FramePtr, 64)) + : FramePtr; + unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true); + CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr)); + CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer)); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createEscape(nullptr, CfaExpr.str()), + MachineInstr::FrameSetup); + } else { + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); + } } else { BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createRestore(nullptr, DwarfReg)); } } + if (auto *MI = X86FI->getStackPtrSaveMI()) { + int FI = MI->getOperand(1).getIndex(); + int64_t Offset = MFI.getObjectOffset(FI) + 2 * SlotSize; + SmallString<64> CfaExpr; + Register FramePtr = TRI->getFrameRegister(MF); + const Register MachineFramePtr = + STI.isTarget64BitILP32() + ? Register(getX86SubSuperRegister(FramePtr, 64)) + : FramePtr; + unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true); + CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr)); + uint8_t buffer[16]; + CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer)); + CfaExpr.push_back(dwarf::DW_OP_deref); + + SmallString<64> DefCfaExpr; + DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression); + DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer)); + DefCfaExpr.append(CfaExpr.str()); + // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()), + MachineInstr::FrameSetup); + } } void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero, @@ -1509,6 +1560,42 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc DL; + Register ArgBaseReg; + + // Emit extra prolog for argument stack slot reference. + if (auto *MI = X86FI->getStackPtrSaveMI()) { + // MI is lea instruction that created in X86ArgumentStackSlotPass. + // Creat extra prolog for stack realignment. + ArgBaseReg = MI->getOperand(0).getReg(); + // leal 4(%esp), %basereg + // .cfi_def_cfa %basereg, 0 + // andl $-128, %esp + // pushl -4(%basereg) + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::LEA64r : X86::LEA32r), + ArgBaseReg) + .addUse(StackPtr) + .addImm(1) + .addUse(X86::NoRegister) + .addImm(SlotSize) + .addUse(X86::NoRegister) + .setMIFlag(MachineInstr::FrameSetup); + if (NeedsDwarfCFI) { + // .cfi_def_cfa %basereg, 0 + unsigned DwarfStackPtr = TRI->getDwarfRegNum(ArgBaseReg, true); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, 0), + MachineInstr::FrameSetup); + } + BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign); + int64_t Offset = Is64Bit ? -2 * (int64_t)SlotSize : -1 * (int64_t)SlotSize; + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm: X86::PUSH32rmm)) + .addReg(ArgBaseReg) + .addImm(1) + .addReg(X86::NoRegister) + .addImm(Offset) + .addReg(X86::NoRegister) + .setMIFlag(MachineInstr::FrameSetup); + } // Space reserved for stack-based arguments when making a (ABI-guaranteed) // tail call. @@ -1640,7 +1727,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, .addReg(MachineFramePtr, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); - if (NeedsDwarfCFI) { + if (NeedsDwarfCFI && !ArgBaseReg.isValid()) { // Mark the place where EBP/RBP was saved. // Define the current CFA rule to use the provided offset. assert(StackSize); @@ -1717,13 +1804,28 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); if (NeedsDwarfCFI) { - // Mark effective beginning of when frame pointer becomes valid. - // Define the current CFA to use the EBP/RBP register. - unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); - BuildCFI( - MBB, MBBI, DL, - MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr), - MachineInstr::FrameSetup); + if (ArgBaseReg.isValid()) { + SmallString<64> CfaExpr; + CfaExpr.push_back(dwarf::DW_CFA_expression); + uint8_t buffer[16]; + unsigned DwarfReg = TRI->getDwarfRegNum(MachineFramePtr, true); + CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer)); + CfaExpr.push_back(2); + CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg)); + CfaExpr.push_back(0); + // DW_CFA_expression: reg5 DW_OP_breg5 +0 + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createEscape(nullptr, CfaExpr.str()), + MachineInstr::FrameSetup); + } else { + // Mark effective beginning of when frame pointer becomes valid. + // Define the current CFA to use the EBP/RBP register. + unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); + BuildCFI( + MBB, MBBI, DL, + MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr), + MachineInstr::FrameSetup); + } } if (NeedsWinFPO) { @@ -1790,7 +1892,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // Realign stack after we pushed callee-saved registers (so that we'll be // able to calculate their offsets from the frame pointer). // Don't do this for Win64, it needs to realign the stack after the prologue. - if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF)) { + if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) && + !ArgBaseReg.isValid()) { assert(HasFP && "There should be a frame pointer if stack is realigned."); BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign); @@ -2048,6 +2151,16 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); } } + if (ArgBaseReg.isValid()) { + // Save argument base pointer. + auto *MI = X86FI->getStackPtrSaveMI(); + int FI = MI->getOperand(1).getIndex(); + unsigned MOVmr = Is64Bit ? X86::MOV64mr : X86::MOV32mr; + // movl %basereg, offset(%ebp) + addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), FI) + .addReg(ArgBaseReg) + .setMIFlag(MachineInstr::FrameSetup); + } if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) { // Mark end of stack pointer adjustment. @@ -2196,6 +2309,34 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, !MF.getTarget().getTargetTriple().isOSWindows()) && MF.needsFrameMoves(); + Register ArgBaseReg; + if (auto *MI = X86FI->getStackPtrSaveMI()) { + unsigned Opc = X86::LEA32r; + Register StackReg = X86::ESP; + ArgBaseReg = MI->getOperand(0).getReg(); + if (STI.is64Bit()) { + Opc = X86::LEA64r; + StackReg = X86::RSP; + } + // leal -8(%basereg), %esp + // .cfi_def_cfa %esp, 4 + BuildMI(MBB, MBBI, DL, TII.get(Opc), StackReg) + .addUse(ArgBaseReg) + .addImm(1) + .addUse(X86::NoRegister) + .addImm((int64_t)SlotSize * -2) + .addUse(X86::NoRegister) + .setMIFlag(MachineInstr::FrameDestroy); + if (NeedsDwarfCFI) { + unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackReg, true); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize), + MachineInstr::FrameDestroy); + --MBBI; + } + --MBBI; + } + if (IsFunclet) { assert(HasFP && "EH funclets without FP not yet implemented"); NumBytes = getWinEHFuncletFrameSize(MF); @@ -2237,11 +2378,13 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } if (NeedsDwarfCFI) { - unsigned DwarfStackPtr = - TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true); - BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize), - MachineInstr::FrameDestroy); + if (!ArgBaseReg.isValid()) { + unsigned DwarfStackPtr = + TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize), + MachineInstr::FrameDestroy); + } if (!MBB.succ_empty() && !MBB.isReturnBlock()) { unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); BuildCFI(MBB, AfterPop, DL, @@ -2271,6 +2414,15 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, --MBBI; } + if (ArgBaseReg.isValid()) { + // Restore argument base pointer. + auto *MI = X86FI->getStackPtrSaveMI(); + int FI = MI->getOperand(1).getIndex(); + unsigned MOVrm = Is64Bit ? X86::MOV64rm : X86::MOV32rm; + // movl offset(%ebp), %basereg + addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVrm), ArgBaseReg), FI) + .setMIFlag(MachineInstr::FrameDestroy); + } MBBI = FirstCSPop; if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET) @@ -3889,8 +4041,16 @@ void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const { void X86FrameLowering::processFunctionBeforeFrameIndicesReplaced( MachineFunction &MF, RegScavenger *RS) const { + auto *X86FI = MF.getInfo(); + if (STI.is32Bit() && MF.hasEHFunclets()) restoreWinEHStackPointersInParent(MF); + // We have emitted prolog and epilog. Don't need stack pointer saving + // instruction any more. + if (MachineInstr *MI = X86FI->getStackPtrSaveMI()) { + MI->eraseFromParent(); + X86FI->setStackPtrSaveMI(nullptr); + } } void X86FrameLowering::restoreWinEHStackPointersInParent( diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h index 372838a212cc8..31c087df9a911 100644 --- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h +++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h @@ -117,6 +117,8 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// determine if we should insert tilerelease in frame lowering. bool HasVirtualTileReg = false; + MachineInstr *StackPtrSaveMI = nullptr; + std::optional SwiftAsyncContextFrameIdx; // Preallocated fields are only used during isel. @@ -225,6 +227,9 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { bool hasVirtualTileReg() const { return HasVirtualTileReg; } void setHasVirtualTileReg(bool v) { HasVirtualTileReg = v; } + void setStackPtrSaveMI(MachineInstr *MI) { StackPtrSaveMI = MI; } + MachineInstr *getStackPtrSaveMI() const { return StackPtrSaveMI; } + std::optional getSwiftAsyncContextFrameIdx() const { return SwiftAsyncContextFrameIdx; } diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index 327c61c4c5d0b..0edc0a432f8e0 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -702,6 +702,11 @@ static bool CantUseSP(const MachineFrameInfo &MFI) { bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { const X86MachineFunctionInfo *X86FI = MF.getInfo(); + // We have a virtual register to reference argument, and don't need base + // pointer. + if (X86FI->getStackPtrSaveMI() != nullptr) + return false; + if (X86FI->hasPreallocatedCall()) return true; @@ -778,6 +783,45 @@ static bool isFuncletReturnInstr(MachineInstr &MI) { llvm_unreachable("impossible"); } +void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + unsigned FIOperandNum, + Register BaseReg, + int FIOffset) const { + MachineInstr &MI = *II; + unsigned Opc = MI.getOpcode(); + if (Opc == TargetOpcode::LOCAL_ESCAPE) { + MachineOperand &FI = MI.getOperand(FIOperandNum); + FI.ChangeToImmediate(FIOffset); + return; + } + + MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false); + + // The frame index format for stackmaps and patchpoints is different from the + // X86 format. It only has a FI and an offset. + if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { + assert(BasePtr == FramePtr && "Expected the FP as base register"); + int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset; + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); + return; + } + + if (MI.getOperand(FIOperandNum + 3).isImm()) { + // Offset is a 32-bit integer. + int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm()); + int Offset = FIOffset + Imm; + assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) && + "Requesting 64-bit offset in 32-bit immediate!"); + if (Offset != 0 || !tryOptimizeLEAtoMOV(II)) + MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset); + } else { + // Offset is symbolic. This is extremely rare. + uint64_t Offset = + FIOffset + (uint64_t)MI.getOperand(FIOperandNum + 3).getOffset(); + MI.getOperand(FIOperandNum + 3).setOffset(Offset); + } +} + bool X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h index f88d4b18f1d86..48eeb72479f8c 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.h +++ b/llvm/lib/Target/X86/X86RegisterInfo.h @@ -133,6 +133,10 @@ class X86RegisterInfo final : public X86GenRegisterInfo { bool canRealignStack(const MachineFunction &MF) const override; + void eliminateFrameIndex(MachineBasicBlock::iterator II, + unsigned FIOperandNum, Register BaseReg, + int FIOffset) const; + bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS = nullptr) const override; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td index 4ef7150122ca8..1e6477e658b9d 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.td +++ b/llvm/lib/Target/X86/X86RegisterInfo.td @@ -433,6 +433,18 @@ def GR64PLTSafe : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI, R8, R9, RBX, R14, R15, R12, R13, RBP)>; +// It includes the GPR that are used as scratch register for Linux64 calling +// convention. +def GR64_ArgRef: RegisterClass<"X86", [i64], 64, (add R10, R11)> { + let GeneratePressureSet = 0; +} + +// It includes the GPR that are used as scratch register for Linux32 calling +// convention. +def GR32_ArgRef: RegisterClass<"X86", [i32], 32, (add ECX, EDX)> { + let GeneratePressureSet = 0; +} + // Segment registers for use by MOV instructions (and others) that have a // segment register as one operand. Always contain a 16-bit segment // descriptor. diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index dfb7030def7e7..d870f9b031f57 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -104,6 +104,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() { initializePseudoProbeInserterPass(PR); initializeX86ReturnThunksPass(PR); initializeX86DAGToDAGISelPass(PR); + initializeX86ArgumentStackSlotPassPass(PR); } static std::unique_ptr createTLOF(const Triple &TT) { @@ -518,6 +519,7 @@ bool X86PassConfig::addPreISel() { } void X86PassConfig::addPreRegAlloc() { + addPass(createX86ArgumentStackSlotPass()); if (getOptLevel() != CodeGenOpt::None) { addPass(&LiveRangeShrinkID); addPass(createX86FixupSetCC()); diff --git a/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir b/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir index 0202103c8ff4d..f92d49cabdcda 100644 --- a/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir +++ b/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir @@ -28,8 +28,8 @@ body: | liveins: $rdi, $rsi ; CHECK-LABEL: name: test - ; CHECK: INLINEASM &foo, 0 /* attdialect */, 4456458 /* regdef:GR64 */, def $rsi, 4456458 /* regdef:GR64 */, def dead $rdi, - INLINEASM &foo, 0, 4456458, def $rsi, 4456458, def dead $rdi, 2147549193, killed $rdi, 2147483657, killed $rsi, 12, implicit-def dead early-clobber $eflags + ; CHECK: INLINEASM &foo, 0 /* attdialect */, 4521994 /* regdef:GR64 */, def $rsi, 4521994 /* regdef:GR64 */, def dead $rdi, + INLINEASM &foo, 0, 4521994, def $rsi, 4521994, def dead $rdi, 2147549193, killed $rdi, 2147483657, killed $rsi, 12, implicit-def dead early-clobber $eflags $rax = MOV64rr killed $rsi RET64 killed $rax ... @@ -45,8 +45,8 @@ body: | ; Verify that the register ties are preserved. ; CHECK-LABEL: name: test2 - ; CHECK: INLINEASM &foo, 0 /* attdialect */, 4456458 /* regdef:GR64 */, def $rsi, 4456458 /* regdef:GR64 */, def dead $rdi, 2147549193 /* reguse tiedto:$1 */, killed $rdi(tied-def 5), 2147483657 /* reguse tiedto:$0 */, killed $rsi(tied-def 3), 12 /* clobber */, implicit-def dead early-clobber $eflags - INLINEASM &foo, 0, 4456458, def $rsi, 4456458, def dead $rdi, 2147549193, killed $rdi(tied-def 5), 2147483657, killed $rsi(tied-def 3), 12, implicit-def dead early-clobber $eflags + ; CHECK: INLINEASM &foo, 0 /* attdialect */, 4521994 /* regdef:GR64 */, def $rsi, 4521994 /* regdef:GR64 */, def dead $rdi, 2147549193 /* reguse tiedto:$1 */, killed $rdi(tied-def 5), 2147483657 /* reguse tiedto:$0 */, killed $rsi(tied-def 3), 12 /* clobber */, implicit-def dead early-clobber $eflags + INLINEASM &foo, 0, 4521994, def $rsi, 4521994, def dead $rdi, 2147549193, killed $rdi(tied-def 5), 2147483657, killed $rsi(tied-def 3), 12, implicit-def dead early-clobber $eflags $rax = MOV64rr killed $rsi RET64 killed $rax ... diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll index e78464120426d..01deb3fbc00d3 100644 --- a/llvm/test/CodeGen/X86/O0-pipeline.ll +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll @@ -41,6 +41,7 @@ ; CHECK-NEXT: X86 PIC Global Base Reg Initialization ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions ; CHECK-NEXT: Local Stack Slot Allocation +; CHECK-NEXT: Argument Stack Rebase ; CHECK-NEXT: X86 speculative load hardening ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: X86 EFLAGS copy lowering diff --git a/llvm/test/CodeGen/X86/i386-baseptr.ll b/llvm/test/CodeGen/X86/i386-baseptr.ll index 8baa16c076bac..cb8d849a86841 100644 --- a/llvm/test/CodeGen/X86/i386-baseptr.ll +++ b/llvm/test/CodeGen/X86/i386-baseptr.ll @@ -39,16 +39,18 @@ entry: define void @clobber_base() #0 { ; CHECK-LABEL: clobber_base: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: .cfi_def_cfa %ecx, 0 +; CHECK-NEXT: andl $-128, %esp +; CHECK-NEXT: pushl -4(%ecx) ; CHECK-NEXT: pushl %ebp -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: .cfi_offset %ebp, -8 ; CHECK-NEXT: movl %esp, %ebp -; CHECK-NEXT: .cfi_def_cfa_register %ebp +; CHECK-NEXT: .cfi_escape 0x10, 0x05, 0x02, 0x75, 0x00 # ; CHECK-NEXT: pushl %esi -; CHECK-NEXT: andl $-128, %esp -; CHECK-NEXT: subl $128, %esp -; CHECK-NEXT: movl %esp, %esi -; CHECK-NEXT: .cfi_offset %esi, -12 +; CHECK-NEXT: subl $244, %esp +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: .cfi_escape 0x10, 0x06, 0x02, 0x75, 0x7c # +; CHECK-NEXT: .cfi_escape 0x0f, 0x04, 0x75, 0x84, 0x7f, 0x06 # ; CHECK-NEXT: calll helper@PLT ; CHECK-NEXT: movl %esp, %ecx ; CHECK-NEXT: leal 31(,%eax,4), %eax @@ -63,12 +65,14 @@ define void @clobber_base() #0 { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $8, %edx ; CHECK-NEXT: #APP -; CHECK-NEXT: movl %edx, (%esi) +; CHECK-NEXT: movl %edx, -120(%ebp) ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $0, (%ecx,%eax) +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: leal -4(%ebp), %esp ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %ebp +; CHECK-NEXT: leal -8(%ecx), %esp ; CHECK-NEXT: .cfi_def_cfa %esp, 4 ; CHECK-NEXT: retl entry: @@ -135,6 +139,6 @@ entry: ret void } -attributes #0 = { "frame-pointer"="all"} +attributes #0 = {"frame-pointer"="all"} !llvm.module.flags = !{!0} !0 = !{i32 2, !"override-stack-alignment", i32 32} diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll index 4361b863c25b0..5c7e6d1da32c7 100644 --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -116,6 +116,7 @@ ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions +; CHECK-NEXT: Argument Stack Rebase ; CHECK-NEXT: Live Range Shrink ; CHECK-NEXT: X86 Fixup SetCC ; CHECK-NEXT: Lazy Machine Block Frequency Analysis diff --git a/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir b/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir index 2170573f4b0d9..fe057a8d78eb1 100644 --- a/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir +++ b/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir @@ -350,7 +350,7 @@ body: | ; CHECK-NEXT: CMP64rr [[NOT64r2]], [[COPY6]], implicit-def $eflags ; CHECK-NEXT: undef %100.sub_32bit:gr64_with_sub_8bit = MOV32ri 0 ; CHECK-NEXT: [[CMOV64rr:%[0-9]+]]:gr64 = CMOV64rr [[CMOV64rr]], %100, 4, implicit killed $eflags - ; CHECK-NEXT: INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4456457 /* reguse:GR64 */, %100, 4456457 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags + ; CHECK-NEXT: INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4521993 /* reguse:GR64 */, %100, 4521993 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags ; CHECK-NEXT: LCMPXCHG32 undef %67:gr64, 1, $noreg, 0, $noreg, [[COPY5]], implicit-def dead $eax, implicit-def dead $eflags, implicit undef $eax :: (load store acquire monotonic (s32) on `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK-NEXT: $rdi = COPY [[COPY4]] @@ -470,7 +470,7 @@ body: | %63:gr64 = NOT64r %63 CMP64rr %63, %31, implicit-def $eflags %63:gr64 = CMOV64rr %63, %53, 4, implicit killed $eflags - INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4456457 /* reguse:GR64 */, %53, 4456457 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags + INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4521993 /* reguse:GR64 */, %53, 4521993 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags LCMPXCHG32 undef %67:gr64, 1, $noreg, 0, $noreg, %65, implicit-def dead $eax, implicit-def dead $eflags, implicit undef $eax :: (load store acquire monotonic (s32) on `ptr addrspace(1) undef`, addrspace 1) ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp $rdi = COPY %64 diff --git a/llvm/test/CodeGen/X86/x86-64-baseptr.ll b/llvm/test/CodeGen/X86/x86-64-baseptr.ll index 914a5a4796247..9b6256189cdfc 100644 --- a/llvm/test/CodeGen/X86/x86-64-baseptr.ll +++ b/llvm/test/CodeGen/X86/x86-64-baseptr.ll @@ -77,16 +77,18 @@ entry: define void @clobber_base() #0 { ; CHECK-LABEL: clobber_base: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %r10 +; CHECK-NEXT: .cfi_def_cfa %r10, 0 +; CHECK-NEXT: andq $-128, %rsp +; CHECK-NEXT: pushq -16(%r10) ; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset %rbp, -16 ; CHECK-NEXT: movq %rsp, %rbp -; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: .cfi_escape 0x10, 0x06, 0x02, 0x76, 0x00 # ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: andq $-128, %rsp -; CHECK-NEXT: subq $128, %rsp -; CHECK-NEXT: movq %rsp, %rbx -; CHECK-NEXT: .cfi_offset %rbx, -24 +; CHECK-NEXT: subq $232, %rsp +; CHECK-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: .cfi_escape 0x10, 0x03, 0x02, 0x76, 0x78 # +; CHECK-NEXT: .cfi_escape 0x0f, 0x04, 0x76, 0x88, 0x7f, 0x06 # ; CHECK-NEXT: callq helper@PLT ; CHECK-NEXT: movq %rsp, %rcx ; CHECK-NEXT: movl %eax, %eax @@ -102,27 +104,31 @@ define void @clobber_base() #0 { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $8, %edx ; CHECK-NEXT: #APP -; CHECK-NEXT: movl %edx, (%rbx) +; CHECK-NEXT: movl %edx, -112(%rbp) ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $0, (%rcx,%rax) +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload ; CHECK-NEXT: leaq -8(%rbp), %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %rbp +; CHECK-NEXT: leaq -16(%r10), %rsp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 ; CHECK-NEXT: retq ; ; X32ABI-LABEL: clobber_base: ; X32ABI: # %bb.0: # %entry +; X32ABI-NEXT: leaq {{[0-9]+}}(%esp), %r10 +; X32ABI-NEXT: .cfi_def_cfa %r10, 0 +; X32ABI-NEXT: andl $-128, %esp +; X32ABI-NEXT: pushq -16(%r10) ; X32ABI-NEXT: pushq %rbp -; X32ABI-NEXT: .cfi_def_cfa_offset 16 -; X32ABI-NEXT: .cfi_offset %rbp, -16 ; X32ABI-NEXT: movl %esp, %ebp -; X32ABI-NEXT: .cfi_def_cfa_register %rbp +; X32ABI-NEXT: .cfi_escape 0x10, 0x06, 0x02, 0x76, 0x00 # ; X32ABI-NEXT: pushq %rbx -; X32ABI-NEXT: andl $-128, %esp -; X32ABI-NEXT: subl $128, %esp -; X32ABI-NEXT: movl %esp, %ebx -; X32ABI-NEXT: .cfi_offset %rbx, -24 +; X32ABI-NEXT: subl $232, %esp +; X32ABI-NEXT: movq %r10, {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Spill +; X32ABI-NEXT: .cfi_escape 0x10, 0x03, 0x02, 0x76, 0x78 # +; X32ABI-NEXT: .cfi_escape 0x0f, 0x04, 0x76, 0x88, 0x7f, 0x06 # ; X32ABI-NEXT: callq helper@PLT ; X32ABI-NEXT: # kill: def $eax killed $eax def $rax ; X32ABI-NEXT: leal 31(,%rax,4), %eax @@ -138,12 +144,14 @@ define void @clobber_base() #0 { ; X32ABI-NEXT: #NO_APP ; X32ABI-NEXT: movl $8, %edx ; X32ABI-NEXT: #APP -; X32ABI-NEXT: movl %edx, (%ebx) +; X32ABI-NEXT: movl %edx, -112(%ebp) ; X32ABI-NEXT: #NO_APP ; X32ABI-NEXT: movl $0, (%ecx,%eax) +; X32ABI-NEXT: movq {{[-0-9]+}}(%e{{[sb]}}p), %r10 # 8-byte Reload ; X32ABI-NEXT: leal -8(%ebp), %esp ; X32ABI-NEXT: popq %rbx ; X32ABI-NEXT: popq %rbp +; X32ABI-NEXT: leaq -16(%r10), %rsp ; X32ABI-NEXT: .cfi_def_cfa %rsp, 8 ; X32ABI-NEXT: retq entry: @@ -160,14 +168,15 @@ entry: define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32 %param3, i32 %param4, i32 %param5, i32 %param6, i32 %param7, i32 %param8, i32 %param9, i32 %param10, i32 %param11, i32 %param12) #0 { ; CHECK-LABEL: clobber_baseptr_argptr: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %r10 +; CHECK-NEXT: .cfi_def_cfa %r10, 0 +; CHECK-NEXT: andq $-128, %rsp +; CHECK-NEXT: pushq -16(%r10) ; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset %rbp, -16 ; CHECK-NEXT: movq %rsp, %rbp -; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: .cfi_escape 0x10, 0x06, 0x02, 0x76, 0x00 # ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: andq $-128, %rsp -; CHECK-NEXT: subq $256, %rsp # imm = 0x100 +; CHECK-NEXT: subq $360, %rsp # imm = 0x168 ; CHECK-NEXT: movaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -176,17 +185,18 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32 ; CHECK-NEXT: movaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movaps %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: movq %rsp, %rbx -; CHECK-NEXT: .cfi_offset %rbx, -24 -; CHECK-NEXT: .cfi_offset %xmm8, -160 -; CHECK-NEXT: .cfi_offset %xmm9, -144 -; CHECK-NEXT: .cfi_offset %xmm10, -128 -; CHECK-NEXT: .cfi_offset %xmm11, -112 -; CHECK-NEXT: .cfi_offset %xmm12, -96 -; CHECK-NEXT: .cfi_offset %xmm13, -80 -; CHECK-NEXT: .cfi_offset %xmm14, -64 -; CHECK-NEXT: .cfi_offset %xmm15, -48 -; CHECK-NEXT: movl 16(%rbp), %r14d +; CHECK-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: .cfi_escape 0x10, 0x03, 0x02, 0x76, 0x78 # +; CHECK-NEXT: .cfi_escape 0x10, 0x19, 0x02, 0x76, 0xf0, 0x7e # +; CHECK-NEXT: .cfi_escape 0x10, 0x1a, 0x02, 0x76, 0x80, 0x7f # +; CHECK-NEXT: .cfi_escape 0x10, 0x1b, 0x02, 0x76, 0x90, 0x7f # +; CHECK-NEXT: .cfi_escape 0x10, 0x1c, 0x02, 0x76, 0xa0, 0x7f # +; CHECK-NEXT: .cfi_escape 0x10, 0x1d, 0x02, 0x76, 0xb0, 0x7f # +; CHECK-NEXT: .cfi_escape 0x10, 0x1e, 0x02, 0x76, 0x40 # +; CHECK-NEXT: .cfi_escape 0x10, 0x1f, 0x02, 0x76, 0x50 # +; CHECK-NEXT: .cfi_escape 0x10, 0x20, 0x02, 0x76, 0x60 # +; CHECK-NEXT: .cfi_escape 0x0f, 0x04, 0x76, 0x88, 0x7e, 0x06 # +; CHECK-NEXT: movl (%r10), %r14d ; CHECK-NEXT: callq helper@PLT ; CHECK-NEXT: movq %rsp, %rcx ; CHECK-NEXT: movl %eax, %eax @@ -205,7 +215,7 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32 ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $8, %edx ; CHECK-NEXT: #APP -; CHECK-NEXT: movl %edx, (%rbx) +; CHECK-NEXT: movl %edx, -240(%rbp) ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl %r14d, (%rcx,%rax) ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm8 # 16-byte Reload @@ -216,22 +226,25 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32 ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload ; CHECK-NEXT: leaq -8(%rbp), %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %rbp +; CHECK-NEXT: leaq -16(%r10), %rsp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 ; CHECK-NEXT: retq ; ; X32ABI-LABEL: clobber_baseptr_argptr: ; X32ABI: # %bb.0: # %entry +; X32ABI-NEXT: leaq {{[0-9]+}}(%esp), %r10 +; X32ABI-NEXT: .cfi_def_cfa %r10, 0 +; X32ABI-NEXT: andl $-128, %esp +; X32ABI-NEXT: pushq -16(%r10) ; X32ABI-NEXT: pushq %rbp -; X32ABI-NEXT: .cfi_def_cfa_offset 16 -; X32ABI-NEXT: .cfi_offset %rbp, -16 ; X32ABI-NEXT: movl %esp, %ebp -; X32ABI-NEXT: .cfi_def_cfa_register %rbp +; X32ABI-NEXT: .cfi_escape 0x10, 0x06, 0x02, 0x76, 0x00 # ; X32ABI-NEXT: pushq %rbx -; X32ABI-NEXT: andl $-128, %esp -; X32ABI-NEXT: subl $256, %esp # imm = 0x100 +; X32ABI-NEXT: subl $360, %esp # imm = 0x168 ; X32ABI-NEXT: movaps %xmm15, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32ABI-NEXT: movaps %xmm14, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32ABI-NEXT: movaps %xmm13, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill @@ -240,17 +253,18 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32 ; X32ABI-NEXT: movaps %xmm10, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32ABI-NEXT: movaps %xmm9, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32ABI-NEXT: movaps %xmm8, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; X32ABI-NEXT: movl %esp, %ebx -; X32ABI-NEXT: .cfi_offset %rbx, -24 -; X32ABI-NEXT: .cfi_offset %xmm8, -160 -; X32ABI-NEXT: .cfi_offset %xmm9, -144 -; X32ABI-NEXT: .cfi_offset %xmm10, -128 -; X32ABI-NEXT: .cfi_offset %xmm11, -112 -; X32ABI-NEXT: .cfi_offset %xmm12, -96 -; X32ABI-NEXT: .cfi_offset %xmm13, -80 -; X32ABI-NEXT: .cfi_offset %xmm14, -64 -; X32ABI-NEXT: .cfi_offset %xmm15, -48 -; X32ABI-NEXT: movl 16(%ebp), %r14d +; X32ABI-NEXT: movq %r10, {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Spill +; X32ABI-NEXT: .cfi_escape 0x10, 0x03, 0x02, 0x76, 0x78 # +; X32ABI-NEXT: .cfi_escape 0x10, 0x19, 0x02, 0x76, 0xf0, 0x7e # +; X32ABI-NEXT: .cfi_escape 0x10, 0x1a, 0x02, 0x76, 0x80, 0x7f # +; X32ABI-NEXT: .cfi_escape 0x10, 0x1b, 0x02, 0x76, 0x90, 0x7f # +; X32ABI-NEXT: .cfi_escape 0x10, 0x1c, 0x02, 0x76, 0xa0, 0x7f # +; X32ABI-NEXT: .cfi_escape 0x10, 0x1d, 0x02, 0x76, 0xb0, 0x7f # +; X32ABI-NEXT: .cfi_escape 0x10, 0x1e, 0x02, 0x76, 0x40 # +; X32ABI-NEXT: .cfi_escape 0x10, 0x1f, 0x02, 0x76, 0x50 # +; X32ABI-NEXT: .cfi_escape 0x10, 0x20, 0x02, 0x76, 0x60 # +; X32ABI-NEXT: .cfi_escape 0x0f, 0x04, 0x76, 0x88, 0x7e, 0x06 # +; X32ABI-NEXT: movl (%r10), %r14d ; X32ABI-NEXT: callq helper@PLT ; X32ABI-NEXT: # kill: def $eax killed $eax def $rax ; X32ABI-NEXT: leal 31(,%rax,4), %eax @@ -269,7 +283,7 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32 ; X32ABI-NEXT: #NO_APP ; X32ABI-NEXT: movl $8, %edx ; X32ABI-NEXT: #APP -; X32ABI-NEXT: movl %edx, (%ebx) +; X32ABI-NEXT: movl %edx, -240(%ebp) ; X32ABI-NEXT: #NO_APP ; X32ABI-NEXT: movl %r14d, (%ecx,%eax) ; X32ABI-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm8 # 16-byte Reload @@ -280,9 +294,11 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32 ; X32ABI-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm13 # 16-byte Reload ; X32ABI-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm14 # 16-byte Reload ; X32ABI-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm15 # 16-byte Reload +; X32ABI-NEXT: movq {{[-0-9]+}}(%e{{[sb]}}p), %r10 # 8-byte Reload ; X32ABI-NEXT: leal -8(%ebp), %esp ; X32ABI-NEXT: popq %rbx ; X32ABI-NEXT: popq %rbp +; X32ABI-NEXT: leaq -16(%r10), %rsp ; X32ABI-NEXT: .cfi_def_cfa %rsp, 8 ; X32ABI-NEXT: retq entry: @@ -298,6 +314,6 @@ entry: ret void } -attributes #0 = { "frame-pointer"="all"} +attributes #0 = {"frame-pointer"="all"} !llvm.module.flags = !{!0} !0 = !{i32 2, !"override-stack-alignment", i32 32} From 1a79b25d36cebc84325e46be1fa3595c1dded785 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 21 Mar 2023 00:14:05 +0000 Subject: [PATCH 108/691] [gn build] Port e4c1dfed3837 --- llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn index e2351f36ed1b1..7c27f7ca21583 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn @@ -77,6 +77,7 @@ static_library("LLVMX86CodeGen") { deps += [ ":X86GenFoldTables" ] } sources = [ + "X86ArgumentStackSlotRebase.cpp", "X86AsmPrinter.cpp", "X86AvoidStoreForwardingBlocks.cpp", "X86AvoidTrailingCall.cpp", From 23ea2c4f4a928f196730f1e71617ceccb677ce32 Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Mon, 20 Mar 2023 17:20:01 -0700 Subject: [PATCH 109/691] [libc][obvious] remove currently unused types Somehow having MBState and StructTmType in the definition for wchar was causing test failures. This should fix those. Differential Revision: https://reviews.llvm.org/D146476 --- libc/spec/spec.td | 1 - libc/spec/stdc.td | 2 -- 2 files changed, 3 deletions(-) diff --git a/libc/spec/spec.td b/libc/spec/spec.td index 7a691f255e0b7..2336754c5d030 100644 --- a/libc/spec/spec.td +++ b/libc/spec/spec.td @@ -61,7 +61,6 @@ def RestrictedSizeTPtr : RestrictedPtrType; def WCharType : NamedType<"wchar_t">; def WIntType : NamedType<"wint_t">; -def MBStateType : NamedType<"mbstate_t">; def LongDoublePtr : PtrType; diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index fd732b74fae1e..56ee9a60d10ae 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -1078,8 +1078,6 @@ def StdC : StandardSpec<"stdc"> { SizeTType, WIntType, WCharType, - MBStateType, - StructTmType, ], [], // Enumerations [ From 89359df8ca8726c288f3d5e1232eb3f3247dd505 Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Sat, 18 Mar 2023 02:39:12 +0000 Subject: [PATCH 110/691] [support] Support printing floats in ScopedPrinter llvm-readobj will need the ability to print floats for use in HashHistograms. This adds that functionality to the ScopedPrinter and JSONScopedPrinter. Reviewed By: jhenderson Differential Revision: https://reviews.llvm.org/D145277 --- llvm/include/llvm/Support/ScopedPrinter.h | 16 +++++ llvm/unittests/Support/ScopedPrinterTest.cpp | 72 +++++++++++++++++++- 2 files changed, 85 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/Support/ScopedPrinter.h b/llvm/include/llvm/Support/ScopedPrinter.h index 15965bf79ade5..4aa695418f226 100644 --- a/llvm/include/llvm/Support/ScopedPrinter.h +++ b/llvm/include/llvm/Support/ScopedPrinter.h @@ -234,6 +234,14 @@ class ScopedPrinter { startLine() << Label << ": " << Value << "\n"; } + virtual void printNumber(StringRef Label, float Value) { + startLine() << Label << ": " << format("%5.1f", Value) << "\n"; + } + + virtual void printNumber(StringRef Label, double Value) { + startLine() << Label << ": " << format("%5.1f", Value) << "\n"; + } + template void printNumber(StringRef Label, StringRef Str, T Value) { printNumberImpl(Label, Str, to_string(Value)); @@ -586,6 +594,14 @@ class JSONScopedPrinter : public ScopedPrinter { JOS.attribute(Label, Value); } + void printNumber(StringRef Label, float Value) override { + JOS.attribute(Label, Value); + } + + void printNumber(StringRef Label, double Value) override { + JOS.attribute(Label, Value); + } + void printNumber(StringRef Label, const APSInt &Value) override { JOS.attributeBegin(Label); printAPSInt(Value); diff --git a/llvm/unittests/Support/ScopedPrinterTest.cpp b/llvm/unittests/Support/ScopedPrinterTest.cpp index b98fe5361481a..ea024ec05df6b 100644 --- a/llvm/unittests/Support/ScopedPrinterTest.cpp +++ b/llvm/unittests/Support/ScopedPrinterTest.cpp @@ -9,6 +9,7 @@ #include "llvm/Support/ScopedPrinter.h" #include "llvm/ADT/APSInt.h" #include "gtest/gtest.h" +#include #include using namespace llvm; @@ -553,9 +554,48 @@ TEST_F(ScopedPrinterTest, PrintNumber) { W.printNumber("apsint", LargeNum); W.printNumber("label", "value", 0); + + float MaxFloat = std::numeric_limits::max(); + float MinFloat = std::numeric_limits::min(); + float InfFloat = std::numeric_limits::infinity(); + float NaNFloat = std::nanf("1"); + W.printNumber("float-max", MaxFloat); + W.printNumber("float-min", MinFloat); + W.printNumber("float-inf", InfFloat); + W.printNumber("float-nan", NaNFloat); + W.printNumber("float-42.0", 42.0f); + W.printNumber("float-42.5625", 42.5625f); + + double MaxDouble = std::numeric_limits::max(); + double MinDouble = std::numeric_limits::min(); + double InfDouble = std::numeric_limits::infinity(); + double NaNDouble = std::nan("1"); + W.printNumber("double-max", MaxDouble); + W.printNumber("double-min", MinDouble); + W.printNumber("double-inf", InfDouble); + W.printNumber("double-nan", NaNDouble); + W.printNumber("double-42.0", 42.0); + W.printNumber("double-42.5625", 42.5625); }; - const char *ExpectedOut = R"(uint64_t-max: 18446744073709551615 + // Make sure when we check floating point representation we avoid + // implementation defined behavior. So format the max float/double, instead of + // hard coding it in the tests. Note: we can't just use std::to_string(), + // since we format the float in PrintNumber(). This isn't required for JSON + // formatting, since it uses exponents, which will be consistent. + + // Allocate a buffer large enough to represent large floating point values + // and construct the string representation for them there. + char Buf[512]; + + format("%5.1f", std::numeric_limits::max()).snprint(Buf, sizeof(Buf)); + std::string MaxFloatStr(Buf); + + format("%5.1f", std::numeric_limits::max()).snprint(Buf, sizeof(Buf)); + std::string MaxDoubleStr(Buf); + + std::string ExpectedOut = Twine( + R"(uint64_t-max: 18446744073709551615 uint64_t-min: 0 uint32_t-max: 4294967295 uint32_t-min: 0 @@ -573,7 +613,21 @@ int8_t-max: 127 int8_t-min: -128 apsint: 9999999999999999999999 label: value (0) -)"; +float-max: )" + MaxFloatStr + R"( +float-min: 0.0 +float-inf: inf +float-nan: nan +float-42.0: 42.0 +float-42.5625: 42.6 +double-max: )" + MaxDoubleStr + + R"( +double-min: 0.0 +double-inf: inf +double-nan: nan +double-42.0: 42.0 +double-42.5625: 42.6 +)") + .str(); const char *JSONExpectedOut = R"({ "uint64_t-max": 18446744073709551615, @@ -596,7 +650,19 @@ label: value (0) "label": { "Name": "value", "Value": 0 - } + }, + "float-max": 3.4028234663852886e+38, + "float-min": 1.1754943508222875e-38, + "float-inf": inf, + "float-nan": nan, + "float-42.0": 42, + "float-42.5625": 42.5625, + "double-max": 1.7976931348623157e+308, + "double-min": 2.2250738585072014e-308, + "double-inf": inf, + "double-nan": nan, + "double-42.0": 42, + "double-42.5625": 42.5625 })"; verifyAll(ExpectedOut, JSONExpectedOut, PrintFunc); } From 0d3f7d2ab333f55cef634e7af834b84e1153e9cf Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Sat, 18 Mar 2023 03:33:26 +0000 Subject: [PATCH 111/691] [llvm-readobj] Provide Hash Histogram for all ELFDumper implementations Previously, the GNUELFDumper was the only implementer for Hash Histograms. This patch moves the implementation for printHashHistogram and printGnuHashHistogram into the ELFDumper base class, and allows each derived class to specialize how it outputs that information. This change also prevents the JSONELFDumper from emitting invalid JSON, since the shared implementation in LLVMELFDumper no longer emits a warning message to the output stream. Reviewed By: jhenderson Differential Revision: https://reviews.llvm.org/D137096 --- .../llvm-readobj/ELF/hash-histogram.test | 63 ++++ llvm/tools/llvm-readobj/ELFDumper.cpp | 286 ++++++++++-------- 2 files changed, 221 insertions(+), 128 deletions(-) diff --git a/llvm/test/tools/llvm-readobj/ELF/hash-histogram.test b/llvm/test/tools/llvm-readobj/ELF/hash-histogram.test index f4db2d8ad7d7b..10f8652bb0855 100644 --- a/llvm/test/tools/llvm-readobj/ELF/hash-histogram.test +++ b/llvm/test/tools/llvm-readobj/ELF/hash-histogram.test @@ -12,6 +12,10 @@ # RUN: yaml2obj --docnum=1 -D BITS=64 %s -o %t1-64.o # RUN: llvm-readelf --elf-hash-histogram %t1-64.o | FileCheck %s --check-prefix=HIST +## Check that LLVM output has the expected format. +# RUN: llvm-readobj --elf-hash-histogram %t1-32.o | FileCheck %s --check-prefix=LLVM-HIST +# RUN: llvm-readobj --elf-hash-histogram %t1-64.o | FileCheck %s --check-prefix=LLVM-HIST + # HIST: Histogram for bucket list length (total of 3 buckets) # HIST-NEXT: Length Number % of total Coverage # HIST-NEXT: 0 2 ( 66.7%) 0.0% @@ -26,6 +30,65 @@ # HIST-NEXT: 3 1 ( 33.3%) 100.0% # HIST-NOT: {{.}} +# LLVM-HIST: HashHistogram { +# LLVM-HIST-NEXT: TotalBuckets: 3 +# LLVM-HIST-NEXT: Chains [ +# LLVM-HIST-NEXT: Chain { +# LLVM-HIST-NEXT: Length: 0 +# LLVM-HIST-NEXT: Count: 2 +# LLVM-HIST-NEXT: Percentage: 66.7 +# LLVM-HIST-NEXT: Coverage: 0.0 +# LLVM-HIST-NEXT: } +# LLVM-HIST-NEXT: Chain { +# LLVM-HIST-NEXT: Length: 1 +# LLVM-HIST-NEXT: Count: 0 +# LLVM-HIST-NEXT: Percentage: 0.0 +# LLVM-HIST-NEXT: Coverage: 0.0 +# LLVM-HIST-NEXT: } +# LLVM-HIST-NEXT: Chain { +# LLVM-HIST-NEXT: Length: 2 +# LLVM-HIST-NEXT: Count: 0 +# LLVM-HIST-NEXT: Percentage: 0.0 +# LLVM-HIST-NEXT: Coverage: 0.0 +# LLVM-HIST-NEXT: } +# LLVM-HIST-NEXT: Chain { +# LLVM-HIST-NEXT: Length: 3 +# LLVM-HIST-NEXT: Count: 1 +# LLVM-HIST-NEXT: Percentage: 33.3 +# LLVM-HIST-NEXT: Coverage: 100.0 +# LLVM-HIST-NEXT: } +# LLVM-HIST-NEXT: ] +# LLVM-HIST-NEXT: } +# LLVM-HIST-NEXT: GnuHashHistogram { +# LLVM-HIST-NEXT: TotalBuckets: 3 +# LLVM-HIST-NEXT: Buckets [ +# LLVM-HIST-NEXT: Bucket { +# LLVM-HIST-NEXT: Length: 0 +# LLVM-HIST-NEXT: Count: 1 +# LLVM-HIST-NEXT: Percentage: 33.3 +# LLVM-HIST-NEXT: Coverage: 0.0 +# LLVM-HIST-NEXT: } +# LLVM-HIST-NEXT: Bucket { +# LLVM-HIST-NEXT: Length: 1 +# LLVM-HIST-NEXT: Count: 1 +# LLVM-HIST-NEXT: Percentage: 33.3 +# LLVM-HIST-NEXT: Coverage: 25.0 +# LLVM-HIST-NEXT: } +# LLVM-HIST-NEXT: Bucket { +# LLVM-HIST-NEXT: Length: 2 +# LLVM-HIST-NEXT: Count: 0 +# LLVM-HIST-NEXT: Percentage: 0.0 +# LLVM-HIST-NEXT: Coverage: 25.0 +# LLVM-HIST-NEXT: } +# LLVM-HIST-NEXT: Bucket { +# LLVM-HIST-NEXT: Length: 3 +# LLVM-HIST-NEXT: Count: 1 +# LLVM-HIST-NEXT: Percentage: 33.3 +# LLVM-HIST-NEXT: Coverage: 100.0 +# LLVM-HIST-NEXT: } +# LLVM-HIST-NEXT: ] +# LLVM-HIST-NEXT: } + --- !ELF FileHeader: Class: ELFCLASS[[BITS]] diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 2434aa7b59cfa..3f1f226d1fe4a 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -222,6 +222,13 @@ template class ELFDumper : public ObjDumper { void printStackMap() const override; void printMemtag() override; + // Hash histogram shows statistics of how efficient the hash was for the + // dynamic symbol table. The table shows the number of hash buckets for + // different lengths of chains as an absolute number and percentage of the + // total buckets, and the cumulative coverage of symbols for each set of + // buckets. + void printHashHistograms() override; + const object::ELFObjectFile &getElfObject() const { return ObjF; }; std::string describe(const Elf_Shdr &Sec) const; @@ -304,6 +311,12 @@ template class ELFDumper : public ObjDumper { const ArrayRef> DynamicEntries, const ArrayRef AndroidNoteDesc) = 0; + virtual void printHashHistogram(const Elf_Hash &HashTable) const; + virtual void printGnuHashHistogram(const Elf_GnuHash &GnuHashTable) const; + virtual void printHashHistogramStats(size_t NBucket, size_t MaxChain, + size_t TotalSyms, ArrayRef Count, + bool IsGnu) const = 0; + Expected> getVersionTable(const Elf_Shdr &Sec, ArrayRef *SymTab, StringRef *StrTab, const Elf_Shdr **SymTabSec) const; @@ -572,7 +585,6 @@ template class GNUELFDumper : public ELFDumper { void printVersionSymbolSection(const Elf_Shdr *Sec) override; void printVersionDefinitionSection(const Elf_Shdr *Sec) override; void printVersionDependencySection(const Elf_Shdr *Sec) override; - void printHashHistograms() override; void printCGProfile() override; void printBBAddrMaps() override; void printAddrsig() override; @@ -582,10 +594,11 @@ template class GNUELFDumper : public ELFDumper { void printMemtag( const ArrayRef> DynamicEntries, const ArrayRef AndroidNoteDesc) override; + void printHashHistogramStats(size_t NBucket, size_t MaxChain, + size_t TotalSyms, ArrayRef Count, + bool IsGnu) const override; private: - void printHashHistogram(const Elf_Hash &HashTable); - void printGnuHashHistogram(const Elf_GnuHash &GnuHashTable); void printHashTableSymbols(const Elf_Hash &HashTable); void printGnuHashTableSymbols(const Elf_GnuHash &GnuHashTable); @@ -679,7 +692,6 @@ template class LLVMELFDumper : public ELFDumper { void printVersionSymbolSection(const Elf_Shdr *Sec) override; void printVersionDefinitionSection(const Elf_Shdr *Sec) override; void printVersionDependencySection(const Elf_Shdr *Sec) override; - void printHashHistograms() override; void printCGProfile() override; void printBBAddrMaps() override; void printAddrsig() override; @@ -691,6 +703,9 @@ template class LLVMELFDumper : public ELFDumper { const ArrayRef AndroidNoteDesc) override; void printSymbolSection(const Elf_Sym &Symbol, unsigned SymIndex, DataRegion ShndxTable) const; + void printHashHistogramStats(size_t NBucket, size_t MaxChain, + size_t TotalSyms, ArrayRef Count, + bool IsGnu) const override; private: void printRelrReloc(const Elf_Relr &R) override; @@ -2682,6 +2697,116 @@ void ELFDumper::printGnuHashTable() { W.printHexList("Values", *Chains); } +template void ELFDumper::printHashHistograms() { + // Print histogram for the .hash section. + if (this->HashTable) { + if (Error E = checkHashTable(*this, this->HashTable)) + this->reportUniqueWarning(std::move(E)); + else + printHashHistogram(*this->HashTable); + } + + // Print histogram for the .gnu.hash section. + if (this->GnuHashTable) { + if (Error E = checkGNUHashTable(this->Obj, this->GnuHashTable)) + this->reportUniqueWarning(std::move(E)); + else + printGnuHashHistogram(*this->GnuHashTable); + } +} + +template +void ELFDumper::printHashHistogram(const Elf_Hash &HashTable) const { + size_t NBucket = HashTable.nbucket; + size_t NChain = HashTable.nchain; + ArrayRef Buckets = HashTable.buckets(); + ArrayRef Chains = HashTable.chains(); + size_t TotalSyms = 0; + // If hash table is correct, we have at least chains with 0 length. + size_t MaxChain = 1; + + if (NChain == 0 || NBucket == 0) + return; + + std::vector ChainLen(NBucket, 0); + // Go over all buckets and and note chain lengths of each bucket (total + // unique chain lengths). + for (size_t B = 0; B < NBucket; ++B) { + BitVector Visited(NChain); + for (size_t C = Buckets[B]; C < NChain; C = Chains[C]) { + if (C == ELF::STN_UNDEF) + break; + if (Visited[C]) { + this->reportUniqueWarning( + ".hash section is invalid: bucket " + Twine(C) + + ": a cycle was detected in the linked chain"); + break; + } + Visited[C] = true; + if (MaxChain <= ++ChainLen[B]) + ++MaxChain; + } + TotalSyms += ChainLen[B]; + } + + if (!TotalSyms) + return; + + std::vector Count(MaxChain, 0); + // Count how long is the chain for each bucket. + for (size_t B = 0; B < NBucket; B++) + ++Count[ChainLen[B]]; + // Print Number of buckets with each chain lengths and their cumulative + // coverage of the symbols. + printHashHistogramStats(NBucket, MaxChain, TotalSyms, Count, /*IsGnu=*/false); +} + +template +void ELFDumper::printGnuHashHistogram( + const Elf_GnuHash &GnuHashTable) const { + Expected> ChainsOrErr = + getGnuHashTableChains(this->DynSymRegion, &GnuHashTable); + if (!ChainsOrErr) { + this->reportUniqueWarning("unable to print the GNU hash table histogram: " + + toString(ChainsOrErr.takeError())); + return; + } + + ArrayRef Chains = *ChainsOrErr; + size_t Symndx = GnuHashTable.symndx; + size_t TotalSyms = 0; + size_t MaxChain = 1; + + size_t NBucket = GnuHashTable.nbuckets; + if (Chains.empty() || NBucket == 0) + return; + + ArrayRef Buckets = GnuHashTable.buckets(); + std::vector ChainLen(NBucket, 0); + for (size_t B = 0; B < NBucket; ++B) { + if (!Buckets[B]) + continue; + size_t Len = 1; + for (size_t C = Buckets[B] - Symndx; + C < Chains.size() && (Chains[C] & 1) == 0; ++C) + if (MaxChain < ++Len) + ++MaxChain; + ChainLen[B] = Len; + TotalSyms += Len; + } + ++MaxChain; + + if (!TotalSyms) + return; + + std::vector Count(MaxChain, 0); + for (size_t B = 0; B < NBucket; ++B) + ++Count[ChainLen[B]]; + // Print Number of buckets with each chain lengths and their cumulative + // coverage of the symbols. + printHashHistogramStats(NBucket, MaxChain, TotalSyms, Count, /*IsGnu=*/true); +} + template void ELFDumper::printLoadName() { StringRef SOName = ""; if (SONameOffset) @@ -4831,108 +4956,16 @@ void GNUELFDumper::printVersionDependencySection(const Elf_Shdr *Sec) { } template -void GNUELFDumper::printHashHistogram(const Elf_Hash &HashTable) { - size_t NBucket = HashTable.nbucket; - size_t NChain = HashTable.nchain; - ArrayRef Buckets = HashTable.buckets(); - ArrayRef Chains = HashTable.chains(); - size_t TotalSyms = 0; - // If hash table is correct, we have at least chains with 0 length - size_t MaxChain = 1; - size_t CumulativeNonZero = 0; - - if (NChain == 0 || NBucket == 0) - return; - - std::vector ChainLen(NBucket, 0); - // Go over all buckets and and note chain lengths of each bucket (total - // unique chain lengths). - for (size_t B = 0; B < NBucket; B++) { - BitVector Visited(NChain); - for (size_t C = Buckets[B]; C < NChain; C = Chains[C]) { - if (C == ELF::STN_UNDEF) - break; - if (Visited[C]) { - this->reportUniqueWarning(".hash section is invalid: bucket " + - Twine(C) + - ": a cycle was detected in the linked chain"); - break; - } - Visited[C] = true; - if (MaxChain <= ++ChainLen[B]) - MaxChain++; - } - TotalSyms += ChainLen[B]; - } - - if (!TotalSyms) - return; - - std::vector Count(MaxChain, 0); - // Count how long is the chain for each bucket - for (size_t B = 0; B < NBucket; B++) - ++Count[ChainLen[B]]; - // Print Number of buckets with each chain lengths and their cumulative - // coverage of the symbols - OS << "Histogram for bucket list length (total of " << NBucket - << " buckets)\n" - << " Length Number % of total Coverage\n"; - for (size_t I = 0; I < MaxChain; I++) { - CumulativeNonZero += Count[I] * I; - OS << format("%7lu %-10lu (%5.1f%%) %5.1f%%\n", I, Count[I], - (Count[I] * 100.0) / NBucket, - (CumulativeNonZero * 100.0) / TotalSyms); - } -} - -template -void GNUELFDumper::printGnuHashHistogram( - const Elf_GnuHash &GnuHashTable) { - Expected> ChainsOrErr = - getGnuHashTableChains(this->DynSymRegion, &GnuHashTable); - if (!ChainsOrErr) { - this->reportUniqueWarning("unable to print the GNU hash table histogram: " + - toString(ChainsOrErr.takeError())); - return; - } - - ArrayRef Chains = *ChainsOrErr; - size_t Symndx = GnuHashTable.symndx; - size_t TotalSyms = 0; - size_t MaxChain = 1; +void GNUELFDumper::printHashHistogramStats(size_t NBucket, + size_t MaxChain, + size_t TotalSyms, + ArrayRef Count, + bool IsGnu) const { size_t CumulativeNonZero = 0; - - size_t NBucket = GnuHashTable.nbuckets; - if (Chains.empty() || NBucket == 0) - return; - - ArrayRef Buckets = GnuHashTable.buckets(); - std::vector ChainLen(NBucket, 0); - for (size_t B = 0; B < NBucket; B++) { - if (!Buckets[B]) - continue; - size_t Len = 1; - for (size_t C = Buckets[B] - Symndx; - C < Chains.size() && (Chains[C] & 1) == 0; C++) - if (MaxChain < ++Len) - MaxChain++; - ChainLen[B] = Len; - TotalSyms += Len; - } - MaxChain++; - - if (!TotalSyms) - return; - - std::vector Count(MaxChain, 0); - for (size_t B = 0; B < NBucket; B++) - ++Count[ChainLen[B]]; - // Print Number of buckets with each chain lengths and their cumulative - // coverage of the symbols - OS << "Histogram for `.gnu.hash' bucket list length (total of " << NBucket - << " buckets)\n" + OS << "Histogram for" << (IsGnu ? " `.gnu.hash'" : "") + << " bucket list length (total of " << NBucket << " buckets)\n" << " Length Number % of total Coverage\n"; - for (size_t I = 0; I < MaxChain; I++) { + for (size_t I = 0; I < MaxChain; ++I) { CumulativeNonZero += Count[I] * I; OS << format("%7lu %-10lu (%5.1f%%) %5.1f%%\n", I, Count[I], (Count[I] * 100.0) / NBucket, @@ -4940,28 +4973,6 @@ void GNUELFDumper::printGnuHashHistogram( } } -// Hash histogram shows statistics of how efficient the hash was for the -// dynamic symbol table. The table shows the number of hash buckets for -// different lengths of chains as an absolute number and percentage of the total -// buckets, and the cumulative coverage of symbols for each set of buckets. -template void GNUELFDumper::printHashHistograms() { - // Print histogram for the .hash section. - if (this->HashTable) { - if (Error E = checkHashTable(*this, this->HashTable)) - this->reportUniqueWarning(std::move(E)); - else - printHashHistogram(*this->HashTable); - } - - // Print histogram for the .gnu.hash section. - if (this->GnuHashTable) { - if (Error E = checkGNUHashTable(this->Obj, this->GnuHashTable)) - this->reportUniqueWarning(std::move(E)); - else - printGnuHashHistogram(*this->GnuHashTable); - } -} - template void GNUELFDumper::printCGProfile() { OS << "GNUStyle::printCGProfile not implemented\n"; } @@ -7151,8 +7162,27 @@ void LLVMELFDumper::printVersionDependencySection(const Elf_Shdr *Sec) { } } -template void LLVMELFDumper::printHashHistograms() { - W.startLine() << "Hash Histogram not implemented!\n"; +template +void LLVMELFDumper::printHashHistogramStats(size_t NBucket, + size_t MaxChain, + size_t TotalSyms, + ArrayRef Count, + bool IsGnu) const { + StringRef HistName = IsGnu ? "GnuHashHistogram" : "HashHistogram"; + StringRef BucketName = IsGnu ? "Bucket" : "Chain"; + StringRef ListName = IsGnu ? "Buckets" : "Chains"; + DictScope Outer(W, HistName); + W.printNumber("TotalBuckets", NBucket); + ListScope Buckets(W, ListName); + size_t CumulativeNonZero = 0; + for (size_t I = 0; I < MaxChain; ++I) { + CumulativeNonZero += Count[I] * I; + DictScope Bucket(W, BucketName); + W.printNumber("Length", I); + W.printNumber("Count", Count[I]); + W.printNumber("Percentage", (float)(Count[I] * 100.0) / NBucket); + W.printNumber("Coverage", (float)(CumulativeNonZero * 100.0) / TotalSyms); + } } // Returns true if rel/rela section exists, and populates SymbolIndices. From 3d9e646c7c9f5552f4476306c8e712e3a5711fd0 Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Mon, 20 Mar 2023 17:35:14 -0700 Subject: [PATCH 112/691] [libc][obvious] fix missing stdio dependency Differential Revision: https://reviews.llvm.org/D146482 --- libc/src/wchar/CMakeLists.txt | 1 + libc/test/src/wchar/CMakeLists.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt index 58e168266829c..5cf980e958a99 100644 --- a/libc/src/wchar/CMakeLists.txt +++ b/libc/src/wchar/CMakeLists.txt @@ -6,5 +6,6 @@ add_entrypoint_object( HDRS wctob.h DEPENDS + libc.include.stdio libc.src.__support.wctype_utils ) diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt index 88b68c43491a6..8b03002e90224 100644 --- a/libc/test/src/wchar/CMakeLists.txt +++ b/libc/test/src/wchar/CMakeLists.txt @@ -7,6 +7,7 @@ add_libc_unittest( SRCS btowc_test.cpp DEPENDS + libc.include.stdio libc.src.wchar.btowc ) From 9e2b164570c3d30eaf9b4bfe6009be0c1a2b1ef0 Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Mon, 20 Mar 2023 17:41:07 -0700 Subject: [PATCH 113/691] [libc] temporarily disable wctob entrypoint Differential Revision: https://reviews.llvm.org/D146484 --- libc/config/linux/x86_64/entrypoints.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 39b7e8c84effa..b3017338f8260 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -195,7 +195,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.unistd.write # wchar.h entrypoints - libc.src.wchar.wctob + # libc.src.wchar.wctob ) set(TARGET_LIBM_ENTRYPOINTS From 19529458c4e7210297428152e4e311f71e00ff07 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 7 Mar 2023 13:31:44 -0800 Subject: [PATCH 114/691] [Attributor][NFC] Reduce output verbosity --- llvm/lib/Transforms/IPO/Attributor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 46b29009437c5..3e6ca6fb2ff19 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -593,7 +593,7 @@ isPotentiallyReachable(Attributor &A, const Instruction &FromI, const AbstractAttribute &QueryingAA, const AA::InstExclusionSetTy *ExclusionSet, std::function GoBackwardsCB) { - LLVM_DEBUG({ + DEBUG_WITH_TYPE(VERBOSE_DEBUG_TYPE, { dbgs() << "[AA] isPotentiallyReachable @" << ToFn.getName() << " from " << FromI << " [GBCB: " << bool(GoBackwardsCB) << "][#ExS: " << (ExclusionSet ? std::to_string(ExclusionSet->size()) : "none") From d14ea2f6b45166f22169c82a9f73ebb404782ee5 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Mon, 20 Mar 2023 12:42:56 -0700 Subject: [PATCH 115/691] [Attributor][NFC] Precommit test exposing a bug --- .../Attributor/value-simplify-instances.ll | 86 +++++++++++++++++-- 1 file changed, 78 insertions(+), 8 deletions(-) diff --git a/llvm/test/Transforms/Attributor/value-simplify-instances.ll b/llvm/test/Transforms/Attributor/value-simplify-instances.ll index a87a054c6dc33..4ac47f189a5be 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-instances.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-instances.ll @@ -29,7 +29,7 @@ define internal i1 @recursive_inst_generator(i1 %c, ptr %p) { ; TUNIT-NEXT: [[A:%.*]] = call ptr @geti1Ptr() ; TUNIT-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] ; TUNIT: t: -; TUNIT-NEXT: [[R1:%.*]] = call i1 @recursive_inst_comparator(ptr noalias nofree readnone [[A]], ptr noalias nofree readnone [[P]]) #[[ATTR6:[0-9]+]] +; TUNIT-NEXT: [[R1:%.*]] = call i1 @recursive_inst_comparator(ptr noalias nofree readnone [[A]], ptr noalias nofree readnone [[P]]) #[[ATTR7:[0-9]+]] ; TUNIT-NEXT: ret i1 [[R1]] ; TUNIT: f: ; TUNIT-NEXT: [[R2:%.*]] = call i1 @recursive_inst_generator(i1 noundef true, ptr nofree [[A]]) @@ -148,7 +148,7 @@ define i1 @recursive_alloca_compare_caller(i1 %c) { ; CGSCC: Function Attrs: nofree nosync nounwind memory(none) ; CGSCC-LABEL: define {{[^@]+}}@recursive_alloca_compare_caller ; CGSCC-SAME: (i1 noundef [[C:%.*]]) #[[ATTR1]] { -; CGSCC-NEXT: [[CALL:%.*]] = call i1 @recursive_alloca_compare(i1 noundef [[C]], ptr undef) #[[ATTR4:[0-9]+]] +; CGSCC-NEXT: [[CALL:%.*]] = call i1 @recursive_alloca_compare(i1 noundef [[C]], ptr undef) #[[ATTR5:[0-9]+]] ; CGSCC-NEXT: ret i1 [[CALL]] ; %call = call i1 @recursive_alloca_compare(i1 %c, ptr undef) @@ -207,7 +207,7 @@ define i8 @recursive_alloca_load_return_caller(i1 %c) { ; CGSCC: Function Attrs: nofree nosync nounwind memory(none) ; CGSCC-LABEL: define {{[^@]+}}@recursive_alloca_load_return_caller ; CGSCC-SAME: (i1 noundef [[C:%.*]]) #[[ATTR1]] { -; CGSCC-NEXT: [[CALL:%.*]] = call i8 @recursive_alloca_load_return(i1 noundef [[C]], ptr undef, i8 noundef 42) #[[ATTR4]] +; CGSCC-NEXT: [[CALL:%.*]] = call i8 @recursive_alloca_load_return(i1 noundef [[C]], ptr undef, i8 noundef 42) #[[ATTR5]] ; CGSCC-NEXT: ret i8 [[CALL]] ; %call = call i8 @recursive_alloca_load_return(i1 %c, ptr undef, i8 42) @@ -271,7 +271,7 @@ define i1 @recursive_alloca_compare_caller_global1(i1 %c) { ; CGSCC: Function Attrs: nofree nosync nounwind ; CGSCC-LABEL: define {{[^@]+}}@recursive_alloca_compare_caller_global1 ; CGSCC-SAME: (i1 noundef [[C:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: [[CALL:%.*]] = call i1 @recursive_alloca_compare_global1(i1 noundef [[C]]) #[[ATTR4]] +; CGSCC-NEXT: [[CALL:%.*]] = call i1 @recursive_alloca_compare_global1(i1 noundef [[C]]) #[[ATTR5]] ; CGSCC-NEXT: ret i1 [[CALL]] ; %call = call i1 @recursive_alloca_compare_global1(i1 %c) @@ -330,7 +330,7 @@ define i1 @recursive_alloca_compare_caller_global2(i1 %c) { ; CGSCC: Function Attrs: nofree nosync nounwind ; CGSCC-LABEL: define {{[^@]+}}@recursive_alloca_compare_caller_global2 ; CGSCC-SAME: (i1 noundef [[C:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: [[CALL:%.*]] = call i1 @recursive_alloca_compare_global2(i1 noundef [[C]]) #[[ATTR4]] +; CGSCC-NEXT: [[CALL:%.*]] = call i1 @recursive_alloca_compare_global2(i1 noundef [[C]]) #[[ATTR5]] ; CGSCC-NEXT: ret i1 [[CALL]] ; %call = call i1 @recursive_alloca_compare_global2(i1 %c) @@ -386,12 +386,80 @@ define i1 @recursive_inst_compare_caller_global3(i1 %c) { ; CGSCC: Function Attrs: nofree nosync nounwind ; CGSCC-LABEL: define {{[^@]+}}@recursive_inst_compare_caller_global3 ; CGSCC-SAME: (i1 noundef [[C:%.*]]) #[[ATTR3]] { -; CGSCC-NEXT: [[CALL:%.*]] = call i1 @recursive_inst_compare_global3(i1 noundef [[C]]) #[[ATTR4]] +; CGSCC-NEXT: [[CALL:%.*]] = call i1 @recursive_inst_compare_global3(i1 noundef [[C]]) #[[ATTR5]] ; CGSCC-NEXT: ret i1 [[CALL]] ; %call = call i1 @recursive_inst_compare_global3(i1 %c) ret i1 %call } + +define i32 @non_unique_phi_ops(ptr %ptr) { +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) +; TUNIT-LABEL: define {{[^@]+}}@non_unique_phi_ops +; TUNIT-SAME: (ptr nocapture nofree readonly [[PTR:%.*]]) #[[ATTR6:[0-9]+]] { +; TUNIT-NEXT: entry: +; TUNIT-NEXT: br label [[HEADER:%.*]] +; TUNIT: header: +; TUNIT-NEXT: [[I:%.*]] = phi i32 [ [[ADD:%.*]], [[F:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; TUNIT-NEXT: [[P:%.*]] = phi i32 [ [[NON_UNIQUE_INPUT:%.*]], [[F]] ], [ poison, [[ENTRY]] ] +; TUNIT-NEXT: [[ADD]] = add i32 [[I]], 1 +; TUNIT-NEXT: [[G:%.*]] = getelementptr i32, ptr [[PTR]], i32 [[I]] +; TUNIT-NEXT: [[NON_UNIQUE_INPUT]] = load i32, ptr [[G]], align 4 +; TUNIT-NEXT: [[CMP1:%.*]] = icmp eq i32 [[I]], [[NON_UNIQUE_INPUT]] +; TUNIT-NEXT: br i1 [[CMP1]], label [[T:%.*]], label [[F]] +; TUNIT: t: +; TUNIT-NEXT: br label [[F]] +; TUNIT: f: +; TUNIT-NEXT: [[NON_UNIQUE:%.*]] = phi i32 [ [[NON_UNIQUE_INPUT]], [[T]] ], [ [[P]], [[HEADER]] ] +; TUNIT-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 42 +; TUNIT-NEXT: br i1 [[CMP2]], label [[HEADER]], label [[END:%.*]] +; TUNIT: end: +; TUNIT-NEXT: ret i32 [[P]] +; +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(argmem: read) +; CGSCC-LABEL: define {{[^@]+}}@non_unique_phi_ops +; CGSCC-SAME: (ptr nocapture nofree readonly [[PTR:%.*]]) #[[ATTR4:[0-9]+]] { +; CGSCC-NEXT: entry: +; CGSCC-NEXT: br label [[HEADER:%.*]] +; CGSCC: header: +; CGSCC-NEXT: [[I:%.*]] = phi i32 [ [[ADD:%.*]], [[F:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CGSCC-NEXT: [[P:%.*]] = phi i32 [ [[NON_UNIQUE_INPUT:%.*]], [[F]] ], [ poison, [[ENTRY]] ] +; CGSCC-NEXT: [[ADD]] = add i32 [[I]], 1 +; CGSCC-NEXT: [[G:%.*]] = getelementptr i32, ptr [[PTR]], i32 [[I]] +; CGSCC-NEXT: [[NON_UNIQUE_INPUT]] = load i32, ptr [[G]], align 4 +; CGSCC-NEXT: [[CMP1:%.*]] = icmp eq i32 [[I]], [[NON_UNIQUE_INPUT]] +; CGSCC-NEXT: br i1 [[CMP1]], label [[T:%.*]], label [[F]] +; CGSCC: t: +; CGSCC-NEXT: br label [[F]] +; CGSCC: f: +; CGSCC-NEXT: [[NON_UNIQUE:%.*]] = phi i32 [ [[NON_UNIQUE_INPUT]], [[T]] ], [ [[P]], [[HEADER]] ] +; CGSCC-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 42 +; CGSCC-NEXT: br i1 [[CMP2]], label [[HEADER]], label [[END:%.*]] +; CGSCC: end: +; CGSCC-NEXT: ret i32 [[P]] +; +entry: + br label %header + +header: + %i = phi i32 [ %add, %f ], [ 0, %entry ] + %p = phi i32 [ %non_unique, %f ], [ poison, %entry ] + %add = add i32 %i, 1 + %g = getelementptr i32, ptr %ptr, i32 %i + %non_unique_input = load i32, ptr %g, align 4 + %cmp1 = icmp eq i32 %i, %non_unique_input + br i1 %cmp1, label %t, label %f +t: + br label %f +f: + %non_unique = phi i32 [ %non_unique_input, %t ], [ %p, %header ] + %cmp2 = icmp slt i32 %i, 42 + br i1 %cmp2, label %header, label %end + +end: + ret i32 %p +} + ;. ; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ; TUNIT: attributes #[[ATTR1]] = { nofree nosync nounwind memory(none) } @@ -399,11 +467,13 @@ define i1 @recursive_inst_compare_caller_global3(i1 %c) { ; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind memory(argmem: readwrite) } ; TUNIT: attributes #[[ATTR4]] = { nofree nosync nounwind } ; TUNIT: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind } -; TUNIT: attributes #[[ATTR6]] = { nounwind } +; TUNIT: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; TUNIT: attributes #[[ATTR7]] = { nounwind } ;. ; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR1]] = { nofree nosync nounwind memory(none) } ; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind memory(argmem: readwrite) } ; CGSCC: attributes #[[ATTR3]] = { nofree nosync nounwind } -; CGSCC: attributes #[[ATTR4]] = { nounwind } +; CGSCC: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn memory(argmem: read) } +; CGSCC: attributes #[[ATTR5]] = { nounwind } ;. From 0fc63d4e642fcf79e9806dc3ae7a49defaae2479 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Mon, 20 Mar 2023 11:21:56 -0700 Subject: [PATCH 116/691] [Attributor][FIX] Ensure loop PHI replacements are dynamically unique Similar to loads, PHIs can be used to introduce non-dynamically unique values into the simplification "algorithm". We need to check that PHIs do not carry such a value from one iteration into the next as can cause downstream reasoning to fail, e.g., downstream could think a comparison is equal because the simplified values are equal while they are defined in different loop iterations. Similarly, instructions in cycles are now conservatively treated as non-dynamically unique. We could do better but I'll leave that for the future. The change in AAUnderlyingObjects allows us to ignore dynamically unique when we simply look for underlying objects. The user of that AA should be aware that the result might not be a dynamically unique value. --- .../Transforms/IPO/AttributorAttributes.cpp | 66 ++++++++++++++----- .../Attributor/IPConstantProp/PR26044.ll | 34 +++++----- .../Attributor/dereferenceable-1.ll | 4 +- .../Attributor/heap_to_stack_gpu.ll | 2 +- llvm/test/Transforms/Attributor/liveness.ll | 2 +- .../Attributor/lvi-after-jumpthreading.ll | 2 +- llvm/test/Transforms/Attributor/nonnull.ll | 13 ++-- llvm/test/Transforms/Attributor/potential.ll | 2 +- llvm/test/Transforms/Attributor/range.ll | 2 +- .../Attributor/value-simplify-instances.ll | 12 ++-- .../Attributor/value-simplify-pointer-info.ll | 10 +-- llvm/test/Transforms/Attributor/willreturn.ll | 8 +-- .../OpenMP/attributor_recursion_crash.ll | 2 +- 13 files changed, 101 insertions(+), 58 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index c4f10f30bd163..04ba96481fb01 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -197,6 +197,19 @@ ChangeStatus clampStateAndIndicateChange(DerefState &S, } // namespace llvm +static bool mayBeInCycle(const CycleInfo *CI, const Instruction *I, + bool HeaderOnly, Cycle **CPtr = nullptr) { + if (!CI) + return true; + auto *BB = I->getParent(); + auto *C = CI->getCycle(BB); + if (!C) + return false; + if (CPtr) + *CPtr = C; + return !HeaderOnly || BB == C->getHeader(); +} + /// Checks if a type could have padding bytes. static bool isDenselyPacked(Type *Ty, const DataLayout &DL) { // There is no size information, so be conservative. @@ -856,7 +869,7 @@ struct AA::PointerInfo::State : public AbstractState { for (unsigned Index : LocalList->getSecond()) { for (auto &R : AccessList[Index]) { Range &= R; - if (Range.offsetOrSizeAreUnknown()) + if (Range.offsetAndSizeAreUnknown()) break; } } @@ -1617,16 +1630,6 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) { return true; } - auto mayBeInCycleHeader = [](const CycleInfo *CI, const Instruction *I) { - if (!CI) - return true; - auto *BB = I->getParent(); - auto *C = CI->getCycle(BB); - if (!C) - return false; - return BB == C->getHeader(); - }; - // Check if the PHI operand is not dependent on the PHI itself. Every // recurrence is a cyclic net of PHIs in the data flow, and has an // equivalent Cycle in the control flow. One of those PHIs must be in the @@ -1634,7 +1637,7 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) { // Cycles reported by CycleInfo. It is sufficient to check the PHIs in // every Cycle header; if such a node is marked unknown, this will // eventually propagate through the whole net of PHIs in the recurrence. - if (mayBeInCycleHeader(CI, cast(Usr))) { + if (mayBeInCycle(CI, cast(Usr), /* HeaderOnly */ true)) { auto BaseOI = It->getSecond(); BaseOI.addToAll(Offset.getZExtValue()); if (IsFirstPHIUser || BaseOI == UsrOI) { @@ -5563,6 +5566,15 @@ struct AAInstanceInfoImpl : public AAInstanceInfo { indicateOptimisticFixpoint(); return; } + if (auto *I = dyn_cast(&V)) { + const auto *CI = + A.getInfoCache().getAnalysisResultForFunction( + *I->getFunction()); + if (mayBeInCycle(CI, I, /* HeaderOnly */ false)) { + indicatePessimisticFixpoint(); + return; + } + } } /// See AbstractAttribute::updateImpl(...). @@ -11048,14 +11060,29 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { if (&PHI == &getAssociatedValue()) { LivenessInfo &LI = GetLivenessInfo(*PHI.getFunction()); + const auto *CI = + A.getInfoCache().getAnalysisResultForFunction( + *PHI.getFunction()); + + Cycle *C = nullptr; + bool CyclePHI = mayBeInCycle(CI, &PHI, /* HeaderOnly */ true, &C); for (unsigned u = 0, e = PHI.getNumIncomingValues(); u < e; u++) { BasicBlock *IncomingBB = PHI.getIncomingBlock(u); if (LI.LivenessAA->isEdgeDead(IncomingBB, PHI.getParent())) { LI.AnyDead = true; continue; } - Worklist.push_back( - {{*PHI.getIncomingValue(u), IncomingBB->getTerminator()}, II.S}); + Value *V = PHI.getIncomingValue(u); + if (V == &PHI) + continue; + + // If the incoming value is not the PHI but an instruction in the same + // cycle we might have multiple versions of it flying around. + if (CyclePHI && isa(V) && + (!C || C->contains(cast(V)->getParent()))) + return false; + + Worklist.push_back({{*V, IncomingBB->getTerminator()}, II.S}); } return true; } @@ -11667,10 +11694,19 @@ struct AAUnderlyingObjectsImpl continue; } - if (isa(Obj) || isa(Obj)) { + if (isa(Obj)) { Changed |= handleIndirect(A, *Obj, UnderlyingObjects, Scope); continue; } + if (auto *PHI = dyn_cast(Obj)) { + // Explicitly look through PHIs as we do not care about dynamically + // uniqueness. + for (unsigned u = 0, e = PHI->getNumIncomingValues(); u < e; u++) { + Changed |= handleIndirect(A, *PHI->getIncomingValue(u), + UnderlyingObjects, Scope); + } + continue; + } Changed |= UnderlyingObjects.insert(Obj); } diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll b/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll index 1dc375dc82c6b..c18899a4287c7 100644 --- a/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll @@ -16,7 +16,8 @@ define void @fn2(ptr %P, i1 %C) { ; TUNIT: if.end: ; TUNIT-NEXT: [[E_2:%.*]] = phi ptr [ [[P]], [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ] ; TUNIT-NEXT: [[TMP0:%.*]] = load i32, ptr [[E_2]], align 4 -; TUNIT-NEXT: store i32 [[TMP0]], ptr [[P]], align 4 +; TUNIT-NEXT: [[CALL:%.*]] = call i32 @fn1(i32 [[TMP0]]) #[[ATTR3:[0-9]+]] +; TUNIT-NEXT: store i32 [[CALL]], ptr [[P]], align 4 ; TUNIT-NEXT: br label [[FOR_COND1]] ; TUNIT: exit: ; TUNIT-NEXT: ret void @@ -54,11 +55,11 @@ exit: } define internal i32 @fn1(i32 %p1) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) -; CGSCC-LABEL: define {{[^@]+}}@fn1 -; CGSCC-SAME: (i32 returned [[P1:%.*]]) #[[ATTR1:[0-9]+]] { -; CGSCC-NEXT: entry: -; CGSCC-NEXT: ret i32 [[P1]] +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define {{[^@]+}}@fn1 +; CHECK-SAME: (i32 returned [[P1:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 [[P1]] ; entry: %tobool = icmp ne i32 %p1, 0 @@ -70,7 +71,7 @@ define void @fn_no_null_opt(ptr %P, i1 %C) null_pointer_is_valid { ; ; TUNIT: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid ; TUNIT-LABEL: define {{[^@]+}}@fn_no_null_opt -; TUNIT-SAME: (ptr nocapture nofree writeonly [[P:%.*]], i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] { +; TUNIT-SAME: (ptr nocapture nofree writeonly [[P:%.*]], i1 [[C:%.*]]) #[[ATTR2:[0-9]+]] { ; TUNIT-NEXT: entry: ; TUNIT-NEXT: br label [[IF_END:%.*]] ; TUNIT: for.cond1: @@ -78,7 +79,8 @@ define void @fn_no_null_opt(ptr %P, i1 %C) null_pointer_is_valid { ; TUNIT: if.end: ; TUNIT-NEXT: [[E_2:%.*]] = phi ptr [ undef, [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ] ; TUNIT-NEXT: [[TMP0:%.*]] = load i32, ptr null, align 4294967296 -; TUNIT-NEXT: store i32 [[TMP0]], ptr [[P]], align 4 +; TUNIT-NEXT: [[CALL:%.*]] = call i32 @fn0(i32 [[TMP0]]) #[[ATTR3]] +; TUNIT-NEXT: store i32 [[CALL]], ptr [[P]], align 4 ; TUNIT-NEXT: br label [[FOR_COND1]] ; TUNIT: exit: ; TUNIT-NEXT: ret void @@ -116,11 +118,11 @@ exit: } define internal i32 @fn0(i32 %p1) { -; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) -; CGSCC-LABEL: define {{[^@]+}}@fn0 -; CGSCC-SAME: (i32 returned [[P1:%.*]]) #[[ATTR1]] { -; CGSCC-NEXT: entry: -; CGSCC-NEXT: ret i32 [[P1]] +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define {{[^@]+}}@fn0 +; CHECK-SAME: (i32 returned [[P1:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 [[P1]] ; entry: %tobool = icmp ne i32 %p1, 0 @@ -129,11 +131,11 @@ entry: } ;. ; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind memory(argmem: readwrite) } -; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind null_pointer_is_valid } +; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind null_pointer_is_valid } +; TUNIT: attributes #[[ATTR3]] = { nofree nosync nounwind } ;. ; CGSCC: attributes #[[ATTR0]] = { nofree nosync nounwind memory(argmem: readwrite) } ; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind willreturn memory(none) } ; CGSCC: attributes #[[ATTR2]] = { nofree nosync nounwind null_pointer_is_valid } ;. -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK: {{.*}} diff --git a/llvm/test/Transforms/Attributor/dereferenceable-1.ll b/llvm/test/Transforms/Attributor/dereferenceable-1.ll index 8ec8b74499706..bac02149eb090 100644 --- a/llvm/test/Transforms/Attributor/dereferenceable-1.ll +++ b/llvm/test/Transforms/Attributor/dereferenceable-1.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=14 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC ; FIXME: Figure out why we need 16 iterations here. @@ -95,7 +95,7 @@ define void @deref_phi_growing(ptr dereferenceable(4000) %a) { ; CHECK: for.cond: ; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] ; CHECK-NEXT: [[A_ADDR_0:%.*]] = phi ptr [ [[A]], [[ENTRY]] ], [ [[INCDEC_PTR:%.*]], [[FOR_INC]] ] -; CHECK-NEXT: call void @deref_phi_user(ptr nonnull dereferenceable(4000) [[A_ADDR_0]]) +; CHECK-NEXT: call void @deref_phi_user(ptr nonnull [[A_ADDR_0]]) ; CHECK-NEXT: [[TMP:%.*]] = load i32, ptr [[A_ADDR_0]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], [[TMP]] ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]] diff --git a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll index 29db8fec1ddba..db6f2773fa8e5 100644 --- a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll +++ b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals -; RUN: opt -opaque-pointers=0 -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT +; RUN: opt -opaque-pointers=0 -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT ; RUN: opt -opaque-pointers=0 -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC ; FIXME: amdgpu doesn't claim malloc is a thing, so the test is somewhat diff --git a/llvm/test/Transforms/Attributor/liveness.ll b/llvm/test/Transforms/Attributor/liveness.ll index 60cd039760f38..f9d4b37c30edf 100644 --- a/llvm/test/Transforms/Attributor/liveness.ll +++ b/llvm/test/Transforms/Attributor/liveness.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals -; RUN: opt -opaque-pointers=0 -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=16 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT +; RUN: opt -opaque-pointers=0 -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT ; RUN: opt -opaque-pointers=0 -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC ; NOT_CGSCC___: @dead_with_blockaddress_users.l = constant [2 x i8*] [i8* inttoptr (i32 1 to i8*), i8* inttoptr (i32 1 to i8*)] diff --git a/llvm/test/Transforms/Attributor/lvi-after-jumpthreading.ll b/llvm/test/Transforms/Attributor/lvi-after-jumpthreading.ll index d397d1b075196..862fc84721784 100644 --- a/llvm/test/Transforms/Attributor/lvi-after-jumpthreading.ll +++ b/llvm/test/Transforms/Attributor/lvi-after-jumpthreading.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=13 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC define i8 @test1(i32 %a, i32 %length) { diff --git a/llvm/test/Transforms/Attributor/nonnull.ll b/llvm/test/Transforms/Attributor/nonnull.ll index d0562eca8e09d..8353506b7e3d0 100644 --- a/llvm/test/Transforms/Attributor/nonnull.ll +++ b/llvm/test/Transforms/Attributor/nonnull.ll @@ -1495,10 +1495,15 @@ define ptr @nonnull_function_ptr_1() { declare ptr @function_decl() define ptr @nonnull_function_ptr_2() { -; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) -; CHECK-LABEL: define {{[^@]+}}@nonnull_function_ptr_2 -; CHECK-SAME: () #[[ATTR1]] { -; CHECK-NEXT: ret ptr @function_decl +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; TUNIT-LABEL: define {{[^@]+}}@nonnull_function_ptr_2 +; TUNIT-SAME: () #[[ATTR1]] { +; TUNIT-NEXT: ret ptr @function_decl +; +; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CGSCC-LABEL: define {{[^@]+}}@nonnull_function_ptr_2 +; CGSCC-SAME: () #[[ATTR1]] { +; CGSCC-NEXT: ret ptr @function_decl ; ret ptr @function_decl } diff --git a/llvm/test/Transforms/Attributor/potential.ll b/llvm/test/Transforms/Attributor/potential.ll index 560e71df2da26..ad7090494627a 100644 --- a/llvm/test/Transforms/Attributor/potential.ll +++ b/llvm/test/Transforms/Attributor/potential.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=13 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC ; ; Test for multiple potential values diff --git a/llvm/test/Transforms/Attributor/range.ll b/llvm/test/Transforms/Attributor/range.ll index ffd8d358be681..0e8d51abb9670 100644 --- a/llvm/test/Transforms/Attributor/range.ll +++ b/llvm/test/Transforms/Attributor/range.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=14 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=12 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC ; FIXME: CGSCC is not looking at callees and calleers even though it could be allowed. diff --git a/llvm/test/Transforms/Attributor/value-simplify-instances.ll b/llvm/test/Transforms/Attributor/value-simplify-instances.ll index 4ac47f189a5be..2dee705e63fd1 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-instances.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-instances.ll @@ -401,16 +401,16 @@ define i32 @non_unique_phi_ops(ptr %ptr) { ; TUNIT-NEXT: br label [[HEADER:%.*]] ; TUNIT: header: ; TUNIT-NEXT: [[I:%.*]] = phi i32 [ [[ADD:%.*]], [[F:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; TUNIT-NEXT: [[P:%.*]] = phi i32 [ [[NON_UNIQUE_INPUT:%.*]], [[F]] ], [ poison, [[ENTRY]] ] +; TUNIT-NEXT: [[P:%.*]] = phi i32 [ [[NON_UNIQUE:%.*]], [[F]] ], [ poison, [[ENTRY]] ] ; TUNIT-NEXT: [[ADD]] = add i32 [[I]], 1 ; TUNIT-NEXT: [[G:%.*]] = getelementptr i32, ptr [[PTR]], i32 [[I]] -; TUNIT-NEXT: [[NON_UNIQUE_INPUT]] = load i32, ptr [[G]], align 4 +; TUNIT-NEXT: [[NON_UNIQUE_INPUT:%.*]] = load i32, ptr [[G]], align 4 ; TUNIT-NEXT: [[CMP1:%.*]] = icmp eq i32 [[I]], [[NON_UNIQUE_INPUT]] ; TUNIT-NEXT: br i1 [[CMP1]], label [[T:%.*]], label [[F]] ; TUNIT: t: ; TUNIT-NEXT: br label [[F]] ; TUNIT: f: -; TUNIT-NEXT: [[NON_UNIQUE:%.*]] = phi i32 [ [[NON_UNIQUE_INPUT]], [[T]] ], [ [[P]], [[HEADER]] ] +; TUNIT-NEXT: [[NON_UNIQUE]] = phi i32 [ [[NON_UNIQUE_INPUT]], [[T]] ], [ [[P]], [[HEADER]] ] ; TUNIT-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 42 ; TUNIT-NEXT: br i1 [[CMP2]], label [[HEADER]], label [[END:%.*]] ; TUNIT: end: @@ -423,16 +423,16 @@ define i32 @non_unique_phi_ops(ptr %ptr) { ; CGSCC-NEXT: br label [[HEADER:%.*]] ; CGSCC: header: ; CGSCC-NEXT: [[I:%.*]] = phi i32 [ [[ADD:%.*]], [[F:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; CGSCC-NEXT: [[P:%.*]] = phi i32 [ [[NON_UNIQUE_INPUT:%.*]], [[F]] ], [ poison, [[ENTRY]] ] +; CGSCC-NEXT: [[P:%.*]] = phi i32 [ [[NON_UNIQUE:%.*]], [[F]] ], [ poison, [[ENTRY]] ] ; CGSCC-NEXT: [[ADD]] = add i32 [[I]], 1 ; CGSCC-NEXT: [[G:%.*]] = getelementptr i32, ptr [[PTR]], i32 [[I]] -; CGSCC-NEXT: [[NON_UNIQUE_INPUT]] = load i32, ptr [[G]], align 4 +; CGSCC-NEXT: [[NON_UNIQUE_INPUT:%.*]] = load i32, ptr [[G]], align 4 ; CGSCC-NEXT: [[CMP1:%.*]] = icmp eq i32 [[I]], [[NON_UNIQUE_INPUT]] ; CGSCC-NEXT: br i1 [[CMP1]], label [[T:%.*]], label [[F]] ; CGSCC: t: ; CGSCC-NEXT: br label [[F]] ; CGSCC: f: -; CGSCC-NEXT: [[NON_UNIQUE:%.*]] = phi i32 [ [[NON_UNIQUE_INPUT]], [[T]] ], [ [[P]], [[HEADER]] ] +; CGSCC-NEXT: [[NON_UNIQUE]] = phi i32 [ [[NON_UNIQUE_INPUT]], [[T]] ], [ [[P]], [[HEADER]] ] ; CGSCC-NEXT: [[CMP2:%.*]] = icmp slt i32 [[I]], 42 ; CGSCC-NEXT: br i1 [[CMP2]], label [[HEADER]], label [[END:%.*]] ; CGSCC: end: diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll index 8d83a76696554..9abcf330eabe4 100644 --- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=22 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=14 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC ; %struct.S = type { i32, i32, i32, float, float, float } @@ -2112,7 +2112,7 @@ define i8 @phi_no_store_2() { ; TUNIT: loop: ; TUNIT-NEXT: [[P:%.*]] = phi ptr [ @a2, [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ] ; TUNIT-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ] -; TUNIT-NEXT: store i8 1, ptr [[P]], align 2 +; TUNIT-NEXT: store i8 1, ptr [[P]], align 1 ; TUNIT-NEXT: [[G]] = getelementptr i8, ptr @a2, i64 2 ; TUNIT-NEXT: [[O]] = add nsw i8 [[I]], 1 ; TUNIT-NEXT: [[C:%.*]] = icmp eq i8 [[O]], 7 @@ -2133,7 +2133,7 @@ define i8 @phi_no_store_2() { ; CGSCC: loop: ; CGSCC-NEXT: [[P:%.*]] = phi ptr [ @a2, [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ] ; CGSCC-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ] -; CGSCC-NEXT: store i8 1, ptr [[P]], align 2 +; CGSCC-NEXT: store i8 1, ptr [[P]], align 1 ; CGSCC-NEXT: [[G]] = getelementptr i8, ptr @a2, i64 2 ; CGSCC-NEXT: [[O]] = add nsw i8 [[I]], 1 ; CGSCC-NEXT: [[C:%.*]] = icmp eq i8 [[O]], 7 @@ -2176,7 +2176,7 @@ define i8 @phi_no_store_3() { ; TUNIT: loop: ; TUNIT-NEXT: [[P:%.*]] = phi ptr [ @a3, [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ] ; TUNIT-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ] -; TUNIT-NEXT: store i8 1, ptr [[P]], align 2 +; TUNIT-NEXT: store i8 1, ptr [[P]], align 1 ; TUNIT-NEXT: [[G]] = getelementptr i8, ptr @a3, i64 2 ; TUNIT-NEXT: [[O]] = add nsw i8 [[I]], 1 ; TUNIT-NEXT: [[C:%.*]] = icmp eq i8 [[O]], 7 @@ -2202,7 +2202,7 @@ define i8 @phi_no_store_3() { ; CGSCC: loop: ; CGSCC-NEXT: [[P:%.*]] = phi ptr [ @a3, [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ] ; CGSCC-NEXT: [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ] -; CGSCC-NEXT: store i8 1, ptr [[P]], align 2 +; CGSCC-NEXT: store i8 1, ptr [[P]], align 1 ; CGSCC-NEXT: [[G]] = getelementptr i8, ptr @a3, i64 2 ; CGSCC-NEXT: [[O]] = add nsw i8 [[I]], 1 ; CGSCC-NEXT: [[C:%.*]] = icmp eq i8 [[O]], 7 diff --git a/llvm/test/Transforms/Attributor/willreturn.ll b/llvm/test/Transforms/Attributor/willreturn.ll index b6a8519a065f2..84af3c6a1f579 100644 --- a/llvm/test/Transforms/Attributor/willreturn.ll +++ b/llvm/test/Transforms/Attributor/willreturn.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals -; RUN: opt -opaque-pointers=0 -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=14 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT +; RUN: opt -opaque-pointers=0 -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT ; RUN: opt -opaque-pointers=0 -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC target datalayout = "e-m:e-i54:64-f80:128-n8:16:32:64-S128" @@ -854,7 +854,7 @@ define i32 @bounded_nested_loops(i32 %n) { ; CHECK-NEXT: [[INC1]] = add nuw nsw i32 [[I_0]], 1 ; CHECK-NEXT: br label [[FOR_COND]] ; CHECK: for.end: -; CHECK-NEXT: ret i32 [[ANS_0_LCSSA]] +; CHECK-NEXT: ret i32 [[ANS_0]] ; entry: br label %for.cond @@ -933,7 +933,7 @@ define i32 @bounded_loop_inside_unbounded_loop(i32 %n) { ; CHECK-NEXT: br label [[WHILE_COND]] ; CHECK: while.end: ; CHECK-NEXT: [[ANS_0_LCSSA:%.*]] = phi i32 [ [[ANS_0]], [[WHILE_COND]] ] -; CHECK-NEXT: ret i32 [[ANS_0_LCSSA]] +; CHECK-NEXT: ret i32 [[ANS_0]] ; entry: br label %while.cond @@ -1016,7 +1016,7 @@ define i32 @nested_unbounded_loops(i32 %n) { ; CHECK-NEXT: br label [[WHILE_COND]] ; CHECK: while.end11: ; CHECK-NEXT: [[ANS_0_LCSSA:%.*]] = phi i32 [ [[ANS_0]], [[WHILE_COND]] ] -; CHECK-NEXT: ret i32 [[ANS_0_LCSSA]] +; CHECK-NEXT: ret i32 [[ANS_0]] ; entry: br label %while.cond diff --git a/llvm/test/Transforms/OpenMP/attributor_recursion_crash.ll b/llvm/test/Transforms/OpenMP/attributor_recursion_crash.ll index fa3fd730eb495..2f7fe7b11e10e 100644 --- a/llvm/test/Transforms/OpenMP/attributor_recursion_crash.ll +++ b/llvm/test/Transforms/OpenMP/attributor_recursion_crash.ll @@ -19,7 +19,7 @@ define weak amdgpu_kernel void @k() { ; CHECK-NEXT: [[PRE1:%.*]] = phi ptr [ [[DOTPRE158_I]], [[BB3]] ], [ null, [[BB2]] ] ; CHECK-NEXT: br i1 false, label [[BB6]], label [[BB5:%.*]] ; CHECK: BB5: -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[STRUCT_TS:%.*]], ptr [[PRE1]], i64 0, i32 1 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [[STRUCT_TS:%.*]], ptr [[DOTPRE158_I]], i64 0, i32 1 ; CHECK-NEXT: [[Q3:%.*]] = load ptr, ptr [[GEP]], align 8 ; CHECK-NEXT: br label [[BB6]] ; CHECK: BB6: From b89558a2ae4b5b20a6f3e8ba0295439f947fd38c Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 7 Mar 2023 13:26:31 -0800 Subject: [PATCH 117/691] [OpenMP][FIX] Properly track and lookup Execution Domains This is a two part fix. First, we need two Execution Domains (ED) to track the values of a function. One for incoming values and one for outgoing values. This was conflated before. Second, at the function entry we need to look at the incoming information from call sites not iterate over non-existing predecessors. --- .../Transforms/IPO/AttributorAttributes.cpp | 2 +- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 58 ++++++++++------ .../reduced/openmp_opt_global_read.ll | 36 ++++++++++ .../reduced/openmp_opt_global_synced.ll | 69 +++++++++++++++++++ .../OpenMP/value-simplify-openmp-opt.ll | 1 + 5 files changed, 145 insertions(+), 21 deletions(-) create mode 100644 llvm/test/Transforms/Attributor/reduced/openmp_opt_global_read.ll create mode 100644 llvm/test/Transforms/Attributor/reduced/openmp_opt_global_synced.ll diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 04ba96481fb01..624e04611f4f4 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -10999,7 +10999,7 @@ struct AAPotentialValuesFloating : AAPotentialValuesImpl { InformationCache &InfoCache = A.getInfoCache(); if (InfoCache.isOnlyUsedByAssume(LI)) { if (!llvm::all_of(PotentialValueOrigins, [&](Instruction *I) { - if (!I) + if (!I || isa(I)) return true; if (auto *SI = dyn_cast(I)) return A.isAssumedDead(SI->getOperandUse(0), this, diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 7fd82df4910b2..a25537e00fec6 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -36,6 +36,7 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instruction.h" @@ -2663,7 +2664,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain { bool InitialEdgeOnly = false); /// Accumulate information for the entry block in \p EntryBBED. - void handleEntryBB(Attributor &A, ExecutionDomainTy &EntryBBED); + void handleCallees(Attributor &A, ExecutionDomainTy &EntryBBED); /// See AbstractAttribute::updateImpl. ChangeStatus updateImpl(Attributor &A) override; @@ -2736,12 +2737,15 @@ struct AAExecutionDomainFunction : public AAExecutionDomain { break; } while ((CurI = CurI->getPrevNonDebugInstruction())); - if (!CurI && - !llvm::all_of( - predecessors(I.getParent()), [&](const BasicBlock *PredBB) { - return BEDMap.lookup(PredBB).IsReachedFromAlignedBarrierOnly; - })) { - return false; + if (!CurI) { + const BasicBlock *BB = I.getParent(); + if (BB == &BB->getParent()->getEntryBlock()) + return BEDMap.lookup(nullptr).IsReachedFromAlignedBarrierOnly; + if (!llvm::all_of(predecessors(BB), [&](const BasicBlock *PredBB) { + return BEDMap.lookup(PredBB).IsReachedFromAlignedBarrierOnly; + })) { + return false; + } } // On neither traversal we found a anything but aligned barriers. @@ -2761,7 +2765,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain { ExecutionDomainTy getFunctionExecutionDomain() const override { assert(isValidState() && "No request should be made against an invalid state!"); - return BEDMap.lookup(nullptr); + return InterProceduralED; } ///} @@ -2810,6 +2814,9 @@ struct AAExecutionDomainFunction : public AAExecutionDomain { return false; }; + /// Mapping containing information about the function for other AAs. + ExecutionDomainTy InterProceduralED; + /// Mapping containing information per block. DenseMap BEDMap; DenseMap CEDMap; @@ -2844,26 +2851,29 @@ void AAExecutionDomainFunction::mergeInPredecessor( ED.clearAssumeInstAndAlignedBarriers(); } -void AAExecutionDomainFunction::handleEntryBB(Attributor &A, +void AAExecutionDomainFunction::handleCallees(Attributor &A, ExecutionDomainTy &EntryBBED) { - SmallVector PredExecDomains; + SmallVector CallSiteEDs; auto PredForCallSite = [&](AbstractCallSite ACS) { const auto &EDAA = A.getAAFor( *this, IRPosition::function(*ACS.getInstruction()->getFunction()), DepClassTy::OPTIONAL); if (!EDAA.getState().isValidState()) return false; - PredExecDomains.emplace_back( + CallSiteEDs.emplace_back( EDAA.getExecutionDomain(*cast(ACS.getInstruction()))); return true; }; + ExecutionDomainTy ExitED; bool AllCallSitesKnown; if (A.checkForAllCallSites(PredForCallSite, *this, /* RequiresAllCallSites */ true, AllCallSitesKnown)) { - for (const auto &PredED : PredExecDomains) - mergeInPredecessor(A, EntryBBED, PredED); + for (const auto &CSED : CallSiteEDs) { + mergeInPredecessor(A, EntryBBED, CSED); + ExitED.IsReachingAlignedBarrierOnly &= CSED.IsReachingAlignedBarrierOnly; + } } else { // We could not find all predecessors, so this is either a kernel or a @@ -2873,16 +2883,19 @@ void AAExecutionDomainFunction::handleEntryBB(Attributor &A, EntryBBED.IsExecutedByInitialThreadOnly = false; EntryBBED.IsReachedFromAlignedBarrierOnly = true; EntryBBED.EncounteredNonLocalSideEffect = false; + ExitED.IsReachingAlignedBarrierOnly = true; } else { EntryBBED.IsExecutedByInitialThreadOnly = false; EntryBBED.IsReachedFromAlignedBarrierOnly = false; EntryBBED.EncounteredNonLocalSideEffect = true; + ExitED.IsReachingAlignedBarrierOnly = false; } } auto &FnED = BEDMap[nullptr]; - FnED.IsReachingAlignedBarrierOnly &= + FnED.IsReachedFromAlignedBarrierOnly &= EntryBBED.IsReachedFromAlignedBarrierOnly; + FnED.IsReachingAlignedBarrierOnly &= ExitED.IsReachingAlignedBarrierOnly; } ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) { @@ -2934,7 +2947,7 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) { ExecutionDomainTy ED; // Propagate "incoming edges" into information about this block. if (IsEntryBB) { - handleEntryBB(A, ED); + handleCallees(A, ED); } else { // For live non-entry blocks we only propagate // information via live edges. @@ -3073,18 +3086,24 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) { ED.EncounteredNonLocalSideEffect = true; } + bool IsEndAndNotReachingAlignedBarriersOnly = false; if (!isa(BB.getTerminator()) && !BB.getTerminator()->getNumSuccessors()) { - auto &FnED = BEDMap[nullptr]; - mergeInPredecessor(A, FnED, ED); + mergeInPredecessor(A, InterProceduralED, ED); + auto &FnED = BEDMap[nullptr]; + if (!FnED.IsReachingAlignedBarrierOnly) { + IsEndAndNotReachingAlignedBarriersOnly = true; + SyncInstWorklist.push_back(BB.getTerminator()); + } if (IsKernel) HandleAlignedBarrier(nullptr, ED); } ExecutionDomainTy &StoredED = BEDMap[&BB]; - ED.IsReachingAlignedBarrierOnly = StoredED.IsReachingAlignedBarrierOnly; + ED.IsReachingAlignedBarrierOnly = StoredED.IsReachingAlignedBarrierOnly & + !IsEndAndNotReachingAlignedBarriersOnly; // Check if we computed anything different as part of the forward // traversal. We do not take assumptions and aligned barriers into account @@ -3135,8 +3154,7 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) { } if (SyncBB != &EntryBB) continue; - auto &FnED = BEDMap[nullptr]; - if (SetAndRecord(FnED.IsReachingAlignedBarrierOnly, false)) + if (SetAndRecord(InterProceduralED.IsReachingAlignedBarrierOnly, false)) Changed = true; } diff --git a/llvm/test/Transforms/Attributor/reduced/openmp_opt_global_read.ll b/llvm/test/Transforms/Attributor/reduced/openmp_opt_global_read.ll new file mode 100644 index 0000000000000..94412b5627205 --- /dev/null +++ b/llvm/test/Transforms/Attributor/reduced/openmp_opt_global_read.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes --check-attributes --check-globals --include-generated-funcs +; RUN: opt -passes=openmp-opt -S < %s | FileCheck %s --check-prefixes=CHECK + +@IsSPMDMode = internal addrspace(3) global i32 undef + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) +declare void @llvm.assume(i1 noundef) #0 + +define weak_odr amdgpu_kernel void @__omp_offloading_16_2e1d69__ZN11qmcplusplus7ompBLAS9gemv_implIfEEiRiciiT_PKS3_iS5_iS3_PS3_i_l44() { +bb: + %i36 = load i32, ptr addrspace(3) @IsSPMDMode, align 4 + %i37 = icmp eq i32 %i36, 0 + tail call void @llvm.assume(i1 %i37) + ret void +} + +attributes #0 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } + +!llvm.module.flags = !{!0, !1} + +!0 = !{i32 7, !"openmp", i32 50} +!1 = !{i32 7, !"openmp-device", i32 50} +;. +; CHECK: @[[ISSPMDMODE:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global i32 undef +;. +; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_16_2e1d69__ZN11qmcplusplus7ompBLAS9gemv_implIfEEiRiciiT_PKS3_iS5_iS3_PS3_i_l44() { +; CHECK-NEXT: bb: +; CHECK-NEXT: tail call void @llvm.assume(i1 true) +; CHECK-NEXT: ret void +; +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +;. +; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +;. diff --git a/llvm/test/Transforms/Attributor/reduced/openmp_opt_global_synced.ll b/llvm/test/Transforms/Attributor/reduced/openmp_opt_global_synced.ll new file mode 100644 index 0000000000000..5cdd0ffb79af8 --- /dev/null +++ b/llvm/test/Transforms/Attributor/reduced/openmp_opt_global_synced.ll @@ -0,0 +1,69 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes --check-attributes --check-globals --include-generated-funcs +; RUN: opt -passes=openmp-opt -S < %s | FileCheck %s --check-prefixes=CHECK + +@_ZN4ompx5state9TeamStateE = internal addrspace(3) global ptr undef + +define internal fastcc i1 @__kmpc_kernel_parallel() { +bb: + %i = load ptr, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 16 + %i1 = icmp eq ptr %i, null + ret i1 %i1 +} + +define weak_odr amdgpu_kernel void @__omp_offloading_16_2e1d69__ZN11qmcplusplus7ompBLAS9gemv_implIfEEiRiciiT_PKS3_iS5_iS3_PS3_i_l44() #1 { +bb: + call void @barrier() + %i31 = call fastcc i1 @__kmpc_kernel_parallel() + call void @barrier() + store ptr @use, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 16 + call void @barrier() + store ptr null, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 16 + call void @barrier() + call void @use(i1 %i31) + ret void +} + +declare void @use(i1) +declare void @barrier() nocallback + +attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #1 = { "kernel" } + +!llvm.module.flags = !{!0, !1} + +!0 = !{i32 7, !"openmp", i32 50} +!1 = !{i32 7, !"openmp-device", i32 50} +;. +; CHECK: @[[_ZN4OMPX5STATE9TEAMSTATEE:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global ptr undef +;. +; CHECK: Function Attrs: norecurse nosync nounwind memory(read) +; CHECK-LABEL: define {{[^@]+}}@__kmpc_kernel_parallel +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: bb: +; CHECK-NEXT: [[I:%.*]] = load ptr, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 16 +; CHECK-NEXT: [[I1:%.*]] = icmp eq ptr [[I]], null +; CHECK-NEXT: ret i1 [[I1]] +; +; +; CHECK-LABEL: define {{[^@]+}}@__omp_offloading_16_2e1d69__ZN11qmcplusplus7ompBLAS9gemv_implIfEEiRiciiT_PKS3_iS5_iS3_PS3_i_l44 +; CHECK-SAME: () #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: bb: +; CHECK-NEXT: call void @barrier() +; CHECK-NEXT: [[I31:%.*]] = call fastcc i1 @__kmpc_kernel_parallel() +; CHECK-NEXT: call void @barrier() +; CHECK-NEXT: store ptr @use, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 16 +; CHECK-NEXT: call void @barrier() +; CHECK-NEXT: store ptr null, ptr addrspace(3) @_ZN4ompx5state9TeamStateE, align 16 +; CHECK-NEXT: call void @barrier() +; CHECK-NEXT: call void @use(i1 [[I31]]) +; CHECK-NEXT: ret void +; +;. +; CHECK: attributes #[[ATTR0]] = { norecurse nosync nounwind memory(read) } +; CHECK: attributes #[[ATTR1]] = { "kernel" } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { nosync nounwind } +;. +; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} +; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} +;. diff --git a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll index 531765c78f842..5c7737128daf0 100644 --- a/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll +++ b/llvm/test/Transforms/OpenMP/value-simplify-openmp-opt.ll @@ -37,6 +37,7 @@ define void @kernel() "kernel" { ; CHECK: if.else: ; CHECK-NEXT: call void @barrier() #[[ATTR6:[0-9]+]] ; CHECK-NEXT: call void @use1(i32 undef) #[[ATTR6]] +; CHECK-NEXT: call void @llvm.assume(i1 true) ; CHECK-NEXT: call void @barrier() #[[ATTR6]] ; CHECK-NEXT: br label [[IF_MERGE]] ; CHECK: if.merge: From 8f47fd05d53f60be40a52d09df9dedc970bbafc5 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Mon, 20 Mar 2023 16:01:39 -0700 Subject: [PATCH 118/691] [OpenMPOpt][FIX] Avoid removing barriers in callees We could be smarter about this, e.g., if the callee has a single call site, but for now we first avoid the miscompile. --- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 2 + .../test/Transforms/OpenMP/barrier_removal.ll | 53 +++++++++++++++---- 2 files changed, 44 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index a25537e00fec6..3c915086db6f0 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -2619,6 +2619,8 @@ struct AAExecutionDomainFunction : public AAExecutionDomain { CallBase *LastCB = Worklist.pop_back_val(); if (!Visited.insert(LastCB)) continue; + if (LastCB->getFunction() != getAnchorScope()) + continue; if (!DeletedBarriers.count(LastCB)) { A.deleteAfterManifest(*LastCB); continue; diff --git a/llvm/test/Transforms/OpenMP/barrier_removal.ll b/llvm/test/Transforms/OpenMP/barrier_removal.ll index edc027372c10c..77be52ea9e734 100644 --- a/llvm/test/Transforms/OpenMP/barrier_removal.ll +++ b/llvm/test/Transforms/OpenMP/barrier_removal.ll @@ -632,16 +632,11 @@ m: } define internal void @write_then_barrier0(i32* %p) { -; MODULE-LABEL: define {{[^@]+}}@write_then_barrier0 -; MODULE-SAME: (ptr [[P:%.*]]) { -; MODULE-NEXT: store i32 0, ptr [[P]], align 4 -; MODULE-NEXT: ret void -; -; CGSCC-LABEL: define {{[^@]+}}@write_then_barrier0 -; CGSCC-SAME: (ptr [[P:%.*]]) { -; CGSCC-NEXT: store i32 0, ptr [[P]], align 4 -; CGSCC-NEXT: call void @aligned_barrier() -; CGSCC-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@write_then_barrier0 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: store i32 0, ptr [[P]], align 4 +; CHECK-NEXT: call void @aligned_barrier() +; CHECK-NEXT: ret void ; store i32 0, i32* %p call void @aligned_barrier() @@ -980,8 +975,40 @@ m3: ret void } +; Verify we do not remove the barrier in the callee. +define internal void @callee_barrier() { +; CHECK-LABEL: define {{[^@]+}}@callee_barrier() { +; CHECK-NEXT: call void @aligned_barrier() +; CHECK-NEXT: ret void +; + call void @aligned_barrier() + ret void +} +define void @caller_barrier1() { +; CHECK-LABEL: define {{[^@]+}}@caller_barrier1() { +; CHECK-NEXT: call void @callee_barrier() +; CHECK-NEXT: ret void +; + call void @aligned_barrier() + call void @callee_barrier() + call void @aligned_barrier() + ret void +} +define void @caller_barrier2() { +; CHECK-LABEL: define {{[^@]+}}@caller_barrier2() { +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: call void @callee_barrier() +; CHECK-NEXT: call void @unknown() +; CHECK-NEXT: ret void +; + call void @unknown() + call void @callee_barrier() + call void @unknown() + ret void +} + !llvm.module.flags = !{!16,!15} -!nvvm.annotations = !{!0,!1,!2,!3,!4,!5,!6,!7,!8,!9,!10,!11,!12,!13,!14,!17,!18,!19,!20,!21,!22,!23} +!nvvm.annotations = !{!0,!1,!2,!3,!4,!5,!6,!7,!8,!9,!10,!11,!12,!13,!14,!17,!18,!19,!20,!21,!22,!23,!24,!25} !0 = !{void ()* @pos_empty_1, !"kernel", i32 1} !1 = !{void ()* @pos_empty_2, !"kernel", i32 1} @@ -992,6 +1019,8 @@ m3: !17 = !{void ()* @pos_empty_7a, !"kernel", i32 1} !18 = !{void ()* @pos_empty_7b, !"kernel", i32 1} !23 = !{void (i1)* @pos_empty_8, !"kernel", i32 1} +!24 = !{void ()* @caller_barrier1, !"kernel", i32 1} +!25 = !{void ()* @caller_barrier2, !"kernel", i32 1} !6 = !{void ()* @neg_empty_8, !"kernel", i32 1} !19 = !{void (i1)* @neg_empty_9, !"kernel", i32 1} !20 = !{void ()* @pos_empty_10, !"kernel", i32 1} @@ -1038,4 +1067,6 @@ m3: ; CHECK: [[META21:![0-9]+]] = !{ptr @pos_empty_11, !"kernel", i32 1} ; CHECK: [[META22:![0-9]+]] = !{ptr @neg_empty_12, !"kernel", i32 1} ; CHECK: [[META23:![0-9]+]] = !{ptr @pos_empty_8, !"kernel", i32 1} +; CHECK: [[META24:![0-9]+]] = !{ptr @caller_barrier1, !"kernel", i32 1} +; CHECK: [[META25:![0-9]+]] = !{ptr @caller_barrier2, !"kernel", i32 1} ;. From c0f3a3d7b50c6bec85b8c5eb8d821087499eec41 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Mon, 20 Mar 2023 17:13:18 -0700 Subject: [PATCH 119/691] [Attributor][FIX] Avoid H2S on GPUs if the pointer can be shared If the stack is not accessible by other threads, e.g., on a GPU, we need to ensure heap-2-stack will not create a stack version of a pointer that might be passed to another thread. Since passing through memory is by default transparent, we need to register a callback and inspect stores we might look through explicitly. --- .../Transforms/IPO/AttributorAttributes.cpp | 9 ++- .../Transforms/Attributor/heap_to_stack.ll | 39 +++++++++--- .../Attributor/heap_to_stack_gpu.ll | 60 ++++++++++++++++--- 3 files changed, 92 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 624e04611f4f4..6118404172aa6 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -7082,7 +7082,14 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) { ValidUsesOnly = false; return true; }; - if (!A.checkForAllUses(Pred, *this, *AI.CB)) + if (!A.checkForAllUses(Pred, *this, *AI.CB, /* CheckBBLivenessOnly */ false, + DepClassTy::OPTIONAL, /* IgnoreDroppableUses */ true, + [&](const Use &OldU, const Use &NewU) { + auto *SI = dyn_cast(OldU.getUser()); + return !SI || StackIsAccessibleByOtherThreads || + AA::isAssumedThreadLocalObject( + A, *SI->getPointerOperand(), *this); + })) return false; return ValidUsesOnly; }; diff --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll index 2a265d5074d54..ffe5bd878daae 100644 --- a/llvm/test/Transforms/Attributor/heap_to_stack.ll +++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll @@ -21,6 +21,7 @@ declare void @foo(i32* %p) declare void @foo_nounw(i32* %p) nounwind nofree declare void @usei8(i8) +declare void @usei8p(i8* nocapture) declare i32 @no_return_call() noreturn @@ -28,6 +29,9 @@ declare void @free(i8* nocapture) allockind("free") declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) nounwind +;. +; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal global i8* undef, align 4 +;. define void @h2s_value_simplify_interaction(i1 %c, i8* %A) { ; CHECK-LABEL: define {{[^@]+}}@h2s_value_simplify_interaction ; CHECK-SAME: (i1 [[C:%.*]], i8* nocapture nofree readnone [[A:%.*]]) { @@ -42,13 +46,13 @@ define void @h2s_value_simplify_interaction(i1 %c, i8* %A) { ; CHECK: f2: ; CHECK-NEXT: [[L:%.*]] = load i8, i8* [[M]], align 16 ; CHECK-NEXT: call void @usei8(i8 [[L]]) -; CHECK-NEXT: call void @no_sync_func(i8* nocapture nofree noundef align 16 [[M]]) #[[ATTR10:[0-9]+]] +; CHECK-NEXT: call void @no_sync_func(i8* nocapture nofree noundef align 16 [[M]]) #[[ATTR11:[0-9]+]] ; CHECK-NEXT: br label [[J]] ; CHECK: dead: ; CHECK-NEXT: unreachable ; CHECK: j: ; CHECK-NEXT: [[PHI:%.*]] = phi i8* [ [[M]], [[F]] ], [ null, [[F2]] ] -; CHECK-NEXT: tail call void @no_sync_func(i8* nocapture nofree noundef align 16 [[PHI]]) #[[ATTR10]] +; CHECK-NEXT: tail call void @no_sync_func(i8* nocapture nofree noundef align 16 [[PHI]]) #[[ATTR11]] ; CHECK-NEXT: ret void ; entry: @@ -328,7 +332,7 @@ define void @test9() { ; CHECK-NEXT: tail call void @no_sync_func(i8* nocapture nofree [[TMP1]]) ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* ; CHECK-NEXT: store i32 10, i32* [[TMP2]], align 4 -; CHECK-NEXT: tail call void @foo_nounw(i32* nofree noundef align 4 [[TMP2]]) #[[ATTR10]] +; CHECK-NEXT: tail call void @foo_nounw(i32* nofree noundef align 4 [[TMP2]]) #[[ATTR11]] ; CHECK-NEXT: tail call void @free(i8* nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) ; CHECK-NEXT: ret void ; @@ -387,7 +391,7 @@ define i32 @test_lifetime() { define void @test11() { ; CHECK-LABEL: define {{[^@]+}}@test11() { ; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 1 -; CHECK-NEXT: tail call void @sync_will_return(i8* [[DOTH2S]]) #[[ATTR10]] +; CHECK-NEXT: tail call void @sync_will_return(i8* [[DOTH2S]]) #[[ATTR11]] ; CHECK-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) @@ -628,7 +632,7 @@ define void @test16c(i8 %v, i8** %P) { ; CHECK-SAME: (i8 [[V:%.*]], i8** nocapture nofree writeonly [[P:%.*]]) { ; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 1 ; CHECK-NEXT: store i8* [[DOTH2S]], i8** [[P]], align 8 -; CHECK-NEXT: tail call void @no_sync_func(i8* nocapture nofree [[DOTH2S]]) #[[ATTR10]] +; CHECK-NEXT: tail call void @no_sync_func(i8* nocapture nofree [[DOTH2S]]) #[[ATTR11]] ; CHECK-NEXT: ret void ; %1 = tail call noalias i8* @malloc(i64 4) @@ -649,6 +653,25 @@ define void @test16d(i8 %v, i8** %P) { store i8* %1, i8** %P ret void } + +@G = internal global i8* undef, align 4 +define void @test16e(i8 %v) norecurse { +; CHECK: Function Attrs: norecurse +; CHECK-LABEL: define {{[^@]+}}@test16e +; CHECK-SAME: (i8 [[V:%.*]]) #[[ATTR9:[0-9]+]] { +; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 1 +; CHECK-NEXT: store i8* [[DOTH2S]], i8** @G, align 8 +; CHECK-NEXT: call void @usei8p(i8* nocapture nofree [[DOTH2S]]) #[[ATTR12:[0-9]+]] +; CHECK-NEXT: ret void +; + %1 = tail call noalias i8* @malloc(i64 4) + store i8* %1, i8** @G + %2 = load i8*, i8** @G + call void @usei8p(i8* nofree nocapture %2) nocallback nosync willreturn nounwind + call void @free(i8* %1) + ret void +} + ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { allockind("alloc,uninitialized") allocsize(0) } ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind willreturn } @@ -659,8 +682,10 @@ define void @test16d(i8 %v, i8** %P) { ; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; CHECK: attributes #[[ATTR7:[0-9]+]] = { allockind("alloc,uninitialized,aligned") allocsize(1) } ; CHECK: attributes #[[ATTR8:[0-9]+]] = { allockind("alloc,zeroed") allocsize(0,1) } -; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } -; CHECK: attributes #[[ATTR10]] = { nounwind } +; CHECK: attributes #[[ATTR9]] = { norecurse } +; CHECK: attributes #[[ATTR10:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +; CHECK: attributes #[[ATTR11]] = { nounwind } +; CHECK: attributes #[[ATTR12]] = { nocallback nosync nounwind willreturn } ;. ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CGSCC: {{.*}} diff --git a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll index db6f2773fa8e5..ef7a80c1e00a2 100644 --- a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll +++ b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll @@ -34,6 +34,10 @@ declare void @free(i8* nocapture) declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) nounwind +;. +; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = internal global i8* undef, align 4 +; CHECK: @[[GTL:[a-zA-Z0-9_$"\\.-]+]] = internal thread_local global i8* undef, align 4 +;. define void @nofree_arg_only(i8* %p1, i8* %p2) { ; CHECK-LABEL: define {{[^@]+}}@nofree_arg_only ; CHECK-SAME: (i8* nocapture nofree [[P1:%.*]], i8* nocapture [[P2:%.*]]) { @@ -277,7 +281,7 @@ define void @test9() { ; CHECK-NEXT: tail call void @no_sync_func(i8* nocapture nofree [[TMP1]]) ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32* ; CHECK-NEXT: store i32 10, i32* [[TMP2]], align 4 -; CHECK-NEXT: tail call void @foo_nounw(i32* nofree noundef align 4 [[TMP2]]) #[[ATTR5:[0-9]+]] +; CHECK-NEXT: tail call void @foo_nounw(i32* nofree noundef align 4 [[TMP2]]) #[[ATTR6:[0-9]+]] ; CHECK-NEXT: tail call void @free(i8* nocapture noundef nonnull align 4 dereferenceable(4) [[TMP1]]) ; CHECK-NEXT: ret void ; @@ -338,7 +342,7 @@ define i32 @test_lifetime() { define void @test11() { ; CHECK-LABEL: define {{[^@]+}}@test11() { ; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) -; CHECK-NEXT: tail call void @sync_will_return(i8* [[TMP1]]) #[[ATTR5]] +; CHECK-NEXT: tail call void @sync_will_return(i8* [[TMP1]]) #[[ATTR6]] ; CHECK-NEXT: tail call void @free(i8* nocapture [[TMP1]]) ; CHECK-NEXT: ret void ; @@ -584,7 +588,7 @@ define void @test16c(i8 %v, i8** %P) { ; CHECK-SAME: (i8 [[V:%.*]], i8** nocapture nofree writeonly [[P:%.*]]) { ; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) ; CHECK-NEXT: store i8* [[TMP1]], i8** [[P]], align 8 -; CHECK-NEXT: tail call void @no_sync_func(i8* nocapture nofree [[TMP1]]) #[[ATTR5]] +; CHECK-NEXT: tail call void @no_sync_func(i8* nocapture nofree [[TMP1]]) #[[ATTR6]] ; CHECK-NEXT: tail call void @free(i8* nocapture [[TMP1]]) ; CHECK-NEXT: ret void ; @@ -614,7 +618,7 @@ define void @test17() { ; CHECK-LABEL: define {{[^@]+}}@test17() { ; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 1, addrspace(5) ; CHECK-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[DOTH2S]] to i8* -; CHECK-NEXT: tail call void @usei8(i8* noalias nocapture nofree [[MALLOC_CAST]]) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: tail call void @usei8(i8* noalias nocapture nofree [[MALLOC_CAST]]) #[[ATTR7:[0-9]+]] ; CHECK-NEXT: ret void ; %1 = tail call noalias i8* @__kmpc_alloc_shared(i64 4) @@ -626,7 +630,7 @@ define void @test17() { define void @test17b() { ; CHECK-LABEL: define {{[^@]+}}@test17b() { ; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @__kmpc_alloc_shared(i64 noundef 4) -; CHECK-NEXT: tail call void @usei8(i8* nofree [[TMP1]]) #[[ATTR6]] +; CHECK-NEXT: tail call void @usei8(i8* nofree [[TMP1]]) #[[ATTR7]] ; CHECK-NEXT: tail call void @__kmpc_free_shared(i8* nocapture [[TMP1]], i64 noundef 4) ; CHECK-NEXT: ret void ; @@ -643,7 +647,7 @@ define void @move_alloca() { ; CHECK-NEXT: br label [[NOT_ENTRY:%.*]] ; CHECK: not_entry: ; CHECK-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[DOTH2S]] to i8* -; CHECK-NEXT: tail call void @usei8(i8* noalias nocapture nofree [[MALLOC_CAST]]) #[[ATTR6]] +; CHECK-NEXT: tail call void @usei8(i8* noalias nocapture nofree [[MALLOC_CAST]]) #[[ATTR7]] ; CHECK-NEXT: ret void ; entry: @@ -656,6 +660,44 @@ not_entry: ret void } +@G = internal global i8* undef, align 4 +define void @test16e(i8 %v) norecurse { +; CHECK: Function Attrs: norecurse +; CHECK-LABEL: define {{[^@]+}}@test16e +; CHECK-SAME: (i8 [[V:%.*]]) #[[ATTR5:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @__kmpc_alloc_shared(i64 noundef 4) +; CHECK-NEXT: store i8* [[TMP1]], i8** @G, align 8 +; CHECK-NEXT: call void @usei8(i8* nocapture nofree [[TMP1]]) #[[ATTR8:[0-9]+]] +; CHECK-NEXT: tail call void @__kmpc_free_shared(i8* noalias nocapture [[TMP1]], i64 noundef 4) +; CHECK-NEXT: ret void +; + %1 = tail call noalias i8* @__kmpc_alloc_shared(i64 4) + store i8* %1, i8** @G + %2 = load i8*, i8** @G + call void @usei8(i8* nofree nocapture %2) nocallback nosync willreturn nounwind + tail call void @__kmpc_free_shared(i8* %1, i64 4) + ret void +} + +@Gtl = internal thread_local global i8* undef, align 4 +define void @test16f(i8 %v) norecurse { +; CHECK: Function Attrs: norecurse +; CHECK-LABEL: define {{[^@]+}}@test16f +; CHECK-SAME: (i8 [[V:%.*]]) #[[ATTR5]] { +; CHECK-NEXT: [[DOTH2S:%.*]] = alloca i8, i64 4, align 1, addrspace(5) +; CHECK-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast i8 addrspace(5)* [[DOTH2S]] to i8* +; CHECK-NEXT: store i8* [[MALLOC_CAST]], i8** @Gtl, align 8 +; CHECK-NEXT: call void @usei8(i8* nocapture nofree [[MALLOC_CAST]]) #[[ATTR8]] +; CHECK-NEXT: ret void +; + %1 = tail call noalias i8* @__kmpc_alloc_shared(i64 4) + store i8* %1, i8** @Gtl + %2 = load i8*, i8** @Gtl + call void @usei8(i8* nofree nocapture %2) nocallback nosync willreturn nounwind + tail call void @__kmpc_free_shared(i8* %1, i64 4) + ret void +} + ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind willreturn } @@ -663,8 +705,10 @@ not_entry: ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nofree nounwind } ; CHECK: attributes #[[ATTR3]] = { noreturn } ; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } -; CHECK: attributes #[[ATTR5]] = { nounwind } -; CHECK: attributes #[[ATTR6]] = { nosync nounwind willreturn } +; CHECK: attributes #[[ATTR5]] = { norecurse } +; CHECK: attributes #[[ATTR6]] = { nounwind } +; CHECK: attributes #[[ATTR7]] = { nosync nounwind willreturn } +; CHECK: attributes #[[ATTR8]] = { nocallback nosync nounwind willreturn } ;. ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CGSCC: {{.*}} From 4c483a046d2ff29ec2fd5bad6305f97424a2b880 Mon Sep 17 00:00:00 2001 From: Michael Francis Date: Mon, 20 Mar 2023 18:00:32 +0000 Subject: [PATCH 120/691] [AIX] Consolidate Crt0Basename logic when certain flags are specified, the Crt0 object files are not linked. However, the logic for determining which files will always run. This patch moves that logic so that the basename is only determined if it is needed. Differential Revision: https://reviews.llvm.org/D146443 --- clang/lib/Driver/ToolChains/AIX.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/clang/lib/Driver/ToolChains/AIX.cpp b/clang/lib/Driver/ToolChains/AIX.cpp index d4d13ce36e76f..5521a38d9bc0a 100644 --- a/clang/lib/Driver/ToolChains/AIX.cpp +++ b/clang/lib/Driver/ToolChains/AIX.cpp @@ -163,19 +163,19 @@ void aix::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-bpD:0x110000000"); } - auto getCrt0Basename = [&Args, IsArch32Bit] { - if (Arg *A = Args.getLastArgNoClaim(options::OPT_p, options::OPT_pg)) { - // Enable gprofiling when "-pg" is specified. - if (A->getOption().matches(options::OPT_pg)) - return IsArch32Bit ? "gcrt0.o" : "gcrt0_64.o"; - // Enable profiling when "-p" is specified. - return IsArch32Bit ? "mcrt0.o" : "mcrt0_64.o"; - } - return IsArch32Bit ? "crt0.o" : "crt0_64.o"; - }; - if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles, options::OPT_shared, options::OPT_r)) { + auto getCrt0Basename = [&Args, IsArch32Bit] { + if (Arg *A = Args.getLastArgNoClaim(options::OPT_p, options::OPT_pg)) { + // Enable gprofiling when "-pg" is specified. + if (A->getOption().matches(options::OPT_pg)) + return IsArch32Bit ? "gcrt0.o" : "gcrt0_64.o"; + // Enable profiling when "-p" is specified. + return IsArch32Bit ? "mcrt0.o" : "mcrt0_64.o"; + } + return IsArch32Bit ? "crt0.o" : "crt0_64.o"; + }; + CmdArgs.push_back( Args.MakeArgString(ToolChain.GetFilePath(getCrt0Basename()))); From 64d96ec817f18d7fffa706989e4a1d54186c8b18 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Mon, 20 Mar 2023 19:20:33 -0700 Subject: [PATCH 121/691] Fix Windows export list. Should fix Windows buildbot: https://lab.llvm.org/buildbot/#/builders/127/builds/45411 --- .../test/asan/TestCases/Windows/interface_symbols_windows.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/compiler-rt/test/asan/TestCases/Windows/interface_symbols_windows.cpp b/compiler-rt/test/asan/TestCases/Windows/interface_symbols_windows.cpp index 58808d4387612..f3ed331451ad4 100644 --- a/compiler-rt/test/asan/TestCases/Windows/interface_symbols_windows.cpp +++ b/compiler-rt/test/asan/TestCases/Windows/interface_symbols_windows.cpp @@ -40,7 +40,6 @@ // IMPORT: __asan_set_seh_filter // IMPORT: __asan_unhandled_exception_filter // IMPORT: __asan_test_only_reported_buggy_pointer -// IMPORT: __sancov_lowest_stack // IMPORT: __ubsan_vptr_type_cache // // RUN: cat %t.imports1 %t.imports2 %t.imports3 | sort | uniq > %t.imports-sorted From 8894fe7a6f3efad371d64a1825ba3f4a6704d604 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Thu, 16 Mar 2023 11:02:06 +0800 Subject: [PATCH 122/691] [docs] Update the status for coroutines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the discussion in https://discourse.llvm.org/t/rfc-could-we-mark-coroutines-as-unreleased-now/69220 We should mark coroutines as "it’s supported fully everywhere but on Windows targets". Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D146187 --- clang/docs/ReleaseNotes.rst | 2 ++ clang/www/cxx_status.html | 6 ++---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 78c57500568f9..e7688b09f68e6 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -83,6 +83,8 @@ C++20 Feature Support - Lambda templates with a requires clause directly after the template parameters now parse correctly if the requires clause consists of a variable with a dependent type. (`#61278 `_) +- Announced C++20 Coroutines is fully supported on all targets except Windows, which + still has some stability and ABI issues. C++2b Feature Support ^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html index 0c742b358bb6c..79cfca34fb4f3 100755 --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -1222,10 +1222,8 @@

C++20 implementation status

P0912R5
Partial - The optimizer does not yet handle TLS with - __attribute__((const)) attribute correctly. There can be issues where the - coroutine may resume on a different thread.
This feature requires further - analysis of the C++ Standard to determine what work is necessary for conformance. + Fully supported on all targets except Windows, which + still has some stability and ABI issues.
From 064e2497e2ebe9ac30ac96923a26a52484300fdf Mon Sep 17 00:00:00 2001 From: Jie Fu Date: Tue, 21 Mar 2023 10:33:48 +0800 Subject: [PATCH 123/691] [llvm-readobj] Fix ambiguous call of printNumber in ELFDumper.cpp (NFC) /Users/jiefu/llvm-project/llvm/tools/llvm-readobj/ELFDumper.cpp:7175:5: error: call to member function 'printNumber' is ambiguous W.printNumber("TotalBuckets", NBucket); ~~^~~~~~~~~~~ /Users/jiefu/llvm-project/llvm/include/llvm/Support/ScopedPrinter.h:201:16: note: candidate function virtual void printNumber(StringRef Label, uint64_t Value) { ^ /Users/jiefu/llvm-project/llvm/include/llvm/Support/ScopedPrinter.h:205:16: note: candidate function virtual void printNumber(StringRef Label, uint32_t Value) { ^ /Users/jiefu/llvm-project/llvm/include/llvm/Support/ScopedPrinter.h:209:16: note: candidate function virtual void printNumber(StringRef Label, uint16_t Value) { ^ /Users/jiefu/llvm-project/llvm/include/llvm/Support/ScopedPrinter.h:213:16: note: candidate function virtual void printNumber(StringRef Label, uint8_t Value) { ^ /Users/jiefu/llvm-project/llvm/include/llvm/Support/ScopedPrinter.h:217:16: note: candidate function virtual void printNumber(StringRef Label, int64_t Value) { ^ /Users/jiefu/llvm-project/llvm/include/llvm/Support/ScopedPrinter.h:221:16: note: candidate function virtual void printNumber(StringRef Label, int32_t Value) { ^ /Users/jiefu/llvm-project/llvm/include/llvm/Support/ScopedPrinter.h:225:16: note: candidate function virtual void printNumber(StringRef Label, int16_t Value) { ^ /Users/jiefu/llvm-project/llvm/include/llvm/Support/ScopedPrinter.h:229:16: note: candidate function virtual void printNumber(StringRef Label, int8_t Value) { ^ /Users/jiefu/llvm-project/llvm/include/llvm/Support/ScopedPrinter.h:237:16: note: candidate function virtual void printNumber(StringRef Label, float Value) { ^ /Users/jiefu/llvm-project/llvm/include/llvm/Support/ScopedPrinter.h:241:16: note: candidate function virtual void printNumber(StringRef Label, double Value) { ^ --- llvm/tools/llvm-readobj/ELFDumper.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 3f1f226d1fe4a..0298e8dce50ed 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -7172,14 +7172,14 @@ void LLVMELFDumper::printHashHistogramStats(size_t NBucket, StringRef BucketName = IsGnu ? "Bucket" : "Chain"; StringRef ListName = IsGnu ? "Buckets" : "Chains"; DictScope Outer(W, HistName); - W.printNumber("TotalBuckets", NBucket); + W.printNumber("TotalBuckets", static_cast(NBucket)); ListScope Buckets(W, ListName); size_t CumulativeNonZero = 0; for (size_t I = 0; I < MaxChain; ++I) { CumulativeNonZero += Count[I] * I; DictScope Bucket(W, BucketName); - W.printNumber("Length", I); - W.printNumber("Count", Count[I]); + W.printNumber("Length", static_cast(I)); + W.printNumber("Count", static_cast(Count[I])); W.printNumber("Percentage", (float)(Count[I] * 100.0) / NBucket); W.printNumber("Coverage", (float)(CumulativeNonZero * 100.0) / TotalSyms); } From 00c4343d216348b9c527c2f8040a202c1a78a4a0 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Mon, 20 Mar 2023 18:55:00 -0700 Subject: [PATCH 124/691] [llvm-jitlink] Fix typo in description of llvm-jitlink's -alias option. --- llvm/tools/llvm-jitlink/llvm-jitlink.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp index 5e91ad068cdbc..15b26a80e2274 100644 --- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp +++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp @@ -151,7 +151,8 @@ static cl::list AbsoluteDefs( cl::cat(JITLinkCategory)); static cl::list - Aliases("alias", cl::desc("Inject symbol aliases (syntax: =)"), + Aliases("alias", + cl::desc("Inject symbol aliases (syntax: =)"), cl::cat(JITLinkCategory)); static cl::list TestHarnesses("harness", cl::Positional, From 4fa9dc948226e374372537250d046924d348307e Mon Sep 17 00:00:00 2001 From: Ben Shi Date: Sun, 8 Jan 2023 20:35:23 +0800 Subject: [PATCH 125/691] [AVR] Fix incorrect expansion of the pseudo 'ELPMBRdZ' instruction The 'ELPM' instruction has three forms: -------------------------- | form | feature | | ----------- | -------- | | ELPM | hasELPM | | ELPM Rd, Z | hasELPMX | | ELPM Rd, Z+ | hasELPMX | -------------------------- The second form is always used in the expansion of the pseudo instruction 'ELPMBRdZ'. But for devices without ELPMX but only with ELPM, only the first form can be emitted. Reviewed By: jacquesguan Differential Revision: https://reviews.llvm.org/D141221 --- llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp | 19 +++++-- llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp | 2 + llvm/lib/Target/AVR/AVRInstrInfo.td | 10 ++-- llvm/test/CodeGen/AVR/elpm.ll | 54 ++++++++++++++++++-- llvm/test/CodeGen/AVR/pseudo/ELPMBRdZ.mir | 45 ++++++++++++++++ 5 files changed, 117 insertions(+), 13 deletions(-) create mode 100644 llvm/test/CodeGen/AVR/pseudo/ELPMBRdZ.mir diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp index 2c97dea0bce03..06dc2b7c5b27b 100644 --- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -871,11 +871,20 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { buildMI(MBB, MBBI, AVR::OUTARr).addImm(STI.getIORegRAMPZ()).addReg(BankReg); // Load byte. - auto MILB = buildMI(MBB, MBBI, AVR::ELPMRdZ) - .addReg(DstReg, RegState::Define) - .addReg(SrcReg, getKillRegState(SrcIsKill)); - - MILB.setMemRefs(MI.memoperands()); + if (STI.hasELPMX()) { + auto MILB = buildMI(MBB, MBBI, AVR::ELPMRdZ) + .addReg(DstReg, RegState::Define) + .addReg(SrcReg, getKillRegState(SrcIsKill)); + MILB.setMemRefs(MI.memoperands()); + } else { + // For the basic 'ELPM' instruction, its operand[0] is the implicit + // 'Z' register, and its operand[1] is the implicit 'R0' register. + auto MILB = buildMI(MBB, MBBI, AVR::ELPM); + buildMI(MBB, MBBI, AVR::MOVRdRr) + .addReg(DstReg, RegState::Define) + .addReg(AVR::R0, RegState::Kill); + MILB.setMemRefs(MI.memoperands()); + } MI.eraseFromParent(); return true; diff --git a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp index 5511d53dfa312..03015a457a0d1 100644 --- a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp +++ b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp @@ -366,6 +366,8 @@ template <> bool AVRDAGToDAGISel::select(SDNode *N) { int ProgMemBank = AVR::getProgramMemoryBank(LD); if (ProgMemBank < 0 || ProgMemBank > 5) report_fatal_error("unexpected program memory bank"); + if (ProgMemBank > 0 && !Subtarget->hasELPM()) + report_fatal_error("unexpected program memory bank"); // This is a flash memory load, move the pointer into R31R30 and emit // the lpm instruction. diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td index 05ee94be79263..c272711bb8663 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -1742,12 +1742,14 @@ let mayLoad = 1, hasSideEffects = 0 in { Requires<[HasELPMX]>; } + // This pseudo is combination of the OUT and ELPM instructions. + let Defs = [R0] in + def ELPMBRdZ : Pseudo<(outs GPR8:$dst), (ins ZREG:$z, LD8:$p), + "elpmb\t$dst, $z, $p", []>, + Requires<[HasELPM]>; + // These pseudos are combination of the OUT and ELPM instructions. let Defs = [R31R30], hasSideEffects = 1 in { - def ELPMBRdZ : Pseudo<(outs GPR8:$dst), (ins ZREG:$z, LD8:$p), - "elpmb\t$dst, $z, $p", []>, - Requires<[HasELPMX]>; - let Constraints = "@earlyclobber $dst" in def ELPMWRdZ : Pseudo<(outs DREGS:$dst), (ins ZREG:$z, LD8:$p), "elpmw\t$dst, $z, $p", []>, diff --git a/llvm/test/CodeGen/AVR/elpm.ll b/llvm/test/CodeGen/AVR/elpm.ll index a322ab773014a..ba28bc814591d 100644 --- a/llvm/test/CodeGen/AVR/elpm.ll +++ b/llvm/test/CodeGen/AVR/elpm.ll @@ -1,5 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=avr --mcpu=atmega2560 -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=avr -mattr=+movw -mattr=+elpm -mattr=+elpmx -mattr=+lpm -mattr=+lpmx -verify-machineinstrs \ +; RUN: | FileCheck %s +; RUN: llc < %s -mtriple=avr -mattr=+movw -mattr=+elpm -mattr=-elpmx -mattr=+lpm -mattr=-lpmx -verify-machineinstrs \ +; RUN: | FileCheck --check-prefix=NOX %s @arr0 = addrspace(1) constant [4 x i16] [i16 123, i16 24, i16 56, i16 37], align 1 @arr1 = addrspace(2) constant [4 x i16] [i16 123, i16 34, i16 46, i16 27], align 1 @@ -129,9 +132,9 @@ entry: ret i16 %sub } -@arrb1 = addrspace(1) constant [4 x i8] c"{\188%", align 1 -@arrb3 = addrspace(3) constant [4 x i8] c"{\22.\1B", align 1 -@arrb5 = addrspace(5) constant [4 x i8] c"{\17-\11", align 1 +@arrb1 = addrspace(1) constant [4 x i8] c"abcd", align 1 +@arrb3 = addrspace(3) constant [4 x i8] c"1234", align 1 +@arrb5 = addrspace(5) constant [4 x i8] c"HJLQ", align 1 define signext i8 @foob0(i16 %a, i16 %b) { ; CHECK-LABEL: foob0: @@ -232,6 +235,28 @@ define signext i8 @foob3(i16 %a, i16 %b) { ; CHECK-NEXT: lsl r25 ; CHECK-NEXT: sbc r25, r25 ; CHECK-NEXT: ret +; +; NOX-LABEL: foob3: +; NOX: ; %bb.0: ; %entry +; NOX-NEXT: subi r22, lo8(-(arrb5)) +; NOX-NEXT: sbci r23, hi8(-(arrb5)) +; NOX-NEXT: movw r30, r22 +; NOX-NEXT: ldi r18, 4 +; NOX-NEXT: out 59, r18 +; NOX-NEXT: elpm +; NOX-NEXT: mov r18, r0 +; NOX-NEXT: subi r24, lo8(-(arrb3)) +; NOX-NEXT: sbci r25, hi8(-(arrb3)) +; NOX-NEXT: movw r30, r24 +; NOX-NEXT: ldi r24, 2 +; NOX-NEXT: out 59, r24 +; NOX-NEXT: elpm +; NOX-NEXT: mov r24, r0 +; NOX-NEXT: sub r24, r18 +; NOX-NEXT: mov r25, r24 +; NOX-NEXT: lsl r25 +; NOX-NEXT: sbc r25, r25 +; NOX-NEXT: ret entry: %arrayidx = getelementptr inbounds [4 x i8], [4 x i8] addrspace(3)* @arrb3, i16 0, i16 %a %0 = load i8, i8 addrspace(3)* %arrayidx, align 1 @@ -260,6 +285,27 @@ define signext i8 @foob4(i16 %a, i16 %b) { ; CHECK-NEXT: lsl r25 ; CHECK-NEXT: sbc r25, r25 ; CHECK-NEXT: ret +; +; NOX-LABEL: foob4: +; NOX: ; %bb.0: ; %entry +; NOX-NEXT: subi r22, lo8(-(arrb3)) +; NOX-NEXT: sbci r23, hi8(-(arrb3)) +; NOX-NEXT: movw r30, r22 +; NOX-NEXT: ldi r18, 2 +; NOX-NEXT: out 59, r18 +; NOX-NEXT: elpm +; NOX-NEXT: mov r19, r0 +; NOX-NEXT: subi r24, lo8(-(arrb3)) +; NOX-NEXT: sbci r25, hi8(-(arrb3)) +; NOX-NEXT: movw r30, r24 +; NOX-NEXT: out 59, r18 +; NOX-NEXT: elpm +; NOX-NEXT: mov r24, r0 +; NOX-NEXT: sub r24, r19 +; NOX-NEXT: mov r25, r24 +; NOX-NEXT: lsl r25 +; NOX-NEXT: sbc r25, r25 +; NOX-NEXT: ret entry: %arrayidx = getelementptr inbounds [4 x i8], [4 x i8] addrspace(3)* @arrb3, i16 0, i16 %a %0 = load i8, i8 addrspace(3)* %arrayidx, align 1 diff --git a/llvm/test/CodeGen/AVR/pseudo/ELPMBRdZ.mir b/llvm/test/CodeGen/AVR/pseudo/ELPMBRdZ.mir new file mode 100644 index 0000000000000..29dbd79c652a2 --- /dev/null +++ b/llvm/test/CodeGen/AVR/pseudo/ELPMBRdZ.mir @@ -0,0 +1,45 @@ +# RUN: llc -mtriple=avr -mattr=+elpm -mattr=+elpmx -start-before=greedy %s -o - \ +# RUN: | FileCheck %s +# RUN: llc -mtriple=avr -mattr=+elpm -mattr=-elpmx -start-before=greedy %s -o - \ +# RUN: | FileCheck --check-prefix=NOX %s + +# This test checks the expansion of the 16-bit ELPM pseudo instruction and that +# the register allocator won't use R31R30 as an output register (which would +# lead to undefined behavior). + +--- | + target triple = "avr--" + define void @test_elpmbrdz() { + entry: + ret void + } +... + +--- +name: test_elpmbrdz +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $r31r30 + + ; CHECK-LABEL: test_elpmbrdz + ; CHECK: ; %bb.0: + ; CHECK-NEXT: ldi r24, 1 + ; CHECK-NEXT: out + ; CHECK-NEXT: elpm r31, Z + ; CHECK-NEXT: ret + + ; NOX-LABEL: test_elpmbrdz + ; NOX: ; %bb.0: + ; NOX-NEXT: ldi r24, 1 + ; NOX-NEXT: out + ; NOX-NEXT: elpm + ; NOX-NEXT: mov r31, r0 + ; NOX-NEXT: ret + + %1:zreg = COPY killed $r31r30 + %2:ld8 = LDIRdK 1 + %3:gpr8 = ELPMBRdZ %1, %2, implicit-def dead $r0 + $r31 = COPY %3 + RET implicit $r31 +... From d9661d79f46601698b21c01788c244e6294f5eb1 Mon Sep 17 00:00:00 2001 From: Congcong Cai Date: Tue, 21 Mar 2023 12:04:41 +0800 Subject: [PATCH 126/691] [Webassembly][multivalue] update libcall signature when multivalue feature enabled fixed: #59095 Update libcall signatures to use multivalue return rather than returning via a pointer when the multivalue features is enabled in the WebAssembly backend. Reviewed By: tlively Differential Revision: https://reviews.llvm.org/D146271 --- .../WebAssemblyRuntimeLibcallSignatures.cpp | 120 +++++++++--------- .../CodeGen/WebAssembly/multivalue_libcall.ll | 63 +++++++++ 2 files changed, 123 insertions(+), 60 deletions(-) create mode 100644 llvm/test/CodeGen/WebAssembly/multivalue_libcall.ll diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp index 72a53b6c388ea..c48a13ef7eb59 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp @@ -687,72 +687,72 @@ void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget, Params.push_back(PtrTy); break; case i64_i64_func_f32: -#if 0 // TODO: Enable this when wasm gets multiple-return-value support. - Rets.push_back(wasm::ValType::I64); - Rets.push_back(wasm::ValType::I64); -#else - Params.push_back(PtrTy); -#endif + if (Subtarget.hasMultivalue()) { + Rets.push_back(wasm::ValType::I64); + Rets.push_back(wasm::ValType::I64); + } else { + Params.push_back(PtrTy); + } Params.push_back(wasm::ValType::F32); break; case i64_i64_func_f64: -#if 0 // TODO: Enable this when wasm gets multiple-return-value support. - Rets.push_back(wasm::ValType::I64); - Rets.push_back(wasm::ValType::I64); -#else - Params.push_back(PtrTy); -#endif + if (Subtarget.hasMultivalue()) { + Rets.push_back(wasm::ValType::I64); + Rets.push_back(wasm::ValType::I64); + } else { + Params.push_back(PtrTy); + } Params.push_back(wasm::ValType::F64); break; case i16_i16_func_i16_i16: -#if 0 // TODO: Enable this when wasm gets multiple-return-value support. - Rets.push_back(wasm::ValType::I32); - Rets.push_back(wasm::ValType::I32); -#else - Params.push_back(PtrTy); -#endif + if (Subtarget.hasMultivalue()) { + Rets.push_back(wasm::ValType::I32); + Rets.push_back(wasm::ValType::I32); + } else { + Params.push_back(PtrTy); + } Params.push_back(wasm::ValType::I32); Params.push_back(wasm::ValType::I32); break; case i32_i32_func_i32_i32: -#if 0 // TODO: Enable this when wasm gets multiple-return-value support. - Rets.push_back(wasm::ValType::I32); - Rets.push_back(wasm::ValType::I32); -#else - Params.push_back(PtrTy); -#endif + if (Subtarget.hasMultivalue()) { + Rets.push_back(wasm::ValType::I32); + Rets.push_back(wasm::ValType::I32); + } else { + Params.push_back(PtrTy); + } Params.push_back(wasm::ValType::I32); Params.push_back(wasm::ValType::I32); break; case i64_i64_func_i64_i64: -#if 0 // TODO: Enable this when wasm gets multiple-return-value support. - Rets.push_back(wasm::ValType::I64); - Rets.push_back(wasm::ValType::I64); -#else - Params.push_back(PtrTy); -#endif + if (Subtarget.hasMultivalue()) { + Rets.push_back(wasm::ValType::I64); + Rets.push_back(wasm::ValType::I64); + } else { + Params.push_back(PtrTy); + } Params.push_back(wasm::ValType::I64); Params.push_back(wasm::ValType::I64); break; case i64_i64_func_i64_i64_i64_i64: -#if 0 // TODO: Enable this when wasm gets multiple-return-value support. - Rets.push_back(wasm::ValType::I64); - Rets.push_back(wasm::ValType::I64); -#else - Params.push_back(PtrTy); -#endif + if (Subtarget.hasMultivalue()) { + Rets.push_back(wasm::ValType::I64); + Rets.push_back(wasm::ValType::I64); + } else { + Params.push_back(PtrTy); + } Params.push_back(wasm::ValType::I64); Params.push_back(wasm::ValType::I64); Params.push_back(wasm::ValType::I64); Params.push_back(wasm::ValType::I64); break; case i64_i64_func_i64_i64_i64_i64_iPTR: -#if 0 // TODO: Enable this when wasm gets multiple-return-value support. - Rets.push_back(wasm::ValType::I64); - Rets.push_back(wasm::ValType::I64); -#else - Params.push_back(PtrTy); -#endif + if (Subtarget.hasMultivalue()) { + Rets.push_back(wasm::ValType::I64); + Rets.push_back(wasm::ValType::I64); + } else { + Params.push_back(PtrTy); + } Params.push_back(wasm::ValType::I64); Params.push_back(wasm::ValType::I64); Params.push_back(wasm::ValType::I64); @@ -760,28 +760,28 @@ void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget, Params.push_back(PtrTy); break; case i64_i64_i64_i64_func_i64_i64_i64_i64: -#if 0 // TODO: Enable this when wasm gets multiple-return-value support. - Rets.push_back(wasm::ValType::I64); - Rets.push_back(wasm::ValType::I64); - Rets.push_back(wasm::ValType::I64); - Rets.push_back(wasm::ValType::I64); -#else - Params.push_back(PtrTy); -#endif + if (Subtarget.hasMultivalue()) { + Rets.push_back(wasm::ValType::I64); + Rets.push_back(wasm::ValType::I64); + Rets.push_back(wasm::ValType::I64); + Rets.push_back(wasm::ValType::I64); + } else { + Params.push_back(PtrTy); + } Params.push_back(wasm::ValType::I64); Params.push_back(wasm::ValType::I64); Params.push_back(wasm::ValType::I64); Params.push_back(wasm::ValType::I64); break; case i64_i64_func_i64_i64_i32: -#if 0 // TODO: Enable this when wasm gets multiple-return-value support. - Rets.push_back(wasm::ValType::I64); - Rets.push_back(wasm::ValType::I64); - Rets.push_back(wasm::ValType::I64); - Rets.push_back(wasm::ValType::I64); -#else - Params.push_back(PtrTy); -#endif + if (Subtarget.hasMultivalue()) { + Rets.push_back(wasm::ValType::I64); + Rets.push_back(wasm::ValType::I64); + Rets.push_back(wasm::ValType::I64); + Rets.push_back(wasm::ValType::I64); + } else { + Params.push_back(PtrTy); + } Params.push_back(wasm::ValType::I64); Params.push_back(wasm::ValType::I64); Params.push_back(wasm::ValType::I32); @@ -904,8 +904,8 @@ void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget, auto Val = Map.find(Name); #ifndef NDEBUG if (Val == Map.end()) { - auto message = std::string("unexpected runtime library name: ") + - std::string(Name); + auto message = + std::string("unexpected runtime library name: ") + std::string(Name); llvm_unreachable(message.c_str()); } #endif diff --git a/llvm/test/CodeGen/WebAssembly/multivalue_libcall.ll b/llvm/test/CodeGen/WebAssembly/multivalue_libcall.ll new file mode 100644 index 0000000000000..270fdae336678 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/multivalue_libcall.ll @@ -0,0 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -verify-machineinstrs -mcpu=mvp -mattr=+multivalue | FileCheck %s --check-prefix=MULTIVALUE +; RUN: llc < %s -verify-machineinstrs -mcpu=mvp | FileCheck %s --check-prefix=NO_MULTIVALUE + +; Test libcall signatures when multivalue is enabled and disabled + +target triple = "wasm32-unknown-unknown" + +@c = global i128 0, align 16 + +define void @multivalue_sdiv(i128 noundef %a, i128 noundef %b) #0 { +; MULTIVALUE-LABEL: multivalue_sdiv: +; MULTIVALUE: .functype multivalue_sdiv (i64, i64, i64, i64) -> () +; MULTIVALUE-NEXT: # %bb.0: +; MULTIVALUE-NEXT: local.get 0 +; MULTIVALUE-NEXT: local.get 1 +; MULTIVALUE-NEXT: local.get 2 +; MULTIVALUE-NEXT: local.get 3 +; MULTIVALUE-NEXT: call __divti3 +; MULTIVALUE-NEXT: local.set 2 +; MULTIVALUE-NEXT: local.set 3 +; MULTIVALUE-NEXT: i32.const c +; MULTIVALUE-NEXT: local.get 2 +; MULTIVALUE-NEXT: i64.store 8 +; MULTIVALUE-NEXT: i32.const 0 +; MULTIVALUE-NEXT: local.get 3 +; MULTIVALUE-NEXT: i64.store c +; MULTIVALUE-NEXT: # fallthrough-return +; +; NO_MULTIVALUE-LABEL: multivalue_sdiv: +; NO_MULTIVALUE: .functype multivalue_sdiv (i64, i64, i64, i64) -> () +; NO_MULTIVALUE-NEXT: .local i32 +; NO_MULTIVALUE-NEXT: # %bb.0: +; NO_MULTIVALUE-NEXT: global.get __stack_pointer +; NO_MULTIVALUE-NEXT: i32.const 16 +; NO_MULTIVALUE-NEXT: i32.sub +; NO_MULTIVALUE-NEXT: local.tee 4 +; NO_MULTIVALUE-NEXT: global.set __stack_pointer +; NO_MULTIVALUE-NEXT: local.get 4 +; NO_MULTIVALUE-NEXT: local.get 0 +; NO_MULTIVALUE-NEXT: local.get 1 +; NO_MULTIVALUE-NEXT: local.get 2 +; NO_MULTIVALUE-NEXT: local.get 3 +; NO_MULTIVALUE-NEXT: call __divti3 +; NO_MULTIVALUE-NEXT: i32.const c +; NO_MULTIVALUE-NEXT: local.get 4 +; NO_MULTIVALUE-NEXT: i32.const 8 +; NO_MULTIVALUE-NEXT: i32.add +; NO_MULTIVALUE-NEXT: i64.load 0 +; NO_MULTIVALUE-NEXT: i64.store 8 +; NO_MULTIVALUE-NEXT: i32.const 0 +; NO_MULTIVALUE-NEXT: local.get 4 +; NO_MULTIVALUE-NEXT: i64.load 0 +; NO_MULTIVALUE-NEXT: i64.store c +; NO_MULTIVALUE-NEXT: local.get 4 +; NO_MULTIVALUE-NEXT: i32.const 16 +; NO_MULTIVALUE-NEXT: i32.add +; NO_MULTIVALUE-NEXT: global.set __stack_pointer +; NO_MULTIVALUE-NEXT: # fallthrough-return + %div = sdiv i128 %a, %b + store i128 %div, ptr @c, align 16 + ret void +} From 71d97df6afc4a8b65c145f835f51dccd624772c7 Mon Sep 17 00:00:00 2001 From: Uday Bondhugula Date: Tue, 21 Mar 2023 10:22:19 +0530 Subject: [PATCH 127/691] [MLIR][Affine] Fix addInductionVarOrTerminalSymbol Update affine analysis method `addInductionVarOrTerminalSymbol` for affine.parallel IV. Fixes https://github.com/llvm/llvm-project/issues/61371 Reviewed By: dcaballe Differential Revision: https://reviews.llvm.org/D146493 --- .../Affine/Analysis/AffineStructures.cpp | 10 +++++++- .../test/Dialect/Affine/affine-data-copy.mlir | 24 +++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp index 9c9d089416c5e..03b8b1d72a5fa 100644 --- a/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp +++ b/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp @@ -537,7 +537,7 @@ void FlatAffineValueConstraints::addInductionVarOrTerminalSymbol(Value val) { return; // Caller is expected to fully compose map/operands if necessary. - assert((isTopLevelValue(val) || isAffineForInductionVar(val)) && + assert((isTopLevelValue(val) || isAffineInductionVar(val)) && "non-terminal symbol / loop IV expected"); // Outer loop IVs could be used in forOp's bounds. if (auto loop = getForInductionVarOwner(val)) { @@ -547,6 +547,14 @@ void FlatAffineValueConstraints::addInductionVarOrTerminalSymbol(Value val) { loop.emitWarning("failed to add domain info to constraint system")); return; } + if (auto parallel = getAffineParallelInductionVarOwner(val)) { + appendDimVar(parallel.getIVs()); + if (failed(this->addAffineParallelOpDomain(parallel))) + LLVM_DEBUG(parallel.emitWarning( + "failed to add domain info to constraint system")); + return; + } + // Add top level symbol. appendSymbolVar(val); // Check if the symbol is a constant. diff --git a/mlir/test/Dialect/Affine/affine-data-copy.mlir b/mlir/test/Dialect/Affine/affine-data-copy.mlir index a5fa22a4b28cc..22fbd7306d253 100644 --- a/mlir/test/Dialect/Affine/affine-data-copy.mlir +++ b/mlir/test/Dialect/Affine/affine-data-copy.mlir @@ -286,3 +286,27 @@ func.func @empty_loops(%arg0: memref<1024x1024xf64>) { // CHECK-NOT: memref.alloc // CHECK: return } + +#map16 = affine_map<(d0, d1, d2) -> (d0 * 40 + d1 * 8 + d2 * 2)> +#map17 = affine_map<(d0, d1, d2) -> (d0 * 40 + d1 * 8 + d2 * 2 + 2)> +// CHECK-LABEL: func @affine_parallel +func.func @affine_parallel(%85:memref<2x5x4x2xi64>) { + affine.for %arg0 = 0 to 2 { + affine.parallel (%arg1) = (0) to (5) { + affine.parallel (%arg2) = (0) to (4) { + affine.for %arg3 = #map16(%arg0, %arg1, %arg2) to #map17(%arg0, %arg1, %arg2) { + %105 = affine.load %85[((%arg3 floordiv 2) floordiv 4) floordiv 5, ((%arg3 floordiv 2) floordiv 4) mod 5, (%arg3 floordiv 2) mod 4, %arg3 mod 2] : memref<2x5x4x2xi64> + } + } + } + } + // CHECK: affine.for + // CHECK-NEXT: affine.for %{{.*}} = 0 to 5 + // CHECK-NEXT: affine.for %{{.*}} = 0 to 4 + // CHECK-NEXT: affine.for %{{.*}} = 0 to 2 + + // CHECK: affine.for + // CHECK-NEXT: affine.parallel + // CHECK-NEXT: affine.parallel + return +} From ff937a82f175937faebcea924941551898e7f794 Mon Sep 17 00:00:00 2001 From: Congcong Cai Date: Tue, 21 Mar 2023 13:19:35 +0800 Subject: [PATCH 128/691] [Webassembly][NFC] Fix typo in comment --- .../WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp index c48a13ef7eb59..9bf2596bf3ff8 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp @@ -893,7 +893,7 @@ void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget, } } -// TODO: If the RTLIB::Libcall-taking flavor of GetSignature remains unsed +// TODO: If the RTLIB::Libcall-taking flavor of GetSignature remains unused // other than here, just roll its logic into this version. void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget, StringRef Name, @@ -904,9 +904,9 @@ void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget, auto Val = Map.find(Name); #ifndef NDEBUG if (Val == Map.end()) { - auto message = + auto Message = std::string("unexpected runtime library name: ") + std::string(Name); - llvm_unreachable(message.c_str()); + llvm_unreachable(Message.c_str()); } #endif return getLibcallSignature(Subtarget, Val->second, Rets, Params); From 37f3e53c5be9f61c75394ce0558f54e67647de00 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 20 Mar 2023 21:58:42 -0700 Subject: [PATCH 129/691] [RISCV] Simplify RISCVISAInfo::compareExtension. NFCI Instead of having a separate single letter and multiletter ranking use a unified rank that assigns multiletter a larger value than single letter. Once we've ranked the extensions, then we compare using these ranks. Reviewed By: kito-cheng Differential Revision: https://reviews.llvm.org/D146273 --- llvm/lib/Support/RISCVISAInfo.cpp | 82 ++++++++++++++----------------- 1 file changed, 37 insertions(+), 45 deletions(-) diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp index 2ef3ffa58079e..603b1f3d64737 100644 --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -266,78 +266,70 @@ bool RISCVISAInfo::hasExtension(StringRef Ext) const { return Exts.count(Ext.str()) != 0; } +// We rank extensions in the following order: +// -Single letter extensions in canonical order. +// -Unknown single letter extensions in alphabetical order. +// -Multi-letter extensions starting with 's' in alphabetical order. +// -Multi-letter extensions starting with 'z' sorted by canonical order of +// the second letter then sorted alphabetically. +// -X extensions in alphabetical order. +// These flags are used to indicate the category. The first 6 bits store the +// single letter extension rank for single letter and multi-letter extensions +// starting with 'z'. +enum RankFlags { + RF_S_EXTENSION = 1 << 6, + RF_Z_EXTENSION = 1 << 7, + RF_X_EXTENSION = 1 << 8, +}; + // Get the rank for single-letter extension, lower value meaning higher // priority. -static int singleLetterExtensionRank(char Ext) { +static unsigned singleLetterExtensionRank(char Ext) { + assert(Ext >= 'a' && Ext <= 'z'); switch (Ext) { case 'i': - return -2; + return 0; case 'e': - return -1; - default: - break; + return 1; } size_t Pos = AllStdExts.find(Ext); - int Rank; - if (Pos == StringRef::npos) - // If we got an unknown extension letter, then give it an alphabetical - // order, but after all known standard extensions. - Rank = AllStdExts.size() + (Ext - 'a'); - else - Rank = Pos; + if (Pos != StringRef::npos) + return Pos + 2; // Skip 'e' and 'i' from above. - return Rank; + // If we got an unknown extension letter, then give it an alphabetical + // order, but after all known standard extensions. + return 2 + AllStdExts.size() + (Ext - 'a'); } // Get the rank for multi-letter extension, lower value meaning higher // priority/order in canonical order. -static int multiLetterExtensionRank(const std::string &ExtName) { - assert(ExtName.length() >= 2); - int HighOrder; - int LowOrder = 0; - // The order between multi-char extensions: s -> h -> z -> x. - char ExtClass = ExtName[0]; - switch (ExtClass) { +static unsigned getExtensionRank(const std::string &ExtName) { + assert(ExtName.size() >= 1); + switch (ExtName[0]) { case 's': - HighOrder = 0; - break; + return RF_S_EXTENSION; case 'z': - HighOrder = 1; + assert(ExtName.size() >= 2); // `z` extension must be sorted by canonical order of second letter. // e.g. zmx has higher rank than zax. - LowOrder = singleLetterExtensionRank(ExtName[1]); - break; + return RF_Z_EXTENSION | singleLetterExtensionRank(ExtName[1]); case 'x': - HighOrder = 2; - break; + return RF_X_EXTENSION; default: - llvm_unreachable("Unknown prefix for multi-char extension"); - return -1; + assert(ExtName.size() == 1); + return singleLetterExtensionRank(ExtName[0]); } - - return (HighOrder << 8) + LowOrder; } // Compare function for extension. // Only compare the extension name, ignore version comparison. bool RISCVISAInfo::compareExtension(const std::string &LHS, const std::string &RHS) { - size_t LHSLen = LHS.length(); - size_t RHSLen = RHS.length(); - if (LHSLen == 1 && RHSLen != 1) - return true; - - if (LHSLen != 1 && RHSLen == 1) - return false; - - if (LHSLen == 1 && RHSLen == 1) - return singleLetterExtensionRank(LHS[0]) < - singleLetterExtensionRank(RHS[0]); + unsigned LHSRank = getExtensionRank(LHS); + unsigned RHSRank = getExtensionRank(RHS); - // Both are multi-char ext here. - int LHSRank = multiLetterExtensionRank(LHS); - int RHSRank = multiLetterExtensionRank(RHS); + // If the ranks differ, pick the lower rank. if (LHSRank != RHSRank) return LHSRank < RHSRank; From aead502b111fd2473b526fd2690315a110617d97 Mon Sep 17 00:00:00 2001 From: Ishaan Gandhi Date: Mon, 20 Mar 2023 22:31:39 -0700 Subject: [PATCH 130/691] [Attributor] Add convergent abstract attribute This patch adds the AANonConvergent abstract attribute. It removes the convergent attribute from functions that only call non-convergent functions. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D143228 --- llvm/include/llvm/Transforms/IPO/Attributor.h | 31 ++ llvm/lib/Transforms/IPO/Attributor.cpp | 4 + .../Transforms/IPO/AttributorAttributes.cpp | 57 +++ llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 2 + llvm/test/Transforms/Attributor/convergent.ll | 122 ++++++ .../Transforms/OpenMP/always_inline_device.ll | 4 +- .../OpenMP/custom_state_machines.ll | 352 +++++++++--------- .../OpenMP/custom_state_machines_pre_lto.ll | 176 ++++----- llvm/test/Transforms/OpenMP/spmdization.ll | 332 +++++++++-------- ...mdization_guarding_two_reaching_kernels.ll | 12 +- ...zation_no_guarding_two_reaching_kernels.ll | 16 +- 11 files changed, 664 insertions(+), 444 deletions(-) create mode 100644 llvm/test/Transforms/Attributor/convergent.ll diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index e8159d2c25fe2..5fb35439def58 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -5144,6 +5144,37 @@ struct AAInterFnReachability static const char ID; }; +/// An abstract Attribute for determining the necessity of the convergent +/// attribute. +struct AANonConvergent : public StateWrapper { + using Base = StateWrapper; + + AANonConvergent(const IRPosition &IRP, Attributor &A) : Base(IRP) {} + + /// Create an abstract attribute view for the position \p IRP. + static AANonConvergent &createForPosition(const IRPosition &IRP, Attributor &A); + + /// Return true if "non-convergent" is assumed. + bool isAssumedNotConvergent() const { return getAssumed(); } + + /// Return true if "non-convergent" is known. + bool isKnownNotConvergent() const { return getKnown(); } + + /// See AbstractAttribute::getName() + const std::string getName() const override { return "AANonConvergent"; } + + /// See AbstractAttribute::getIdAddr() + const char *getIdAddr() const override { return &ID; } + + /// This function should return true if the type of the \p AA is AANonConvergent. + static bool classof(const AbstractAttribute *AA) { + return (AA->getIdAddr() == &ID); + } + + /// Unique ID (due to the unique address) + static const char ID; +}; + /// An abstract interface for struct information. struct AAPointerInfo : public AbstractAttribute { AAPointerInfo(const IRPosition &IRP) : AbstractAttribute(IRP) {} diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index 3e6ca6fb2ff19..4c62a79834721 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -3193,6 +3193,10 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) { // Every function might be "no-recurse". getOrCreateAAFor(FPos); + // Every function can be "non-convergent". + if (F.hasFnAttribute(Attribute::Convergent)) + getOrCreateAAFor(FPos); + // Every function might be "readnone/readonly/writeonly/...". getOrCreateAAFor(FPos); diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 6118404172aa6..db4ad5343810e 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -156,6 +156,7 @@ PIPE_OPERATOR(AAIsDead) PIPE_OPERATOR(AANoUnwind) PIPE_OPERATOR(AANoSync) PIPE_OPERATOR(AANoRecurse) +PIPE_OPERATOR(AANonConvergent) PIPE_OPERATOR(AAWillReturn) PIPE_OPERATOR(AANoReturn) PIPE_OPERATOR(AAReturnedValues) @@ -2932,6 +2933,60 @@ struct AANoRecurseCallSite final : AANoRecurseImpl { }; } // namespace +/// ------------------------ No-Convergent Attribute -------------------------- + +namespace { +struct AANonConvergentImpl : public AANonConvergent { + AANonConvergentImpl(const IRPosition &IRP, Attributor &A) + : AANonConvergent(IRP, A) {} + + /// See AbstractAttribute::getAsStr() + const std::string getAsStr() const override { + return getAssumed() ? "non-convergent" : "may-be-convergent"; + } +}; + +struct AANonConvergentFunction final : AANonConvergentImpl { + AANonConvergentFunction(const IRPosition &IRP, Attributor &A) + : AANonConvergentImpl(IRP, A) {} + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override { + // If all function calls are known to not be convergent, we are not convergent. + auto CalleeIsNotConvergent = [&](Instruction &Inst) { + CallBase &CB = cast(Inst); + Function *Callee = CB.getCalledFunction(); + if (!Callee || Callee->isIntrinsic()) { + return false; + } + if (Callee->isDeclaration()) { + return !Callee->hasFnAttribute(Attribute::Convergent); + } + const auto &ConvergentAA = A.getAAFor( + *this, IRPosition::function(*Callee), DepClassTy::REQUIRED); + return ConvergentAA.isAssumedNotConvergent(); + }; + + bool UsedAssumedInformation = false; + if (!A.checkForAllCallLikeInstructions(CalleeIsNotConvergent, *this, + UsedAssumedInformation)) { + return indicatePessimisticFixpoint(); + } + return ChangeStatus::UNCHANGED; + } + + ChangeStatus manifest(Attributor &A) override { + if (isKnownNotConvergent() && hasAttr(Attribute::Convergent)) { + removeAttrs({Attribute::Convergent}); + return ChangeStatus::CHANGED; + } + return ChangeStatus::UNCHANGED; + } + + void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(convergent) } +}; +} // namespace + /// -------------------- Undefined-Behavior Attributes ------------------------ namespace { @@ -11811,6 +11866,7 @@ const char AANoSync::ID = 0; const char AANoFree::ID = 0; const char AANonNull::ID = 0; const char AANoRecurse::ID = 0; +const char AANonConvergent::ID = 0; const char AAWillReturn::ID = 0; const char AAUndefinedBehavior::ID = 0; const char AANoAlias::ID = 0; @@ -11961,6 +12017,7 @@ CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAUnderlyingObjects) CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAHeapToStack) CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAUndefinedBehavior) +CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonConvergent) CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAIntraFnReachability) CREATE_FUNCTION_ONLY_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAInterFnReachability) diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 3c915086db6f0..bf525a7dcc48f 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -5201,6 +5201,8 @@ void OpenMPOpt::registerAAsForFunction(Attributor &A, const Function &F) { A.getOrCreateAAFor(IRPosition::function(F)); if (!DisableOpenMPOptDeglobalization) A.getOrCreateAAFor(IRPosition::function(F)); + if (F.hasFnAttribute(Attribute::Convergent)) + A.getOrCreateAAFor(IRPosition::function(F)); for (auto &I : instructions(F)) { if (auto *LI = dyn_cast(&I)) { diff --git a/llvm/test/Transforms/Attributor/convergent.ll b/llvm/test/Transforms/Attributor/convergent.ll new file mode 100644 index 0000000000000..e6b0e965d8e2f --- /dev/null +++ b/llvm/test/Transforms/Attributor/convergent.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT +; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC + +define i32 @defined() convergent { +; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; CHECK-LABEL: define {{[^@]+}}@defined +; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: ret i32 1 +; + ret i32 1 +} + +define i32 @calls_defined() convergent { +; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn memory(none) +; TUNIT-LABEL: define {{[^@]+}}@calls_defined +; TUNIT-SAME: () #[[ATTR0]] { +; TUNIT-NEXT: ret i32 1 +; +; CGSCC: Function Attrs: convergent nofree nosync nounwind willreturn memory(none) +; CGSCC-LABEL: define {{[^@]+}}@calls_defined +; CGSCC-SAME: () #[[ATTR1:[0-9]+]] { +; CGSCC-NEXT: [[A:%.*]] = call noundef i32 @defined() #[[ATTR5:[0-9]+]] +; CGSCC-NEXT: ret i32 [[A]] +; + %a = call i32 @defined() + ret i32 %a +} + +declare void @declared_non_convergent() + +define void @calls_declared_non_convergent() convergent { +; CHECK-LABEL: define {{[^@]+}}@calls_declared_non_convergent() { +; CHECK-NEXT: call void @declared_non_convergent() +; CHECK-NEXT: ret void +; + call void @declared_non_convergent() + ret void +} + +; CHECK: Function Attrs: convergent +; CHECK-NEXT: declare i32 @declared_convergent() +declare i32 @declared_convergent() convergent + +define i32 @calls_declared_convergent() convergent { +; TUNIT: Function Attrs: convergent +; TUNIT-LABEL: define {{[^@]+}}@calls_declared_convergent +; TUNIT-SAME: () #[[ATTR1:[0-9]+]] { +; TUNIT-NEXT: [[A:%.*]] = call i32 @declared_convergent() +; TUNIT-NEXT: ret i32 [[A]] +; +; CGSCC: Function Attrs: convergent +; CGSCC-LABEL: define {{[^@]+}}@calls_declared_convergent +; CGSCC-SAME: () #[[ATTR2:[0-9]+]] { +; CGSCC-NEXT: [[A:%.*]] = call i32 @declared_convergent() +; CGSCC-NEXT: ret i32 [[A]] +; + %a = call i32 @declared_convergent() + ret i32 %a +} + +define i32 @defined_with_asm(i32 %a, i32 %b) { +; CHECK-LABEL: define {{[^@]+}}@defined_with_asm +; CHECK-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: [[RESULT:%.*]] = add i32 [[A]], [[B]] +; CHECK-NEXT: [[ASM_RESULT:%.*]] = call i32 asm sideeffect "addl $1, $0", "=r,r"(i32 [[RESULT]]) +; CHECK-NEXT: ret i32 [[ASM_RESULT]] +; + %result = add i32 %a, %b + %asm_result = call i32 asm sideeffect "addl $1, $0", "=r,r"(i32 %result) + ret i32 %asm_result +} + +define i32 @calls_defined_with_asm(i32 %a, i32 %b) convergent { +; TUNIT: Function Attrs: convergent +; TUNIT-LABEL: define {{[^@]+}}@calls_defined_with_asm +; TUNIT-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) #[[ATTR1]] { +; TUNIT-NEXT: [[C:%.*]] = call i32 @defined_with_asm(i32 [[A]], i32 [[B]]) +; TUNIT-NEXT: ret i32 [[C]] +; +; CGSCC: Function Attrs: convergent +; CGSCC-LABEL: define {{[^@]+}}@calls_defined_with_asm +; CGSCC-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) #[[ATTR2]] { +; CGSCC-NEXT: [[C:%.*]] = call i32 @defined_with_asm(i32 [[A]], i32 [[B]]) +; CGSCC-NEXT: ret i32 [[C]] +; + %c = call i32 @defined_with_asm(i32 %a, i32 %b) + ret i32 %c +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 %size, i1 %isVolatile) convergent + +define void @calls_intrinsic(i8* %dest, i8* %src, i64 %size) convergent { +; TUNIT: Function Attrs: convergent nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +; TUNIT-LABEL: define {{[^@]+}}@calls_intrinsic +; TUNIT-SAME: (ptr nocapture nofree writeonly [[DEST:%.*]], ptr nocapture nofree readonly [[SRC:%.*]], i64 [[SIZE:%.*]]) #[[ATTR2:[0-9]+]] { +; TUNIT-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture nofree writeonly [[DEST]], ptr noalias nocapture nofree readonly [[SRC]], i64 [[SIZE]], i1 noundef false) #[[ATTR4:[0-9]+]] +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: convergent nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) +; CGSCC-LABEL: define {{[^@]+}}@calls_intrinsic +; CGSCC-SAME: (ptr nocapture nofree writeonly [[DEST:%.*]], ptr nocapture nofree readonly [[SRC:%.*]], i64 [[SIZE:%.*]]) #[[ATTR3:[0-9]+]] { +; CGSCC-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture nofree writeonly [[DEST]], ptr noalias nocapture nofree readonly [[SRC]], i64 [[SIZE]], i1 noundef false) #[[ATTR5]] +; CGSCC-NEXT: ret void +; + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 %size, i1 false) + ret void +} +;. +; TUNIT: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; TUNIT: attributes #[[ATTR1]] = { convergent } +; TUNIT: attributes #[[ATTR2]] = { convergent nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +; TUNIT: attributes #[[ATTR4]] = { willreturn } +;. +; CGSCC: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR1]] = { convergent nofree nosync nounwind willreturn memory(none) } +; CGSCC: attributes #[[ATTR2]] = { convergent } +; CGSCC: attributes #[[ATTR3]] = { convergent nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +; CGSCC: attributes #[[ATTR5]] = { willreturn } +;. diff --git a/llvm/test/Transforms/OpenMP/always_inline_device.ll b/llvm/test/Transforms/OpenMP/always_inline_device.ll index a4c749f78e4c2..612c18725d3ba 100644 --- a/llvm/test/Transforms/OpenMP/always_inline_device.ll +++ b/llvm/test/Transforms/OpenMP/always_inline_device.ll @@ -8,9 +8,9 @@ @llvm.compiler.used = appending global [1 x ptr] [ptr @__omp_offloading_fd02_c0934fc2_foo_l4_exec_mode], section "llvm.metadata" @G = external global i8 -; Function Attrs: convergent norecurse nounwind +; Function Attrs: norecurse nounwind define weak void @__omp_offloading_fd02_c0934fc2_foo_l4() #0 { -; CHECK: Function Attrs: convergent norecurse nounwind +; CHECK: Function Attrs: norecurse nounwind ; CHECK-LABEL: @__omp_offloading_fd02_c0934fc2_foo_l4( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @[[GLOB1:[0-9]+]], i8 2, i1 false) diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll index 29387947a4f5c..fe20653af18bf 100644 --- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll @@ -861,8 +861,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8:[0-9]+]] -; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9:[0-9]+]] +; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10:[0-9]+]] +; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR11:[0-9]+]] ; AMDGPU-NEXT: ret void ; ; @@ -969,11 +969,11 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR11]] ; AMDGPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; AMDGPU-NEXT: ret void ; @@ -984,7 +984,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p0() #[[ATTR10:[0-9]+]] +; AMDGPU-NEXT: call void @p0() #[[ATTR12:[0-9]+]] ; AMDGPU-NEXT: ret void ; ; @@ -1007,7 +1007,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p1() #[[ATTR10]] +; AMDGPU-NEXT: call void @p1() #[[ATTR12]] ; AMDGPU-NEXT: ret void ; ; @@ -1098,19 +1098,19 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] -; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR11]] +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR10]] +; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] ; AMDGPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR10]] ; AMDGPU-NEXT: ret void ; ; -; AMDGPU: Function Attrs: convergent noinline nounwind +; AMDGPU: Function Attrs: noinline nounwind ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized -; AMDGPU-SAME: () #[[ATTR1]] { +; AMDGPU-SAME: () #[[ATTR6:[0-9]+]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] @@ -1134,7 +1134,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p1() #[[ATTR10]] +; AMDGPU-NEXT: call void @p1() #[[ATTR12]] ; AMDGPU-NEXT: ret void ; ; @@ -1151,9 +1151,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: ret void ; ; -; AMDGPU: Function Attrs: convergent noinline nounwind +; AMDGPU: Function Attrs: noinline nounwind ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized -; AMDGPU-SAME: () #[[ATTR1]] { +; AMDGPU-SAME: () #[[ATTR6]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] @@ -1245,7 +1245,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] +; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR12]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; AMDGPU-NEXT: ret void ; @@ -1256,7 +1256,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p0() #[[ATTR10]] +; AMDGPU-NEXT: call void @p0() #[[ATTR12]] ; AMDGPU-NEXT: ret void ; ; @@ -1279,7 +1279,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p1() #[[ATTR10]] +; AMDGPU-NEXT: call void @p1() #[[ATTR12]] ; AMDGPU-NEXT: ret void ; ; @@ -1368,7 +1368,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR11]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; AMDGPU-NEXT: ret void ; @@ -1379,7 +1379,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p0() #[[ATTR10]] +; AMDGPU-NEXT: call void @p0() #[[ATTR12]] ; AMDGPU-NEXT: ret void ; ; @@ -1402,7 +1402,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p1() #[[ATTR10]] +; AMDGPU-NEXT: call void @p1() #[[ATTR12]] ; AMDGPU-NEXT: ret void ; ; @@ -1488,7 +1488,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR11]] ; AMDGPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) @@ -1502,7 +1502,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p0() #[[ATTR10]] +; AMDGPU-NEXT: call void @p0() #[[ATTR12]] ; AMDGPU-NEXT: ret void ; ; @@ -1525,7 +1525,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p1() #[[ATTR10]] +; AMDGPU-NEXT: call void @p1() #[[ATTR12]] ; AMDGPU-NEXT: ret void ; ; @@ -1603,14 +1603,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]] -; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]] +; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR12]] +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR10]] ; AMDGPU-NEXT: ret void ; ; -; AMDGPU: Function Attrs: convergent noinline nounwind +; AMDGPU: Function Attrs: noinline nounwind ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized -; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 @@ -1622,8 +1622,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU: if.end: ; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]] -; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]] +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR10]] +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR10]] ; AMDGPU-NEXT: br label [[RETURN]] ; AMDGPU: return: ; AMDGPU-NEXT: ret void @@ -1643,16 +1643,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU: if.end: ; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]] -; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]] +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR12]] +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR12]] ; AMDGPU-NEXT: br label [[RETURN]] ; AMDGPU: return: ; AMDGPU-NEXT: ret void ; ; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU: Function Attrs: noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 -; AMDGPU-SAME: () #[[ATTR0]] { +; AMDGPU-SAME: () #[[ATTR9:[0-9]+]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 @@ -1699,19 +1699,19 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: ret void ; ; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU: Function Attrs: noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__16 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR9]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @weak_callee_empty() #[[ATTR8]] +; AMDGPU-NEXT: call void @weak_callee_empty() #[[ATTR10]] ; AMDGPU-NEXT: ret void ; ; -; AMDGPU: Function Attrs: convergent noinline nounwind +; AMDGPU: Function Attrs: noinline nounwind ; AMDGPU-LABEL: define {{[^@]+}}@weak_callee_empty -; AMDGPU-SAME: () #[[ATTR1]] { +; AMDGPU-SAME: () #[[ATTR6]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: ret void ; @@ -1722,7 +1722,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p0() #[[ATTR10]] +; AMDGPU-NEXT: call void @p0() #[[ATTR12]] ; AMDGPU-NEXT: ret void ; ; @@ -1745,7 +1745,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p0() #[[ATTR10]] +; AMDGPU-NEXT: call void @p0() #[[ATTR12]] ; AMDGPU-NEXT: ret void ; ; @@ -1762,9 +1762,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: ret void ; ; -; AMDGPU: Function Attrs: convergent noinline nounwind +; AMDGPU: Function Attrs: noinline nounwind ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized -; AMDGPU-SAME: () #[[ATTR1]] { +; AMDGPU-SAME: () #[[ATTR6]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] @@ -1788,7 +1788,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p0() #[[ATTR10]] +; AMDGPU-NEXT: call void @p0() #[[ATTR12]] ; AMDGPU-NEXT: ret void ; ; @@ -1834,8 +1834,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8:[0-9]+]] -; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9:[0-9]+]] +; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10:[0-9]+]] +; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR11:[0-9]+]] ; NVPTX-NEXT: ret void ; ; @@ -1941,11 +1941,11 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 ; NVPTX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR11]] ; NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; NVPTX-NEXT: ret void ; @@ -1956,7 +1956,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p0() #[[ATTR10:[0-9]+]] +; NVPTX-NEXT: call void @p0() #[[ATTR12:[0-9]+]] ; NVPTX-NEXT: ret void ; ; @@ -1979,7 +1979,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p1() #[[ATTR10]] +; NVPTX-NEXT: call void @p1() #[[ATTR12]] ; NVPTX-NEXT: ret void ; ; @@ -2069,19 +2069,19 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; NVPTX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; NVPTX-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] -; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR11]] +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR10]] +; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] ; NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR10]] ; NVPTX-NEXT: ret void ; ; -; NVPTX: Function Attrs: convergent noinline nounwind +; NVPTX: Function Attrs: noinline nounwind ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized -; NVPTX-SAME: () #[[ATTR1]] { +; NVPTX-SAME: () #[[ATTR6:[0-9]+]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] @@ -2105,7 +2105,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p1() #[[ATTR10]] +; NVPTX-NEXT: call void @p1() #[[ATTR12]] ; NVPTX-NEXT: ret void ; ; @@ -2122,9 +2122,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: ret void ; ; -; NVPTX: Function Attrs: convergent noinline nounwind +; NVPTX: Function Attrs: noinline nounwind ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized -; NVPTX-SAME: () #[[ATTR1]] { +; NVPTX-SAME: () #[[ATTR6]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] @@ -2215,7 +2215,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] +; NVPTX-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR12]] ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; NVPTX-NEXT: ret void ; @@ -2226,7 +2226,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p0() #[[ATTR10]] +; NVPTX-NEXT: call void @p0() #[[ATTR12]] ; NVPTX-NEXT: ret void ; ; @@ -2249,7 +2249,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p1() #[[ATTR10]] +; NVPTX-NEXT: call void @p1() #[[ATTR12]] ; NVPTX-NEXT: ret void ; ; @@ -2337,7 +2337,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR11]] ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; NVPTX-NEXT: ret void ; @@ -2348,7 +2348,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p0() #[[ATTR10]] +; NVPTX-NEXT: call void @p0() #[[ATTR12]] ; NVPTX-NEXT: ret void ; ; @@ -2371,7 +2371,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p1() #[[ATTR10]] +; NVPTX-NEXT: call void @p1() #[[ATTR12]] ; NVPTX-NEXT: ret void ; ; @@ -2456,7 +2456,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 ; NVPTX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR11]] ; NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) @@ -2470,7 +2470,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p0() #[[ATTR10]] +; NVPTX-NEXT: call void @p0() #[[ATTR12]] ; NVPTX-NEXT: ret void ; ; @@ -2493,7 +2493,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p1() #[[ATTR10]] +; NVPTX-NEXT: call void @p1() #[[ATTR12]] ; NVPTX-NEXT: ret void ; ; @@ -2570,14 +2570,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]] -; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]] +; NVPTX-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR12]] +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR10]] ; NVPTX-NEXT: ret void ; ; -; NVPTX: Function Attrs: convergent noinline nounwind +; NVPTX: Function Attrs: noinline nounwind ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized -; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 @@ -2589,8 +2589,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX: if.end: ; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; NVPTX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]] -; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]] +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR10]] +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR10]] ; NVPTX-NEXT: br label [[RETURN]] ; NVPTX: return: ; NVPTX-NEXT: ret void @@ -2610,16 +2610,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX: if.end: ; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; NVPTX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]] -; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]] +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR12]] +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR12]] ; NVPTX-NEXT: br label [[RETURN]] ; NVPTX: return: ; NVPTX-NEXT: ret void ; ; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX: Function Attrs: noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 -; NVPTX-SAME: () #[[ATTR0]] { +; NVPTX-SAME: () #[[ATTR9:[0-9]+]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 @@ -2665,19 +2665,19 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: ret void ; ; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX: Function Attrs: noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__16 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR9]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @weak_callee_empty() #[[ATTR8]] +; NVPTX-NEXT: call void @weak_callee_empty() #[[ATTR10]] ; NVPTX-NEXT: ret void ; ; -; NVPTX: Function Attrs: convergent noinline nounwind +; NVPTX: Function Attrs: noinline nounwind ; NVPTX-LABEL: define {{[^@]+}}@weak_callee_empty -; NVPTX-SAME: () #[[ATTR1]] { +; NVPTX-SAME: () #[[ATTR6]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: ret void ; @@ -2688,7 +2688,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p0() #[[ATTR10]] +; NVPTX-NEXT: call void @p0() #[[ATTR12]] ; NVPTX-NEXT: ret void ; ; @@ -2711,7 +2711,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p0() #[[ATTR10]] +; NVPTX-NEXT: call void @p0() #[[ATTR12]] ; NVPTX-NEXT: ret void ; ; @@ -2728,9 +2728,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: ret void ; ; -; NVPTX: Function Attrs: convergent noinline nounwind +; NVPTX: Function Attrs: noinline nounwind ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized -; NVPTX-SAME: () #[[ATTR1]] { +; NVPTX-SAME: () #[[ATTR6]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] @@ -2754,7 +2754,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p0() #[[ATTR10]] +; NVPTX-NEXT: call void @p0() #[[ATTR12]] ; NVPTX-NEXT: ret void ; ; @@ -2800,8 +2800,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8:[0-9]+]] -; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9:[0-9]+]] +; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10:[0-9]+]] +; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR11:[0-9]+]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -2867,11 +2867,11 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-DISABLED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR11]] ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; AMDGPU-DISABLED-NEXT: ret void ; @@ -2882,7 +2882,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR10:[0-9]+]] +; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR12:[0-9]+]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -2905,7 +2905,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR10]] +; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR12]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -2949,19 +2949,19 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-DISABLED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] -; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR11]] +; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR10]] +; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] +; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR10]] ; AMDGPU-DISABLED-NEXT: ret void ; ; -; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind +; AMDGPU-DISABLED: Function Attrs: noinline nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized -; AMDGPU-DISABLED-SAME: () #[[ATTR1]] { +; AMDGPU-DISABLED-SAME: () #[[ATTR6:[0-9]+]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] @@ -2985,7 +2985,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR10]] +; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR12]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3002,9 +3002,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: ret void ; ; -; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind +; AMDGPU-DISABLED: Function Attrs: noinline nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized -; AMDGPU-DISABLED-SAME: () #[[ATTR1]] { +; AMDGPU-DISABLED-SAME: () #[[ATTR6]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] @@ -3053,7 +3053,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] +; AMDGPU-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR12]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; AMDGPU-DISABLED-NEXT: ret void ; @@ -3064,7 +3064,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR12]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3087,7 +3087,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR10]] +; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR12]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3135,7 +3135,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR11]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; AMDGPU-DISABLED-NEXT: ret void ; @@ -3146,7 +3146,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR12]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3169,7 +3169,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR10]] +; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR12]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3214,7 +3214,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-DISABLED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR11]] ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) @@ -3228,7 +3228,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR12]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3251,7 +3251,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR10]] +; AMDGPU-DISABLED-NEXT: call void @p1() #[[ATTR12]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3292,14 +3292,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]] -; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]] +; AMDGPU-DISABLED-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR12]] +; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR10]] ; AMDGPU-DISABLED-NEXT: ret void ; ; -; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind +; AMDGPU-DISABLED: Function Attrs: noinline nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized -; AMDGPU-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; AMDGPU-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 @@ -3311,8 +3311,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED: if.end: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]] -; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]] +; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR10]] +; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR10]] ; AMDGPU-DISABLED-NEXT: br label [[RETURN]] ; AMDGPU-DISABLED: return: ; AMDGPU-DISABLED-NEXT: ret void @@ -3332,16 +3332,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED: if.end: ; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]] -; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]] +; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR12]] +; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR12]] ; AMDGPU-DISABLED-NEXT: br label [[RETURN]] ; AMDGPU-DISABLED: return: ; AMDGPU-DISABLED-NEXT: ret void ; ; -; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-DISABLED: Function Attrs: noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 -; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: () #[[ATTR9:[0-9]+]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 @@ -3357,19 +3357,19 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: ret void ; ; -; AMDGPU-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU-DISABLED: Function Attrs: noinline norecurse nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__16 -; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR9]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: call void @weak_callee_empty() #[[ATTR8]] +; AMDGPU-DISABLED-NEXT: call void @weak_callee_empty() #[[ATTR10]] ; AMDGPU-DISABLED-NEXT: ret void ; ; -; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind +; AMDGPU-DISABLED: Function Attrs: noinline nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@weak_callee_empty -; AMDGPU-DISABLED-SAME: () #[[ATTR1]] { +; AMDGPU-DISABLED-SAME: () #[[ATTR6]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: ret void ; @@ -3380,7 +3380,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR12]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3403,7 +3403,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR12]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3420,9 +3420,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: ret void ; ; -; AMDGPU-DISABLED: Function Attrs: convergent noinline nounwind +; AMDGPU-DISABLED: Function Attrs: noinline nounwind ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized -; AMDGPU-DISABLED-SAME: () #[[ATTR1]] { +; AMDGPU-DISABLED-SAME: () #[[ATTR6]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] @@ -3446,7 +3446,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; AMDGPU-DISABLED-NEXT: call void @p0() #[[ATTR12]] ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3492,8 +3492,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8:[0-9]+]] -; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9:[0-9]+]] +; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10:[0-9]+]] +; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR11:[0-9]+]] ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3559,11 +3559,11 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 ; NVPTX-DISABLED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR11]] ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; NVPTX-DISABLED-NEXT: ret void ; @@ -3574,7 +3574,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR10:[0-9]+]] +; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR12:[0-9]+]] ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3597,7 +3597,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR10]] +; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR12]] ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3641,19 +3641,19 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; NVPTX-DISABLED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] -; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR11]] +; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR10]] +; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] +; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR10]] ; NVPTX-DISABLED-NEXT: ret void ; ; -; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind +; NVPTX-DISABLED: Function Attrs: noinline nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized -; NVPTX-DISABLED-SAME: () #[[ATTR1]] { +; NVPTX-DISABLED-SAME: () #[[ATTR6:[0-9]+]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] @@ -3677,7 +3677,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR10]] +; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR12]] ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3694,9 +3694,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: ret void ; ; -; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind +; NVPTX-DISABLED: Function Attrs: noinline nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized -; NVPTX-DISABLED-SAME: () #[[ATTR1]] { +; NVPTX-DISABLED-SAME: () #[[ATTR6]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] @@ -3745,7 +3745,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] +; NVPTX-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR12]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; NVPTX-DISABLED-NEXT: ret void ; @@ -3756,7 +3756,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR12]] ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3779,7 +3779,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR10]] +; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR12]] ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3827,7 +3827,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR11]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; NVPTX-DISABLED-NEXT: ret void ; @@ -3838,7 +3838,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR12]] ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3861,7 +3861,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR10]] +; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR12]] ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3906,7 +3906,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 ; NVPTX-DISABLED-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR11]] ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) @@ -3920,7 +3920,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR12]] ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3943,7 +3943,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR10]] +; NVPTX-DISABLED-NEXT: call void @p1() #[[ATTR12]] ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3984,14 +3984,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]] -; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]] +; NVPTX-DISABLED-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR12]] +; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR10]] ; NVPTX-DISABLED-NEXT: ret void ; ; -; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind +; NVPTX-DISABLED: Function Attrs: noinline nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized -; NVPTX-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; NVPTX-DISABLED-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 @@ -4003,8 +4003,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED: if.end: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; NVPTX-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]] -; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]] +; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR10]] +; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR10]] ; NVPTX-DISABLED-NEXT: br label [[RETURN]] ; NVPTX-DISABLED: return: ; NVPTX-DISABLED-NEXT: ret void @@ -4024,16 +4024,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED: if.end: ; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; NVPTX-DISABLED-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]] -; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]] +; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR12]] +; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR12]] ; NVPTX-DISABLED-NEXT: br label [[RETURN]] ; NVPTX-DISABLED: return: ; NVPTX-DISABLED-NEXT: ret void ; ; -; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-DISABLED: Function Attrs: noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 -; NVPTX-DISABLED-SAME: () #[[ATTR0]] { +; NVPTX-DISABLED-SAME: () #[[ATTR9:[0-9]+]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 @@ -4049,19 +4049,19 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: ret void ; ; -; NVPTX-DISABLED: Function Attrs: convergent noinline norecurse nounwind +; NVPTX-DISABLED: Function Attrs: noinline norecurse nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__16 -; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR9]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED-NEXT: call void @weak_callee_empty() #[[ATTR8]] +; NVPTX-DISABLED-NEXT: call void @weak_callee_empty() #[[ATTR10]] ; NVPTX-DISABLED-NEXT: ret void ; ; -; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind +; NVPTX-DISABLED: Function Attrs: noinline nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@weak_callee_empty -; NVPTX-DISABLED-SAME: () #[[ATTR1]] { +; NVPTX-DISABLED-SAME: () #[[ATTR6]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: ret void ; @@ -4072,7 +4072,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR12]] ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -4095,7 +4095,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR12]] ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -4112,9 +4112,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: ret void ; ; -; NVPTX-DISABLED: Function Attrs: convergent noinline nounwind +; NVPTX-DISABLED: Function Attrs: noinline nounwind ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized -; NVPTX-DISABLED-SAME: () #[[ATTR1]] { +; NVPTX-DISABLED-SAME: () #[[ATTR6]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] @@ -4138,7 +4138,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR10]] +; NVPTX-DISABLED-NEXT: call void @p0() #[[ATTR12]] ; NVPTX-DISABLED-NEXT: ret void ; ; diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll b/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll index eb83c596e2caa..d7633434c6615 100644 --- a/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines_pre_lto.ll @@ -856,8 +856,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8:[0-9]+]] -; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9:[0-9]+]] +; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10:[0-9]+]] +; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR11:[0-9]+]] ; AMDGPU-NEXT: ret void ; ; @@ -923,11 +923,11 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR11]] ; AMDGPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; AMDGPU-NEXT: ret void ; @@ -938,7 +938,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p0() #[[ATTR10:[0-9]+]] +; AMDGPU-NEXT: call void @p0() #[[ATTR12:[0-9]+]] ; AMDGPU-NEXT: ret void ; ; @@ -961,7 +961,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p1() #[[ATTR10]] +; AMDGPU-NEXT: call void @p1() #[[ATTR12]] ; AMDGPU-NEXT: ret void ; ; @@ -1005,19 +1005,19 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] -; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR11]] +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR10]] +; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] ; AMDGPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR10]] ; AMDGPU-NEXT: ret void ; ; -; AMDGPU: Function Attrs: convergent noinline nounwind +; AMDGPU: Function Attrs: noinline nounwind ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized -; AMDGPU-SAME: () #[[ATTR1]] { +; AMDGPU-SAME: () #[[ATTR6:[0-9]+]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] @@ -1041,7 +1041,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p1() #[[ATTR10]] +; AMDGPU-NEXT: call void @p1() #[[ATTR12]] ; AMDGPU-NEXT: ret void ; ; @@ -1058,9 +1058,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: ret void ; ; -; AMDGPU: Function Attrs: convergent noinline nounwind +; AMDGPU: Function Attrs: noinline nounwind ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized -; AMDGPU-SAME: () #[[ATTR1]] { +; AMDGPU-SAME: () #[[ATTR6]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] @@ -1109,7 +1109,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] +; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR12]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; AMDGPU-NEXT: ret void ; @@ -1120,7 +1120,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p0() #[[ATTR10]] +; AMDGPU-NEXT: call void @p0() #[[ATTR12]] ; AMDGPU-NEXT: ret void ; ; @@ -1143,7 +1143,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p1() #[[ATTR10]] +; AMDGPU-NEXT: call void @p1() #[[ATTR12]] ; AMDGPU-NEXT: ret void ; ; @@ -1191,7 +1191,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR11]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; AMDGPU-NEXT: ret void ; @@ -1202,7 +1202,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p0() #[[ATTR10]] +; AMDGPU-NEXT: call void @p0() #[[ATTR12]] ; AMDGPU-NEXT: ret void ; ; @@ -1225,7 +1225,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p1() #[[ATTR10]] +; AMDGPU-NEXT: call void @p1() #[[ATTR12]] ; AMDGPU-NEXT: ret void ; ; @@ -1270,7 +1270,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR11]] ; AMDGPU-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) @@ -1284,7 +1284,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p0() #[[ATTR10]] +; AMDGPU-NEXT: call void @p0() #[[ATTR12]] ; AMDGPU-NEXT: ret void ; ; @@ -1307,7 +1307,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p1() #[[ATTR10]] +; AMDGPU-NEXT: call void @p1() #[[ATTR12]] ; AMDGPU-NEXT: ret void ; ; @@ -1348,14 +1348,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]] -; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]] +; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR12]] +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR10]] ; AMDGPU-NEXT: ret void ; ; -; AMDGPU: Function Attrs: convergent noinline nounwind +; AMDGPU: Function Attrs: noinline nounwind ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized -; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; AMDGPU-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 @@ -1367,8 +1367,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU: if.end: ; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]] -; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]] +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR10]] +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR10]] ; AMDGPU-NEXT: br label [[RETURN]] ; AMDGPU: return: ; AMDGPU-NEXT: ret void @@ -1388,16 +1388,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU: if.end: ; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; AMDGPU-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]] -; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]] +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR12]] +; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR12]] ; AMDGPU-NEXT: br label [[RETURN]] ; AMDGPU: return: ; AMDGPU-NEXT: ret void ; ; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU: Function Attrs: noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 -; AMDGPU-SAME: () #[[ATTR0]] { +; AMDGPU-SAME: () #[[ATTR9:[0-9]+]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 @@ -1413,19 +1413,19 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: ret void ; ; -; AMDGPU: Function Attrs: convergent noinline norecurse nounwind +; AMDGPU: Function Attrs: noinline norecurse nounwind ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__16 -; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR9]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @weak_callee_empty() #[[ATTR8]] +; AMDGPU-NEXT: call void @weak_callee_empty() #[[ATTR10]] ; AMDGPU-NEXT: ret void ; ; -; AMDGPU: Function Attrs: convergent noinline nounwind +; AMDGPU: Function Attrs: noinline nounwind ; AMDGPU-LABEL: define {{[^@]+}}@weak_callee_empty -; AMDGPU-SAME: () #[[ATTR1]] { +; AMDGPU-SAME: () #[[ATTR6]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: ret void ; @@ -1436,7 +1436,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p0() #[[ATTR10]] +; AMDGPU-NEXT: call void @p0() #[[ATTR12]] ; AMDGPU-NEXT: ret void ; ; @@ -1459,7 +1459,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p0() #[[ATTR10]] +; AMDGPU-NEXT: call void @p0() #[[ATTR12]] ; AMDGPU-NEXT: ret void ; ; @@ -1476,9 +1476,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: ret void ; ; -; AMDGPU: Function Attrs: convergent noinline nounwind +; AMDGPU: Function Attrs: noinline nounwind ; AMDGPU-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized -; AMDGPU-SAME: () #[[ATTR1]] { +; AMDGPU-SAME: () #[[ATTR6]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] @@ -1502,7 +1502,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; AMDGPU-NEXT: call void @p0() #[[ATTR10]] +; AMDGPU-NEXT: call void @p0() #[[ATTR12]] ; AMDGPU-NEXT: ret void ; ; @@ -1543,8 +1543,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8:[0-9]+]] -; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9:[0-9]+]] +; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10:[0-9]+]] +; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR11:[0-9]+]] ; NVPTX-NEXT: ret void ; ; @@ -1610,11 +1610,11 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 ; NVPTX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR11]] ; NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__2, ptr @__omp_outlined__2_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__3, ptr @__omp_outlined__3_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; NVPTX-NEXT: ret void ; @@ -1625,7 +1625,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p0() #[[ATTR10:[0-9]+]] +; NVPTX-NEXT: call void @p0() #[[ATTR12:[0-9]+]] ; NVPTX-NEXT: ret void ; ; @@ -1648,7 +1648,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p1() #[[ATTR10]] +; NVPTX-NEXT: call void @p1() #[[ATTR12]] ; NVPTX-NEXT: ret void ; ; @@ -1692,19 +1692,19 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; NVPTX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; NVPTX-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] -; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] +; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR11]] +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR10]] +; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR10]] ; NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__5, ptr @__omp_outlined__5_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR10]] ; NVPTX-NEXT: ret void ; ; -; NVPTX: Function Attrs: convergent noinline nounwind +; NVPTX: Function Attrs: noinline nounwind ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_before.internalized -; NVPTX-SAME: () #[[ATTR1]] { +; NVPTX-SAME: () #[[ATTR6:[0-9]+]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] @@ -1728,7 +1728,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p1() #[[ATTR10]] +; NVPTX-NEXT: call void @p1() #[[ATTR12]] ; NVPTX-NEXT: ret void ; ; @@ -1745,9 +1745,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: ret void ; ; -; NVPTX: Function Attrs: convergent noinline nounwind +; NVPTX: Function Attrs: noinline nounwind ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_after.internalized -; NVPTX-SAME: () #[[ATTR1]] { +; NVPTX-SAME: () #[[ATTR6]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] @@ -1796,7 +1796,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__7, ptr @__omp_outlined__7_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] +; NVPTX-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR12]] ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__8, ptr @__omp_outlined__8_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; NVPTX-NEXT: ret void ; @@ -1807,7 +1807,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p0() #[[ATTR10]] +; NVPTX-NEXT: call void @p0() #[[ATTR12]] ; NVPTX-NEXT: ret void ; ; @@ -1830,7 +1830,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p1() #[[ATTR10]] +; NVPTX-NEXT: call void @p1() #[[ATTR12]] ; NVPTX-NEXT: ret void ; ; @@ -1878,7 +1878,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__10, ptr @__omp_outlined__10_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) -; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR11]] ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__11, ptr @__omp_outlined__11_wrapper, ptr [[CAPTURED_VARS_ADDRS1]], i64 0) ; NVPTX-NEXT: ret void ; @@ -1889,7 +1889,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p0() #[[ATTR10]] +; NVPTX-NEXT: call void @p0() #[[ATTR12]] ; NVPTX-NEXT: ret void ; ; @@ -1912,7 +1912,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p1() #[[ATTR10]] +; NVPTX-NEXT: call void @p1() #[[ATTR12]] ; NVPTX-NEXT: ret void ; ; @@ -1957,7 +1957,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x ptr], align 8 ; NVPTX-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]] +; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR11]] ; NVPTX-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 4 ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__13, ptr @__omp_outlined__13_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0) @@ -1971,7 +1971,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p0() #[[ATTR10]] +; NVPTX-NEXT: call void @p0() #[[ATTR12]] ; NVPTX-NEXT: ret void ; ; @@ -1994,7 +1994,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p1() #[[ATTR10]] +; NVPTX-NEXT: call void @p1() #[[ATTR12]] ; NVPTX-NEXT: ret void ; ; @@ -2035,14 +2035,14 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR10]] -; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR8]] +; NVPTX-NEXT: [[CALL:%.*]] = call i32 @omp_get_thread_num() #[[ATTR12]] +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[CALL]]) #[[ATTR10]] ; NVPTX-NEXT: ret void ; ; -; NVPTX: Function Attrs: convergent noinline nounwind +; NVPTX: Function Attrs: noinline nounwind ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after.internalized -; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +; NVPTX-SAME: (i32 [[A:%.*]]) #[[ATTR6]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 @@ -2054,8 +2054,8 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX: if.end: ; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; NVPTX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]] -; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]] +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR10]] +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR10]] ; NVPTX-NEXT: br label [[RETURN]] ; NVPTX: return: ; NVPTX-NEXT: ret void @@ -2075,16 +2075,16 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX: if.end: ; NVPTX-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 ; NVPTX-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1 -; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR10]] -; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR10]] +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after(i32 [[SUB]]) #[[ATTR12]] +; NVPTX-NEXT: call void @simple_state_machine_interprocedural_nested_recursive_after_after() #[[ATTR12]] ; NVPTX-NEXT: br label [[RETURN]] ; NVPTX: return: ; NVPTX-NEXT: ret void ; ; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX: Function Attrs: noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_14_a36502b_no_state_machine_weak_callee_l112 -; NVPTX-SAME: () #[[ATTR0]] { +; NVPTX-SAME: () #[[ATTR9:[0-9]+]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 @@ -2100,19 +2100,19 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: ret void ; ; -; NVPTX: Function Attrs: convergent noinline norecurse nounwind +; NVPTX: Function Attrs: noinline norecurse nounwind ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__16 -; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR9]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @weak_callee_empty() #[[ATTR8]] +; NVPTX-NEXT: call void @weak_callee_empty() #[[ATTR10]] ; NVPTX-NEXT: ret void ; ; -; NVPTX: Function Attrs: convergent noinline nounwind +; NVPTX: Function Attrs: noinline nounwind ; NVPTX-LABEL: define {{[^@]+}}@weak_callee_empty -; NVPTX-SAME: () #[[ATTR1]] { +; NVPTX-SAME: () #[[ATTR6]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: ret void ; @@ -2123,7 +2123,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p0() #[[ATTR10]] +; NVPTX-NEXT: call void @p0() #[[ATTR12]] ; NVPTX-NEXT: ret void ; ; @@ -2146,7 +2146,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p0() #[[ATTR10]] +; NVPTX-NEXT: call void @p0() #[[ATTR12]] ; NVPTX-NEXT: ret void ; ; @@ -2163,9 +2163,9 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: ret void ; ; -; NVPTX: Function Attrs: convergent noinline nounwind +; NVPTX: Function Attrs: noinline nounwind ; NVPTX-LABEL: define {{[^@]+}}@simple_state_machine_interprocedural_nested_recursive_after_after.internalized -; NVPTX-SAME: () #[[ATTR1]] { +; NVPTX-SAME: () #[[ATTR6]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; NVPTX-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) #[[ATTR3]] @@ -2189,7 +2189,7 @@ attributes #9 = { convergent nounwind readonly willreturn } ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -; NVPTX-NEXT: call void @p0() #[[ATTR10]] +; NVPTX-NEXT: call void @p0() #[[ATTR12]] ; NVPTX-NEXT: ret void ; ; diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll index 47438e607248b..363dc2fa59292 100644 --- a/llvm/test/Transforms/OpenMP/spmdization.ll +++ b/llvm/test/Transforms/OpenMP/spmdization.ll @@ -229,9 +229,9 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; AMDGPU: common.ret: ; AMDGPU-NEXT: ret void ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5:[0-9]+]] ; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] -; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; @@ -246,9 +246,9 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; NVPTX: common.ret: ; NVPTX-NEXT: ret void ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5:[0-9]+]] ; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] -; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; NVPTX-NEXT: br label [[COMMON_RET]] ; @@ -300,9 +300,9 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; AMDGPU-DISABLED: common.ret: ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5:[0-9]+]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] ; @@ -353,9 +353,9 @@ define internal void @__omp_offloading_fd02_2044372e_sequential_loop_l5__debug() ; NVPTX-DISABLED: common.ret: ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4:[0-9]+]] +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5:[0-9]+]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18:![0-9]+]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] ; @@ -390,7 +390,7 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %. ; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; AMDGPU: for.cond.cleanup: -; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] +; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR8:[0-9]+]] ; AMDGPU-NEXT: ret void ; AMDGPU: for.body: ; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] @@ -408,7 +408,7 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %. ; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; NVPTX: for.cond.cleanup: -; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] +; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR8:[0-9]+]] ; NVPTX-NEXT: ret void ; NVPTX: for.body: ; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] @@ -426,7 +426,7 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %. ; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; AMDGPU-DISABLED: for.cond.cleanup: -; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] +; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR8:[0-9]+]] ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: for.body: ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] @@ -444,7 +444,7 @@ define internal void @__omp_outlined__(ptr noalias %.global_tid., ptr noalias %. ; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; NVPTX-DISABLED: for.cond.cleanup: -; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7:[0-9]+]] +; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR8:[0-9]+]] ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: for.body: ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] @@ -477,25 +477,25 @@ define internal void @__omp_outlined__1(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__1 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] +; AMDGPU-NEXT: call void @unknown() #[[ATTR9:[0-9]+]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] +; NVPTX-NEXT: call void @unknown() #[[ATTR9:[0-9]+]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] +; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR9:[0-9]+]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1 ; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8:[0-9]+]] +; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR9:[0-9]+]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -512,7 +512,7 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper @@ -522,7 +522,7 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper @@ -532,7 +532,7 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper @@ -542,7 +542,7 @@ define internal void @__omp_outlined__1_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__1(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -569,9 +569,9 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; AMDGPU: common.ret: ; AMDGPU-NEXT: ret void ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; @@ -586,9 +586,9 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; NVPTX: common.ret: ; NVPTX-NEXT: ret void ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; NVPTX-NEXT: br label [[COMMON_RET]] ; @@ -640,9 +640,9 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; AMDGPU-DISABLED: common.ret: ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] ; @@ -693,9 +693,9 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l2 ; NVPTX-DISABLED: common.ret: ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__2(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] ; @@ -726,14 +726,14 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5) ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr -; AMDGPU-NEXT: call void @use(ptr nocapture [[MALLOC_CAST]]) #[[ATTR7]] +; AMDGPU-NEXT: call void @use(ptr nocapture [[MALLOC_CAST]]) #[[ATTR8]] ; AMDGPU-NEXT: br label [[FOR_COND:%.*]] ; AMDGPU: for.cond: ; AMDGPU-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] ; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; AMDGPU: for.cond.cleanup: -; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] +; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR8]] ; AMDGPU-NEXT: ret void ; AMDGPU: for.body: ; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] @@ -746,14 +746,14 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias % ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4 ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-NEXT: call void @use(ptr nocapture [[X_H2S]]) #[[ATTR7]] +; NVPTX-NEXT: call void @use(ptr nocapture [[X_H2S]]) #[[ATTR8]] ; NVPTX-NEXT: br label [[FOR_COND:%.*]] ; NVPTX: for.cond: ; NVPTX-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] ; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; NVPTX: for.cond.cleanup: -; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] +; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR8]] ; NVPTX-NEXT: ret void ; NVPTX: for.body: ; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] @@ -767,14 +767,14 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-DISABLED-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4, addrspace(5) ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 ; AMDGPU-DISABLED-NEXT: [[MALLOC_CAST:%.*]] = addrspacecast ptr addrspace(5) [[X_H2S]] to ptr -; AMDGPU-DISABLED-NEXT: call void @use(ptr nocapture [[MALLOC_CAST]]) #[[ATTR7]] +; AMDGPU-DISABLED-NEXT: call void @use(ptr nocapture [[MALLOC_CAST]]) #[[ATTR8]] ; AMDGPU-DISABLED-NEXT: br label [[FOR_COND:%.*]] ; AMDGPU-DISABLED: for.cond: ; AMDGPU-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] ; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; AMDGPU-DISABLED: for.cond.cleanup: -; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] +; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR8]] ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: for.body: ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] @@ -787,14 +787,14 @@ define internal void @__omp_outlined__2(ptr noalias %.global_tid., ptr noalias % ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[X_H2S:%.*]] = alloca i8, i64 4, align 4 ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 -; NVPTX-DISABLED-NEXT: call void @use(ptr nocapture [[X_H2S]]) #[[ATTR7]] +; NVPTX-DISABLED-NEXT: call void @use(ptr nocapture [[X_H2S]]) #[[ATTR8]] ; NVPTX-DISABLED-NEXT: br label [[FOR_COND:%.*]] ; NVPTX-DISABLED: for.cond: ; NVPTX-DISABLED-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ] ; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; NVPTX-DISABLED: for.cond.cleanup: -; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] +; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR8]] ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: for.body: ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]] @@ -829,25 +829,25 @@ define internal void @__omp_outlined__3(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__3 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: call void @unknown() #[[ATTR8]] +; AMDGPU-NEXT: call void @unknown() #[[ATTR9]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: call void @unknown() #[[ATTR8]] +; NVPTX-NEXT: call void @unknown() #[[ATTR9]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]] +; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR9]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3 ; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]] +; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR9]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -864,7 +864,7 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper @@ -874,7 +874,7 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper @@ -884,7 +884,7 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper @@ -894,7 +894,7 @@ define internal void @__omp_outlined__3_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__3(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -922,9 +922,9 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; AMDGPU: common.ret: ; AMDGPU-NEXT: ret void ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; @@ -939,9 +939,9 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; NVPTX: common.ret: ; NVPTX-NEXT: ret void ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; NVPTX-NEXT: br label [[COMMON_RET]] ; @@ -993,9 +993,9 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; AMDGPU-DISABLED: common.ret: ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] ; @@ -1046,9 +1046,9 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l ; NVPTX-DISABLED: common.ret: ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__4(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] ; @@ -1083,7 +1083,7 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; AMDGPU: for.cond.cleanup: -; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] +; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR8]] ; AMDGPU-NEXT: ret void ; AMDGPU: for.body: ; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]] @@ -1102,7 +1102,7 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias % ; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; NVPTX: for.cond.cleanup: -; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] +; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR8]] ; NVPTX-NEXT: ret void ; NVPTX: for.body: ; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]] @@ -1121,7 +1121,7 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; AMDGPU-DISABLED: for.cond.cleanup: -; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] +; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR8]] ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: for.body: ; AMDGPU-DISABLED-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]] @@ -1140,7 +1140,7 @@ define internal void @__omp_outlined__4(ptr noalias %.global_tid., ptr noalias % ; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; NVPTX-DISABLED: for.cond.cleanup: -; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] +; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR8]] ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: for.body: ; NVPTX-DISABLED-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26:![0-9]+]] @@ -1180,7 +1180,7 @@ define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] ; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: call void @unknown() #[[ATTR8]] +; AMDGPU-NEXT: call void @unknown() #[[ATTR9]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5 @@ -1189,7 +1189,7 @@ define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias % ; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] ; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: call void @unknown() #[[ATTR8]] +; NVPTX-NEXT: call void @unknown() #[[ATTR9]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 @@ -1198,7 +1198,7 @@ define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] ; AMDGPU-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; AMDGPU-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]] +; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR9]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5 @@ -1207,7 +1207,7 @@ define internal void @__omp_outlined__5(ptr noalias %.global_tid., ptr noalias % ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] ; NVPTX-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; NVPTX-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]] +; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR9]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -1229,7 +1229,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 ; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] -; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] +; AMDGPU-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR5]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper @@ -1241,7 +1241,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 ; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] -; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] +; NVPTX-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR5]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper @@ -1253,7 +1253,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 ; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR5]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper @@ -1265,7 +1265,7 @@ define internal void @__omp_outlined__5_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 ; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__5(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR5]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -1294,9 +1294,9 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; AMDGPU: common.ret: ; AMDGPU-NEXT: ret void ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; AMDGPU-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; @@ -1311,9 +1311,9 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; NVPTX: common.ret: ; NVPTX-NEXT: ret void ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; NVPTX-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 2) ; NVPTX-NEXT: br label [[COMMON_RET]] ; @@ -1365,9 +1365,9 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; AMDGPU-DISABLED: common.ret: ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; AMDGPU-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] ; @@ -1418,9 +1418,9 @@ define weak void @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_g ; NVPTX-DISABLED: common.ret: ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] ; NVPTX-DISABLED-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__6(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] ; @@ -1469,7 +1469,7 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; AMDGPU-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; AMDGPU: for.cond.cleanup: -; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] +; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR8]] ; AMDGPU-NEXT: ret void ; AMDGPU: for.body: ; AMDGPU-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]] @@ -1502,7 +1502,7 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias % ; NVPTX-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; NVPTX-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; NVPTX: for.cond.cleanup: -; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] +; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR8]] ; NVPTX-NEXT: ret void ; NVPTX: for.body: ; NVPTX-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]] @@ -1522,7 +1522,7 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; AMDGPU-DISABLED: for.cond.cleanup: -; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] +; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR8]] ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: for.body: ; AMDGPU-DISABLED-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared.1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]] @@ -1542,7 +1542,7 @@ define internal void @__omp_outlined__6(ptr noalias %.global_tid., ptr noalias % ; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], 100 ; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] ; NVPTX-DISABLED: for.cond.cleanup: -; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] +; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR8]] ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: for.body: ; NVPTX-DISABLED-NEXT: store ptr addrspacecast (ptr addrspace(3) @x_shared1 to ptr), ptr [[CAPTURED_VARS_ADDRS]], align 8, !tbaa [[TBAA26]] @@ -1583,7 +1583,7 @@ define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] ; AMDGPU-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; AMDGPU-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] -; AMDGPU-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] +; AMDGPU-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR9]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7 @@ -1592,7 +1592,7 @@ define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias % ; NVPTX-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] ; NVPTX-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; NVPTX-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] -; NVPTX-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] +; NVPTX-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR9]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 @@ -1601,7 +1601,7 @@ define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] ; AMDGPU-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; AMDGPU-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] -; AMDGPU-DISABLED-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] +; AMDGPU-DISABLED-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR9]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7 @@ -1610,7 +1610,7 @@ define internal void @__omp_outlined__7(ptr noalias %.global_tid., ptr noalias % ; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA18]] ; NVPTX-DISABLED-NEXT: [[INC:%.*]] = add nsw i32 [[TMP0]], 1 ; NVPTX-DISABLED-NEXT: store i32 [[INC]], ptr [[X]], align 4, !tbaa [[TBAA18]] -; NVPTX-DISABLED-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR8]] +; NVPTX-DISABLED-NEXT: call void @unknowni32p(ptr [[X]]) #[[ATTR9]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -1632,7 +1632,7 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 ; AMDGPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] -; AMDGPU-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] +; AMDGPU-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR5]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper @@ -1644,7 +1644,7 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; NVPTX-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 ; NVPTX-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] -; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] +; NVPTX-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR5]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper @@ -1656,7 +1656,7 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 ; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR5]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__7_wrapper @@ -1668,7 +1668,7 @@ define internal void @__omp_outlined__7_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) ; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ARGS]], align 8 ; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8, !tbaa [[TBAA26]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__7(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]], ptr [[TMP3]]) #[[ATTR5]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -1728,8 +1728,8 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; AMDGPU: common.ret: ; AMDGPU-NEXT: ret void ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] +; AMDGPU-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; @@ -1774,8 +1774,8 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; NVPTX: common.ret: ; NVPTX-NEXT: ret void ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; NVPTX-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] +; NVPTX-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: br label [[COMMON_RET]] ; @@ -1821,8 +1821,8 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; AMDGPU-DISABLED: common.ret: ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] ; @@ -1867,8 +1867,8 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65() #0 ; NVPTX-DISABLED: common.ret: ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__8(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] ; @@ -1896,25 +1896,25 @@ define internal void @__omp_outlined__8(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__8 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: call void @unknown() #[[ATTR8]] +; AMDGPU-NEXT: call void @unknown() #[[ATTR9]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__8 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: call void @unknown() #[[ATTR8]] +; NVPTX-NEXT: call void @unknown() #[[ATTR9]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]] +; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR9]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__8 ; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]] +; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR9]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -1925,7 +1925,7 @@ entry: ; Function Attrs: alwaysinline convergent norecurse nounwind define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { ; AMDGPU-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 -; AMDGPU-SAME: () #[[ATTR0]] { +; AMDGPU-SAME: () #[[ATTR3:[0-9]+]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 @@ -1971,15 +1971,15 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { ; AMDGPU: common.ret: ; AMDGPU-NEXT: ret void ; AMDGPU: user_code.entry: -; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]] -; AMDGPU-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]] +; AMDGPU-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] +; AMDGPU-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR5]] +; AMDGPU-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR5]] ; AMDGPU-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-NEXT: br label [[COMMON_RET]] ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 -; NVPTX-SAME: () #[[ATTR0]] { +; NVPTX-SAME: () #[[ATTR3:[0-9]+]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 @@ -2024,15 +2024,15 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { ; NVPTX: common.ret: ; NVPTX-NEXT: ret void ; NVPTX: user_code.entry: -; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; NVPTX-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]] -; NVPTX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]] +; NVPTX-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] +; NVPTX-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR5]] +; NVPTX-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR5]] ; NVPTX-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-NEXT: br label [[COMMON_RET]] ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 -; AMDGPU-DISABLED-SAME: () #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: () #[[ATTR3:[0-9]+]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 @@ -2078,15 +2078,15 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { ; AMDGPU-DISABLED: common.ret: ; AMDGPU-DISABLED-NEXT: ret void ; AMDGPU-DISABLED: user_code.entry: -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]] -; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] +; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR5]] +; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR5]] ; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; AMDGPU-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; AMDGPU-DISABLED-NEXT: br label [[COMMON_RET]] ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74 -; NVPTX-DISABLED-SAME: () #[[ATTR0]] { +; NVPTX-DISABLED-SAME: () #[[ATTR3:[0-9]+]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[WORKER_WORK_FN_ADDR:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8 @@ -2131,9 +2131,9 @@ define weak void @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74() #0 { ; NVPTX-DISABLED: common.ret: ; NVPTX-DISABLED-NEXT: ret void ; NVPTX-DISABLED: user_code.entry: -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR4]] -; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR4]] -; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) #[[ATTR5]] +; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i64 40, i64 0, ptr @"_omp_task_entry$") #[[ATTR5]] +; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP1]], ptr [[TMP2]]) #[[ATTR5]] ; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__9, ptr @__omp_outlined__9_wrapper.ID, ptr [[CAPTURED_VARS_ADDRS]], i64 0) ; NVPTX-DISABLED-NEXT: call void @__kmpc_target_deinit(ptr @[[GLOB1]], i8 1) ; NVPTX-DISABLED-NEXT: br label [[COMMON_RET]] @@ -2159,27 +2159,27 @@ user_code.entry: ; preds = %entry ; Function Attrs: alwaysinline convergent nounwind define internal void @.omp_outlined.(i32 %.global_tid., ptr noalias %.part_id., ptr noalias %.privates., ptr noalias %.copy_fn., ptr %.task_t., ptr noalias %__context) #9 { ; AMDGPU-LABEL: define {{[^@]+}}@.omp_outlined. -; AMDGPU-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { +; AMDGPU-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR7]] +; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR8]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@.omp_outlined. -; NVPTX-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { +; NVPTX-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR7]] +; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR8]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@.omp_outlined. -; AMDGPU-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { +; AMDGPU-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] +; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR8]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@.omp_outlined. -; NVPTX-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] { +; NVPTX-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTPART_ID_:%.*]], ptr noalias [[DOTPRIVATES_:%.*]], ptr noalias [[DOTCOPY_FN_:%.*]], ptr [[DOTTASK_T_:%.*]], ptr noalias [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR7]] +; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR8]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -2261,25 +2261,25 @@ define internal void @__omp_outlined__9(ptr noalias %.global_tid., ptr noalias % ; AMDGPU-LABEL: define {{[^@]+}}@__omp_outlined__9 ; AMDGPU-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: -; AMDGPU-NEXT: call void @unknown() #[[ATTR8]] +; AMDGPU-NEXT: call void @unknown() #[[ATTR9]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9 ; NVPTX-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: -; NVPTX-NEXT: call void @unknown() #[[ATTR8]] +; NVPTX-NEXT: call void @unknown() #[[ATTR9]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9 ; AMDGPU-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: -; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR8]] +; AMDGPU-DISABLED-NEXT: call void @unknown() #[[ATTR9]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9 ; NVPTX-DISABLED-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: -; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR8]] +; NVPTX-DISABLED-NEXT: call void @unknown() #[[ATTR9]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -2296,7 +2296,7 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; AMDGPU-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper @@ -2306,7 +2306,7 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; NVPTX-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper @@ -2316,7 +2316,7 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { ; AMDGPU-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; AMDGPU-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; AMDGPU-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@__omp_outlined__9_wrapper @@ -2326,7 +2326,7 @@ define internal void @__omp_outlined__9_wrapper(i16 zeroext %0, i32 %1) #3 { ; NVPTX-DISABLED-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8 ; NVPTX-DISABLED-NEXT: call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]]) -; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR4]] +; NVPTX-DISABLED-NEXT: call void @__omp_outlined__9(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5]] ; NVPTX-DISABLED-NEXT: ret void ; entry: @@ -2397,54 +2397,58 @@ attributes #11 = { convergent } ; AMDGPU: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } ; AMDGPU: attributes #[[ATTR1]] = { norecurse } ; AMDGPU: attributes #[[ATTR2]] = { convergent norecurse nounwind } -; AMDGPU: attributes #[[ATTR3]] = { alwaysinline convergent nounwind } -; AMDGPU: attributes #[[ATTR4]] = { nounwind } -; AMDGPU: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } -; AMDGPU: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } -; AMDGPU: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } -; AMDGPU: attributes #[[ATTR8]] = { convergent } -; AMDGPU: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } -; AMDGPU: attributes #[[ATTR10:[0-9]+]] = { alwaysinline } -; AMDGPU: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind } +; AMDGPU: attributes #[[ATTR3]] = { alwaysinline norecurse nounwind } +; AMDGPU: attributes #[[ATTR4]] = { alwaysinline convergent nounwind } +; AMDGPU: attributes #[[ATTR5]] = { nounwind } +; AMDGPU: attributes #[[ATTR6:[0-9]+]] = { nosync nounwind } +; AMDGPU: attributes #[[ATTR7:[0-9]+]] = { nofree nosync nounwind allocsize(0) } +; AMDGPU: attributes #[[ATTR8]] = { convergent "llvm.assume"="ompx_spmd_amenable" } +; AMDGPU: attributes #[[ATTR9]] = { convergent } +; AMDGPU: attributes #[[ATTR10:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +; AMDGPU: attributes #[[ATTR11:[0-9]+]] = { alwaysinline } +; AMDGPU: attributes #[[ATTR12:[0-9]+]] = { convergent nounwind } ;. ; NVPTX: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } ; NVPTX: attributes #[[ATTR1]] = { norecurse } ; NVPTX: attributes #[[ATTR2]] = { convergent norecurse nounwind } -; NVPTX: attributes #[[ATTR3]] = { alwaysinline convergent nounwind } -; NVPTX: attributes #[[ATTR4]] = { nounwind } -; NVPTX: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } -; NVPTX: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } -; NVPTX: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } -; NVPTX: attributes #[[ATTR8]] = { convergent } -; NVPTX: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } -; NVPTX: attributes #[[ATTR10:[0-9]+]] = { alwaysinline } -; NVPTX: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind } +; NVPTX: attributes #[[ATTR3]] = { alwaysinline norecurse nounwind } +; NVPTX: attributes #[[ATTR4]] = { alwaysinline convergent nounwind } +; NVPTX: attributes #[[ATTR5]] = { nounwind } +; NVPTX: attributes #[[ATTR6:[0-9]+]] = { nosync nounwind } +; NVPTX: attributes #[[ATTR7:[0-9]+]] = { nofree nosync nounwind allocsize(0) } +; NVPTX: attributes #[[ATTR8]] = { convergent "llvm.assume"="ompx_spmd_amenable" } +; NVPTX: attributes #[[ATTR9]] = { convergent } +; NVPTX: attributes #[[ATTR10:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +; NVPTX: attributes #[[ATTR11:[0-9]+]] = { alwaysinline } +; NVPTX: attributes #[[ATTR12:[0-9]+]] = { convergent nounwind } ;. ; AMDGPU-DISABLED: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } ; AMDGPU-DISABLED: attributes #[[ATTR1]] = { norecurse } ; AMDGPU-DISABLED: attributes #[[ATTR2]] = { convergent norecurse nounwind } -; AMDGPU-DISABLED: attributes #[[ATTR3]] = { alwaysinline convergent nounwind } -; AMDGPU-DISABLED: attributes #[[ATTR4]] = { nounwind } -; AMDGPU-DISABLED: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } -; AMDGPU-DISABLED: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } -; AMDGPU-DISABLED: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } -; AMDGPU-DISABLED: attributes #[[ATTR8]] = { convergent } -; AMDGPU-DISABLED: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } -; AMDGPU-DISABLED: attributes #[[ATTR10:[0-9]+]] = { alwaysinline } -; AMDGPU-DISABLED: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind } +; AMDGPU-DISABLED: attributes #[[ATTR3]] = { alwaysinline norecurse nounwind } +; AMDGPU-DISABLED: attributes #[[ATTR4]] = { alwaysinline convergent nounwind } +; AMDGPU-DISABLED: attributes #[[ATTR5]] = { nounwind } +; AMDGPU-DISABLED: attributes #[[ATTR6:[0-9]+]] = { nosync nounwind } +; AMDGPU-DISABLED: attributes #[[ATTR7:[0-9]+]] = { nofree nosync nounwind allocsize(0) } +; AMDGPU-DISABLED: attributes #[[ATTR8]] = { convergent "llvm.assume"="ompx_spmd_amenable" } +; AMDGPU-DISABLED: attributes #[[ATTR9]] = { convergent } +; AMDGPU-DISABLED: attributes #[[ATTR10:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +; AMDGPU-DISABLED: attributes #[[ATTR11:[0-9]+]] = { alwaysinline } +; AMDGPU-DISABLED: attributes #[[ATTR12:[0-9]+]] = { convergent nounwind } ;. ; NVPTX-DISABLED: attributes #[[ATTR0]] = { alwaysinline convergent norecurse nounwind } ; NVPTX-DISABLED: attributes #[[ATTR1]] = { norecurse } ; NVPTX-DISABLED: attributes #[[ATTR2]] = { convergent norecurse nounwind } -; NVPTX-DISABLED: attributes #[[ATTR3]] = { alwaysinline convergent nounwind } -; NVPTX-DISABLED: attributes #[[ATTR4]] = { nounwind } -; NVPTX-DISABLED: attributes #[[ATTR5:[0-9]+]] = { nosync nounwind } -; NVPTX-DISABLED: attributes #[[ATTR6:[0-9]+]] = { nofree nosync nounwind allocsize(0) } -; NVPTX-DISABLED: attributes #[[ATTR7]] = { convergent "llvm.assume"="ompx_spmd_amenable" } -; NVPTX-DISABLED: attributes #[[ATTR8]] = { convergent } -; NVPTX-DISABLED: attributes #[[ATTR9:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } -; NVPTX-DISABLED: attributes #[[ATTR10:[0-9]+]] = { alwaysinline } -; NVPTX-DISABLED: attributes #[[ATTR11:[0-9]+]] = { convergent nounwind } +; NVPTX-DISABLED: attributes #[[ATTR3]] = { alwaysinline norecurse nounwind } +; NVPTX-DISABLED: attributes #[[ATTR4]] = { alwaysinline convergent nounwind } +; NVPTX-DISABLED: attributes #[[ATTR5]] = { nounwind } +; NVPTX-DISABLED: attributes #[[ATTR6:[0-9]+]] = { nosync nounwind } +; NVPTX-DISABLED: attributes #[[ATTR7:[0-9]+]] = { nofree nosync nounwind allocsize(0) } +; NVPTX-DISABLED: attributes #[[ATTR8]] = { convergent "llvm.assume"="ompx_spmd_amenable" } +; NVPTX-DISABLED: attributes #[[ATTR9]] = { convergent } +; NVPTX-DISABLED: attributes #[[ATTR10:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +; NVPTX-DISABLED: attributes #[[ATTR11:[0-9]+]] = { alwaysinline } +; NVPTX-DISABLED: attributes #[[ATTR12:[0-9]+]] = { convergent nounwind } ;. ; AMDGPU: [[META0:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"do_not_spmdize_task", i32 74, i32 5} ; AMDGPU: [[META1:![0-9]+]] = !{i32 0, i32 64770, i32 541341486, !"sequential_loop_to_stack_var", i32 20, i32 1} diff --git a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll index c57f61e27c7b7..17f4cf4dcdd85 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_guarding_two_reaching_kernels.ll @@ -376,20 +376,20 @@ attributes #5 = { convergent } !8 = !{i32 7, !"frame-pointer", i32 2} !9 = !{!"clang version 14.0.0"} ;. -; CHECK: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } -; CHECK: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } +; CHECK: attributes #[[ATTR0]] = { noinline norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } +; CHECK: attributes #[[ATTR1]] = { noinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK: attributes #[[ATTR2]] = { nounwind } ; CHECK: attributes #[[ATTR3:[0-9]+]] = { alwaysinline } -; CHECK: attributes #[[ATTR4]] = { convergent noinline nosync nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } +; CHECK: attributes #[[ATTR4]] = { noinline nosync nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK: attributes #[[ATTR5]] = { convergent nounwind } ; CHECK: attributes #[[ATTR6]] = { convergent nosync nounwind } ; CHECK: attributes #[[ATTR7]] = { convergent } ;. -; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } -; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } +; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR0]] = { noinline norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } +; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR1]] = { noinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR2]] = { nounwind } ; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR3:[0-9]+]] = { alwaysinline } -; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR4]] = { convergent noinline nosync nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } +; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR4]] = { noinline nosync nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR5]] = { convergent nounwind } ; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR6]] = { convergent nosync nounwind } ; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR7]] = { convergent } diff --git a/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll b/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll index 2234b695307d4..291024780790a 100644 --- a/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll +++ b/llvm/test/Transforms/OpenMP/spmdization_no_guarding_two_reaching_kernels.ll @@ -420,21 +420,21 @@ attributes #5 = { convergent } !8 = !{i32 7, !"frame-pointer", i32 2} !9 = !{!"clang version 14.0.0"} ;. -; CHECK: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } -; CHECK: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } -; CHECK: attributes #[[ATTR2]] = { convergent noinline norecurse nosync nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } +; CHECK: attributes #[[ATTR0]] = { noinline norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } +; CHECK: attributes #[[ATTR1]] = { noinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } +; CHECK: attributes #[[ATTR2]] = { noinline norecurse nosync nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK: attributes #[[ATTR3]] = { nounwind } ; CHECK: attributes #[[ATTR4:[0-9]+]] = { alwaysinline } -; CHECK: attributes #[[ATTR5]] = { convergent noinline nosync nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } +; CHECK: attributes #[[ATTR5]] = { noinline nosync nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK: attributes #[[ATTR6]] = { convergent nounwind } ; CHECK: attributes #[[ATTR7]] = { nosync nounwind } ;. -; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR0]] = { convergent noinline norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } -; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR1]] = { convergent noinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } -; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR2]] = { convergent noinline norecurse nosync nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } +; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR0]] = { noinline norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } +; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR1]] = { noinline nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } +; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR2]] = { noinline norecurse nosync nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR3]] = { nounwind } ; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR4:[0-9]+]] = { alwaysinline } -; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR5]] = { convergent noinline nosync nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } +; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR5]] = { noinline nosync nounwind memory(write) "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" } ; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR6]] = { convergent nounwind } ; CHECK-DISABLE-SPMDIZATION: attributes #[[ATTR7]] = { nosync nounwind } ;. From 614c63bec6d67cbfdc17b50e443ff769a28c18d0 Mon Sep 17 00:00:00 2001 From: "Luo, Yuanke" Date: Tue, 21 Mar 2023 09:06:44 +0800 Subject: [PATCH 131/691] [X86] Create extra prolog/epilog for stack realignment [part 2] This patch is to support D145650 for elf target as well. Differential Revision: https://reviews.llvm.org/D146489 --- llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp | 2 +- llvm/test/CodeGen/X86/i386-baseptr.ll | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp b/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp index ef3751e40f17f..bf60791234315 100644 --- a/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp +++ b/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp @@ -114,7 +114,7 @@ bool X86ArgumentStackSlotPass::runOnMachineFunction(MachineFunction &MF) { if (F.hasFnAttribute(Attribute::Naked)) return false; // Only support Linux - if (!STI.isTargetLinux()) + if (!STI.isTargetLinux() && !STI.isTargetELF()) return false; if (!TRI->hasBasePointer(MF)) return false; diff --git a/llvm/test/CodeGen/X86/i386-baseptr.ll b/llvm/test/CodeGen/X86/i386-baseptr.ll index cb8d849a86841..df921f2738ff0 100644 --- a/llvm/test/CodeGen/X86/i386-baseptr.ll +++ b/llvm/test/CodeGen/X86/i386-baseptr.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=i386-pc-linux -stackrealign < %s | FileCheck %s +; RUN: llc -mtriple=i386-pc-none-elf -stackrealign < %s | FileCheck %s declare i32 @helper() nounwind define void @base() #0 { From 35c2aac6e3957c2e82bf92269039fa02bab0e1d9 Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Mon, 20 Mar 2023 19:06:09 +0100 Subject: [PATCH 132/691] Revert "Revert "[clangd] Fix AddUsing in the face of typo-correction"" This reverts commit fb3f6a95393f33bc8d8550a5ac62c18e488a9b6f. --- .../clangd/refactor/tweaks/AddUsing.cpp | 87 +++++++++++-------- .../clangd/unittests/tweaks/AddUsingTests.cpp | 42 +++++++-- 2 files changed, 87 insertions(+), 42 deletions(-) diff --git a/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp b/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp index 103e13f44d060..1e51d8fb9a518 100644 --- a/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp +++ b/clang-tools-extra/clangd/refactor/tweaks/AddUsing.cpp @@ -8,10 +8,25 @@ #include "AST.h" #include "Config.h" +#include "SourceCode.h" #include "refactor/Tweak.h" #include "support/Logger.h" #include "clang/AST/Decl.h" +#include "clang/AST/Expr.h" +#include "clang/AST/NestedNameSpecifier.h" #include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/Type.h" +#include "clang/AST/TypeLoc.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Tooling/Core/Replacement.h" +#include "clang/Tooling/Syntax/Tokens.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include namespace clang { namespace clangd { @@ -45,8 +60,12 @@ class AddUsing : public Tweak { // All of the following are set by prepare(). // The qualifier to remove. NestedNameSpecifierLoc QualifierToRemove; - // The name following QualifierToRemove. - llvm::StringRef Name; + // Qualified name to use when spelling the using declaration. This might be + // different than SpelledQualifier in presence of error correction. + std::string QualifierToSpell; + // The name and qualifier as spelled in the code. + llvm::StringRef SpelledQualifier; + llvm::StringRef SpelledName; // If valid, the insertion point for "using" statement must come after this. // This is relevant when the type is defined in the main file, to make sure // the type/function is already defined at the point where "using" is added. @@ -56,7 +75,7 @@ REGISTER_TWEAK(AddUsing) std::string AddUsing::title() const { return std::string(llvm::formatv( - "Add using-declaration for {0} and remove qualifier", Name)); + "Add using-declaration for {0} and remove qualifier", SpelledName)); } // Locates all "using" statements relevant to SelectionDeclContext. @@ -269,36 +288,23 @@ bool AddUsing::prepare(const Selection &Inputs) { if (Node == nullptr) return false; + SourceRange SpelledNameRange; if (auto *D = Node->ASTNode.get()) { if (auto *II = D->getDecl()->getIdentifier()) { QualifierToRemove = D->getQualifierLoc(); - Name = II->getName(); + SpelledNameRange = D->getSourceRange(); MustInsertAfterLoc = D->getDecl()->getBeginLoc(); } } else if (auto *T = Node->ASTNode.get()) { if (auto E = T->getAs()) { QualifierToRemove = E.getQualifierLoc(); - if (!QualifierToRemove) - return false; - auto NameRange = E.getSourceRange(); + SpelledNameRange = E.getSourceRange(); if (auto T = E.getNamedTypeLoc().getAs()) { // Remove the template arguments from the name. - NameRange.setEnd(T.getLAngleLoc().getLocWithOffset(-1)); + SpelledNameRange.setEnd(T.getLAngleLoc().getLocWithOffset(-1)); } - auto SpelledTokens = TB.spelledForExpanded(TB.expandedTokens(NameRange)); - if (!SpelledTokens) - return false; - auto SpelledRange = syntax::Token::range(SM, SpelledTokens->front(), - SpelledTokens->back()); - Name = SpelledRange.text(SM); - - std::string QualifierToRemoveStr = getNNSLAsString( - QualifierToRemove, Inputs.AST->getASTContext().getPrintingPolicy()); - if (!Name.consume_front(QualifierToRemoveStr)) - return false; // What's spelled doesn't match the qualifier. - if (const auto *ET = E.getTypePtr()) { if (const auto *TDT = dyn_cast(ET->getNamedType().getTypePtr())) { @@ -309,19 +315,14 @@ bool AddUsing::prepare(const Selection &Inputs) { } } } - - // FIXME: This only supports removing qualifiers that are made up of just - // namespace names. If qualifier contains a type, we could take the longest - // namespace prefix and remove that. - if (!QualifierToRemove.hasQualifier() || + if (!QualifierToRemove || + // FIXME: This only supports removing qualifiers that are made up of just + // namespace names. If qualifier contains a type, we could take the + // longest namespace prefix and remove that. !QualifierToRemove.getNestedNameSpecifier()->getAsNamespace() || - Name.empty()) { - return false; - } - - if (isNamespaceForbidden(Inputs, *QualifierToRemove.getNestedNameSpecifier())) + // Respect user config. + isNamespaceForbidden(Inputs, *QualifierToRemove.getNestedNameSpecifier())) return false; - // Macros are difficult. We only want to offer code action when what's spelled // under the cursor is a namespace qualifier. If it's a macro that expands to // a qualifier, user would not know what code action will actually change. @@ -333,23 +334,35 @@ bool AddUsing::prepare(const Selection &Inputs) { return false; } + auto SpelledTokens = + TB.spelledForExpanded(TB.expandedTokens(SpelledNameRange)); + if (!SpelledTokens) + return false; + auto SpelledRange = + syntax::Token::range(SM, SpelledTokens->front(), SpelledTokens->back()); + // We only drop qualifiers that're namespaces, so this is safe. + std::tie(SpelledQualifier, SpelledName) = + splitQualifiedName(SpelledRange.text(SM)); + QualifierToSpell = getNNSLAsString( + QualifierToRemove, Inputs.AST->getASTContext().getPrintingPolicy()); + if (!llvm::StringRef(QualifierToSpell).endswith(SpelledQualifier) || + SpelledName.empty()) + return false; // What's spelled doesn't match the qualifier. return true; } Expected AddUsing::apply(const Selection &Inputs) { auto &SM = Inputs.AST->getSourceManager(); - std::string QualifierToRemoveStr = getNNSLAsString( - QualifierToRemove, Inputs.AST->getASTContext().getPrintingPolicy()); tooling::Replacements R; if (auto Err = R.add(tooling::Replacement( SM, SM.getSpellingLoc(QualifierToRemove.getBeginLoc()), - QualifierToRemoveStr.length(), ""))) { + SpelledQualifier.size(), ""))) { return std::move(Err); } - auto InsertionPoint = - findInsertionPoint(Inputs, QualifierToRemove, Name, MustInsertAfterLoc); + auto InsertionPoint = findInsertionPoint(Inputs, QualifierToRemove, + SpelledName, MustInsertAfterLoc); if (!InsertionPoint) { return InsertionPoint.takeError(); } @@ -362,7 +375,7 @@ Expected AddUsing::apply(const Selection &Inputs) { if (InsertionPoint->AlwaysFullyQualify && !isFullyQualified(QualifierToRemove.getNestedNameSpecifier())) UsingTextStream << "::"; - UsingTextStream << QualifierToRemoveStr << Name << ";" + UsingTextStream << QualifierToSpell << SpelledName << ";" << InsertionPoint->Suffix; assert(SM.getFileID(InsertionPoint->Loc) == SM.getMainFileID()); diff --git a/clang-tools-extra/clangd/unittests/tweaks/AddUsingTests.cpp b/clang-tools-extra/clangd/unittests/tweaks/AddUsingTests.cpp index adfd018f56d27..d466dd5349d44 100644 --- a/clang-tools-extra/clangd/unittests/tweaks/AddUsingTests.cpp +++ b/clang-tools-extra/clangd/unittests/tweaks/AddUsingTests.cpp @@ -8,8 +8,11 @@ #include "Config.h" #include "TweakTesting.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include "gmock/gmock.h" #include "gtest/gtest.h" +#include namespace clang { namespace clangd { @@ -30,7 +33,7 @@ namespace one { void oo() {} template class tt {}; namespace two { -enum ee {}; +enum ee { ee_enum_value }; void ff() {} class cc { public: @@ -64,9 +67,6 @@ class cc { EXPECT_UNAVAILABLE(Header + "void fun() { ::ban::fo^o(); }"); EXPECT_AVAILABLE(Header + "void fun() { banana::fo^o(); }"); - // Do not offer code action on typo-corrections. - EXPECT_UNAVAILABLE(Header + "/*error-ok*/c^c C;"); - // NestedNameSpecifier, but no namespace. EXPECT_UNAVAILABLE(Header + "class Foo {}; class F^oo foo;"); @@ -466,7 +466,37 @@ one::v^ec foo; using one::vec; vec foo; -)cpp"}}; +)cpp"}, + // Typo correction. + {R"cpp( +// error-ok +#include "test.hpp" +c^c C; +)cpp", + R"cpp( +// error-ok +#include "test.hpp" +using one::two::cc; + +cc C; +)cpp"}, + {R"cpp( +// error-ok +#include "test.hpp" +void foo() { + switch(one::two::ee{}) { case two::ee_^one:break; } +} +)cpp", + R"cpp( +// error-ok +#include "test.hpp" +using one::two::ee_one; + +void foo() { + switch(one::two::ee{}) { case ee_one:break; } +} +)cpp"}, + }; llvm::StringMap EditedFiles; for (const auto &Case : Cases) { ExtraFiles["test.hpp"] = R"cpp( @@ -484,6 +514,8 @@ class cc { using uu = two::cc; template struct vec {}; })cpp"; + // Typo correction is disabled in msvc-compatibility mode. + ExtraArgs.push_back("-fno-ms-compatibility"); EXPECT_EQ(apply(Case.TestSource, &EditedFiles), Case.ExpectedSource); } } From 772658a9d0243e7ae215c80c48fef00d2cbfe388 Mon Sep 17 00:00:00 2001 From: luxufan Date: Fri, 3 Mar 2023 15:21:28 +0800 Subject: [PATCH 133/691] [InstCombine][NFC] Precommit test case of PR61137 --- llvm/test/Transforms/InstCombine/phi.ll | 150 ++++++++++++++++++++++++ 1 file changed, 150 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/phi.ll b/llvm/test/Transforms/InstCombine/phi.ll index bd0e649254adf..75eece9b626be 100644 --- a/llvm/test/Transforms/InstCombine/phi.ll +++ b/llvm/test/Transforms/InstCombine/phi.ll @@ -1500,3 +1500,153 @@ latch: exit: ret void } + +define i32 @add_two_phi_node_can_fold(i1 %c, i32 %i, i32 %j) { +; CHECK-LABEL: @add_two_phi_node_can_fold( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[X:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[J:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[Y:%.*]] = phi i32 [ [[I:%.*]], [[IF_THEN]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[Y]], [[X]] +; CHECK-NEXT: ret i32 [[ADD]] +; +entry: + br i1 %c, label %if.then, label %if.end + +if.then: + br label %if.end + +if.end: + %x = phi i32 [ 0, %if.then ], [ %j, %entry ] + %y = phi i32 [ %i, %if.then ], [ 0, %entry ] + %add = add i32 %y, %x + ret i32 %add +} + +define i32 @add_two_phi_node_cannt_fold(i1 %c, i32 %i, i32 %j) { +; CHECK-LABEL: @add_two_phi_node_cannt_fold( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[X:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[J:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[Y:%.*]] = phi i32 [ [[I:%.*]], [[IF_THEN]] ], [ 1, [[ENTRY]] ] +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[Y]], [[X]] +; CHECK-NEXT: ret i32 [[ADD]] +; +entry: + br i1 %c, label %if.then, label %if.end + +if.then: + br label %if.end + +if.end: + %x = phi i32 [ 0, %if.then ], [ %j, %entry ] + %y = phi i32 [ %i, %if.then ], [ 1, %entry ] + %add = add i32 %y, %x + ret i32 %add +} + +define i32 @or_two_phi_node_can_fold(i1 %c, i32 %i, i32 %j) { +; CHECK-LABEL: @or_two_phi_node_can_fold( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[X:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[J:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[Y:%.*]] = phi i32 [ [[I:%.*]], [[IF_THEN]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[ADD:%.*]] = or i32 [[Y]], [[X]] +; CHECK-NEXT: ret i32 [[ADD]] +; +entry: + br i1 %c, label %if.then, label %if.end + +if.then: + br label %if.end + +if.end: + %x = phi i32 [ 0, %if.then ], [ %j, %entry ] + %y = phi i32 [ %i, %if.then ], [ 0, %entry ] + %add = or i32 %y, %x + ret i32 %add +} + +define i32 @and_two_phi_node_can_fold(i1 %c, i32 %i, i32 %j) { +; CHECK-LABEL: @and_two_phi_node_can_fold( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[X:%.*]] = phi i32 [ -1, [[IF_THEN]] ], [ [[J:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[Y:%.*]] = phi i32 [ [[I:%.*]], [[IF_THEN]] ], [ -1, [[ENTRY]] ] +; CHECK-NEXT: [[ADD:%.*]] = and i32 [[Y]], [[X]] +; CHECK-NEXT: ret i32 [[ADD]] +; +entry: + br i1 %c, label %if.then, label %if.end + +if.then: + br label %if.end + +if.end: + %x = phi i32 [ -1, %if.then ], [ %j, %entry ] + %y = phi i32 [ %i, %if.then ], [ -1, %entry ] + %add = and i32 %y, %x + ret i32 %add +} + +define i32 @mul_two_phi_node_can_fold(i1 %c, i32 %i, i32 %j) { +; CHECK-LABEL: @mul_two_phi_node_can_fold( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[X:%.*]] = phi i32 [ 1, [[IF_THEN]] ], [ [[J:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[Y:%.*]] = phi i32 [ [[I:%.*]], [[IF_THEN]] ], [ 1, [[ENTRY]] ] +; CHECK-NEXT: [[ADD:%.*]] = mul i32 [[Y]], [[X]] +; CHECK-NEXT: ret i32 [[ADD]] +; +entry: + br i1 %c, label %if.then, label %if.end + +if.then: + br label %if.end + +if.end: + %x = phi i32 [ 1, %if.then ], [ %j, %entry ] + %y = phi i32 [ %i, %if.then ], [ 1, %entry ] + %add = mul i32 %y, %x + ret i32 %add +} + +define i32 @xor_two_phi_node_can_fold(i1 %c, i32 %i, i32 %j) { +; CHECK-LABEL: @xor_two_phi_node_can_fold( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[X:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[J:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[Y:%.*]] = phi i32 [ [[I:%.*]], [[IF_THEN]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[ADD:%.*]] = xor i32 [[Y]], [[X]] +; CHECK-NEXT: ret i32 [[ADD]] +; +entry: + br i1 %c, label %if.then, label %if.end + +if.then: + br label %if.end + +if.end: + %x = phi i32 [ 0, %if.then ], [ %j, %entry ] + %y = phi i32 [ %i, %if.then ], [ 0, %entry ] + %add = xor i32 %y, %x + ret i32 %add +} From 029ec03a096901a83d650ac08375b78f9161ab51 Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Tue, 21 Mar 2023 07:51:03 +0100 Subject: [PATCH 134/691] [clangd][NFC] Format & include cleanup for AddUsingTests.cpp --- .../clangd/unittests/tweaks/AddUsingTests.cpp | 142 +++++++++--------- 1 file changed, 72 insertions(+), 70 deletions(-) diff --git a/clang-tools-extra/clangd/unittests/tweaks/AddUsingTests.cpp b/clang-tools-extra/clangd/unittests/tweaks/AddUsingTests.cpp index d466dd5349d44..f3a479a9a240f 100644 --- a/clang-tools-extra/clangd/unittests/tweaks/AddUsingTests.cpp +++ b/clang-tools-extra/clangd/unittests/tweaks/AddUsingTests.cpp @@ -8,11 +8,12 @@ #include "Config.h" #include "TweakTesting.h" +#include "support/Context.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "gmock/gmock.h" #include "gtest/gtest.h" #include +#include namespace clang { namespace clangd { @@ -96,16 +97,17 @@ TEST_F(AddUsingTest, Apply) { struct { llvm::StringRef TestSource; llvm::StringRef ExpectedSource; - } Cases[]{{ - // Function, no other using, namespace. - R"cpp( + } Cases[]{ + { + // Function, no other using, namespace. + R"cpp( #include "test.hpp" namespace { void fun() { ^one::two::ff(); } })cpp", - R"cpp( + R"cpp( #include "test.hpp" namespace {using one::two::ff; @@ -113,17 +115,17 @@ void fun() { ff(); } })cpp", - }, - // Type, no other using, namespace. - { - R"cpp( + }, + // Type, no other using, namespace. + { + R"cpp( #include "test.hpp" namespace { void fun() { ::one::t^wo::cc inst; } })cpp", - R"cpp( + R"cpp( #include "test.hpp" namespace {using ::one::two::cc; @@ -131,16 +133,16 @@ void fun() { cc inst; } })cpp", - }, - // Type, no other using, no namespace. - { - R"cpp( + }, + // Type, no other using, no namespace. + { + R"cpp( #include "test.hpp" void fun() { one::two::e^e inst; })cpp", - R"cpp( + R"cpp( #include "test.hpp" using one::two::ee; @@ -148,9 +150,9 @@ using one::two::ee; void fun() { ee inst; })cpp"}, - // Function, other usings. - { - R"cpp( + // Function, other usings. + { + R"cpp( #include "test.hpp" using one::two::cc; @@ -161,7 +163,7 @@ void fun() { one::two::f^f(); } })cpp", - R"cpp( + R"cpp( #include "test.hpp" using one::two::cc; @@ -172,10 +174,10 @@ void fun() { ff(); } })cpp", - }, - // Function, other usings inside namespace. - { - R"cpp( + }, + // Function, other usings inside namespace. + { + R"cpp( #include "test.hpp" using one::two::cc; @@ -188,7 +190,7 @@ void fun() { o^ne::oo(); } })cpp", - R"cpp( + R"cpp( #include "test.hpp" using one::two::cc; @@ -201,9 +203,9 @@ void fun() { oo(); } })cpp"}, - // Using comes after cursor. - { - R"cpp( + // Using comes after cursor. + { + R"cpp( #include "test.hpp" namespace { @@ -215,7 +217,7 @@ void fun() { using one::two::cc; })cpp", - R"cpp( + R"cpp( #include "test.hpp" namespace {using one::two::ff; @@ -228,14 +230,14 @@ void fun() { using one::two::cc; })cpp"}, - // Pointer type. - {R"cpp( + // Pointer type. + {R"cpp( #include "test.hpp" void fun() { one::two::c^c *p; })cpp", - R"cpp( + R"cpp( #include "test.hpp" using one::two::cc; @@ -243,8 +245,8 @@ using one::two::cc; void fun() { cc *p; })cpp"}, - // Namespace declared via macro. - {R"cpp( + // Namespace declared via macro. + {R"cpp( #include "test.hpp" #define NS_BEGIN(name) namespace name { @@ -254,7 +256,7 @@ void fun() { one::two::f^f(); } })cpp", - R"cpp( + R"cpp( #include "test.hpp" #define NS_BEGIN(name) namespace name { @@ -266,15 +268,15 @@ void fun() { ff(); } })cpp"}, - // Inside macro argument. - {R"cpp( + // Inside macro argument. + {R"cpp( #include "test.hpp" #define CALL(name) name() void fun() { CALL(one::t^wo::ff); })cpp", - R"cpp( + R"cpp( #include "test.hpp" #define CALL(name) name() @@ -283,15 +285,15 @@ using one::two::ff; void fun() { CALL(ff); })cpp"}, - // Parent namespace != lexical parent namespace - {R"cpp( + // Parent namespace != lexical parent namespace + {R"cpp( #include "test.hpp" namespace foo { void fun(); } void foo::fun() { one::two::f^f(); })cpp", - R"cpp( + R"cpp( #include "test.hpp" using one::two::ff; @@ -300,8 +302,8 @@ namespace foo { void fun(); } void foo::fun() { ff(); })cpp"}, - // If all other using are fully qualified, add :: - {R"cpp( + // If all other using are fully qualified, add :: + {R"cpp( #include "test.hpp" using ::one::two::cc; @@ -310,7 +312,7 @@ using ::one::two::ee; void fun() { one::two::f^f(); })cpp", - R"cpp( + R"cpp( #include "test.hpp" using ::one::two::cc; @@ -319,8 +321,8 @@ using ::one::two::ff;using ::one::two::ee; void fun() { ff(); })cpp"}, - // Make sure we don't add :: if it's already there - {R"cpp( + // Make sure we don't add :: if it's already there + {R"cpp( #include "test.hpp" using ::one::two::cc; @@ -329,7 +331,7 @@ using ::one::two::ee; void fun() { ::one::two::f^f(); })cpp", - R"cpp( + R"cpp( #include "test.hpp" using ::one::two::cc; @@ -338,8 +340,8 @@ using ::one::two::ff;using ::one::two::ee; void fun() { ff(); })cpp"}, - // If even one using doesn't start with ::, do not add it - {R"cpp( + // If even one using doesn't start with ::, do not add it + {R"cpp( #include "test.hpp" using ::one::two::cc; @@ -348,7 +350,7 @@ using one::two::ee; void fun() { one::two::f^f(); })cpp", - R"cpp( + R"cpp( #include "test.hpp" using ::one::two::cc; @@ -357,14 +359,14 @@ using one::two::ff;using one::two::ee; void fun() { ff(); })cpp"}, - // using alias; insert using for the spelled name. - {R"cpp( + // using alias; insert using for the spelled name. + {R"cpp( #include "test.hpp" void fun() { one::u^u u; })cpp", - R"cpp( + R"cpp( #include "test.hpp" using one::uu; @@ -372,29 +374,29 @@ using one::uu; void fun() { uu u; })cpp"}, - // using namespace. - {R"cpp( + // using namespace. + {R"cpp( #include "test.hpp" using namespace one; namespace { two::c^c C; })cpp", - R"cpp( + R"cpp( #include "test.hpp" using namespace one; namespace {using two::cc; cc C; })cpp"}, - // Type defined in main file, make sure using is after that. - {R"cpp( + // Type defined in main file, make sure using is after that. + {R"cpp( namespace xx { struct yy {}; } x^x::yy X; )cpp", - R"cpp( + R"cpp( namespace xx { struct yy {}; } @@ -403,8 +405,8 @@ using xx::yy; yy X; )cpp"}, - // Type defined in main file via "using", insert after that. - {R"cpp( + // Type defined in main file via "using", insert after that. + {R"cpp( #include "test.hpp" namespace xx { @@ -413,7 +415,7 @@ namespace xx { x^x::yy X; )cpp", - R"cpp( + R"cpp( #include "test.hpp" namespace xx { @@ -424,8 +426,8 @@ using xx::yy; yy X; )cpp"}, - // Using must come after function definition. - {R"cpp( + // Using must come after function definition. + {R"cpp( namespace xx { void yy(); } @@ -434,7 +436,7 @@ void fun() { x^x::yy(); } )cpp", - R"cpp( + R"cpp( namespace xx { void yy(); } @@ -445,23 +447,23 @@ void fun() { yy(); } )cpp"}, - // Existing using with non-namespace part. - {R"cpp( + // Existing using with non-namespace part. + {R"cpp( #include "test.hpp" using one::two::ee::ee_one; one::t^wo::cc c; )cpp", - R"cpp( + R"cpp( #include "test.hpp" using one::two::cc;using one::two::ee::ee_one; cc c; )cpp"}, - // Template (like std::vector). - {R"cpp( + // Template (like std::vector). + {R"cpp( #include "test.hpp" one::v^ec foo; )cpp", - R"cpp( + R"cpp( #include "test.hpp" using one::vec; From bd5d0a20d44456b2ee53feb229ed97bb8937b2d3 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Tue, 21 Mar 2023 13:09:34 +0900 Subject: [PATCH 135/691] Reformat --- llvm/utils/TableGen/TableGen.cpp | 11 +++++------ llvm/utils/TableGen/TableGenBackends.h | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/llvm/utils/TableGen/TableGen.cpp b/llvm/utils/TableGen/TableGen.cpp index fd8612cd87d28..2ec3deb8ae258 100644 --- a/llvm/utils/TableGen/TableGen.cpp +++ b/llvm/utils/TableGen/TableGen.cpp @@ -157,12 +157,12 @@ cl::opt Class("class", cl::desc("Print Enum list for this class"), bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) { switch (Action) { case PrintRecords: - OS << Records; // No argument, dump all contents + OS << Records; // No argument, dump all contents break; case PrintDetailedRecords: EmitDetailedRecords(Records, OS); break; - case NullBackend: // No backend at all. + case NullBackend: // No backend at all. break; case DumpJSON: EmitJSON(Records, OS); @@ -227,13 +227,12 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) { OS << "\n"; break; } - case PrintSets: - { + case PrintSets: { SetTheory Sets; Sets.addFieldExpander("Set", "Elements"); for (Record *Rec : Records.getAllDerivedDefinitions("Set")) { OS << Rec->getName() << " = ["; - const std::vector *Elts = Sets.expand(Rec); + const std::vector *Elts = Sets.expand(Rec); assert(Elts && "Couldn't expand Set instance"); for (Record *Elt : *Elts) OS << ' ' << Elt->getName(); @@ -290,7 +289,7 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) { return false; } -} +} // namespace int main(int argc, char **argv) { InitLLVM X(argc, argv); diff --git a/llvm/utils/TableGen/TableGenBackends.h b/llvm/utils/TableGen/TableGenBackends.h index ac44babb12610..12ea753cf9531 100644 --- a/llvm/utils/TableGen/TableGenBackends.h +++ b/llvm/utils/TableGen/TableGenBackends.h @@ -96,6 +96,6 @@ void EmitDirectivesImpl(RecordKeeper &RK, raw_ostream &OS); void EmitDXILOperation(RecordKeeper &RK, raw_ostream &OS); void EmitRISCVTargetDef(const RecordKeeper &RK, raw_ostream &OS); -} // End llvm namespace +} // namespace llvm #endif From f675ec6165ab6add5e57cd43a2e9fa1a9bc21d81 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Tue, 21 Mar 2023 12:58:57 +0900 Subject: [PATCH 136/691] TableGen: Make 2nd arg `MainFn` of `TableGenMain(argv0, MainFn)` optional. --- llvm/include/llvm/TableGen/Main.h | 5 ++++- llvm/lib/TableGen/Main.cpp | 9 +++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/TableGen/Main.h b/llvm/include/llvm/TableGen/Main.h index 4e05da36168f7..4639ec756e9b1 100644 --- a/llvm/include/llvm/TableGen/Main.h +++ b/llvm/include/llvm/TableGen/Main.h @@ -13,6 +13,8 @@ #ifndef LLVM_TABLEGEN_MAIN_H #define LLVM_TABLEGEN_MAIN_H +#include + namespace llvm { class raw_ostream; @@ -22,7 +24,8 @@ class RecordKeeper; /// Returns true on error, false otherwise. using TableGenMainFn = bool (raw_ostream &OS, RecordKeeper &Records); -int TableGenMain(const char *argv0, TableGenMainFn *MainFn); +int TableGenMain(const char *argv0, + std::function MainFn = nullptr); } // end namespace llvm diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp index 2f9ac86e1f07b..ee72b4b2e9e74 100644 --- a/llvm/lib/TableGen/Main.cpp +++ b/llvm/lib/TableGen/Main.cpp @@ -95,7 +95,8 @@ static int createDependencyFile(const TGParser &Parser, const char *argv0) { return 0; } -int llvm::TableGenMain(const char *argv0, TableGenMainFn *MainFn) { +int llvm::TableGenMain(const char *argv0, + std::function MainFn) { RecordKeeper Records; if (TimePhases) @@ -129,7 +130,11 @@ int llvm::TableGenMain(const char *argv0, TableGenMainFn *MainFn) { Records.startBackendTimer("Backend overall"); std::string OutString; raw_string_ostream Out(OutString); - unsigned status = MainFn(Out, Records); + unsigned status = 0; + if (MainFn) + status = MainFn(Out, Records); + else + return 1; Records.stopBackendTimer(); if (status) return 1; From 4178ef43b2f12171639e3ef2bf02e5b7efd34688 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Fri, 17 Feb 2023 08:24:07 +0900 Subject: [PATCH 137/691] TableGen: Introduce `llvm::TableGen::Emitter` to register backends `Opt(flag, func, desc)` registers an option into `Action`. `OptClass` is also available if `EmitterC(RK).run(OS)` is capable. `Action` is defined as `ManagedStatic` to guarantee to be created when each registration of emitter is invoked. `llvm::TableGenMain(argv0, MainFn)` invokes `Action` instead of `MainFn` Differential Revision: https://reviews.llvm.org/D144351 --- llvm/docs/TableGen/BackGuide.rst | 27 +++++----------- llvm/include/llvm/TableGen/TableGenBackend.h | 32 ++++++++++++++++++- llvm/lib/TableGen/Main.cpp | 6 +++- llvm/lib/TableGen/TableGenBackend.cpp | 9 ++++++ llvm/lib/TableGen/TableGenBackendSkeleton.cpp | 16 +++++++--- 5 files changed, 64 insertions(+), 26 deletions(-) diff --git a/llvm/docs/TableGen/BackGuide.rst b/llvm/docs/TableGen/BackGuide.rst index 7da39bf0d5713..e1413c1c73a79 100644 --- a/llvm/docs/TableGen/BackGuide.rst +++ b/llvm/docs/TableGen/BackGuide.rst @@ -452,20 +452,6 @@ The following steps are required to create a new backend for TableGen. one instance for Clang and another for LLVM. Or you may be building your own instance. -#. Modify the selected ``tablegen.cpp`` to include your new backend. - - a. Add the name to the enumerated type ``ActionType``. - - #. Add a keyword to the ``ActionType`` command option using the - ``clEnumValN()`` function. - - #. Add a case to the ``switch`` statement in the *xxx*\ ``TableGenMain()`` - function. It should invoke the "main function" of your backend, which - in this case, according to convention, is named ``EmitAddressModes``. - -5. Add a declaration of your "main function" to the corresponding - ``TableGenBackends.h`` header file. - #. Add your backend C++ file to the appropriate ``CMakeLists.txt`` file so that it will be built. @@ -498,11 +484,14 @@ unit for writing a new TableGen backend. Here are a few notes on the file. * The ``run`` function should use the ``emitSourceFileHeader`` helper function to include a standard header in the emitted file. -* The only function in the ``llvm`` namespace is the backend "main function." - In this example, it is named ``EmitAddressModes``. It creates an instance - of the ``AddressModesEmitter`` class, passing the ``RecordKeeper`` - instance, then invokes the ``run`` function, passing the ``raw_ostream`` - instance. +* Register the class or the function as the command line option + with ``llvm/TableGen/TableGenBackend.h``. + + * Use ``llvm::TableGen::Emitter::OptClass`` + if the class has the constructor ``(RK)`` and + the method ``run(OS)``. + + * Otherwise, use ``llvm::TableGen::Emitter::Opt``. All the examples in the remainder of this document will assume the naming conventions used in the skeleton file. diff --git a/llvm/include/llvm/TableGen/TableGenBackend.h b/llvm/include/llvm/TableGen/TableGenBackend.h index a426e42175784..09e60cb3c203d 100644 --- a/llvm/include/llvm/TableGen/TableGenBackend.h +++ b/llvm/include/llvm/TableGen/TableGenBackend.h @@ -13,11 +13,41 @@ #ifndef LLVM_TABLEGEN_TABLEGENBACKEND_H #define LLVM_TABLEGEN_TABLEGENBACKEND_H +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ManagedStatic.h" + namespace llvm { -class StringRef; +class RecordKeeper; class raw_ostream; +namespace TableGen::Emitter { +using FnT = void (*)(RecordKeeper &Records, raw_ostream &OS); + +struct OptCreatorT { + static void *call(); +}; + +extern ManagedStatic, OptCreatorT> Action; + +struct Opt { + Opt(StringRef Name, FnT CB, StringRef Desc, bool ByDefault = false) { + if (ByDefault) + Action->setInitialValue(CB); + Action->getParser().addLiteralOption(Name, CB, Desc); + } +}; + +template class OptClass : Opt { + static void run(RecordKeeper &RK, raw_ostream &OS) { EmitterC(RK).run(OS); } + +public: + OptClass(StringRef Name, StringRef Desc) : Opt(Name, run, Desc) {} +}; + +} // namespace TableGen::Emitter + /// emitSourceFileHeader - Output an LLVM style file header to the specified /// raw_ostream. void emitSourceFileHeader(StringRef Desc, raw_ostream &OS); diff --git a/llvm/lib/TableGen/Main.cpp b/llvm/lib/TableGen/Main.cpp index ee72b4b2e9e74..9aee1f8fecd2f 100644 --- a/llvm/lib/TableGen/Main.cpp +++ b/llvm/lib/TableGen/Main.cpp @@ -29,6 +29,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" +#include "llvm/TableGen/TableGenBackend.h" #include #include #include @@ -131,7 +132,10 @@ int llvm::TableGenMain(const char *argv0, std::string OutString; raw_string_ostream Out(OutString); unsigned status = 0; - if (MainFn) + TableGen::Emitter::FnT ActionFn = TableGen::Emitter::Action->getValue(); + if (ActionFn) + ActionFn(Records, Out); + else if (MainFn) status = MainFn(Out, Records); else return 1; diff --git a/llvm/lib/TableGen/TableGenBackend.cpp b/llvm/lib/TableGen/TableGenBackend.cpp index 252f126d2d00f..135ec643bc3a7 100644 --- a/llvm/lib/TableGen/TableGenBackend.cpp +++ b/llvm/lib/TableGen/TableGenBackend.cpp @@ -13,12 +13,21 @@ #include "llvm/TableGen/TableGenBackend.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/raw_ostream.h" +#include #include +#include using namespace llvm; const size_t MAX_LINE_LEN = 80U; +namespace llvm::TableGen::Emitter { +ManagedStatic, OptCreatorT> Action; +void *OptCreatorT::call() { + return new cl::opt(cl::desc("Action to perform:")); +} +} // namespace llvm::TableGen::Emitter + static void printLine(raw_ostream &OS, const Twine &Prefix, char Fill, StringRef Suffix) { size_t Pos = (size_t)OS.tell(); diff --git a/llvm/lib/TableGen/TableGenBackendSkeleton.cpp b/llvm/lib/TableGen/TableGenBackendSkeleton.cpp index 0ba00c8d8ab1c..2fde4a66727b9 100644 --- a/llvm/lib/TableGen/TableGenBackendSkeleton.cpp +++ b/llvm/lib/TableGen/TableGenBackendSkeleton.cpp @@ -46,14 +46,20 @@ void SkeletonEmitter::run(raw_ostream &OS) { (void)Records; // To suppress unused variable warning; remove on use. } -namespace llvm { +// Choose either option A or B. -// The only thing that should be in the llvm namespace is the -// emitter entry point function. +//===----------------------------------------------------------------------===// +// Option A: Register the backed as class +static TableGen::Emitter::OptClass + X("gen-skeleton-class", "Generate example skeleton class"); -void EmitSkeleton(RecordKeeper &RK, raw_ostream &OS) { +//===----------------------------------------------------------------------===// +// Option B: Register "EmitSkeleton" directly +// The emitter entry may be private scope. +static void EmitSkeleton(RecordKeeper &RK, raw_ostream &OS) { // Instantiate the emitter class and invoke run(). SkeletonEmitter(RK).run(OS); } -} // namespace llvm +static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, + "Generate example skeleton entry"); From f35064dbe993708cad39234ced0b7c4b01c98ae2 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Mon, 20 Feb 2023 23:02:00 +0900 Subject: [PATCH 138/691] llvm-tblgen: Move decl of `EmitDecoder()`to TableGenBackends.h Differential Revision: https://reviews.llvm.org/D144351 --- llvm/utils/TableGen/DecoderEmitter.cpp | 1 + llvm/utils/TableGen/DisassemblerEmitter.cpp | 3 --- llvm/utils/TableGen/TableGenBackends.h | 6 ++++++ 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp index b3f696654fc5f..e1cdbb8aa2548 100644 --- a/llvm/utils/TableGen/DecoderEmitter.cpp +++ b/llvm/utils/TableGen/DecoderEmitter.cpp @@ -15,6 +15,7 @@ #include "CodeGenInstruction.h" #include "CodeGenTarget.h" #include "InfoByHwMode.h" +#include "TableGenBackends.h" #include "VarLenCodeEmitterGen.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" diff --git a/llvm/utils/TableGen/DisassemblerEmitter.cpp b/llvm/utils/TableGen/DisassemblerEmitter.cpp index 6280df37db6a7..eba6c54bd3858 100644 --- a/llvm/utils/TableGen/DisassemblerEmitter.cpp +++ b/llvm/utils/TableGen/DisassemblerEmitter.cpp @@ -96,9 +96,6 @@ using namespace llvm::X86Disassembler; namespace llvm { -extern void EmitDecoder(RecordKeeper &RK, raw_ostream &OS, - const std::string &PredicateNamespace); - void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) { CodeGenTarget Target(Records); emitSourceFileHeader(" * " + Target.getName().str() + " Disassembler", OS); diff --git a/llvm/utils/TableGen/TableGenBackends.h b/llvm/utils/TableGen/TableGenBackends.h index 12ea753cf9531..7be83af3734f5 100644 --- a/llvm/utils/TableGen/TableGenBackends.h +++ b/llvm/utils/TableGen/TableGenBackends.h @@ -15,6 +15,8 @@ #ifndef LLVM_UTILS_TABLEGEN_TABLEGENBACKENDS_H #define LLVM_UTILS_TABLEGEN_TABLEGENBACKENDS_H +#include + // A TableGen backend is a function that looks like // // EmitFoo(RecordKeeper &RK, raw_ostream &OS /*, anything else you need */ ) @@ -96,6 +98,10 @@ void EmitDirectivesImpl(RecordKeeper &RK, raw_ostream &OS); void EmitDXILOperation(RecordKeeper &RK, raw_ostream &OS); void EmitRISCVTargetDef(const RecordKeeper &RK, raw_ostream &OS); +// Defined in DecoderEmitter.cpp +void EmitDecoder(RecordKeeper &RK, raw_ostream &OS, + const std::string &PredicateNamespace); + } // namespace llvm #endif From a7e2b749b551dc39fe2840af21759796ccaeb78f Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Sun, 19 Feb 2023 01:22:38 +0900 Subject: [PATCH 139/691] llvm-tblgen: Cleanup for each EmitterClass to be invoked by uniform signature. Differential Revision: https://reviews.llvm.org/D144351 --- llvm/utils/TableGen/AsmMatcherEmitter.cpp | 3 ++- llvm/utils/TableGen/AsmWriterEmitter.cpp | 2 +- llvm/utils/TableGen/Attributes.cpp | 6 +++--- llvm/utils/TableGen/CallingConvEmitter.cpp | 3 ++- llvm/utils/TableGen/CodeEmitterGen.cpp | 3 ++- llvm/utils/TableGen/DAGISelEmitter.cpp | 2 +- llvm/utils/TableGen/DFAPacketizerEmitter.cpp | 2 +- llvm/utils/TableGen/SubtargetEmitter.cpp | 9 ++++----- llvm/utils/TableGen/X86FoldTablesEmitter.cpp | 8 ++++---- 9 files changed, 20 insertions(+), 18 deletions(-) diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp index 57b25b237b3ee..6bffd545f3c46 100644 --- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp +++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp @@ -3205,6 +3205,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { Record *AsmParser = Target.getAsmParser(); StringRef ClassName = AsmParser->getValueAsString("AsmParserClassName"); + emitSourceFileHeader("Assembly Matcher Source Fragment", OS); + // Compute the information on the instructions to match. AsmMatcherInfo Info(AsmParser, Target, Records); Info.buildInfo(); @@ -4005,7 +4007,6 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { namespace llvm { void EmitAsmMatcher(RecordKeeper &RK, raw_ostream &OS) { - emitSourceFileHeader("Assembly Matcher Source Fragment", OS); AsmMatcherEmitter(RK).run(OS); } diff --git a/llvm/utils/TableGen/AsmWriterEmitter.cpp b/llvm/utils/TableGen/AsmWriterEmitter.cpp index f905993ac78b6..0d5109f9b3c9f 100644 --- a/llvm/utils/TableGen/AsmWriterEmitter.cpp +++ b/llvm/utils/TableGen/AsmWriterEmitter.cpp @@ -1303,6 +1303,7 @@ void AsmWriterEmitter::run(raw_ostream &O) { std::vector> TableDrivenOperandPrinters; unsigned BitsLeft = 0; unsigned AsmStrBits = 0; + emitSourceFileHeader("Assembly Writer Source Fragment", O); EmitGetMnemonic(O, TableDrivenOperandPrinters, BitsLeft, AsmStrBits); EmitPrintInstruction(O, TableDrivenOperandPrinters, BitsLeft, AsmStrBits); EmitGetRegisterName(O); @@ -1312,7 +1313,6 @@ void AsmWriterEmitter::run(raw_ostream &O) { namespace llvm { void EmitAsmWriter(RecordKeeper &RK, raw_ostream &OS) { - emitSourceFileHeader("Assembly Writer Source Fragment", OS); AsmWriterEmitter(RK).run(OS); } diff --git a/llvm/utils/TableGen/Attributes.cpp b/llvm/utils/TableGen/Attributes.cpp index aa0d2b780fff3..705fb00415850 100644 --- a/llvm/utils/TableGen/Attributes.cpp +++ b/llvm/utils/TableGen/Attributes.cpp @@ -18,7 +18,7 @@ namespace { class Attributes { public: Attributes(RecordKeeper &R) : Records(R) {} - void emit(raw_ostream &OS); + void run(raw_ostream &OS); private: void emitTargetIndependentNames(raw_ostream &OS); @@ -124,7 +124,7 @@ void Attributes::emitAttributeProperties(raw_ostream &OS) { OS << "#endif\n"; } -void Attributes::emit(raw_ostream &OS) { +void Attributes::run(raw_ostream &OS) { emitTargetIndependentNames(OS); emitFnAttrCompatCheck(OS, false); emitAttributeProperties(OS); @@ -133,7 +133,7 @@ void Attributes::emit(raw_ostream &OS) { namespace llvm { void EmitAttributes(RecordKeeper &RK, raw_ostream &OS) { - Attributes(RK).emit(OS); + Attributes(RK).run(OS); } } // namespace llvm diff --git a/llvm/utils/TableGen/CallingConvEmitter.cpp b/llvm/utils/TableGen/CallingConvEmitter.cpp index 41e779daa5397..048cccc07c70e 100644 --- a/llvm/utils/TableGen/CallingConvEmitter.cpp +++ b/llvm/utils/TableGen/CallingConvEmitter.cpp @@ -42,6 +42,8 @@ class CallingConvEmitter { } // End anonymous namespace void CallingConvEmitter::run(raw_ostream &O) { + emitSourceFileHeader("Calling Convention Implementation Fragment", O); + std::vector CCs = Records.getAllDerivedDefinitions("CallingConv"); // Emit prototypes for all of the non-custom CC's so that they can forward ref @@ -430,7 +432,6 @@ void CallingConvEmitter::EmitArgRegisterLists(raw_ostream &O) { namespace llvm { void EmitCallingConv(RecordKeeper &RK, raw_ostream &OS) { - emitSourceFileHeader("Calling Convention Implementation Fragment", OS); CallingConvEmitter(RK).run(OS); } diff --git a/llvm/utils/TableGen/CodeEmitterGen.cpp b/llvm/utils/TableGen/CodeEmitterGen.cpp index 11a57bb1bc310..4bbc260186175 100644 --- a/llvm/utils/TableGen/CodeEmitterGen.cpp +++ b/llvm/utils/TableGen/CodeEmitterGen.cpp @@ -358,6 +358,8 @@ void CodeEmitterGen::emitInstructionBaseValues( } void CodeEmitterGen::run(raw_ostream &o) { + emitSourceFileHeader("Machine Code Emitter", o); + CodeGenTarget Target(Records); std::vector Insts = Records.getAllDerivedDefinitions("Instruction"); @@ -505,7 +507,6 @@ void CodeEmitterGen::run(raw_ostream &o) { namespace llvm { void EmitCodeEmitter(RecordKeeper &RK, raw_ostream &OS) { - emitSourceFileHeader("Machine Code Emitter", OS); CodeEmitterGen(RK).run(OS); } diff --git a/llvm/utils/TableGen/DAGISelEmitter.cpp b/llvm/utils/TableGen/DAGISelEmitter.cpp index 07febba6bc14e..9ed9f5e88cea4 100644 --- a/llvm/utils/TableGen/DAGISelEmitter.cpp +++ b/llvm/utils/TableGen/DAGISelEmitter.cpp @@ -125,6 +125,7 @@ struct PatternSortingPredicate { void DAGISelEmitter::run(raw_ostream &OS) { + Records.startTimer("Parse patterns"); emitSourceFileHeader("DAG Instruction Selector for the " + CGP.getTargetInfo().getName().str() + " target", OS); @@ -190,7 +191,6 @@ void DAGISelEmitter::run(raw_ostream &OS) { namespace llvm { void EmitDAGISel(RecordKeeper &RK, raw_ostream &OS) { - RK.startTimer("Parse patterns"); DAGISelEmitter(RK).run(OS); } diff --git a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp index e9fca57c3ef3d..941846eca0fa0 100644 --- a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp +++ b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp @@ -206,6 +206,7 @@ void DFAPacketizerEmitter::createScheduleClasses(unsigned ItineraryIdx, // Run the worklist algorithm to generate the DFA. // void DFAPacketizerEmitter::run(raw_ostream &OS) { + emitSourceFileHeader("Target DFA Packetizer Tables", OS); OS << "\n" << "#include \"llvm/CodeGen/DFAPacketizer.h\"\n"; OS << "namespace llvm {\n"; @@ -356,7 +357,6 @@ void DFAPacketizerEmitter::emitForItineraries( namespace llvm { void EmitDFAPacketizer(RecordKeeper &RK, raw_ostream &OS) { - emitSourceFileHeader("Target DFA Packetizer Tables", OS); DFAPacketizerEmitter(RK).run(OS); } diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp index c98048626c167..3ff25413b873e 100644 --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -70,7 +70,7 @@ class SubtargetEmitter { } }; - const CodeGenTarget &TGT; + CodeGenTarget TGT; RecordKeeper &Records; CodeGenSchedModels &SchedModels; std::string Target; @@ -128,8 +128,8 @@ class SubtargetEmitter { void ParseFeaturesFunction(raw_ostream &OS); public: - SubtargetEmitter(RecordKeeper &R, CodeGenTarget &TGT) - : TGT(TGT), Records(R), SchedModels(TGT.getSchedModels()), + SubtargetEmitter(RecordKeeper &R) + : TGT(R), Records(R), SchedModels(TGT.getSchedModels()), Target(TGT.getName()) {} void run(raw_ostream &o); @@ -1988,8 +1988,7 @@ void SubtargetEmitter::run(raw_ostream &OS) { namespace llvm { void EmitSubtarget(RecordKeeper &RK, raw_ostream &OS) { - CodeGenTarget CGTarget(RK); - SubtargetEmitter(RK, CGTarget).run(OS); + SubtargetEmitter(RK).run(OS); } } // end namespace llvm diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp index 052151dfb007e..e8555df7ba021 100644 --- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp +++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp @@ -141,7 +141,7 @@ class X86FoldTablesEmitter { X86FoldTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {} // run - Generate the 6 X86 memory fold tables. - void run(formatted_raw_ostream &OS); + void run(raw_ostream &OS); private: // Decides to which table to add the entry with the given instructions. @@ -522,7 +522,8 @@ void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInstr, } } -void X86FoldTablesEmitter::run(formatted_raw_ostream &OS) { +void X86FoldTablesEmitter::run(raw_ostream &o) { + formatted_raw_ostream OS(o); emitSourceFileHeader("X86 fold tables", OS); // Holds all memory instructions @@ -615,8 +616,7 @@ void X86FoldTablesEmitter::run(formatted_raw_ostream &OS) { namespace llvm { -void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &o) { - formatted_raw_ostream OS(o); +void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &OS) { X86FoldTablesEmitter(RK).run(OS); } } // namespace llvm From 9c93e728bfb8079c1de51e5481168c4083038c2a Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Sun, 19 Feb 2023 14:30:14 +0900 Subject: [PATCH 140/691] llvm-tblgen: Rewrite emitters to use `TableGen::Emitter` Each emitter became self-contained since it has the registration of option. Differential Revision: https://reviews.llvm.org/D144351 --- llvm/utils/TableGen/AsmMatcherEmitter.cpp | 11 +- llvm/utils/TableGen/AsmWriterEmitter.cpp | 10 +- llvm/utils/TableGen/Attributes.cpp | 11 +- llvm/utils/TableGen/CTagsEmitter.cpp | 9 +- llvm/utils/TableGen/CallingConvEmitter.cpp | 10 +- llvm/utils/TableGen/CodeEmitterGen.cpp | 10 +- llvm/utils/TableGen/CompressInstEmitter.cpp | 10 +- llvm/utils/TableGen/DAGISelEmitter.cpp | 10 +- llvm/utils/TableGen/DFAEmitter.cpp | 11 +- llvm/utils/TableGen/DFAPacketizerEmitter.cpp | 10 +- llvm/utils/TableGen/DXILEmitter.cpp | 9 +- llvm/utils/TableGen/DirectiveEmitter.cpp | 20 +- llvm/utils/TableGen/DisassemblerEmitter.cpp | 7 +- llvm/utils/TableGen/ExegesisEmitter.cpp | 10 +- llvm/utils/TableGen/FastISelEmitter.cpp | 8 +- llvm/utils/TableGen/GICombinerEmitter.cpp | 7 +- llvm/utils/TableGen/GlobalISelEmitter.cpp | 8 +- llvm/utils/TableGen/InstrDocsEmitter.cpp | 9 +- llvm/utils/TableGen/InstrInfoEmitter.cpp | 7 +- llvm/utils/TableGen/IntrinsicEmitter.cpp | 11 +- llvm/utils/TableGen/OptParserEmitter.cpp | 8 +- llvm/utils/TableGen/OptRSTEmitter.cpp | 9 +- llvm/utils/TableGen/PseudoLoweringEmitter.cpp | 10 +- llvm/utils/TableGen/RISCVTargetDefEmitter.cpp | 7 +- llvm/utils/TableGen/RegisterBankEmitter.cpp | 10 +- llvm/utils/TableGen/RegisterInfoEmitter.cpp | 10 +- .../utils/TableGen/SearchableTableEmitter.cpp | 11 +- llvm/utils/TableGen/SubtargetEmitter.cpp | 10 +- llvm/utils/TableGen/TableGen.cpp | 288 +++--------------- llvm/utils/TableGen/TableGenBackends.h | 33 -- .../TableGen/X86EVEX2VEXTablesEmitter.cpp | 8 +- llvm/utils/TableGen/X86FoldTablesEmitter.cpp | 9 +- llvm/utils/TableGen/X86MnemonicTables.cpp | 8 +- 33 files changed, 133 insertions(+), 486 deletions(-) diff --git a/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/llvm/utils/TableGen/AsmMatcherEmitter.cpp index 6bffd545f3c46..a1082674ddf15 100644 --- a/llvm/utils/TableGen/AsmMatcherEmitter.cpp +++ b/llvm/utils/TableGen/AsmMatcherEmitter.cpp @@ -100,7 +100,6 @@ #include "CodeGenRegisters.h" #include "CodeGenTarget.h" #include "SubtargetFeatureInfo.h" -#include "TableGenBackends.h" #include "Types.h" #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/PointerUnion.h" @@ -108,7 +107,6 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/Config/llvm-config.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -4004,10 +4002,5 @@ void AsmMatcherEmitter::run(raw_ostream &OS) { OS << "#endif // GET_MNEMONIC_CHECKER\n\n"; } -namespace llvm { - -void EmitAsmMatcher(RecordKeeper &RK, raw_ostream &OS) { - AsmMatcherEmitter(RK).run(OS); -} - -} // end namespace llvm +static TableGen::Emitter::OptClass + X("gen-asm-matcher", "Generate assembly instruction matcher"); diff --git a/llvm/utils/TableGen/AsmWriterEmitter.cpp b/llvm/utils/TableGen/AsmWriterEmitter.cpp index 0d5109f9b3c9f..505fdd8c25fb9 100644 --- a/llvm/utils/TableGen/AsmWriterEmitter.cpp +++ b/llvm/utils/TableGen/AsmWriterEmitter.cpp @@ -17,7 +17,6 @@ #include "CodeGenRegisters.h" #include "CodeGenTarget.h" #include "SequenceToOffsetTable.h" -#include "TableGenBackends.h" #include "Types.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -1310,10 +1309,5 @@ void AsmWriterEmitter::run(raw_ostream &O) { EmitPrintAliasInstruction(O); } -namespace llvm { - -void EmitAsmWriter(RecordKeeper &RK, raw_ostream &OS) { - AsmWriterEmitter(RK).run(OS); -} - -} // end namespace llvm +static TableGen::Emitter::OptClass + X("gen-asm-writer", "Generate assembly writer"); diff --git a/llvm/utils/TableGen/Attributes.cpp b/llvm/utils/TableGen/Attributes.cpp index 705fb00415850..5f8dd1594a1f2 100644 --- a/llvm/utils/TableGen/Attributes.cpp +++ b/llvm/utils/TableGen/Attributes.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "TableGenBackends.h" #include "llvm/TableGen/Record.h" +#include "llvm/TableGen/TableGenBackend.h" #include using namespace llvm; @@ -130,10 +130,5 @@ void Attributes::run(raw_ostream &OS) { emitAttributeProperties(OS); } -namespace llvm { - -void EmitAttributes(RecordKeeper &RK, raw_ostream &OS) { - Attributes(RK).run(OS); -} - -} // namespace llvm +static TableGen::Emitter::OptClass X("gen-attrs", + "Generate attributes"); diff --git a/llvm/utils/TableGen/CTagsEmitter.cpp b/llvm/utils/TableGen/CTagsEmitter.cpp index 11ec81790877c..b8e27d057d957 100644 --- a/llvm/utils/TableGen/CTagsEmitter.cpp +++ b/llvm/utils/TableGen/CTagsEmitter.cpp @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// -#include "TableGenBackends.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" +#include "llvm/TableGen/TableGenBackend.h" #include #include using namespace llvm; @@ -86,8 +86,5 @@ void CTagsEmitter::run(raw_ostream &OS) { T.emit(OS); } -namespace llvm { - -void EmitCTags(RecordKeeper &RK, raw_ostream &OS) { CTagsEmitter(RK).run(OS); } - -} // namespace llvm +static TableGen::Emitter::OptClass + X("gen-ctags", "Generate ctags-compatible index"); diff --git a/llvm/utils/TableGen/CallingConvEmitter.cpp b/llvm/utils/TableGen/CallingConvEmitter.cpp index 048cccc07c70e..49d4f3196e664 100644 --- a/llvm/utils/TableGen/CallingConvEmitter.cpp +++ b/llvm/utils/TableGen/CallingConvEmitter.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "CodeGenTarget.h" -#include "TableGenBackends.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/TableGenBackend.h" @@ -429,10 +428,5 @@ void CallingConvEmitter::EmitArgRegisterLists(raw_ostream &O) { } } -namespace llvm { - -void EmitCallingConv(RecordKeeper &RK, raw_ostream &OS) { - CallingConvEmitter(RK).run(OS); -} - -} // namespace llvm +static TableGen::Emitter::OptClass + X("gen-callingconv", "Generate calling convention descriptions"); diff --git a/llvm/utils/TableGen/CodeEmitterGen.cpp b/llvm/utils/TableGen/CodeEmitterGen.cpp index 4bbc260186175..66ef71a90a93d 100644 --- a/llvm/utils/TableGen/CodeEmitterGen.cpp +++ b/llvm/utils/TableGen/CodeEmitterGen.cpp @@ -16,7 +16,6 @@ #include "CodeGenInstruction.h" #include "CodeGenTarget.h" #include "InfoByHwMode.h" -#include "TableGenBackends.h" #include "VarLenCodeEmitterGen.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" @@ -504,10 +503,5 @@ void CodeEmitterGen::run(raw_ostream &o) { } // end anonymous namespace -namespace llvm { - -void EmitCodeEmitter(RecordKeeper &RK, raw_ostream &OS) { - CodeEmitterGen(RK).run(OS); -} - -} // end namespace llvm +static TableGen::Emitter::OptClass + X("gen-emitter", "Generate machine code emitter"); diff --git a/llvm/utils/TableGen/CompressInstEmitter.cpp b/llvm/utils/TableGen/CompressInstEmitter.cpp index 89e50ea29cb15..a7816c24f8717 100644 --- a/llvm/utils/TableGen/CompressInstEmitter.cpp +++ b/llvm/utils/TableGen/CompressInstEmitter.cpp @@ -67,7 +67,6 @@ #include "CodeGenInstruction.h" #include "CodeGenRegisters.h" #include "CodeGenTarget.h" -#include "TableGenBackends.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" @@ -904,10 +903,5 @@ void CompressInstEmitter::run(raw_ostream &o) { emitCompressInstEmitter(o, EmitterType::CheckCompress); } -namespace llvm { - -void EmitCompressInst(RecordKeeper &RK, raw_ostream &OS) { - CompressInstEmitter(RK).run(OS); -} - -} // namespace llvm +static TableGen::Emitter::OptClass + X("gen-compress-inst-emitter", "Generate RISCV compressed instructions."); diff --git a/llvm/utils/TableGen/DAGISelEmitter.cpp b/llvm/utils/TableGen/DAGISelEmitter.cpp index 9ed9f5e88cea4..cf8e3f2675716 100644 --- a/llvm/utils/TableGen/DAGISelEmitter.cpp +++ b/llvm/utils/TableGen/DAGISelEmitter.cpp @@ -14,7 +14,6 @@ #include "CodeGenInstruction.h" #include "CodeGenTarget.h" #include "DAGISelMatcher.h" -#include "TableGenBackends.h" #include "llvm/Support/Debug.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/TableGenBackend.h" @@ -188,10 +187,5 @@ void DAGISelEmitter::run(raw_ostream &OS) { EmitMatcherTable(TheMatcher.get(), CGP, OS); } -namespace llvm { - -void EmitDAGISel(RecordKeeper &RK, raw_ostream &OS) { - DAGISelEmitter(RK).run(OS); -} - -} // namespace llvm +static TableGen::Emitter::OptClass + X("gen-dag-isel", "Generate a DAG instruction selector"); diff --git a/llvm/utils/TableGen/DFAEmitter.cpp b/llvm/utils/TableGen/DFAEmitter.cpp index 705908226fa18..54ad81cbebe81 100644 --- a/llvm/utils/TableGen/DFAEmitter.cpp +++ b/llvm/utils/TableGen/DFAEmitter.cpp @@ -22,13 +22,13 @@ #include "DFAEmitter.h" #include "SequenceToOffsetTable.h" -#include "TableGenBackends.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/UniqueVector.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Record.h" +#include "llvm/TableGen/TableGenBackend.h" #include #include #include @@ -370,10 +370,5 @@ void CustomDfaEmitter::printActionValue(action_type A, raw_ostream &OS) { OS << ")"; } -namespace llvm { - -void EmitAutomata(RecordKeeper &RK, raw_ostream &OS) { - AutomatonEmitter(RK).run(OS); -} - -} // namespace llvm +static TableGen::Emitter::OptClass + X("gen-automata", "Generate generic automata"); diff --git a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp index 941846eca0fa0..da8538fc801a9 100644 --- a/llvm/utils/TableGen/DFAPacketizerEmitter.cpp +++ b/llvm/utils/TableGen/DFAPacketizerEmitter.cpp @@ -17,7 +17,6 @@ #include "CodeGenSchedule.h" #include "CodeGenTarget.h" #include "DFAEmitter.h" -#include "TableGenBackends.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -354,10 +353,5 @@ void DFAPacketizerEmitter::emitForItineraries( << "\n}\n\n"; } -namespace llvm { - -void EmitDFAPacketizer(RecordKeeper &RK, raw_ostream &OS) { - DFAPacketizerEmitter(RK).run(OS); -} - -} // end namespace llvm +static TableGen::Emitter::OptClass + X("gen-dfa-packetizer", "Generate DFA Packetizer for VLIW targets"); diff --git a/llvm/utils/TableGen/DXILEmitter.cpp b/llvm/utils/TableGen/DXILEmitter.cpp index 5239cb74d2aaa..51924ff76524b 100644 --- a/llvm/utils/TableGen/DXILEmitter.cpp +++ b/llvm/utils/TableGen/DXILEmitter.cpp @@ -12,13 +12,13 @@ //===----------------------------------------------------------------------===// #include "SequenceToOffsetTable.h" -#include "TableGenBackends.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/DXILOperationCommon.h" #include "llvm/TableGen/Record.h" +#include "llvm/TableGen/TableGenBackend.h" using namespace llvm; using namespace llvm::dxil; @@ -411,9 +411,7 @@ static void emitDXILOperationTable(std::vector &DXILOps, OS << "}\n "; } -namespace llvm { - -void EmitDXILOperation(RecordKeeper &Records, raw_ostream &OS) { +static void EmitDXILOperation(RecordKeeper &Records, raw_ostream &OS) { std::vector Ops = Records.getAllDerivedDefinitions("dxil_op"); OS << "// Generated code, do not edit.\n"; OS << "\n"; @@ -439,4 +437,5 @@ void EmitDXILOperation(RecordKeeper &Records, raw_ostream &OS) { OS << "\n"; } -} // namespace llvm +static TableGen::Emitter::Opt X("gen-dxil-operation", EmitDXILOperation, + "Generate DXIL operation information"); diff --git a/llvm/utils/TableGen/DirectiveEmitter.cpp b/llvm/utils/TableGen/DirectiveEmitter.cpp index a65c71690b097..9d2860743308f 100644 --- a/llvm/utils/TableGen/DirectiveEmitter.cpp +++ b/llvm/utils/TableGen/DirectiveEmitter.cpp @@ -12,13 +12,13 @@ //===----------------------------------------------------------------------===// #include "llvm/TableGen/DirectiveEmitter.h" -#include "TableGenBackends.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" +#include "llvm/TableGen/TableGenBackend.h" using namespace llvm; @@ -174,11 +174,9 @@ bool DirectiveLanguage::HasValidityErrors() const { return HasDuplicateClausesInDirectives(getDirectives()); } -namespace llvm { - // Generate the declaration section for the enumeration in the directive // language -void EmitDirectivesDecl(RecordKeeper &Records, raw_ostream &OS) { +static void EmitDirectivesDecl(RecordKeeper &Records, raw_ostream &OS) { const auto DirLang = DirectiveLanguage{Records}; if (DirLang.HasValidityErrors()) return; @@ -247,8 +245,6 @@ void EmitDirectivesDecl(RecordKeeper &Records, raw_ostream &OS) { OS << "#endif // LLVM_" << DirLang.getName() << "_INC\n"; } -} // namespace llvm - // Generate function implementation for getName(StringRef Str) static void GenerateGetName(const std::vector &Records, raw_ostream &OS, StringRef Enum, @@ -877,11 +873,9 @@ void EmitDirectivesBasicImpl(const DirectiveLanguage &DirLang, GenerateIsAllowedClause(DirLang, OS); } -namespace llvm { - // Generate the implemenation section for the enumeration in the directive // language. -void EmitDirectivesImpl(RecordKeeper &Records, raw_ostream &OS) { +static void EmitDirectivesImpl(RecordKeeper &Records, raw_ostream &OS) { const auto DirLang = DirectiveLanguage{Records}; if (DirLang.HasValidityErrors()) return; @@ -893,4 +887,10 @@ void EmitDirectivesImpl(RecordKeeper &Records, raw_ostream &OS) { EmitDirectivesBasicImpl(DirLang, OS); } -} // namespace llvm +static TableGen::Emitter::Opt + X("gen-directive-decl", EmitDirectivesDecl, + "Generate directive related declaration code (header file)"); + +static TableGen::Emitter::Opt + Y("gen-directive-impl", EmitDirectivesImpl, + "Generate directive related implementation code"); diff --git a/llvm/utils/TableGen/DisassemblerEmitter.cpp b/llvm/utils/TableGen/DisassemblerEmitter.cpp index eba6c54bd3858..92f3721507e5b 100644 --- a/llvm/utils/TableGen/DisassemblerEmitter.cpp +++ b/llvm/utils/TableGen/DisassemblerEmitter.cpp @@ -94,9 +94,7 @@ using namespace llvm::X86Disassembler; /// X86RecognizableInstr.cpp contains the implementation for a single /// instruction. -namespace llvm { - -void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) { +static void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) { CodeGenTarget Target(Records); emitSourceFileHeader(" * " + Target.getName().str() + " Disassembler", OS); @@ -133,4 +131,5 @@ void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) { EmitDecoder(Records, OS, PredicateNamespace); } -} // end namespace llvm +static TableGen::Emitter::Opt X("gen-disassembler", EmitDisassembler, + "Generate disassembler"); diff --git a/llvm/utils/TableGen/ExegesisEmitter.cpp b/llvm/utils/TableGen/ExegesisEmitter.cpp index 751000f726f2b..736f1220be14d 100644 --- a/llvm/utils/TableGen/ExegesisEmitter.cpp +++ b/llvm/utils/TableGen/ExegesisEmitter.cpp @@ -10,7 +10,6 @@ // //===----------------------------------------------------------------------===// -#include "TableGenBackends.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringRef.h" @@ -203,10 +202,5 @@ void ExegesisEmitter::run(raw_ostream &OS) const { } // end anonymous namespace -namespace llvm { - -void EmitExegesis(RecordKeeper &RK, raw_ostream &OS) { - ExegesisEmitter(RK).run(OS); -} - -} // end namespace llvm +static TableGen::Emitter::OptClass + X("gen-exegesis", "Generate llvm-exegesis tables"); diff --git a/llvm/utils/TableGen/FastISelEmitter.cpp b/llvm/utils/TableGen/FastISelEmitter.cpp index a1e45be609cd5..3f3a63de0c0c7 100644 --- a/llvm/utils/TableGen/FastISelEmitter.cpp +++ b/llvm/utils/TableGen/FastISelEmitter.cpp @@ -21,7 +21,6 @@ #include "CodeGenRegisters.h" #include "CodeGenTarget.h" #include "InfoByHwMode.h" -#include "TableGenBackends.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/TableGen/Error.h" @@ -858,9 +857,7 @@ void FastISelMap::printFunctionDefinitions(raw_ostream &OS) { // TODO: SignaturesWithConstantForms should be empty here. } -namespace llvm { - -void EmitFastISel(RecordKeeper &RK, raw_ostream &OS) { +static void EmitFastISel(RecordKeeper &RK, raw_ostream &OS) { CodeGenDAGPatterns CGP(RK); const CodeGenTarget &Target = CGP.getTargetInfo(); emitSourceFileHeader("\"Fast\" Instruction Selector for the " + @@ -876,4 +873,5 @@ void EmitFastISel(RecordKeeper &RK, raw_ostream &OS) { F.printFunctionDefinitions(OS); } -} // namespace llvm +static TableGen::Emitter::Opt X("gen-fast-isel", EmitFastISel, + "Generate a \"fast\" instruction selector"); diff --git a/llvm/utils/TableGen/GICombinerEmitter.cpp b/llvm/utils/TableGen/GICombinerEmitter.cpp index 68a7296de13ab..be8425e304e65 100644 --- a/llvm/utils/TableGen/GICombinerEmitter.cpp +++ b/llvm/utils/TableGen/GICombinerEmitter.cpp @@ -20,7 +20,6 @@ #include "GlobalISel/GIMatchDagOperands.h" #include "GlobalISel/GIMatchDagPredicate.h" #include "GlobalISel/GIMatchTree.h" -#include "TableGenBackends.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSet.h" @@ -1067,8 +1066,7 @@ void GICombinerEmitter::run(raw_ostream &OS) { //===----------------------------------------------------------------------===// -namespace llvm { -void EmitGICombiner(RecordKeeper &RK, raw_ostream &OS) { +static void EmitGICombiner(RecordKeeper &RK, raw_ostream &OS) { CodeGenTarget Target(RK); emitSourceFileHeader("Global Combiner", OS); @@ -1083,4 +1081,5 @@ void EmitGICombiner(RecordKeeper &RK, raw_ostream &OS) { NumPatternTotalStatistic = NumPatternTotal; } -} // namespace llvm +static TableGen::Emitter::Opt X("gen-global-isel-combiner", EmitGICombiner, + "Generate GlobalISel combiner"); diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp index dfeb7aaeeda9e..666253ac43b3d 100644 --- a/llvm/utils/TableGen/GlobalISelEmitter.cpp +++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp @@ -36,7 +36,6 @@ #include "CodeGenTarget.h" #include "InfoByHwMode.h" #include "SubtargetFeatureInfo.h" -#include "TableGenBackends.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CodeGenCoverage.h" #include "llvm/Support/CommandLine.h" @@ -6363,8 +6362,5 @@ unsigned OperandMatcher::getInsnVarID() const { return Insn.getInsnVarID(); } //===----------------------------------------------------------------------===// -namespace llvm { -void EmitGlobalISel(RecordKeeper &RK, raw_ostream &OS) { - GlobalISelEmitter(RK).run(OS); -} -} // namespace llvm +static TableGen::Emitter::OptClass + X("gen-global-isel", "Generate GlobalISel selector"); diff --git a/llvm/utils/TableGen/InstrDocsEmitter.cpp b/llvm/utils/TableGen/InstrDocsEmitter.cpp index 7476422025fd9..616e7b589288a 100644 --- a/llvm/utils/TableGen/InstrDocsEmitter.cpp +++ b/llvm/utils/TableGen/InstrDocsEmitter.cpp @@ -21,8 +21,8 @@ #include "CodeGenDAGPatterns.h" #include "CodeGenInstruction.h" #include "CodeGenTarget.h" -#include "TableGenBackends.h" #include "llvm/TableGen/Record.h" +#include "llvm/TableGen/TableGenBackend.h" #include #include @@ -54,9 +54,7 @@ static std::string escapeForRST(StringRef Str) { return Result; } -namespace llvm { - -void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS) { +static void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS) { CodeGenDAGPatterns CDP(RK); CodeGenTarget &Target = CDP.getTargetInfo(); unsigned VariantCount = Target.getAsmParserVariantCount(); @@ -217,4 +215,5 @@ void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS) { } } -} // end namespace llvm +static TableGen::Emitter::Opt X("gen-instr-docs", EmitInstrDocs, + "Generate instruction documentation"); diff --git a/llvm/utils/TableGen/InstrInfoEmitter.cpp b/llvm/utils/TableGen/InstrInfoEmitter.cpp index 4eef1fef2a91e..c051775890244 100644 --- a/llvm/utils/TableGen/InstrInfoEmitter.cpp +++ b/llvm/utils/TableGen/InstrInfoEmitter.cpp @@ -1245,13 +1245,12 @@ void InstrInfoEmitter::emitEnums(raw_ostream &OS) { OS << "#endif // GET_INSTRINFO_SCHED_ENUM\n\n"; } -namespace llvm { - -void EmitInstrInfo(RecordKeeper &RK, raw_ostream &OS) { +static void EmitInstrInfo(RecordKeeper &RK, raw_ostream &OS) { RK.startTimer("Analyze DAG patterns"); InstrInfoEmitter(RK).run(OS); RK.startTimer("Emit map table"); EmitMapTable(RK, OS); } -} // end namespace llvm +static TableGen::Emitter::Opt X("gen-instr-info", EmitInstrInfo, + "Generate instruction descriptions"); diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp index b759c71811124..704c12611fde6 100644 --- a/llvm/utils/TableGen/IntrinsicEmitter.cpp +++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp @@ -12,7 +12,6 @@ #include "CodeGenIntrinsics.h" #include "SequenceToOffsetTable.h" -#include "TableGenBackends.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" @@ -967,10 +966,16 @@ void IntrinsicEmitter::EmitIntrinsicToBuiltinMap( OS << "#endif\n\n"; } -void llvm::EmitIntrinsicEnums(RecordKeeper &RK, raw_ostream &OS) { +static void EmitIntrinsicEnums(RecordKeeper &RK, raw_ostream &OS) { IntrinsicEmitter(RK).run(OS, /*Enums=*/true); } -void llvm::EmitIntrinsicImpl(RecordKeeper &RK, raw_ostream &OS) { +static TableGen::Emitter::Opt X("gen-intrinsic-enums", EmitIntrinsicEnums, + "Generate intrinsic enums"); + +static void EmitIntrinsicImpl(RecordKeeper &RK, raw_ostream &OS) { IntrinsicEmitter(RK).run(OS, /*Enums=*/false); } + +static TableGen::Emitter::Opt Y("gen-intrinsic-impl", EmitIntrinsicImpl, + "Generate intrinsic information"); diff --git a/llvm/utils/TableGen/OptParserEmitter.cpp b/llvm/utils/TableGen/OptParserEmitter.cpp index 01da32e5e7ea6..514346c843d1a 100644 --- a/llvm/utils/TableGen/OptParserEmitter.cpp +++ b/llvm/utils/TableGen/OptParserEmitter.cpp @@ -7,7 +7,6 @@ //===----------------------------------------------------------------------===// #include "OptEmitter.h" -#include "TableGenBackends.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" @@ -213,8 +212,7 @@ static MarshallingInfo createMarshallingInfo(const Record &R) { /// OptParserEmitter - This tablegen backend takes an input .td file /// describing a list of options and emits a data structure for parsing and /// working with those options when given an input command line. -namespace llvm { -void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) { +static void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) { // Get the option groups and options. const std::vector &Groups = Records.getAllDerivedDefinitions("OptionGroup"); @@ -500,4 +498,6 @@ void EmitOptParser(RecordKeeper &Records, raw_ostream &OS) { OS << "\n"; } -} // end namespace llvm + +static TableGen::Emitter::Opt X("gen-opt-parser-defs", EmitOptParser, + "Generate option definitions"); diff --git a/llvm/utils/TableGen/OptRSTEmitter.cpp b/llvm/utils/TableGen/OptRSTEmitter.cpp index d32522c00f451..87e755d943a14 100644 --- a/llvm/utils/TableGen/OptRSTEmitter.cpp +++ b/llvm/utils/TableGen/OptRSTEmitter.cpp @@ -7,17 +7,16 @@ //===----------------------------------------------------------------------===// #include "OptEmitter.h" -#include "TableGenBackends.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/TableGen/Record.h" +#include "llvm/TableGen/TableGenBackend.h" using namespace llvm; /// OptParserEmitter - This tablegen backend takes an input .td file /// describing a list of options and emits a RST man page. -namespace llvm { -void EmitOptRST(RecordKeeper &Records, raw_ostream &OS) { +static void EmitOptRST(RecordKeeper &Records, raw_ostream &OS) { llvm::StringMap> OptionsByGroup; std::vector OptionsWithoutGroup; @@ -103,4 +102,6 @@ void EmitOptRST(RecordKeeper &Records, raw_ostream &OS) { } } } -} // end namespace llvm + +static TableGen::Emitter::Opt X("gen-opt-rst", EmitOptRST, + "Generate option RST"); diff --git a/llvm/utils/TableGen/PseudoLoweringEmitter.cpp b/llvm/utils/TableGen/PseudoLoweringEmitter.cpp index 75a64f2430050..e07fb91880980 100644 --- a/llvm/utils/TableGen/PseudoLoweringEmitter.cpp +++ b/llvm/utils/TableGen/PseudoLoweringEmitter.cpp @@ -8,7 +8,6 @@ #include "CodeGenInstruction.h" #include "CodeGenTarget.h" -#include "TableGenBackends.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" @@ -314,10 +313,5 @@ void PseudoLoweringEmitter::run(raw_ostream &o) { emitLoweringEmitter(o); } -namespace llvm { - -void EmitPseudoLowering(RecordKeeper &RK, raw_ostream &OS) { - PseudoLoweringEmitter(RK).run(OS); -} - -} // namespace llvm +static TableGen::Emitter::OptClass + X("gen-pseudo-lowering", "Generate pseudo instruction lowering"); diff --git a/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp b/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp index b534ce4b3b785..b02f7bd4198e9 100644 --- a/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp +++ b/llvm/utils/TableGen/RISCVTargetDefEmitter.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// -#include "TableGenBackends.h" #include "llvm/Support/RISCVISAInfo.h" #include "llvm/TableGen/Record.h" +#include "llvm/TableGen/TableGenBackend.h" using namespace llvm; @@ -47,7 +47,7 @@ static std::string getMArch(const Record &Rec) { return (*ISAInfo)->toString(); } -void llvm::EmitRISCVTargetDef(const RecordKeeper &RK, raw_ostream &OS) { +static void EmitRISCVTargetDef(RecordKeeper &RK, raw_ostream &OS) { OS << "#ifndef PROC\n" << "#define PROC(ENUM, NAME, DEFAULT_MARCH)\n" << "#endif\n\n"; @@ -80,3 +80,6 @@ void llvm::EmitRISCVTargetDef(const RecordKeeper &RK, raw_ostream &OS) { OS << "\n#undef TUNE_PROC\n"; } + +static TableGen::Emitter::Opt X("gen-riscv-target-def", EmitRISCVTargetDef, + "Generate the list of CPU for RISCV"); diff --git a/llvm/utils/TableGen/RegisterBankEmitter.cpp b/llvm/utils/TableGen/RegisterBankEmitter.cpp index a5acb50b5d6d5..c5ba6a897c77e 100644 --- a/llvm/utils/TableGen/RegisterBankEmitter.cpp +++ b/llvm/utils/TableGen/RegisterBankEmitter.cpp @@ -14,7 +14,6 @@ #include "CodeGenRegisters.h" #include "CodeGenTarget.h" #include "InfoByHwMode.h" -#include "TableGenBackends.h" #include "llvm/ADT/BitVector.h" #include "llvm/Support/Debug.h" #include "llvm/TableGen/Error.h" @@ -328,10 +327,5 @@ void RegisterBankEmitter::run(raw_ostream &OS) { OS << "#endif // GET_TARGET_REGBANK_IMPL\n"; } -namespace llvm { - -void EmitRegisterBank(RecordKeeper &RK, raw_ostream &OS) { - RegisterBankEmitter(RK).run(OS); -} - -} // end namespace llvm +static TableGen::Emitter::OptClass + X("gen-register-bank", "Generate registers bank descriptions"); diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp index aa9a979c63448..0c32a6aa51cc6 100644 --- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp +++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp @@ -17,7 +17,6 @@ #include "CodeGenTarget.h" #include "InfoByHwMode.h" #include "SequenceToOffsetTable.h" -#include "TableGenBackends.h" #include "Types.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" @@ -1909,10 +1908,5 @@ void RegisterInfoEmitter::debugDump(raw_ostream &OS) { } } -namespace llvm { - -void EmitRegisterInfo(RecordKeeper &RK, raw_ostream &OS) { - RegisterInfoEmitter(RK).run(OS); -} - -} // end namespace llvm +static TableGen::Emitter::OptClass + X("gen-register-info", "Generate registers and register classes info"); diff --git a/llvm/utils/TableGen/SearchableTableEmitter.cpp b/llvm/utils/TableGen/SearchableTableEmitter.cpp index fd6508ceec6ff..2fc291708db7e 100644 --- a/llvm/utils/TableGen/SearchableTableEmitter.cpp +++ b/llvm/utils/TableGen/SearchableTableEmitter.cpp @@ -13,13 +13,13 @@ //===----------------------------------------------------------------------===// #include "CodeGenIntrinsics.h" -#include "TableGenBackends.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" +#include "llvm/TableGen/TableGenBackend.h" #include #include #include @@ -823,10 +823,5 @@ void SearchableTableEmitter::run(raw_ostream &OS) { OS << "#undef " << Guard << "\n"; } -namespace llvm { - -void EmitSearchableTables(RecordKeeper &RK, raw_ostream &OS) { - SearchableTableEmitter(RK).run(OS); -} - -} // namespace llvm +static TableGen::Emitter::OptClass + X("gen-searchable-tables", "Generate generic binary-searchable table"); diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp index 3ff25413b873e..9bbf7cd673a09 100644 --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -14,7 +14,6 @@ #include "CodeGenSchedule.h" #include "CodeGenTarget.h" #include "PredicateExpander.h" -#include "TableGenBackends.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" @@ -1985,10 +1984,5 @@ void SubtargetEmitter::run(raw_ostream &OS) { EmitMCInstrAnalysisPredicateFunctions(OS); } -namespace llvm { - -void EmitSubtarget(RecordKeeper &RK, raw_ostream &OS) { - SubtargetEmitter(RK).run(OS); -} - -} // end namespace llvm +static TableGen::Emitter::OptClass + X("gen-subtarget", "Generate subtarget enumerations"); diff --git a/llvm/utils/TableGen/TableGen.cpp b/llvm/utils/TableGen/TableGen.cpp index 2ec3deb8ae258..b2ed48cffe6be 100644 --- a/llvm/utils/TableGen/TableGen.cpp +++ b/llvm/utils/TableGen/TableGen.cpp @@ -10,61 +10,20 @@ // //===----------------------------------------------------------------------===// -#include "TableGenBackends.h" // Declares all backends. +#include "llvm/ADT/StringRef.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Main.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/SetTheory.h" +#include "llvm/TableGen/TableGenBackend.h" #include #include #include using namespace llvm; -enum ActionType { - PrintRecords, - PrintDetailedRecords, - NullBackend, - DumpJSON, - GenEmitter, - GenRegisterInfo, - GenInstrInfo, - GenInstrDocs, - GenAsmWriter, - GenAsmMatcher, - GenDisassembler, - GenPseudoLowering, - GenCompressInst, - GenCallingConv, - GenDAGISel, - GenDFAPacketizer, - GenFastISel, - GenSubtarget, - GenIntrinsicEnums, - GenIntrinsicImpl, - PrintEnums, - PrintSets, - GenOptParserDefs, - GenOptRST, - GenCTags, - GenAttributes, - GenSearchableTables, - GenGlobalISel, - GenGICombiner, - GenX86EVEX2VEXTables, - GenX86FoldTables, - GenX86MnemonicTables, - GenRegisterBank, - GenExegesis, - GenAutomata, - GenDirectivesEnumDecl, - GenDirectivesEnumImpl, - GenDXILOperation, - GenRISCVTargetDef, -}; - namespace llvm { cl::opt EmitLongStrLiterals( "long-string-literals", @@ -75,227 +34,54 @@ cl::opt EmitLongStrLiterals( } // end namespace llvm namespace { -cl::opt Action( - cl::desc("Action to perform:"), - cl::values( - clEnumValN(PrintRecords, "print-records", - "Print all records to stdout (default)"), - clEnumValN(PrintDetailedRecords, "print-detailed-records", - "Print full details of all records to stdout"), - clEnumValN(NullBackend, "null-backend", - "Do nothing after parsing (useful for timing)"), - clEnumValN(DumpJSON, "dump-json", - "Dump all records as machine-readable JSON"), - clEnumValN(GenEmitter, "gen-emitter", "Generate machine code emitter"), - clEnumValN(GenRegisterInfo, "gen-register-info", - "Generate registers and register classes info"), - clEnumValN(GenInstrInfo, "gen-instr-info", - "Generate instruction descriptions"), - clEnumValN(GenInstrDocs, "gen-instr-docs", - "Generate instruction documentation"), - clEnumValN(GenCallingConv, "gen-callingconv", - "Generate calling convention descriptions"), - clEnumValN(GenAsmWriter, "gen-asm-writer", "Generate assembly writer"), - clEnumValN(GenDisassembler, "gen-disassembler", - "Generate disassembler"), - clEnumValN(GenPseudoLowering, "gen-pseudo-lowering", - "Generate pseudo instruction lowering"), - clEnumValN(GenCompressInst, "gen-compress-inst-emitter", - "Generate RISCV compressed instructions."), - clEnumValN(GenAsmMatcher, "gen-asm-matcher", - "Generate assembly instruction matcher"), - clEnumValN(GenDAGISel, "gen-dag-isel", - "Generate a DAG instruction selector"), - clEnumValN(GenDFAPacketizer, "gen-dfa-packetizer", - "Generate DFA Packetizer for VLIW targets"), - clEnumValN(GenFastISel, "gen-fast-isel", - "Generate a \"fast\" instruction selector"), - clEnumValN(GenSubtarget, "gen-subtarget", - "Generate subtarget enumerations"), - clEnumValN(GenIntrinsicEnums, "gen-intrinsic-enums", - "Generate intrinsic enums"), - clEnumValN(GenIntrinsicImpl, "gen-intrinsic-impl", - "Generate intrinsic information"), - clEnumValN(PrintEnums, "print-enums", "Print enum values for a class"), - clEnumValN(PrintSets, "print-sets", - "Print expanded sets for testing DAG exprs"), - clEnumValN(GenOptParserDefs, "gen-opt-parser-defs", - "Generate option definitions"), - clEnumValN(GenOptRST, "gen-opt-rst", "Generate option RST"), - clEnumValN(GenCTags, "gen-ctags", "Generate ctags-compatible index"), - clEnumValN(GenAttributes, "gen-attrs", "Generate attributes"), - clEnumValN(GenSearchableTables, "gen-searchable-tables", - "Generate generic binary-searchable table"), - clEnumValN(GenGlobalISel, "gen-global-isel", - "Generate GlobalISel selector"), - clEnumValN(GenGICombiner, "gen-global-isel-combiner", - "Generate GlobalISel combiner"), - clEnumValN(GenX86EVEX2VEXTables, "gen-x86-EVEX2VEX-tables", - "Generate X86 EVEX to VEX compress tables"), - clEnumValN(GenX86FoldTables, "gen-x86-fold-tables", - "Generate X86 fold tables"), - clEnumValN(GenX86MnemonicTables, "gen-x86-mnemonic-tables", - "Generate X86 mnemonic tables"), - clEnumValN(GenRegisterBank, "gen-register-bank", - "Generate registers bank descriptions"), - clEnumValN(GenExegesis, "gen-exegesis", - "Generate llvm-exegesis tables"), - clEnumValN(GenAutomata, "gen-automata", "Generate generic automata"), - clEnumValN(GenDirectivesEnumDecl, "gen-directive-decl", - "Generate directive related declaration code (header file)"), - clEnumValN(GenDirectivesEnumImpl, "gen-directive-impl", - "Generate directive related implementation code"), - clEnumValN(GenDXILOperation, "gen-dxil-operation", - "Generate DXIL operation information"), - clEnumValN(GenRISCVTargetDef, "gen-riscv-target-def", - "Generate the list of CPU for RISCV"))); + cl::OptionCategory PrintEnumsCat("Options for -print-enums"); cl::opt Class("class", cl::desc("Print Enum list for this class"), cl::value_desc("class name"), cl::cat(PrintEnumsCat)); -bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) { - switch (Action) { - case PrintRecords: - OS << Records; // No argument, dump all contents - break; - case PrintDetailedRecords: - EmitDetailedRecords(Records, OS); - break; - case NullBackend: // No backend at all. - break; - case DumpJSON: - EmitJSON(Records, OS); - break; - case GenEmitter: - EmitCodeEmitter(Records, OS); - break; - case GenRegisterInfo: - EmitRegisterInfo(Records, OS); - break; - case GenInstrInfo: - EmitInstrInfo(Records, OS); - break; - case GenInstrDocs: - EmitInstrDocs(Records, OS); - break; - case GenCallingConv: - EmitCallingConv(Records, OS); - break; - case GenAsmWriter: - EmitAsmWriter(Records, OS); - break; - case GenAsmMatcher: - EmitAsmMatcher(Records, OS); - break; - case GenDisassembler: - EmitDisassembler(Records, OS); - break; - case GenPseudoLowering: - EmitPseudoLowering(Records, OS); - break; - case GenCompressInst: - EmitCompressInst(Records, OS); - break; - case GenDAGISel: - EmitDAGISel(Records, OS); - break; - case GenDFAPacketizer: - EmitDFAPacketizer(Records, OS); - break; - case GenFastISel: - EmitFastISel(Records, OS); - break; - case GenSubtarget: - EmitSubtarget(Records, OS); - break; - case GenIntrinsicEnums: - EmitIntrinsicEnums(Records, OS); - break; - case GenIntrinsicImpl: - EmitIntrinsicImpl(Records, OS); - break; - case GenOptParserDefs: - EmitOptParser(Records, OS); - break; - case GenOptRST: - EmitOptRST(Records, OS); - break; - case PrintEnums: { - for (Record *Rec : Records.getAllDerivedDefinitions(Class)) - OS << Rec->getName() << ", "; - OS << "\n"; - break; - } - case PrintSets: { - SetTheory Sets; - Sets.addFieldExpander("Set", "Elements"); - for (Record *Rec : Records.getAllDerivedDefinitions("Set")) { - OS << Rec->getName() << " = ["; - const std::vector *Elts = Sets.expand(Rec); - assert(Elts && "Couldn't expand Set instance"); - for (Record *Elt : *Elts) - OS << ' ' << Elt->getName(); - OS << " ]\n"; - } - break; - } - case GenCTags: - EmitCTags(Records, OS); - break; - case GenAttributes: - EmitAttributes(Records, OS); - break; - case GenSearchableTables: - EmitSearchableTables(Records, OS); - break; - case GenGlobalISel: - EmitGlobalISel(Records, OS); - break; - case GenGICombiner: - EmitGICombiner(Records, OS); - break; - case GenRegisterBank: - EmitRegisterBank(Records, OS); - break; - case GenX86EVEX2VEXTables: - EmitX86EVEX2VEXTables(Records, OS); - break; - case GenX86MnemonicTables: - EmitX86MnemonicTables(Records, OS); - break; - case GenX86FoldTables: - EmitX86FoldTables(Records, OS); - break; - case GenExegesis: - EmitExegesis(Records, OS); - break; - case GenAutomata: - EmitAutomata(Records, OS); - break; - case GenDirectivesEnumDecl: - EmitDirectivesDecl(Records, OS); - break; - case GenDirectivesEnumImpl: - EmitDirectivesImpl(Records, OS); - break; - case GenDXILOperation: - EmitDXILOperation(Records, OS); - break; - case GenRISCVTargetDef: - EmitRISCVTargetDef(Records, OS); - break; - } +void PrintRecords(RecordKeeper &Records, raw_ostream &OS) { + OS << Records; // No argument, dump all contents +} - return false; +void PrintEnums(RecordKeeper &Records, raw_ostream &OS) { + for (Record *Rec : Records.getAllDerivedDefinitions(Class)) + OS << Rec->getName() << ", "; + OS << "\n"; } + +void PrintSets(RecordKeeper &Records, raw_ostream &OS) { + SetTheory Sets; + Sets.addFieldExpander("Set", "Elements"); + for (Record *Rec : Records.getAllDerivedDefinitions("Set")) { + OS << Rec->getName() << " = ["; + const std::vector *Elts = Sets.expand(Rec); + assert(Elts && "Couldn't expand Set instance"); + for (Record *Elt : *Elts) + OS << ' ' << Elt->getName(); + OS << " ]\n"; + } +} + +TableGen::Emitter::Opt X[] = { + {"print-records", PrintRecords, "Print all records to stdout (default)", + true}, + {"print-detailed-records", EmitDetailedRecords, + "Print full details of all records to stdout"}, + {"null-backend", [](RecordKeeper &Records, raw_ostream &OS) {}, + "Do nothing after parsing (useful for timing)"}, + {"dump-json", EmitJSON, "Dump all records as machine-readable JSON"}, + {"print-enums", PrintEnums, "Print enum values for a class"}, + {"print-sets", PrintSets, "Print expanded sets for testing DAG exprs"}, +}; + } // namespace int main(int argc, char **argv) { InitLLVM X(argc, argv); cl::ParseCommandLineOptions(argc, argv); - return TableGenMain(argv[0], &LLVMTableGenMain); + return TableGenMain(argv[0]); } #ifndef __has_feature diff --git a/llvm/utils/TableGen/TableGenBackends.h b/llvm/utils/TableGen/TableGenBackends.h index 7be83af3734f5..3afe6b01467bb 100644 --- a/llvm/utils/TableGen/TableGenBackends.h +++ b/llvm/utils/TableGen/TableGenBackends.h @@ -63,40 +63,7 @@ namespace llvm { class raw_ostream; class RecordKeeper; -void EmitIntrinsicEnums(RecordKeeper &RK, raw_ostream &OS); -void EmitIntrinsicImpl(RecordKeeper &RK, raw_ostream &OS); -void EmitAsmMatcher(RecordKeeper &RK, raw_ostream &OS); -void EmitAsmWriter(RecordKeeper &RK, raw_ostream &OS); -void EmitCallingConv(RecordKeeper &RK, raw_ostream &OS); -void EmitCodeEmitter(RecordKeeper &RK, raw_ostream &OS); -void EmitDAGISel(RecordKeeper &RK, raw_ostream &OS); -void EmitDFAPacketizer(RecordKeeper &RK, raw_ostream &OS); -void EmitDisassembler(RecordKeeper &RK, raw_ostream &OS); -void EmitFastISel(RecordKeeper &RK, raw_ostream &OS); -void EmitInstrInfo(RecordKeeper &RK, raw_ostream &OS); -void EmitInstrDocs(RecordKeeper &RK, raw_ostream &OS); -void EmitPseudoLowering(RecordKeeper &RK, raw_ostream &OS); -void EmitCompressInst(RecordKeeper &RK, raw_ostream &OS); -void EmitRegisterInfo(RecordKeeper &RK, raw_ostream &OS); -void EmitSubtarget(RecordKeeper &RK, raw_ostream &OS); void EmitMapTable(RecordKeeper &RK, raw_ostream &OS); -void EmitOptParser(RecordKeeper &RK, raw_ostream &OS); -void EmitOptRST(RecordKeeper &RK, raw_ostream &OS); -void EmitCTags(RecordKeeper &RK, raw_ostream &OS); -void EmitAttributes(RecordKeeper &RK, raw_ostream &OS); -void EmitSearchableTables(RecordKeeper &RK, raw_ostream &OS); -void EmitGlobalISel(RecordKeeper &RK, raw_ostream &OS); -void EmitGICombiner(RecordKeeper &RK, raw_ostream &OS); -void EmitX86EVEX2VEXTables(RecordKeeper &RK, raw_ostream &OS); -void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &OS); -void EmitX86MnemonicTables(RecordKeeper &RK, raw_ostream &OS); -void EmitRegisterBank(RecordKeeper &RK, raw_ostream &OS); -void EmitExegesis(RecordKeeper &RK, raw_ostream &OS); -void EmitAutomata(RecordKeeper &RK, raw_ostream &OS); -void EmitDirectivesDecl(RecordKeeper &RK, raw_ostream &OS); -void EmitDirectivesImpl(RecordKeeper &RK, raw_ostream &OS); -void EmitDXILOperation(RecordKeeper &RK, raw_ostream &OS); -void EmitRISCVTargetDef(const RecordKeeper &RK, raw_ostream &OS); // Defined in DecoderEmitter.cpp void EmitDecoder(RecordKeeper &RK, raw_ostream &OS, diff --git a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp b/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp index 9a21ccb937298..c0b019534c662 100644 --- a/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp +++ b/llvm/utils/TableGen/X86EVEX2VEXTablesEmitter.cpp @@ -13,7 +13,6 @@ #include "CodeGenInstruction.h" #include "CodeGenTarget.h" -#include "TableGenBackends.h" #include "X86RecognizableInstr.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Record.h" @@ -241,8 +240,5 @@ void X86EVEX2VEXTablesEmitter::run(raw_ostream &OS) { } } // namespace -namespace llvm { -void EmitX86EVEX2VEXTables(RecordKeeper &RK, raw_ostream &OS) { - X86EVEX2VEXTablesEmitter(RK).run(OS); -} -} // namespace llvm +static TableGen::Emitter::OptClass + X("gen-x86-EVEX2VEX-tables", "Generate X86 EVEX to VEX compress tables"); diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp index e8555df7ba021..864f402fe8390 100644 --- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp +++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp @@ -13,7 +13,6 @@ #include "CodeGenInstruction.h" #include "CodeGenTarget.h" -#include "TableGenBackends.h" #include "X86RecognizableInstr.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Support/FormattedStream.h" @@ -614,9 +613,5 @@ void X86FoldTablesEmitter::run(raw_ostream &o) { printTable(Table4, "Table4", OS); } -namespace llvm { - -void EmitX86FoldTables(RecordKeeper &RK, raw_ostream &OS) { - X86FoldTablesEmitter(RK).run(OS); -} -} // namespace llvm +static TableGen::Emitter::OptClass + X("gen-x86-fold-tables", "Generate X86 fold tables"); diff --git a/llvm/utils/TableGen/X86MnemonicTables.cpp b/llvm/utils/TableGen/X86MnemonicTables.cpp index aca311be63574..aeafee1574623 100644 --- a/llvm/utils/TableGen/X86MnemonicTables.cpp +++ b/llvm/utils/TableGen/X86MnemonicTables.cpp @@ -13,7 +13,6 @@ #include "CodeGenInstruction.h" #include "CodeGenTarget.h" -#include "TableGenBackends.h" #include "X86RecognizableInstr.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/TableGenBackend.h" @@ -88,8 +87,5 @@ void X86MnemonicTablesEmitter::run(raw_ostream &OS) { } // namespace -namespace llvm { -void EmitX86MnemonicTables(RecordKeeper &RK, raw_ostream &OS) { - X86MnemonicTablesEmitter(RK).run(OS); -} -} // namespace llvm +static TableGen::Emitter::OptClass + X("gen-x86-mnemonic-tables", "Generate X86 mnemonic tables"); From 6beb371e4c469801e04711b81d27cde6da11b032 Mon Sep 17 00:00:00 2001 From: luxufan Date: Fri, 3 Mar 2023 16:34:17 +0800 Subject: [PATCH 141/691] [InstCombine] Combine binary operator of two phi node Combine binary operator of two phi node if there is at least one specific constant value in phi0 and phi1's incoming values for each same incoming block and this specific constant value can be used to do optimization for specific binary operator. For example: ``` %phi0 = phi i32 [0, %bb0], [%i, %bb1] %phi1 = phi i32 [%j, %bb0], [0, %bb1] %add = add i32 %phi0, %phi1 ==> %add = phi i32 [%j, %bb0], [%i, %bb1] ``` Fixes: https://github.com/llvm/llvm-project/issues/61137 Differential Revision: https://reviews.llvm.org/D145223 --- .../InstCombine/InstructionCombining.cpp | 47 ++++++++++++++++++- llvm/test/Transforms/InstCombine/phi.ll | 43 +++++++++++------ 2 files changed, 75 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index c3ba452e3341b..8dd752b82541d 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1294,7 +1294,7 @@ Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) { auto *Phi0 = dyn_cast(BO.getOperand(0)); auto *Phi1 = dyn_cast(BO.getOperand(1)); if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() || - Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2) + Phi0->getNumOperands() != Phi1->getNumOperands()) return nullptr; // TODO: Remove the restriction for binop being in the same block as the phis. @@ -1302,6 +1302,51 @@ Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) { BO.getParent() != Phi1->getParent()) return nullptr; + // Fold if there is at least one specific constant value in phi0 or phi1's + // incoming values that comes from the same block and this specific constant + // value can be used to do optimization for specific binary operator. + // For example: + // %phi0 = phi i32 [0, %bb0], [%i, %bb1] + // %phi1 = phi i32 [%j, %bb0], [0, %bb1] + // %add = add i32 %phi0, %phi1 + // ==> + // %add = phi i32 [%j, %bb0], [%i, %bb1] + Constant *C = ConstantExpr::getBinOpIdentity(BO.getOpcode(), BO.getType(), + /*AllowRHSConstant*/ false); + if (C) { + SmallVector NewIncomingValues; + auto CanFoldIncomingValuePair = [&](std::tuple T) { + auto &Phi0Use = std::get<0>(T); + auto &Phi1Use = std::get<1>(T); + if (Phi0->getIncomingBlock(Phi0Use) != Phi1->getIncomingBlock(Phi1Use)) + return false; + Value *Phi0UseV = Phi0Use.get(); + Value *Phi1UseV = Phi1Use.get(); + if (Phi0UseV == C) + NewIncomingValues.push_back(Phi1UseV); + else if (Phi1UseV == C) + NewIncomingValues.push_back(Phi0UseV); + else + return false; + return true; + }; + + if (all_of(zip(Phi0->operands(), Phi1->operands()), + CanFoldIncomingValuePair)) { + PHINode *NewPhi = + PHINode::Create(Phi0->getType(), Phi0->getNumOperands()); + assert(NewIncomingValues.size() == Phi0->getNumOperands() && + "The number of collected incoming values should equal the number " + "of the original PHINode operands!"); + for (unsigned I = 0; I < Phi0->getNumOperands(); I++) + NewPhi->addIncoming(NewIncomingValues[I], Phi0->getIncomingBlock(I)); + return NewPhi; + } + } + + if (Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2) + return nullptr; + // Match a pair of incoming constants for one of the predecessor blocks. BasicBlock *ConstBB, *OtherBB; Constant *C0, *C1; diff --git a/llvm/test/Transforms/InstCombine/phi.ll b/llvm/test/Transforms/InstCombine/phi.ll index 75eece9b626be..52a5c0b3a2d74 100644 --- a/llvm/test/Transforms/InstCombine/phi.ll +++ b/llvm/test/Transforms/InstCombine/phi.ll @@ -1508,9 +1508,7 @@ define i32 @add_two_phi_node_can_fold(i1 %c, i32 %i, i32 %j) { ; CHECK: if.then: ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[X:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[J:%.*]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[Y:%.*]] = phi i32 [ [[I:%.*]], [[IF_THEN]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[Y]], [[X]] +; CHECK-NEXT: [[ADD:%.*]] = phi i32 [ [[I:%.*]], [[IF_THEN]] ], [ [[J:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i32 [[ADD]] ; entry: @@ -1558,9 +1556,7 @@ define i32 @or_two_phi_node_can_fold(i1 %c, i32 %i, i32 %j) { ; CHECK: if.then: ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[X:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[J:%.*]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[Y:%.*]] = phi i32 [ [[I:%.*]], [[IF_THEN]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: [[ADD:%.*]] = or i32 [[Y]], [[X]] +; CHECK-NEXT: [[ADD:%.*]] = phi i32 [ [[I:%.*]], [[IF_THEN]] ], [ [[J:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i32 [[ADD]] ; entry: @@ -1583,9 +1579,7 @@ define i32 @and_two_phi_node_can_fold(i1 %c, i32 %i, i32 %j) { ; CHECK: if.then: ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[X:%.*]] = phi i32 [ -1, [[IF_THEN]] ], [ [[J:%.*]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[Y:%.*]] = phi i32 [ [[I:%.*]], [[IF_THEN]] ], [ -1, [[ENTRY]] ] -; CHECK-NEXT: [[ADD:%.*]] = and i32 [[Y]], [[X]] +; CHECK-NEXT: [[ADD:%.*]] = phi i32 [ [[I:%.*]], [[IF_THEN]] ], [ [[J:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i32 [[ADD]] ; entry: @@ -1608,9 +1602,7 @@ define i32 @mul_two_phi_node_can_fold(i1 %c, i32 %i, i32 %j) { ; CHECK: if.then: ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[X:%.*]] = phi i32 [ 1, [[IF_THEN]] ], [ [[J:%.*]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[Y:%.*]] = phi i32 [ [[I:%.*]], [[IF_THEN]] ], [ 1, [[ENTRY]] ] -; CHECK-NEXT: [[ADD:%.*]] = mul i32 [[Y]], [[X]] +; CHECK-NEXT: [[ADD:%.*]] = phi i32 [ [[I:%.*]], [[IF_THEN]] ], [ [[J:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: ret i32 [[ADD]] ; entry: @@ -1633,9 +1625,32 @@ define i32 @xor_two_phi_node_can_fold(i1 %c, i32 %i, i32 %j) { ; CHECK: if.then: ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: +; CHECK-NEXT: [[ADD:%.*]] = phi i32 [ [[I:%.*]], [[IF_THEN]] ], [ [[J:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[ADD]] +; +entry: + br i1 %c, label %if.then, label %if.end + +if.then: + br label %if.end + +if.end: + %x = phi i32 [ 0, %if.then ], [ %j, %entry ] + %y = phi i32 [ %i, %if.then ], [ 0, %entry ] + %add = xor i32 %y, %x + ret i32 %add +} + +define i32 @sub_two_phi_node_cant_fold(i1 %c, i32 %i, i32 %j) { +; CHECK-LABEL: @sub_two_phi_node_cant_fold( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: ; CHECK-NEXT: [[X:%.*]] = phi i32 [ 0, [[IF_THEN]] ], [ [[J:%.*]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[Y:%.*]] = phi i32 [ [[I:%.*]], [[IF_THEN]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: [[ADD:%.*]] = xor i32 [[Y]], [[X]] +; CHECK-NEXT: [[ADD:%.*]] = sub i32 [[Y]], [[X]] ; CHECK-NEXT: ret i32 [[ADD]] ; entry: @@ -1647,6 +1662,6 @@ if.then: if.end: %x = phi i32 [ 0, %if.then ], [ %j, %entry ] %y = phi i32 [ %i, %if.then ], [ 0, %entry ] - %add = xor i32 %y, %x + %add = sub i32 %y, %x ret i32 %add } From 3e79dfac0ea7b7cf2c9687f798b6f63ba5b07c9d Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Tue, 21 Mar 2023 16:52:14 +0900 Subject: [PATCH 142/691] [Bazel] Fixup for D144351, Add `alwayslink` to `//llvm:tblgen` --- utils/bazel/llvm-project-overlay/llvm/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index 9707d1296debc..55064fba0bf88 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -570,6 +570,7 @@ cc_library( cc_library( name = "tblgen", + alwayslink = True, srcs = glob([ "utils/TableGen/*.cpp", "utils/TableGen/*.inc", From 6fc670e5e370e90759b5adecd9980b63609dc2f8 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Mon, 20 Mar 2023 15:48:48 +0100 Subject: [PATCH 143/691] [WebAssembly] Add auto-upgrade for renamed intrinsics D138249 renamed a number of wasm intrinsics without implementing auto-upgrade support. Differential Revision: https://reviews.llvm.org/D146424 --- llvm/lib/IR/AutoUpgrade.cpp | 35 ++++++++++++ .../Assembler/autoupgrade-wasm-intrinsics.ll | 53 +++++++++++++++++++ 2 files changed, 88 insertions(+) create mode 100644 llvm/test/Assembler/autoupgrade-wasm-intrinsics.ll diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index f68c492ee67b0..5c74acc0fd871 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -27,6 +27,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/IR/IntrinsicsARM.h" +#include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" @@ -1143,6 +1144,40 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { break; } + case 'w': + if (Name.startswith("wasm.fma.")) { + rename(F); + NewFn = Intrinsic::getDeclaration( + F->getParent(), Intrinsic::wasm_relaxed_madd, F->getReturnType()); + return true; + } + if (Name.startswith("wasm.fms.")) { + rename(F); + NewFn = Intrinsic::getDeclaration( + F->getParent(), Intrinsic::wasm_relaxed_nmadd, F->getReturnType()); + return true; + } + if (Name.startswith("wasm.laneselect.")) { + rename(F); + NewFn = Intrinsic::getDeclaration( + F->getParent(), Intrinsic::wasm_relaxed_laneselect, + F->getReturnType()); + return true; + } + if (Name == "wasm.dot.i8x16.i7x16.signed") { + rename(F); + NewFn = Intrinsic::getDeclaration( + F->getParent(), Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed); + return true; + } + if (Name == "wasm.dot.i8x16.i7x16.add.signed") { + rename(F); + NewFn = Intrinsic::getDeclaration( + F->getParent(), Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed); + return true; + } + break; + case 'x': if (UpgradeX86IntrinsicFunction(F, Name, NewFn)) return true; diff --git a/llvm/test/Assembler/autoupgrade-wasm-intrinsics.ll b/llvm/test/Assembler/autoupgrade-wasm-intrinsics.ll new file mode 100644 index 0000000000000..012fa1dfe7e28 --- /dev/null +++ b/llvm/test/Assembler/autoupgrade-wasm-intrinsics.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S < %s | FileCheck %s + +define <16 x i8> @test_laneselect(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CHECK-LABEL: @test_laneselect( +; CHECK-NEXT: [[RES:%.*]] = call <16 x i8> @llvm.wasm.relaxed.laneselect.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]]) +; CHECK-NEXT: ret <16 x i8> [[RES]] +; + %res = call <16 x i8> @llvm.wasm.laneselect.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) + ret <16 x i8> %res +} + +define <8 x i16> @test_dot(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: @test_dot( +; CHECK-NEXT: [[RES:%.*]] = call <8 x i16> @llvm.wasm.relaxed.dot.i8x16.i7x16.signed(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) +; CHECK-NEXT: ret <8 x i16> [[RES]] +; + %res = call <8 x i16> @llvm.wasm.dot.i8x16.i7x16.signed(<16 x i8> %a, <16 x i8> %b) + ret <8 x i16> %res +} + +define <4 x i32> @test_dot_add(<16 x i8> %a, <16 x i8> %b, <4 x i32> %c) { +; CHECK-LABEL: @test_dot_add( +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.wasm.relaxed.dot.i8x16.i7x16.add.signed(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <4 x i32> [[C:%.*]]) +; CHECK-NEXT: ret <4 x i32> [[RES]] +; + %res = call <4 x i32> @llvm.wasm.dot.i8x16.i7x16.add.signed(<16 x i8> %a, <16 x i8> %b, <4 x i32> %c) + ret <4 x i32> %res +} + +define <4 x float> @test_fma(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; CHECK-LABEL: @test_fma( +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.wasm.relaxed.madd.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]]) +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.wasm.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) + ret <4 x float> %res +} + +define <4 x float> @test_fms(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; CHECK-LABEL: @test_fms( +; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.wasm.relaxed.nmadd.v4f32(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]]) +; CHECK-NEXT: ret <4 x float> [[RES]] +; + %res = call <4 x float> @llvm.wasm.fms.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) + ret <4 x float> %res +} + +declare <16 x i8> @llvm.wasm.laneselect.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) +declare <8 x i16> @llvm.wasm.dot.i8x16.i7x16.signed(<16 x i8>, <16 x i8>) +declare <4 x i32> @llvm.wasm.dot.i8x16.i7x16.add.signed(<16 x i8>, <16 x i8>, <4 x i32>) +declare <4 x float> @llvm.wasm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) +declare <4 x float> @llvm.wasm.fms.v4f32(<4 x float>, <4 x float>, <4 x float>) From db8c0b295d27a870d35c7353b2482b20230ee7b1 Mon Sep 17 00:00:00 2001 From: Bing1 Yu Date: Tue, 21 Mar 2023 16:38:05 +0800 Subject: [PATCH 144/691] [X86][MemFold] Stop emitting Header for X86 fold tables Reviewed By: skan Differential Revision: https://reviews.llvm.org/D146502 --- llvm/test/TableGen/x86-auto-memfold.td | 2 +- llvm/utils/TableGen/X86FoldTablesEmitter.cpp | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/test/TableGen/x86-auto-memfold.td b/llvm/test/TableGen/x86-auto-memfold.td index 55bb8875ef205..548b63ede7606 100644 --- a/llvm/test/TableGen/x86-auto-memfold.td +++ b/llvm/test/TableGen/x86-auto-memfold.td @@ -1,2 +1,2 @@ // RUN: llvm-tblgen -gen-x86-fold-tables -asmwriternum=1 %p/../../lib/Target/X86/X86.td -I %p/../../include -I %p/../../lib/Target/X86/ -I %p/../../include/ -I %p/../../lib/Target/ --write-if-changed -o %t1 -// RUN: cmp --ignore-initial=0:568 %p/../../lib/Target/X86/X86MemFoldTables.inc %t1 +// RUN: cmp %p/../../lib/Target/X86/X86MemFoldTables.inc %t1 diff --git a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp index 864f402fe8390..f43babdfde758 100644 --- a/llvm/utils/TableGen/X86FoldTablesEmitter.cpp +++ b/llvm/utils/TableGen/X86FoldTablesEmitter.cpp @@ -523,7 +523,6 @@ void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInstr, void X86FoldTablesEmitter::run(raw_ostream &o) { formatted_raw_ostream OS(o); - emitSourceFileHeader("X86 fold tables", OS); // Holds all memory instructions std::vector MemInsts; From d9b3a9442530c5db302c0af62196e6d907453a23 Mon Sep 17 00:00:00 2001 From: Ilyas Mustafazade Date: Tue, 21 Mar 2023 09:43:51 +0100 Subject: [PATCH 145/691] [NFC] Change strcpy to std::copy Differential Revision: https://reviews.llvm.org/D146382 --- llvm/lib/Object/COFFImportFile.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Object/COFFImportFile.cpp b/llvm/lib/Object/COFFImportFile.cpp index 7090d3ca5618f..ce6d84d5c2347 100644 --- a/llvm/lib/Object/COFFImportFile.cpp +++ b/llvm/lib/Object/COFFImportFile.cpp @@ -86,7 +86,8 @@ static void writeStringTable(std::vector &B, for (const auto &S : Strings) { B.resize(Pos + S.length() + 1); - strcpy(reinterpret_cast(&B[Pos]), S.c_str()); + std::copy(S.begin(), S.end(), std::next(B.begin(), Pos)); + B[Pos + S.length()] = 0; Pos += S.length() + 1; } From 8325d46a4ab803c6eeff28bf1bdafb43287ce557 Mon Sep 17 00:00:00 2001 From: Ding Xiang Fei Date: Tue, 21 Mar 2023 09:45:51 +0100 Subject: [PATCH 146/691] [MergeFuncs] Compare load instruction metadata MergeFuncs currently merges load instructions with differing semantically-relevant metadata, e.g. a load that has !nonnull with one that does not. Update FunctionComparator to make sure that metadata of both loads is the same. Alternatively, it would be possilbe to ignore the metadata during comparison, and then drop it during merging. Differential Revision: https://reviews.llvm.org/D144682 --- .../Transforms/Utils/FunctionComparator.h | 3 +- .../Transforms/Utils/FunctionComparator.cpp | 47 +++++++++++++++---- 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/FunctionComparator.h b/llvm/include/llvm/Transforms/Utils/FunctionComparator.h index b6b53d0f10cb9..400b9faa94c1b 100644 --- a/llvm/include/llvm/Transforms/Utils/FunctionComparator.h +++ b/llvm/include/llvm/Transforms/Utils/FunctionComparator.h @@ -332,7 +332,8 @@ class FunctionComparator { int cmpOrderings(AtomicOrdering L, AtomicOrdering R) const; int cmpInlineAsm(const InlineAsm *L, const InlineAsm *R) const; int cmpAttrs(const AttributeList L, const AttributeList R) const; - int cmpRangeMetadata(const MDNode *L, const MDNode *R) const; + int cmpMetadata(const MDNode *L, const MDNode *R) const; + int cmpInstMetadata(Instruction const *L, Instruction const *R) const; int cmpOperandBundlesSchema(const CallBase &LCS, const CallBase &RCS) const; /// Compare two GEPs for equivalent pointer arithmetic. diff --git a/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/llvm/lib/Transforms/Utils/FunctionComparator.cpp index 3fa61ec68cd30..af8bc8126160e 100644 --- a/llvm/lib/Transforms/Utils/FunctionComparator.cpp +++ b/llvm/lib/Transforms/Utils/FunctionComparator.cpp @@ -157,16 +157,13 @@ int FunctionComparator::cmpAttrs(const AttributeList L, return 0; } -int FunctionComparator::cmpRangeMetadata(const MDNode *L, - const MDNode *R) const { +int FunctionComparator::cmpMetadata(const MDNode *L, const MDNode *R) const { if (L == R) return 0; if (!L) return -1; if (!R) return 1; - // Range metadata is a sequence of numbers. Make sure they are the same - // sequence. // TODO: Note that as this is metadata, it is possible to drop and/or merge // this data when considering functions to merge. Thus this comparison would // return 0 (i.e. equivalent), but merging would become more complicated @@ -176,14 +173,48 @@ int FunctionComparator::cmpRangeMetadata(const MDNode *L, if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands())) return Res; for (size_t I = 0; I < L->getNumOperands(); ++I) { + // TODO: the following routine coerce the metadata contents into numbers + // before comparison. + // It ignores any other cases, so that the metadata nodes are considered + // equal even though this is not correct. + // We should structurally compare the metadata nodes to be perfect here. ConstantInt *LLow = mdconst::extract(L->getOperand(I)); ConstantInt *RLow = mdconst::extract(R->getOperand(I)); + if (LLow == RLow) + continue; + if (!LLow) + return -1; + if (!RLow) + return 1; if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue())) return Res; } return 0; } +int FunctionComparator::cmpInstMetadata(Instruction const *L, + Instruction const *R) const { + /// These metadata affects the other optimization passes by making assertions + /// or constraints. + /// Values that carry different expectations should be considered different. + SmallVector> MDL, MDR; + L->getAllMetadataOtherThanDebugLoc(MDL); + R->getAllMetadataOtherThanDebugLoc(MDR); + if (MDL.size() > MDR.size()) + return 1; + else if (MDL.size() < MDR.size()) + return -1; + for (size_t I = 0, N = MDL.size(); I < N; ++I) { + auto const [KeyL, ML] = MDL[I]; + auto const [KeyR, MR] = MDR[I]; + if (int Res = cmpNumbers(KeyL, KeyR)) + return Res; + if (int Res = cmpMetadata(ML, MR)) + return Res; + } + return 0; +} + int FunctionComparator::cmpOperandBundlesSchema(const CallBase &LCS, const CallBase &RCS) const { assert(LCS.getOpcode() == RCS.getOpcode() && "Can't compare otherwise!"); @@ -586,9 +617,7 @@ int FunctionComparator::cmpOperations(const Instruction *L, if (int Res = cmpNumbers(LI->getSyncScopeID(), cast(R)->getSyncScopeID())) return Res; - return cmpRangeMetadata( - LI->getMetadata(LLVMContext::MD_range), - cast(R)->getMetadata(LLVMContext::MD_range)); + return cmpInstMetadata(L, R); } if (const StoreInst *SI = dyn_cast(L)) { if (int Res = @@ -616,8 +645,8 @@ int FunctionComparator::cmpOperations(const Instruction *L, if (int Res = cmpNumbers(CI->getTailCallKind(), cast(R)->getTailCallKind())) return Res; - return cmpRangeMetadata(L->getMetadata(LLVMContext::MD_range), - R->getMetadata(LLVMContext::MD_range)); + return cmpMetadata(L->getMetadata(LLVMContext::MD_range), + R->getMetadata(LLVMContext::MD_range)); } if (const InsertValueInst *IVI = dyn_cast(L)) { ArrayRef LIndices = IVI->getIndices(); From 658595d031f726047f6c1a19efefc5e3d265416a Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Tue, 21 Mar 2023 09:50:31 +0100 Subject: [PATCH 147/691] [flang] Handle polymorphic entities with rank > 0 in entry statement Correctly create the temporary for argument absent in the entry statement. Reviewed By: PeteSteinfeld Differential Revision: https://reviews.llvm.org/D146447 --- .../flang/Optimizer/Builder/MutableBox.h | 3 ++- flang/lib/Lower/ConvertVariable.cpp | 10 +++++-- flang/lib/Optimizer/Builder/MutableBox.cpp | 4 +-- flang/test/Lower/polymorphic.f90 | 26 +++++++++++++++++++ 4 files changed, 38 insertions(+), 5 deletions(-) diff --git a/flang/include/flang/Optimizer/Builder/MutableBox.h b/flang/include/flang/Optimizer/Builder/MutableBox.h index 3f3354d93530e..f763d29c40a11 100644 --- a/flang/include/flang/Optimizer/Builder/MutableBox.h +++ b/flang/include/flang/Optimizer/Builder/MutableBox.h @@ -52,7 +52,8 @@ mlir::Value createUnallocatedBox(fir::FirOpBuilder &builder, mlir::Location loc, fir::MutableBoxValue createTempMutableBox(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type type, llvm::StringRef name = {}, - mlir::Value sourceBox = {}); + mlir::Value sourceBox = {}, + bool isPolymorphic = false); /// Update a MutableBoxValue to describe entity \p source (that must be in /// memory). If \lbounds is not empty, it is used to defined the MutableBoxValue diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp index 1d91c86c3d2f5..4d0375bfad4eb 100644 --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -1690,12 +1690,18 @@ void Fortran::lower::mapSymbolAttributes( "handled above"); // The box is read right away because lowering code does not expect // a non pointer/allocatable symbol to be mapped to a MutableBox. + mlir::Type ty = converter.genType(var); + bool isPolymorphic = false; + if (auto boxTy = ty.dyn_cast()) { + isPolymorphic = ty.isa(); + ty = boxTy.getEleTy(); + } Fortran::lower::genDeclareSymbol( converter, symMap, sym, fir::factory::genMutableBoxRead( builder, loc, - fir::factory::createTempMutableBox(builder, loc, - converter.genType(var)))); + fir::factory::createTempMutableBox(builder, loc, ty, {}, {}, + isPolymorphic))); return true; } return false; diff --git a/flang/lib/Optimizer/Builder/MutableBox.cpp b/flang/lib/Optimizer/Builder/MutableBox.cpp index 8cd7aeb43f214..d092f3a2876b8 100644 --- a/flang/lib/Optimizer/Builder/MutableBox.cpp +++ b/flang/lib/Optimizer/Builder/MutableBox.cpp @@ -366,9 +366,9 @@ mlir::Value fir::factory::createUnallocatedBox( fir::MutableBoxValue fir::factory::createTempMutableBox( fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type type, - llvm::StringRef name, mlir::Value typeSourceBox) { + llvm::StringRef name, mlir::Value typeSourceBox, bool isPolymorphic) { mlir::Type boxType; - if (typeSourceBox) + if (typeSourceBox || isPolymorphic) boxType = fir::ClassType::get(fir::HeapType::get(type)); else boxType = fir::BoxType::get(fir::HeapType::get(type)); diff --git a/flang/test/Lower/polymorphic.f90 b/flang/test/Lower/polymorphic.f90 index dd023b9694ff1..67699bd32495f 100644 --- a/flang/test/Lower/polymorphic.f90 +++ b/flang/test/Lower/polymorphic.f90 @@ -1112,6 +1112,32 @@ subroutine class_with_entry(a) ! CHECK-SAME: %[[B:.*]]: !fir.class> {fir.bindc_name = "b"}) { ! CHECK: %[[A:.*]] = fir.alloca !fir.class> {bindc_name = "a", uniq_name = "_QMpolymorphic_testFclass_with_entryEa"} + subroutine class_array_with_entry(a) + class(p1) :: a(:), b(:) + select type (a) + type is(p2) + print*, a%c + class default + print*, a%a + end select + return + entry g(b) + select type(b) + type is(p2) + print*,b%c + class default + print*,b%a + end select + end subroutine + +! CHECK-LABEL: func.func @_QMpolymorphic_testPclass_array_with_entry( +! CHECK-SAME: %[[A:.*]]: !fir.class>> {fir.bindc_name = "a"}) { +! CHECK: %[[B:.*]] = fir.alloca !fir.class>>> + +! CHECK-LABEL: func.func @_QMpolymorphic_testPg( +! CHECK-SAME: %[[B:.*]]: !fir.class>> {fir.bindc_name = "b"}) { +! CHECK: %[[A:.*]] = fir.alloca !fir.class>>> + end module program test From 9f48562ebfe1486f9094459d1aadde25268d7ce4 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Tue, 21 Mar 2023 15:59:00 +0700 Subject: [PATCH 148/691] [Test] Add test on freezing of widenable condition --- .../InstCombine/freeze-integer-intrinsics.ll | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/freeze-integer-intrinsics.ll b/llvm/test/Transforms/InstCombine/freeze-integer-intrinsics.ll index fa62cbcd24dc1..1d8d2a8029576 100644 --- a/llvm/test/Transforms/InstCombine/freeze-integer-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/freeze-integer-intrinsics.ll @@ -416,6 +416,17 @@ define @sshl_sat_v2i32_scalable_zeroinitializer( %freeze } +define i1 @widenable_condition() { +; CHECK-LABEL: @widenable_condition( +; CHECK-NEXT: [[WC:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[FREEZE:%.*]] = freeze i1 [[WC]] +; CHECK-NEXT: ret i1 [[FREEZE]] +; + %wc = call i1 @llvm.experimental.widenable.condition() + %freeze = freeze i1 %wc + ret i1 %freeze +} + declare i32 @llvm.ctlz.i32(i32, i1 immarg) declare i32 @llvm.cttz.i32(i32, i1 immarg) declare i32 @llvm.abs.i32(i32, i1 immarg) @@ -440,3 +451,4 @@ declare <2 x i32> @llvm.sshl.sat.v2i32(<2 x i32>, <2 x i32>) declare <2 x i32> @llvm.ushl.sat.v2i32(<2 x i32>, <2 x i32>) declare @llvm.sshl.sat.nxv2i32(, ) declare @llvm.ushl.sat.nxv2i32(, ) +declare i1 @llvm.experimental.widenable.condition() From d0de2c51c9a9fc0fedb97ee98f61ce08cb34972b Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 21 Mar 2023 09:59:52 +0100 Subject: [PATCH 149/691] [InstCombine] Simplify foldOperationIntoSelectOperand() (NFCI) Rather than handling all instruction types separately, clone the original instruction and replace the select operand. --- .../InstCombine/InstructionCombining.cpp | 49 +++---------------- 1 file changed, 8 insertions(+), 41 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 8dd752b82541d..a7307525b0a59 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1044,45 +1044,12 @@ static Constant *constantFoldOperationIntoSelectOperand( return ConstantFoldInstOperands(&I, ConstOps, I.getModule()->getDataLayout()); } -static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO, - InstCombiner::BuilderTy &Builder) { - if (auto *Cast = dyn_cast(&I)) - return Builder.CreateCast(Cast->getOpcode(), SO, I.getType()); - - if (auto *II = dyn_cast(&I)) { - assert(canConstantFoldCallTo(II, cast(II->getCalledOperand())) && - "Expected constant-foldable intrinsic"); - Intrinsic::ID IID = II->getIntrinsicID(); - if (II->arg_size() == 1) - return Builder.CreateUnaryIntrinsic(IID, SO); - - // This works for real binary ops like min/max (where we always expect the - // constant operand to be canonicalized as op1) and unary ops with a bonus - // constant argument like ctlz/cttz. - // TODO: Handle non-commutative binary intrinsics as below for binops. - assert(II->arg_size() == 2 && "Expected binary intrinsic"); - assert(isa(II->getArgOperand(1)) && "Expected constant operand"); - return Builder.CreateBinaryIntrinsic(IID, SO, II->getArgOperand(1)); - } - - if (auto *EI = dyn_cast(&I)) - return Builder.CreateExtractElement(SO, EI->getIndexOperand()); - - assert(I.isBinaryOp() && "Unexpected opcode for select folding"); - - // Figure out if the constant is the left or the right argument. - bool ConstIsRHS = isa(I.getOperand(1)); - Constant *ConstOperand = cast(I.getOperand(ConstIsRHS)); - - Value *Op0 = SO, *Op1 = ConstOperand; - if (!ConstIsRHS) - std::swap(Op0, Op1); - - Value *NewBO = Builder.CreateBinOp(cast(&I)->getOpcode(), Op0, - Op1, SO->getName() + ".op"); - if (auto *NewBOI = dyn_cast(NewBO)) - NewBOI->copyIRFlags(&I); - return NewBO; +static Value *foldOperationIntoSelectOperand(Instruction &I, SelectInst *SI, + Value *NewOp, InstCombiner &IC) { + Instruction *Clone = I.clone(); + Clone->replaceUsesOfWith(SI, NewOp); + IC.InsertNewInstBefore(Clone, *SI); + return Clone; } Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI, @@ -1162,9 +1129,9 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI, // Create an instruction for the arm that did not fold. if (!NewTV) - NewTV = foldOperationIntoSelectOperand(Op, TV, Builder); + NewTV = foldOperationIntoSelectOperand(Op, SI, TV, *this); if (!NewFV) - NewFV = foldOperationIntoSelectOperand(Op, FV, Builder); + NewFV = foldOperationIntoSelectOperand(Op, SI, FV, *this); return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI); } From 85bc498826d4dac4b64f7b02659f6ec52f11c223 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Tue, 14 Mar 2023 11:52:48 +0000 Subject: [PATCH 150/691] [LLDB] Show sub type of signals when debugging a core file Previously we only looked at the si_signo field, so you got: ``` (lldb) bt * thread #1, name = 'a.out.mte', stop reason = signal SIGSEGV * frame #0: 0x00000000004007f4 ``` This patch adds si_code so we can show: ``` (lldb) bt * thread #1, name = 'a.out.mte', stop reason = signal SIGSEGV: sync tag check fault * frame #0: 0x00000000004007f4 ``` The order of errno and code was incorrect in ElfLinuxSigInfo::Parse. It was the order that a "swapped" siginfo arch would use, which for Linux, is only MIPS. We removed MIPS Linux support some time ago. See: https://github.com/torvalds/linux/blob/fe15c26ee26efa11741a7b632e9f23b01aca4cc6/include/uapi/asm-generic/siginfo.h#L121 A test is added using memory tagging faults. Which were the original motivation for the changes. Reviewed By: JDevlieghere Differential Revision: https://reviews.llvm.org/D146045 --- lldb/include/lldb/Target/StopInfo.h | 3 +- .../Process/elf-core/ProcessElfCore.cpp | 1 + .../Process/elf-core/ThreadElfCore.cpp | 16 +++++----- .../Plugins/Process/elf-core/ThreadElfCore.h | 2 ++ lldb/source/Target/StopInfo.cpp | 29 ++++++++++++------- .../TestAArch64LinuxMTEMemoryTagCoreFile.py | 11 +++++++ llvm/docs/ReleaseNotes.rst | 3 ++ 7 files changed, 47 insertions(+), 18 deletions(-) diff --git a/lldb/include/lldb/Target/StopInfo.h b/lldb/include/lldb/Target/StopInfo.h index 9527a6ea553e3..8d6284e37dacf 100644 --- a/lldb/include/lldb/Target/StopInfo.h +++ b/lldb/include/lldb/Target/StopInfo.h @@ -115,7 +115,8 @@ class StopInfo : public std::enable_shared_from_this { static lldb::StopInfoSP CreateStopReasonWithSignal(Thread &thread, int signo, - const char *description = nullptr); + const char *description = nullptr, + std::optional code = std::nullopt); static lldb::StopInfoSP CreateStopReasonToTrace(Thread &thread); diff --git a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp index 2771d1d20cf0c..a0f391b9b26ca 100644 --- a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp +++ b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp @@ -922,6 +922,7 @@ llvm::Error ProcessElfCore::parseLinuxNotes(llvm::ArrayRef notes) { if (status.Fail()) return status.ToError(); thread_data.signo = siginfo.si_signo; + thread_data.code = siginfo.si_code; break; } case ELF::NT_FILE: { diff --git a/lldb/source/Plugins/Process/elf-core/ThreadElfCore.cpp b/lldb/source/Plugins/Process/elf-core/ThreadElfCore.cpp index bb190104cf2fb..0191562d72230 100644 --- a/lldb/source/Plugins/Process/elf-core/ThreadElfCore.cpp +++ b/lldb/source/Plugins/Process/elf-core/ThreadElfCore.cpp @@ -46,7 +46,8 @@ using namespace lldb_private; // Construct a Thread object with given data ThreadElfCore::ThreadElfCore(Process &process, const ThreadData &td) : Thread(process, td.tid), m_thread_name(td.name), m_thread_reg_ctx_sp(), - m_signo(td.signo), m_gpregset_data(td.gpregset), m_notes(td.notes) {} + m_signo(td.signo), m_code(td.code), m_gpregset_data(td.gpregset), + m_notes(td.notes) {} ThreadElfCore::~ThreadElfCore() { DestroyThread(); } @@ -221,11 +222,12 @@ ThreadElfCore::CreateRegisterContextForFrame(StackFrame *frame) { bool ThreadElfCore::CalculateStopInfo() { ProcessSP process_sp(GetProcess()); - if (process_sp) { - SetStopInfo(StopInfo::CreateStopReasonWithSignal(*this, m_signo)); - return true; - } - return false; + if (!process_sp) + return false; + + SetStopInfo(StopInfo::CreateStopReasonWithSignal( + *this, m_signo, /*description=*/nullptr, m_code)); + return true; } // Parse PRSTATUS from NOTE entry @@ -409,8 +411,8 @@ Status ELFLinuxSigInfo::Parse(const DataExtractor &data, const ArchSpec &arch) { // properly, because the struct is for the 64 bit version offset_t offset = 0; si_signo = data.GetU32(&offset); - si_code = data.GetU32(&offset); si_errno = data.GetU32(&offset); + si_code = data.GetU32(&offset); return error; } diff --git a/lldb/source/Plugins/Process/elf-core/ThreadElfCore.h b/lldb/source/Plugins/Process/elf-core/ThreadElfCore.h index 8d973bb840d25..2f3ed2a017790 100644 --- a/lldb/source/Plugins/Process/elf-core/ThreadElfCore.h +++ b/lldb/source/Plugins/Process/elf-core/ThreadElfCore.h @@ -128,6 +128,7 @@ struct ThreadData { std::vector notes; lldb::tid_t tid; int signo = 0; + int code = 0; int prstatus_sig = 0; std::string name; }; @@ -166,6 +167,7 @@ class ThreadElfCore : public lldb_private::Thread { lldb::RegisterContextSP m_thread_reg_ctx_sp; int m_signo; + int m_code; lldb_private::DataExtractor m_gpregset_data; std::vector m_notes; diff --git a/lldb/source/Target/StopInfo.cpp b/lldb/source/Target/StopInfo.cpp index ebc355c90d0ab..a98fc28c7338d 100644 --- a/lldb/source/Target/StopInfo.cpp +++ b/lldb/source/Target/StopInfo.cpp @@ -1044,8 +1044,9 @@ class StopInfoWatchpoint : public StopInfo { class StopInfoUnixSignal : public StopInfo { public: - StopInfoUnixSignal(Thread &thread, int signo, const char *description) - : StopInfo(thread, signo) { + StopInfoUnixSignal(Thread &thread, int signo, const char *description, + std::optional code) + : StopInfo(thread, signo), m_code(code) { SetDescription(description); } @@ -1100,19 +1101,26 @@ class StopInfoUnixSignal : public StopInfo { if (m_description.empty()) { ThreadSP thread_sp(m_thread_wp.lock()); if (thread_sp) { + UnixSignalsSP unix_signals = thread_sp->GetProcess()->GetUnixSignals(); StreamString strm; - const char *signal_name = - thread_sp->GetProcess()->GetUnixSignals()->GetSignalAsCString( - m_value); - if (signal_name) - strm.Printf("signal %s", signal_name); + strm << "signal "; + + std::string signal_name = + unix_signals->GetSignalDescription(m_value, m_code); + if (signal_name.size()) + strm << signal_name; else - strm.Printf("signal %" PRIi64, m_value); + strm.Printf("%" PRIi64, m_value); + m_description = std::string(strm.GetString()); } } return m_description.c_str(); } + +private: + // In siginfo_t terms, if m_value is si_signo, m_code is si_code. + std::optional m_code; }; // StopInfoTrace @@ -1371,9 +1379,10 @@ StopInfo::CreateStopReasonWithWatchpointID(Thread &thread, break_id_t watch_id, } StopInfoSP StopInfo::CreateStopReasonWithSignal(Thread &thread, int signo, - const char *description) { + const char *description, + std::optional code) { thread.GetProcess()->GetUnixSignals()->IncrementSignalHitCount(signo); - return StopInfoSP(new StopInfoUnixSignal(thread, signo, description)); + return StopInfoSP(new StopInfoUnixSignal(thread, signo, description, code)); } StopInfoSP StopInfo::CreateStopReasonToTrace(Thread &thread) { diff --git a/lldb/test/API/linux/aarch64/mte_core_file/TestAArch64LinuxMTEMemoryTagCoreFile.py b/lldb/test/API/linux/aarch64/mte_core_file/TestAArch64LinuxMTEMemoryTagCoreFile.py index a174616cd89d8..e742dd06b3f12 100644 --- a/lldb/test/API/linux/aarch64/mte_core_file/TestAArch64LinuxMTEMemoryTagCoreFile.py +++ b/lldb/test/API/linux/aarch64/mte_core_file/TestAArch64LinuxMTEMemoryTagCoreFile.py @@ -166,3 +166,14 @@ def test_mte_commands_no_mte(self): # the MTE core file which does support it but does not allow writing tags. self.expect("memory tag write 0 1", substrs=["error: Process does not support memory tagging"], error=True) + + @skipIfLLVMTargetMissing("AArch64") + def test_mte_tag_fault_reason(self): + """ Test that we correctly report the fault reason. """ + self.runCmd("target create --core core.mte") + + # There is no fault address shown here because core files do not include + # si_addr. + self.expect("bt", substrs=[ + "* thread #1, name = 'a.out.mte', stop reason = signal SIGSEGV: " + "sync tag check fault"]) diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 9d7b1dbd79ef1..d87d20704f166 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -221,6 +221,9 @@ Changes to LLDB omit defaulted template parameters. The full template parameter list can still be viewed with ``expr --raw-output``/``frame var --raw-output``. (`D141828 `_) +* LLDB is now able to show the subtype of signals found in a core file. For example + memory tagging specific segfaults such as ``SIGSEGV: sync tag check fault``. + Changes to Sanitizers --------------------- From ee928e322861085eaf7ea18120eca8a8a4c6201e Mon Sep 17 00:00:00 2001 From: OCHyams Date: Tue, 21 Mar 2023 08:17:24 +0000 Subject: [PATCH 151/691] [NFC] Add iterator traits to BitVector set_bits_iterator Reviewed By: scott.linder Differential Revision: https://reviews.llvm.org/D146232 --- llvm/include/llvm/ADT/BitVector.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/llvm/include/llvm/ADT/BitVector.h b/llvm/include/llvm/ADT/BitVector.h index 1a78859ba9889..a8847a3267537 100644 --- a/llvm/include/llvm/ADT/BitVector.h +++ b/llvm/include/llvm/ADT/BitVector.h @@ -24,6 +24,7 @@ #include #include #include +#include #include namespace llvm { @@ -40,6 +41,12 @@ template class const_set_bits_iterator_impl { } public: + using iterator_category = std::forward_iterator_tag; + using difference_type = void; + using value_type = int; + using pointer = value_type*; + using reference = value_type&; + const_set_bits_iterator_impl(const BitVectorT &Parent, int Current) : Parent(Parent), Current(Current) {} explicit const_set_bits_iterator_impl(const BitVectorT &Parent) From d5b2c8e56da07b362c88cf7d3254eabf79323858 Mon Sep 17 00:00:00 2001 From: OCHyams Date: Tue, 21 Mar 2023 08:28:08 +0000 Subject: [PATCH 152/691] [Assignment Tracking][NFC] Use BitVectors as masks for SmallVectors ...rather than using DenseMaps to track per-variable information. Rather than tracking 3 maps of {VariableID: SomeInfo} per block, use a BitVector indexed by VariableID to mask 3 vectors of SomeInfo. BlockInfos now need to be initialised with a call to init which sets the BitVector width to the number of partially promoted variables in the function and fills the vectors with Top values. Prior to this patch, in joinBlockInfo, it was necessary to insert Top values into the Join result for variables in A XOR B after joining the variables in A AND B. Now, because the vectors are pre-filled with Top values we need only join the variables A AND B and set the BitVector of tracked variables to A OR B. The patch achieves an average of 0.25% reduction in instructions retired and a 1.1% max-rss for the CTMark suite in LTO-O3-g builds. Reviewed By: scott.linder Differential Revision: https://reviews.llvm.org/D145558 --- .../CodeGen/AssignmentTrackingAnalysis.cpp | 408 +++++++++--------- 1 file changed, 212 insertions(+), 196 deletions(-) diff --git a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp index bda6ec9d3d042..8b0ab67cb9185 100644 --- a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp +++ b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp @@ -1,4 +1,5 @@ #include "llvm/CodeGen/AssignmentTrackingAnalysis.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/PostOrderIterator.h" @@ -79,6 +80,8 @@ class FunctionVarLocsBuilder { SmallVector SingleLocVars; public: + unsigned getNumVariables() const { return Variables.size(); } + /// Find or insert \p V and return the ID. VariableID insertVariable(DebugVariable V) { return static_cast(Variables.insert(V)); @@ -967,14 +970,17 @@ class AssignmentTrackingLowering { } }; - using AssignmentMap = DenseMap; - using LocMap = DenseMap; - using OverlapMap = DenseMap>; + using AssignmentMap = SmallVector; + using LocMap = SmallVector; + using OverlapMap = DenseMap>; using UntaggedStoreAssignmentMap = DenseMap>>; private: + /// The highest numbered VariableID for partially promoted variables plus 1, + /// the values for which start at 1. + unsigned TrackedVariablesVectorSize = 0; /// Map a variable to the set of variables that it fully contains. OverlapMap VarContains; /// Map untagged stores to the variable fragments they assign to. Used by @@ -990,30 +996,23 @@ class AssignmentTrackingLowering { void emitDbgValue(LocKind Kind, const DbgVariableIntrinsic *Source, Instruction *After); - static bool mapsAreEqual(const AssignmentMap &A, const AssignmentMap &B) { - if (A.size() != B.size()) - return false; - for (const auto &Pair : A) { - VariableID Var = Pair.first; - const Assignment &AV = Pair.second; - auto R = B.find(Var); - // Check if this entry exists in B, otherwise ret false. - if (R == B.end()) - return false; - // Check that the assignment value is the same. - if (!AV.isSameSourceAssignment(R->second)) - return false; - } - return true; + static bool mapsAreEqual(const BitVector &Mask, const AssignmentMap &A, + const AssignmentMap &B) { + return llvm::all_of(Mask.set_bits(), [&](unsigned VarID) { + return A[VarID].isSameSourceAssignment(B[VarID]); + }); } /// Represents the stack and debug assignments in a block. Used to describe /// the live-in and live-out values for blocks, as well as the "current" /// value as we process each instruction in a block. struct BlockInfo { - /// Dominating assignment to memory for each variable. + /// The set of variables (VariableID) being tracked in this block. + BitVector VariableIDsInBlock; + /// Dominating assignment to memory for each variable, indexed by + /// VariableID. AssignmentMap StackHomeValue; - /// Dominating assignemnt to each variable. + /// Dominating assignemnt to each variable, indexed by VariableID. AssignmentMap DebugValue; /// Location kind for each variable. LiveLoc indicates whether the /// dominating assignment in StackHomeValue (LocKind::Mem), DebugValue @@ -1024,20 +1023,138 @@ class AssignmentTrackingLowering { /// merge of multiple assignments (both are Status::NoneOrPhi). In other /// words, the memory location may well be valid while both DebugValue and /// StackHomeValue contain Assignments that have a Status of NoneOrPhi. + /// Indexed by VariableID. LocMap LiveLoc; + public: + enum AssignmentKind { Stack, Debug }; + const AssignmentMap &getAssignmentMap(AssignmentKind Kind) const { + switch (Kind) { + case Stack: + return StackHomeValue; + case Debug: + return DebugValue; + } + llvm_unreachable("Unknown AssignmentKind"); + } + AssignmentMap &getAssignmentMap(AssignmentKind Kind) { + return const_cast( + const_cast(this)->getAssignmentMap(Kind)); + } + + bool isVariableTracked(VariableID Var) const { + return VariableIDsInBlock[static_cast(Var)]; + } + + const Assignment &getAssignment(AssignmentKind Kind, VariableID Var) const { + assert(isVariableTracked(Var) && "Var not tracked in block"); + return getAssignmentMap(Kind)[static_cast(Var)]; + } + + LocKind getLocKind(VariableID Var) const { + assert(isVariableTracked(Var) && "Var not tracked in block"); + return LiveLoc[static_cast(Var)]; + } + + /// Set LocKind for \p Var only: does not set LocKind for VariableIDs of + /// fragments contained win \p Var. + void setLocKind(VariableID Var, LocKind K) { + VariableIDsInBlock.set(static_cast(Var)); + LiveLoc[static_cast(Var)] = K; + } + + /// Set the assignment in the \p Kind assignment map for \p Var only: does + /// not set the assignment for VariableIDs of fragments contained win \p + /// Var. + void setAssignment(AssignmentKind Kind, VariableID Var, + const Assignment &AV) { + VariableIDsInBlock.set(static_cast(Var)); + getAssignmentMap(Kind)[static_cast(Var)] = AV; + } + + /// Return true if there is an assignment matching \p AV in the \p Kind + /// assignment map. Does consider assignments for VariableIDs of fragments + /// contained win \p Var. + bool hasAssignment(AssignmentKind Kind, VariableID Var, + const Assignment &AV) const { + if (!isVariableTracked(Var)) + return false; + return AV.isSameSourceAssignment(getAssignment(Kind, Var)); + } + /// Compare every element in each map to determine structural equality /// (slow). bool operator==(const BlockInfo &Other) const { - return LiveLoc == Other.LiveLoc && - mapsAreEqual(StackHomeValue, Other.StackHomeValue) && - mapsAreEqual(DebugValue, Other.DebugValue); + return VariableIDsInBlock == Other.VariableIDsInBlock && + LiveLoc == Other.LiveLoc && + mapsAreEqual(VariableIDsInBlock, StackHomeValue, + Other.StackHomeValue) && + mapsAreEqual(VariableIDsInBlock, DebugValue, Other.DebugValue); } bool operator!=(const BlockInfo &Other) const { return !(*this == Other); } bool isValid() { return LiveLoc.size() == DebugValue.size() && LiveLoc.size() == StackHomeValue.size(); } + + /// Clear everything and initialise with ⊤-values for all variables. + void init(int NumVars) { + StackHomeValue.clear(); + DebugValue.clear(); + LiveLoc.clear(); + VariableIDsInBlock = BitVector(NumVars); + StackHomeValue.insert(StackHomeValue.begin(), NumVars, + Assignment::makeNoneOrPhi()); + DebugValue.insert(DebugValue.begin(), NumVars, + Assignment::makeNoneOrPhi()); + LiveLoc.insert(LiveLoc.begin(), NumVars, LocKind::None); + } + + /// Helper for join. + template + static void joinElmt(int Index, SmallVector &Target, + const SmallVector &A, + const SmallVector &B, + ElmtType (*Fn)(FnInputType, FnInputType)) { + Target[Index] = Fn(A[Index], B[Index]); + } + + /// See comment for AssignmentTrackingLowering::joinBlockInfo. + static BlockInfo join(const BlockInfo &A, const BlockInfo &B, int NumVars) { + // Join A and B. + // + // Intersect = join(a, b) for a in A, b in B where Var(a) == Var(b) + // Difference = join(x, ⊤) for x where Var(x) is in A xor B + // Join = Intersect ∪ Difference + // + // This is achieved by performing a join on elements from A and B with + // variables common to both A and B (join elements indexed by var + // intersect), then adding ⊤-value elements for vars in A xor B. The + // latter part is equivalent to performing join on elements with variables + // in A xor B with the ⊤-value for the map element since join(x, ⊤) = ⊤. + // BlockInfo::init initializes all variable entries to the ⊤ value so we + // don't need to explicitly perform that step as Join.VariableIDsInBlock + // is set to the union of the variables in A and B at the end of this + // function. + BlockInfo Join; + Join.init(NumVars); + + BitVector Intersect = A.VariableIDsInBlock; + Intersect &= B.VariableIDsInBlock; + + for (auto VarID : Intersect.set_bits()) { + joinElmt(VarID, Join.LiveLoc, A.LiveLoc, B.LiveLoc, joinKind); + joinElmt(VarID, Join.DebugValue, A.DebugValue, B.DebugValue, + joinAssignment); + joinElmt(VarID, Join.StackHomeValue, A.StackHomeValue, B.StackHomeValue, + joinAssignment); + } + + Join.VariableIDsInBlock = A.VariableIDsInBlock; + Join.VariableIDsInBlock |= B.VariableIDsInBlock; + assert(Join.isValid()); + return Join; + } }; Function &Fn; @@ -1082,11 +1199,8 @@ class AssignmentTrackingLowering { /// (⊤) in this case (unknown location / assignment). ///@{ static LocKind joinKind(LocKind A, LocKind B); - static LocMap joinLocMap(const LocMap &A, const LocMap &B); static Assignment joinAssignment(const Assignment &A, const Assignment &B); - static AssignmentMap joinAssignmentMap(const AssignmentMap &A, - const AssignmentMap &B); - static BlockInfo joinBlockInfo(const BlockInfo &A, const BlockInfo &B); + BlockInfo joinBlockInfo(const BlockInfo &A, const BlockInfo &B); ///@} /// Process the instructions in \p BB updating \p LiveSet along the way. \p @@ -1119,8 +1233,15 @@ class AssignmentTrackingLowering { /// have been called for \p Var first. LocKind getLocKind(BlockInfo *LiveSet, VariableID Var); /// Return true if \p Var has an assignment in \p M matching \p AV. - bool hasVarWithAssignment(VariableID Var, const Assignment &AV, - const AssignmentMap &M); + bool hasVarWithAssignment(BlockInfo *LiveSet, BlockInfo::AssignmentKind Kind, + VariableID Var, const Assignment &AV); + /// Return the set of VariableIDs corresponding the fragments contained fully + /// within the variable/fragment \p Var. + ArrayRef getContainedFragments(VariableID Var) const; + + /// Mark \p Var as having been touched this frame. Note, this applies only + /// to the exact fragment \p Var and not to any fragments contained within. + void touchFragment(VariableID Var); /// Emit info for variables that are fully promoted. bool emitPromotedVarLocs(FunctionVarLocsBuilder *FnVarLocs); @@ -1135,66 +1256,60 @@ class AssignmentTrackingLowering { }; } // namespace +ArrayRef +AssignmentTrackingLowering::getContainedFragments(VariableID Var) const { + auto R = VarContains.find(Var); + if (R == VarContains.end()) + return std::nullopt; + return R->second; +} + +void AssignmentTrackingLowering::touchFragment(VariableID Var) { + VarsTouchedThisFrame.insert(Var); +} + void AssignmentTrackingLowering::setLocKind(BlockInfo *LiveSet, VariableID Var, LocKind K) { auto SetKind = [this](BlockInfo *LiveSet, VariableID Var, LocKind K) { - VarsTouchedThisFrame.insert(Var); - LiveSet->LiveLoc[Var] = K; + LiveSet->setLocKind(Var, K); + touchFragment(Var); }; SetKind(LiveSet, Var, K); // Update the LocKind for all fragments contained within Var. - for (VariableID Frag : VarContains[Var]) + for (VariableID Frag : getContainedFragments(Var)) SetKind(LiveSet, Frag, K); } AssignmentTrackingLowering::LocKind AssignmentTrackingLowering::getLocKind(BlockInfo *LiveSet, VariableID Var) { - auto Pair = LiveSet->LiveLoc.find(Var); - assert(Pair != LiveSet->LiveLoc.end()); - return Pair->second; + return LiveSet->getLocKind(Var); } void AssignmentTrackingLowering::addMemDef(BlockInfo *LiveSet, VariableID Var, const Assignment &AV) { - auto AddDef = [](BlockInfo *LiveSet, VariableID Var, Assignment AV) { - LiveSet->StackHomeValue[Var] = AV; - // Add default (Var -> ⊤) to DebugValue if Var isn't in DebugValue yet. - LiveSet->DebugValue.insert({Var, Assignment::makeNoneOrPhi()}); - // Add default (Var -> ⊤) to LiveLocs if Var isn't in LiveLocs yet. Callers - // of addMemDef will call setLocKind to override. - LiveSet->LiveLoc.insert({Var, LocKind::None}); - }; - AddDef(LiveSet, Var, AV); + LiveSet->setAssignment(BlockInfo::Stack, Var, AV); // Use this assigment for all fragments contained within Var, but do not // provide a Source because we cannot convert Var's value to a value for the // fragment. Assignment FragAV = AV; FragAV.Source = nullptr; - for (VariableID Frag : VarContains[Var]) - AddDef(LiveSet, Frag, FragAV); + for (VariableID Frag : getContainedFragments(Var)) + LiveSet->setAssignment(BlockInfo::Stack, Frag, FragAV); } void AssignmentTrackingLowering::addDbgDef(BlockInfo *LiveSet, VariableID Var, const Assignment &AV) { - auto AddDef = [](BlockInfo *LiveSet, VariableID Var, Assignment AV) { - LiveSet->DebugValue[Var] = AV; - // Add default (Var -> ⊤) to StackHome if Var isn't in StackHome yet. - LiveSet->StackHomeValue.insert({Var, Assignment::makeNoneOrPhi()}); - // Add default (Var -> ⊤) to LiveLocs if Var isn't in LiveLocs yet. Callers - // of addDbgDef will call setLocKind to override. - LiveSet->LiveLoc.insert({Var, LocKind::None}); - }; - AddDef(LiveSet, Var, AV); + LiveSet->setAssignment(BlockInfo::Debug, Var, AV); // Use this assigment for all fragments contained within Var, but do not // provide a Source because we cannot convert Var's value to a value for the // fragment. Assignment FragAV = AV; FragAV.Source = nullptr; - for (VariableID Frag : VarContains[Var]) - AddDef(LiveSet, Frag, FragAV); + for (VariableID Frag : getContainedFragments(Var)) + LiveSet->setAssignment(BlockInfo::Debug, Frag, FragAV); } static DIAssignID *getIDFromInst(const Instruction &I) { @@ -1206,24 +1321,16 @@ static DIAssignID *getIDFromMarker(const DbgAssignIntrinsic &DAI) { } /// Return true if \p Var has an assignment in \p M matching \p AV. -bool AssignmentTrackingLowering::hasVarWithAssignment(VariableID Var, - const Assignment &AV, - const AssignmentMap &M) { - auto AssignmentIsMapped = [](VariableID Var, const Assignment &AV, - const AssignmentMap &M) { - auto R = M.find(Var); - if (R == M.end()) - return false; - return AV.isSameSourceAssignment(R->second); - }; - - if (!AssignmentIsMapped(Var, AV, M)) +bool AssignmentTrackingLowering::hasVarWithAssignment( + BlockInfo *LiveSet, BlockInfo::AssignmentKind Kind, VariableID Var, + const Assignment &AV) { + if (!LiveSet->hasAssignment(Kind, Var, AV)) return false; // Check all the frags contained within Var as these will have all been // mapped to AV at the last store to Var. - for (VariableID Frag : VarContains[Var]) - if (!AssignmentIsMapped(Frag, AV, M)) + for (VariableID Frag : getContainedFragments(Var)) + if (!LiveSet->hasAssignment(Kind, Frag, AV)) return false; return true; } @@ -1410,13 +1517,14 @@ void AssignmentTrackingLowering::processTaggedInstruction( // The last assignment to the stack is now AV. Check if the last debug // assignment has a matching Assignment. - if (hasVarWithAssignment(Var, AV, LiveSet->DebugValue)) { + if (hasVarWithAssignment(LiveSet, BlockInfo::Debug, Var, AV)) { // The StackHomeValue and DebugValue for this variable match so we can // emit a stack home location here. LLVM_DEBUG(dbgs() << "Mem, Stack matches Debug program\n";); LLVM_DEBUG(dbgs() << " Stack val: "; AV.dump(dbgs()); dbgs() << "\n"); LLVM_DEBUG(dbgs() << " Debug val: "; - LiveSet->DebugValue[Var].dump(dbgs()); dbgs() << "\n"); + LiveSet->DebugValue[static_cast(Var)].dump(dbgs()); + dbgs() << "\n"); setLocKind(LiveSet, Var, LocKind::Mem); emitDbgValue(LocKind::Mem, DAI, &I); continue; @@ -1439,7 +1547,8 @@ void AssignmentTrackingLowering::processTaggedInstruction( // There's been an assignment to memory that we were using as a // location for this variable, and the Assignment doesn't match what // we'd expect to see in memory. - if (LiveSet->DebugValue[Var].Status == Assignment::NoneOrPhi) { + Assignment DbgAV = LiveSet->getAssignment(BlockInfo::Debug, Var); + if (DbgAV.Status == Assignment::NoneOrPhi) { // We need to terminate any previously open location now. LLVM_DEBUG(dbgs() << "None, No Debug value available\n";); setLocKind(LiveSet, Var, LocKind::None); @@ -1448,9 +1557,8 @@ void AssignmentTrackingLowering::processTaggedInstruction( // The previous DebugValue Value can be used here. LLVM_DEBUG(dbgs() << "Val, Debug value is Known\n";); setLocKind(LiveSet, Var, LocKind::Val); - Assignment PrevAV = LiveSet->DebugValue.lookup(Var); - if (PrevAV.Source) { - emitDbgValue(LocKind::Val, PrevAV.Source, &I); + if (DbgAV.Source) { + emitDbgValue(LocKind::Val, DbgAV.Source, &I); } else { // PrevAV.Source is nullptr so we must emit undef here. emitDbgValue(LocKind::None, DAI, &I); @@ -1484,7 +1592,7 @@ void AssignmentTrackingLowering::processDbgAssign(DbgAssignIntrinsic &DAI, // Check if the DebugValue and StackHomeValue both hold the same // Assignment. - if (hasVarWithAssignment(Var, AV, LiveSet->StackHomeValue)) { + if (hasVarWithAssignment(LiveSet, BlockInfo::Stack, Var, AV)) { // They match. We can use the stack home because the debug intrinsics state // that an assignment happened here, and we know that specific assignment // was the last one to take place in memory for this variable. @@ -1601,58 +1709,6 @@ AssignmentTrackingLowering::joinKind(LocKind A, LocKind B) { return A == B ? A : LocKind::None; } -AssignmentTrackingLowering::LocMap -AssignmentTrackingLowering::joinLocMap(const LocMap &A, const LocMap &B) { - // Join A and B. - // - // U = join(a, b) for a in A, b in B where Var(a) == Var(b) - // D = join(x, ⊤) for x where Var(x) is in A xor B - // Join = U ∪ D - // - // This is achieved by performing a join on elements from A and B with - // variables common to both A and B (join elements indexed by var intersect), - // then adding LocKind::None elements for vars in A xor B. The latter part is - // equivalent to performing join on elements with variables in A xor B with - // LocKind::None (⊤) since join(x, ⊤) = ⊤. - LocMap Join(std::max(A.size(), B.size())); - SmallVector SymmetricDifference; - // Insert the join of the elements with common vars into Join. Add the - // remaining elements to into SymmetricDifference. - for (const auto &[Var, Loc] : A) { - // If this Var doesn't exist in B then add it to the symmetric difference - // set. - auto R = B.find(Var); - if (R == B.end()) { - SymmetricDifference.push_back(Var); - continue; - } - // There is an entry for Var in both, join it. - Join[Var] = joinKind(Loc, R->second); - } - unsigned IntersectSize = Join.size(); - (void)IntersectSize; - - // Check if A and B contain the same variables. - if (SymmetricDifference.empty() && A.size() == B.size()) - return Join; - - // Add the elements in B with variables that are not in A into - // SymmetricDifference. - for (const auto &Pair : B) { - VariableID Var = Pair.first; - if (A.count(Var) == 0) - SymmetricDifference.push_back(Var); - } - - // Add SymmetricDifference elements to Join and return the result. - for (const auto &Var : SymmetricDifference) - Join.insert({Var, LocKind::None}); - - assert(Join.size() == (IntersectSize + SymmetricDifference.size())); - assert(Join.size() >= A.size() && Join.size() >= B.size()); - return Join; -} - AssignmentTrackingLowering::Assignment AssignmentTrackingLowering::joinAssignment(const Assignment &A, const Assignment &B) { @@ -1695,68 +1751,10 @@ AssignmentTrackingLowering::joinAssignment(const Assignment &A, return Assignment::make(A.ID, Source); } -AssignmentTrackingLowering::AssignmentMap -AssignmentTrackingLowering::joinAssignmentMap(const AssignmentMap &A, - const AssignmentMap &B) { - // Join A and B. - // - // U = join(a, b) for a in A, b in B where Var(a) == Var(b) - // D = join(x, ⊤) for x where Var(x) is in A xor B - // Join = U ∪ D - // - // This is achieved by performing a join on elements from A and B with - // variables common to both A and B (join elements indexed by var intersect), - // then adding LocKind::None elements for vars in A xor B. The latter part is - // equivalent to performing join on elements with variables in A xor B with - // Status::NoneOrPhi (⊤) since join(x, ⊤) = ⊤. - AssignmentMap Join(std::max(A.size(), B.size())); - SmallVector SymmetricDifference; - // Insert the join of the elements with common vars into Join. Add the - // remaining elements to into SymmetricDifference. - for (const auto &[Var, AV] : A) { - // If this Var doesn't exist in B then add it to the symmetric difference - // set. - auto R = B.find(Var); - if (R == B.end()) { - SymmetricDifference.push_back(Var); - continue; - } - // There is an entry for Var in both, join it. - Join[Var] = joinAssignment(AV, R->second); - } - unsigned IntersectSize = Join.size(); - (void)IntersectSize; - - // Check if A and B contain the same variables. - if (SymmetricDifference.empty() && A.size() == B.size()) - return Join; - - // Add the elements in B with variables that are not in A into - // SymmetricDifference. - for (const auto &Pair : B) { - VariableID Var = Pair.first; - if (A.count(Var) == 0) - SymmetricDifference.push_back(Var); - } - - // Add SymmetricDifference elements to Join and return the result. - for (auto Var : SymmetricDifference) - Join.insert({Var, Assignment::makeNoneOrPhi()}); - - assert(Join.size() == (IntersectSize + SymmetricDifference.size())); - assert(Join.size() >= A.size() && Join.size() >= B.size()); - return Join; -} - AssignmentTrackingLowering::BlockInfo AssignmentTrackingLowering::joinBlockInfo(const BlockInfo &A, const BlockInfo &B) { - BlockInfo Join; - Join.LiveLoc = joinLocMap(A.LiveLoc, B.LiveLoc); - Join.StackHomeValue = joinAssignmentMap(A.StackHomeValue, B.StackHomeValue); - Join.DebugValue = joinAssignmentMap(A.DebugValue, B.DebugValue); - assert(Join.isValid()); - return Join; + return BlockInfo::join(A, B, TrackedVariablesVectorSize); } bool AssignmentTrackingLowering::join( @@ -1789,6 +1787,9 @@ bool AssignmentTrackingLowering::join( FirstJoin = false; } + if (FirstJoin) + BBLiveIn.init(TrackedVariablesVectorSize); + auto CurrentLiveInEntry = LiveIn.find(&BB); // Check if there isn't an entry, or there is but the LiveIn set has changed // (expensive check). @@ -1835,7 +1836,13 @@ getUntaggedStoreAssignmentInfo(const Instruction &I, const DataLayout &Layout) { /// y does not contain all overlaps because partial overlaps are excluded. /// /// While we're iterating over the function, add single location defs for -/// dbg.declares to \p FnVarLocs +/// dbg.declares to \p FnVarLocs. +/// +/// Variables that are interesting to this pass in are added to +/// FnVarLocs->Variables first. TrackedVariablesVectorSize is set to the ID of +/// the last interesting variable plus 1, meaning variables with ID 1 +/// (inclusive) to TrackedVariablesVectorSize (exclusive) are interesting. The +/// subsequent variables are either stack homed or fully promoted. /// /// Finally, populate UntaggedStoreVars with a mapping of untagged stores to /// the stored-to variable fragments. @@ -1844,7 +1851,8 @@ getUntaggedStoreAssignmentInfo(const Instruction &I, const DataLayout &Layout) { /// to iterate over the function as they can be achieved together in one pass. static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares( Function &Fn, FunctionVarLocsBuilder *FnVarLocs, - AssignmentTrackingLowering::UntaggedStoreAssignmentMap &UntaggedStoreVars) { + AssignmentTrackingLowering::UntaggedStoreAssignmentMap &UntaggedStoreVars, + unsigned &TrackedVariablesVectorSize) { DenseSet Seen; // Map of Variable: [Fragments]. DenseMap> FragmentMap; @@ -1855,12 +1863,11 @@ static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares( // UntaggedStoreVars. // We need to add fragments for untagged stores too so that we can correctly // clobber overlapped fragment locations later. + SmallVector Declares; for (auto &BB : Fn) { for (auto &I : BB) { if (auto *DDI = dyn_cast(&I)) { - FnVarLocs->addSingleLocVar(DebugVariable(DDI), DDI->getExpression(), - DDI->getDebugLoc(), - DDI->getWrappedLocation()); + Declares.push_back(DDI); } else if (auto *DII = dyn_cast(&I)) { DebugVariable DV = DebugVariable(DII); DebugAggregate DA = {DV.getVariable(), DV.getInlinedAt()}; @@ -1935,6 +1942,15 @@ static AssignmentTrackingLowering::OverlapMap buildOverlapMapAndRecordDeclares( } } + // VariableIDs are 1-based so the variable-tracking bitvector needs + // NumVariables plus 1 bits. + TrackedVariablesVectorSize = FnVarLocs->getNumVariables() + 1; + + // Finally, insert the declares afterwards, so the first IDs are all + // partially stack homed vars. + for (auto *DDI : Declares) + FnVarLocs->addSingleLocVar(DebugVariable(DDI), DDI->getExpression(), + DDI->getDebugLoc(), DDI->getWrappedLocation()); return Map; } @@ -1955,8 +1971,8 @@ bool AssignmentTrackingLowering::run(FunctionVarLocsBuilder *FnVarLocsBuilder) { // Note that this pass doesn't handle partial overlaps correctly (FWIW // neither does LiveDebugVariables) because that is difficult to do and // appears to be rare occurance. - VarContains = - buildOverlapMapAndRecordDeclares(Fn, FnVarLocs, UntaggedStoreVars); + VarContains = buildOverlapMapAndRecordDeclares( + Fn, FnVarLocs, UntaggedStoreVars, TrackedVariablesVectorSize); // Prepare for traversal. ReversePostOrderTraversal RPOT(&Fn); From 9437bf418a7fdb9a1079f416dd28bb7107161d74 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Tue, 21 Mar 2023 01:04:04 -0700 Subject: [PATCH 153/691] [mlir][Linalg][Transform] Fix effect on RewriteInDestinationPassingStyleOp that did not consume its operand --- .../Linalg/TransformOps/LinalgTransformOps.td | 20 +++++++++--- .../TransformOps/LinalgTransformOps.cpp | 31 +++++++++---------- 2 files changed, 29 insertions(+), 22 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td index 712abf341f460..c58e955cb7951 100644 --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -83,8 +83,10 @@ def BufferizeToAllocationOp : Op { + [FunctionalStyleTransformOpTrait, + MemoryEffectsOpInterface, + TransformOpInterface, + TransformEachOpTrait]> { let description = [{ Decomposes named complex operations, such as higher-dimensional (depthwise) convolutions, into combinations of lower-dimensional equivalents @@ -932,9 +934,10 @@ def ScalarizeOp : Op]> { + [FunctionalStyleTransformOpTrait, + MemoryEffectsOpInterface, + TransformOpInterface, + TransformEachOpTrait]> { let description = [{ Rewrite a supported tensor operation that is not in destination-passing style into a form that is in destination-passing style. @@ -963,6 +966,13 @@ def RewriteInDestinationPassingStyleOp : Op< $target attr-dict `:` functional-type($target, results) }]; + + let extraClassDeclaration = [{ + ::mlir::DiagnosedSilenceableFailure applyToOne( + ::mlir::Operation *target, + ::mlir::transform::ApplyToEachResultList &results, + ::mlir::transform::TransformState &state); + }]; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index 407b8d213de1c..d98eb3b781fc5 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -2000,24 +2000,21 @@ transform::ScalarizeOp::applyToOne(LinalgOp target, //===----------------------------------------------------------------------===// DiagnosedSilenceableFailure -transform::RewriteInDestinationPassingStyleOp::apply( - transform::TransformResults &results, transform::TransformState &state) { +transform::RewriteInDestinationPassingStyleOp::applyToOne( + Operation *target, transform::ApplyToEachResultList &results, + transform::TransformState &state) { SmallVector res; - ArrayRef targetOps = state.getPayloadOps(getTarget()); - for (Operation *target : targetOps) { - IRRewriter rewriter(target->getContext()); - rewriter.setInsertionPoint(target); - FailureOr maybeResult = - TypeSwitch>(target) - .Case( - [&rewriter](auto op) { - return rewriteInDestinationPassingStyle(rewriter, op); - }); - if (failed(maybeResult)) - return emitDefaultSilenceableFailure(target); - res.push_back(*maybeResult); - } - results.set(getResult().cast(), res); + IRRewriter rewriter(target->getContext()); + rewriter.setInsertionPoint(target); + FailureOr maybeResult = + TypeSwitch>(target) + .Case( + [&rewriter](auto op) { + return rewriteInDestinationPassingStyle(rewriter, op); + }); + if (failed(maybeResult)) + return emitDefaultSilenceableFailure(target); + results.push_back(*maybeResult); return DiagnosedSilenceableFailure::success(); } From e8ec42b80b5dc75186dca543572416f2f2e21475 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 21 Mar 2023 10:16:51 +0100 Subject: [PATCH 154/691] [InstCombine] Fold icmp eq of non-inbounds gep with base pointer For equality comparisons, we don't need the gep to be inbounds: https://alive2.llvm.org/ce/z/Fe_kn2 --- .../InstCombine/InstCombineCompares.cpp | 2 +- .../Transforms/InstCombine/compare-alloca.ll | 7 ++---- llvm/test/Transforms/InstCombine/icmp-gep.ll | 22 +++++++++---------- 3 files changed, 13 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index b1d59d355eb73..b9473634e6dc7 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -741,7 +741,7 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, RHS = RHS->stripPointerCasts(); Value *PtrBase = GEPLHS->getOperand(0); - if (PtrBase == RHS && GEPLHS->isInBounds()) { + if (PtrBase == RHS && (GEPLHS->isInBounds() || ICmpInst::isEquality(Cond))) { // ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0). Value *Offset = EmitGEPOffset(GEPLHS); return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset, diff --git a/llvm/test/Transforms/InstCombine/compare-alloca.ll b/llvm/test/Transforms/InstCombine/compare-alloca.ll index 6ba1850cba3bb..164398382e9e0 100644 --- a/llvm/test/Transforms/InstCombine/compare-alloca.ll +++ b/llvm/test/Transforms/InstCombine/compare-alloca.ll @@ -292,11 +292,8 @@ define void @select_alloca_unrelated_ptr(i1 %c, ptr %p, ptr %p2) { define void @alloca_offset_icmp(ptr %p, i32 %offset) { ; CHECK-LABEL: @alloca_offset_icmp( -; CHECK-NEXT: [[M:%.*]] = alloca [4 x i8], align 1 -; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[M]], i32 [[OFFSET:%.*]] -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq ptr [[M]], [[P:%.*]] -; CHECK-NEXT: [[CMP2:%.*]] = icmp eq ptr [[M]], [[G]] -; CHECK-NEXT: call void @witness(i1 [[CMP1]], i1 [[CMP2]]) +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[OFFSET:%.*]], 0 +; CHECK-NEXT: call void @witness(i1 false, i1 [[CMP2]]) ; CHECK-NEXT: ret void ; %m = alloca [4 x i8] diff --git a/llvm/test/Transforms/InstCombine/icmp-gep.ll b/llvm/test/Transforms/InstCombine/icmp-gep.ll index bc8bc7b74d3b4..1ccd3819e6593 100644 --- a/llvm/test/Transforms/InstCombine/icmp-gep.ll +++ b/llvm/test/Transforms/InstCombine/icmp-gep.ll @@ -8,8 +8,7 @@ declare void @use(ptr) define i1 @eq_base(ptr %x, i64 %y) { ; CHECK-LABEL: @eq_base( -; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[X:%.*]], i64 [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp eq ptr [[G]], [[X]] +; CHECK-NEXT: [[R:%.*]] = icmp eq i64 [[Y:%.*]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %g = getelementptr i8, ptr %x, i64 %y @@ -20,8 +19,7 @@ define i1 @eq_base(ptr %x, i64 %y) { define i1 @ne_base_commute(i64 %y) { ; CHECK-LABEL: @ne_base_commute( ; CHECK-NEXT: [[X:%.*]] = call ptr @getptr() -; CHECK-NEXT: [[G:%.*]] = getelementptr i8, ptr [[X]], i64 [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = icmp ne ptr [[X]], [[G]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i64 [[Y:%.*]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %x = call ptr @getptr() ; thwart complexity-based canonicalization @@ -176,8 +174,8 @@ define i1 @eq_base_inbounds_commute_use(i64 %y) { define i1 @eq_bitcast_base(ptr %p, i64 %x) { ; CHECK-LABEL: @eq_bitcast_base( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr [2 x i8], ptr [[P:%.*]], i64 [[X:%.*]], i64 0 -; CHECK-NEXT: [[R:%.*]] = icmp eq ptr [[GEP]], [[P]] +; CHECK-NEXT: [[GEP_IDX_MASK:%.*]] = and i64 [[X:%.*]], 9223372036854775807 +; CHECK-NEXT: [[R:%.*]] = icmp eq i64 [[GEP_IDX_MASK]], 0 ; CHECK-NEXT: ret i1 [[R]] ; %gep = getelementptr [2 x i8], ptr %p, i64 %x, i64 0 @@ -305,8 +303,8 @@ define i1 @test60_as1(ptr addrspace(1) %foo, i64 %i, i64 %j) { define i1 @test60_addrspacecast(ptr %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @test60_addrspacecast( ; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nsw i64 [[I:%.*]], 2 -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i64 [[GEP1_IDX]], [[J:%.*]] -; CHECK-NEXT: ret i1 [[TMP1]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[GEP1_IDX]], [[J:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] ; %bit = addrspacecast ptr %foo to ptr addrspace(3) %gep1 = getelementptr inbounds i32, ptr addrspace(3) %bit, i64 %i @@ -320,8 +318,8 @@ define i1 @test60_addrspacecast_smaller(ptr %foo, i16 %i, i64 %j) { ; CHECK-LABEL: @test60_addrspacecast_smaller( ; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nsw i16 [[I:%.*]], 2 ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[J:%.*]] to i16 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i16 [[GEP1_IDX]], [[TMP1]] -; CHECK-NEXT: ret i1 [[TMP2]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[GEP1_IDX]], [[TMP1]] +; CHECK-NEXT: ret i1 [[CMP]] ; %bit = addrspacecast ptr %foo to ptr addrspace(1) %gep1 = getelementptr inbounds i32, ptr addrspace(1) %bit, i16 %i @@ -335,8 +333,8 @@ define i1 @test60_addrspacecast_larger(ptr addrspace(1) %foo, i32 %i, i16 %j) { ; CHECK-LABEL: @test60_addrspacecast_larger( ; CHECK-NEXT: [[I_TR:%.*]] = trunc i32 [[I:%.*]] to i16 ; CHECK-NEXT: [[TMP1:%.*]] = shl i16 [[I_TR]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i16 [[TMP1]], [[J:%.*]] -; CHECK-NEXT: ret i1 [[TMP2]] +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[TMP1]], [[J:%.*]] +; CHECK-NEXT: ret i1 [[CMP]] ; %bit = addrspacecast ptr addrspace(1) %foo to ptr addrspace(2) %gep1 = getelementptr inbounds i32, ptr addrspace(2) %bit, i32 %i From 3cfb081c19f706adc4ea32998dca1ab4feae0b2b Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 21 Mar 2023 10:41:15 +0100 Subject: [PATCH 155/691] [InstCombine] Add icmp gep tests without inbounds (NFC) --- llvm/test/Transforms/InstCombine/icmp-gep.ll | 26 ++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/icmp-gep.ll b/llvm/test/Transforms/InstCombine/icmp-gep.ll index 1ccd3819e6593..5fef7ac435361 100644 --- a/llvm/test/Transforms/InstCombine/icmp-gep.ll +++ b/llvm/test/Transforms/InstCombine/icmp-gep.ll @@ -284,6 +284,32 @@ define i1 @test60(ptr %foo, i64 %i, i64 %j) { ret i1 %cmp } +define i1 @test_gep_ult_no_inbounds(ptr %foo, i64 %i, i64 %j) { +; CHECK-LABEL: @test_gep_ult_no_inbounds( +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr [[FOO:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[FOO]], i64 [[J:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult ptr [[GEP1]], [[GEP2]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %gep1 = getelementptr i32, ptr %foo, i64 %i + %gep2 = getelementptr i8, ptr %foo, i64 %j + %cmp = icmp ult ptr %gep1, %gep2 + ret i1 %cmp +} + +define i1 @test_gep_eq_no_inbounds(ptr %foo, i64 %i, i64 %j) { +; CHECK-LABEL: @test_gep_eq_no_inbounds( +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr [[FOO:%.*]], i64 [[I:%.*]] +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[FOO]], i64 [[J:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[GEP1]], [[GEP2]] +; CHECK-NEXT: ret i1 [[CMP]] +; + %gep1 = getelementptr i32, ptr %foo, i64 %i + %gep2 = getelementptr i8, ptr %foo, i64 %j + %cmp = icmp eq ptr %gep1, %gep2 + ret i1 %cmp +} + define i1 @test60_as1(ptr addrspace(1) %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @test60_as1( ; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[I:%.*]] to i16 From 090205fb57a1ca65bec6fada32232c2e975d0c48 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 21 Mar 2023 10:44:40 +0100 Subject: [PATCH 156/691] [lldb] Fix TestStepOverWatchpoint --- .../step_over_watchpoint/TestStepOverWatchpoint.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lldb/test/API/commands/watchpoints/step_over_watchpoint/TestStepOverWatchpoint.py b/lldb/test/API/commands/watchpoints/step_over_watchpoint/TestStepOverWatchpoint.py index fd70bd692a216..f88428b872c00 100644 --- a/lldb/test/API/commands/watchpoints/step_over_watchpoint/TestStepOverWatchpoint.py +++ b/lldb/test/API/commands/watchpoints/step_over_watchpoint/TestStepOverWatchpoint.py @@ -34,13 +34,13 @@ def get_to_start(self, bkpt_text): # stepping off from the breakpoint: bkpt.SetEnabled(False) - return (target, process, thread, read_watchpoint) + return (target, process, thread, frame, read_watchpoint) # Read-write watchpoints not supported on SystemZ @expectedFailureAll(archs=['s390x']) @add_test_categories(["basic_process"]) def test_step_over(self): - target, process, thread, wp = self.get_to_start("Set a breakpoint here") + target, process, thread, frame, wp = self.get_to_start("Set a breakpoint here") thread.StepOver() self.assertStopReason(thread.GetStopReason(), lldb.eStopReasonWatchpoint, @@ -61,7 +61,7 @@ def test_step_over(self): bugnumber="") @add_test_categories(["basic_process"]) def test_step_instruction(self): - target, process, thread, wp = self.get_to_start("Set breakpoint after call") + target, process, thread, frame, wp = self.get_to_start("Set breakpoint after call") self.step_inst_for_watchpoint(1) @@ -75,6 +75,7 @@ def test_step_instruction(self): if re.match("^mips", arch) or re.match("powerpc64le", arch): self.runCmd("watchpoint delete 1") + error = lldb.SBError() # resolve_location=True, read=False, write=True write_watchpoint = write_value.Watch(True, False, True, error) self.assertTrue(write_watchpoint, "Failed to set write watchpoint.") From 5d17ae2d5d1ab482058b345eaa138125e21b7087 Mon Sep 17 00:00:00 2001 From: OCHyams Date: Tue, 21 Mar 2023 09:14:42 +0000 Subject: [PATCH 157/691] [Assignment Tracking] Downgrade dbg.assigns to dbg.values in mem2reg For fully promoted variables dbg.assigns and dbg.values convey the same information and can be used interchangeably. This patch converts dbg.assigns to dbg.values for variables promoted by mem2reg. This reduces resource usage by reducing the amount of unnecessary function local metadata. The compile time tracker reports that CTMark projects build with LTO-O3-g with 0.4% fewer instructions retired and peak memory usage is reduced by 2.2%. Reviewed By: jryans Differential Revision: https://reviews.llvm.org/D145511 --- .../Utils/PromoteMemoryToRegister.cpp | 65 +++++++++++++------ .../assignment-tracking/mem2reg/phi.ll | 6 +- .../mem2reg/single-block-alloca.ll | 4 +- .../mem2reg/single-store-alloca.ll | 2 +- .../assignment-tracking/sroa/complex.ll | 4 +- .../Generic/assignment-tracking/sroa/frag.ll | 4 +- .../Generic/assignment-tracking/sroa/id.ll | 4 +- .../assignment-tracking/sroa/memcpy.ll | 2 +- .../sroa/memmove-to-from-same-alloca.ll | 2 +- .../assignment-tracking/sroa/rewrite.ll | 4 +- .../Generic/assignment-tracking/sroa/store.ll | 4 +- .../sroa/unspecified-var-size.ll | 2 +- .../assignment-tracking/sroa/user-memcpy.ll | 20 +++--- .../Generic/assignment-tracking/sroa/vec-1.ll | 4 +- .../Generic/assignment-tracking/sroa/vec-2.ll | 6 +- 15 files changed, 79 insertions(+), 54 deletions(-) diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index a90ae77032717..22272f7e51b36 100644 --- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -118,17 +118,28 @@ class AssignmentTrackingInfo { /// Update assignment tracking debug info given for the to-be-deleted store /// \p ToDelete that stores to this alloca. - void updateForDeletedStore(StoreInst *ToDelete, DIBuilder &DIB) const { + void updateForDeletedStore( + StoreInst *ToDelete, DIBuilder &DIB, + SmallSet *DbgAssignsToDelete) const { // There's nothing to do if the alloca doesn't have any variables using // assignment tracking. if (DbgAssigns.empty()) return; - // Just leave dbg.assign intrinsics in place and remember that we've seen - // one for each variable fragment. + // Insert a dbg.value where the linked dbg.assign is and remember to delete + // the dbg.assign later. Demoting to dbg.value isn't necessary for + // correctness but does reduce compile time and memory usage by reducing + // unnecessary function-local metadata. Remember that we've seen a + // dbg.assign for each variable fragment for the untracked store handling + // (after this loop). SmallSet VarHasDbgAssignForStore; - for (DbgAssignIntrinsic *DAI : at::getAssignmentMarkers(ToDelete)) + for (DbgAssignIntrinsic *DAI : at::getAssignmentMarkers(ToDelete)) { VarHasDbgAssignForStore.insert(DebugVariable(DAI)); + DbgAssignsToDelete->insert(DAI); + DIB.insertDbgValueIntrinsic(DAI->getValue(), DAI->getVariable(), + DAI->getExpression(), DAI->getDebugLoc(), + DAI); + } // It's possible for variables using assignment tracking to have no // dbg.assign linked to this store. These are variables in DbgAssigns that @@ -322,6 +333,9 @@ struct PromoteMem2Reg { /// For each alloca, keep an instance of a helper class that gives us an easy /// way to update assignment tracking debug info if the alloca is promoted. SmallVector AllocaATInfo; + /// A set of dbg.assigns to delete because they've been demoted to + /// dbg.values. Call cleanUpDbgAssigns to delete them. + SmallSet DbgAssignsToDelete; /// The set of basic blocks the renamer has already visited. SmallPtrSet Visited; @@ -365,6 +379,13 @@ struct PromoteMem2Reg { RenamePassData::LocationVector &IncLocs, std::vector &Worklist); bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version); + + /// Delete dbg.assigns that have been demoted to dbg.values. + void cleanUpDbgAssigns() { + for (auto *DAI : DbgAssignsToDelete) + DAI->eraseFromParent(); + DbgAssignsToDelete.clear(); + } }; } // end anonymous namespace @@ -436,9 +457,10 @@ static void removeIntrinsicUsers(AllocaInst *AI) { /// false there were some loads which were not dominated by the single store /// and thus must be phi-ed with undef. We fall back to the standard alloca /// promotion algorithm in that case. -static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, - LargeBlockInfo &LBI, const DataLayout &DL, - DominatorTree &DT, AssumptionCache *AC) { +static bool rewriteSingleStoreAlloca( + AllocaInst *AI, AllocaInfo &Info, LargeBlockInfo &LBI, const DataLayout &DL, + DominatorTree &DT, AssumptionCache *AC, + SmallSet *DbgAssignsToDelete) { StoreInst *OnlyStore = Info.OnlyStore; bool StoringGlobalVal = !isa(OnlyStore->getOperand(0)); BasicBlock *StoreBB = OnlyStore->getParent(); @@ -498,7 +520,8 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, DIBuilder DIB(*AI->getModule(), /*AllowUnresolved*/ false); // Update assignment tracking info for the store we're going to delete. - Info.AssignmentTracking.updateForDeletedStore(Info.OnlyStore, DIB); + Info.AssignmentTracking.updateForDeletedStore(Info.OnlyStore, DIB, + DbgAssignsToDelete); // Record debuginfo for the store and remove the declaration's // debuginfo. @@ -538,11 +561,10 @@ static bool rewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, /// use(t); /// *A = 42; /// } -static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, - LargeBlockInfo &LBI, - const DataLayout &DL, - DominatorTree &DT, - AssumptionCache *AC) { +static bool promoteSingleBlockAlloca( + AllocaInst *AI, const AllocaInfo &Info, LargeBlockInfo &LBI, + const DataLayout &DL, DominatorTree &DT, AssumptionCache *AC, + SmallSet *DbgAssignsToDelete) { // The trickiest case to handle is when we have large blocks. Because of this, // this code is optimized assuming that large blocks happen. This does not // significantly pessimize the small block case. This uses LargeBlockInfo to @@ -606,7 +628,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info, while (!AI->use_empty()) { StoreInst *SI = cast(AI->user_back()); // Update assignment tracking info for the store we're going to delete. - Info.AssignmentTracking.updateForDeletedStore(SI, DIB); + Info.AssignmentTracking.updateForDeletedStore(SI, DIB, DbgAssignsToDelete); // Record debuginfo for the store before removing it. for (DbgVariableIntrinsic *DII : Info.DbgUsers) { if (DII->isAddressOfVariable()) { @@ -666,7 +688,8 @@ void PromoteMem2Reg::run() { // If there is only a single store to this value, replace any loads of // it that are directly dominated by the definition with the value stored. if (Info.DefiningBlocks.size() == 1) { - if (rewriteSingleStoreAlloca(AI, Info, LBI, SQ.DL, DT, AC)) { + if (rewriteSingleStoreAlloca(AI, Info, LBI, SQ.DL, DT, AC, + &DbgAssignsToDelete)) { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); ++NumSingleStore; @@ -677,7 +700,8 @@ void PromoteMem2Reg::run() { // If the alloca is only read and written in one basic block, just perform a // linear sweep over the block to eliminate it. if (Info.OnlyUsedInOneBlock && - promoteSingleBlockAlloca(AI, Info, LBI, SQ.DL, DT, AC)) { + promoteSingleBlockAlloca(AI, Info, LBI, SQ.DL, DT, AC, + &DbgAssignsToDelete)) { // The alloca has been processed, move on. RemoveFromAllocasList(AllocaNum); continue; @@ -726,9 +750,10 @@ void PromoteMem2Reg::run() { QueuePhiNode(BB, AllocaNum, CurrentVersion); } - if (Allocas.empty()) + if (Allocas.empty()) { + cleanUpDbgAssigns(); return; // All of the allocas must have been trivial! - + } LBI.clear(); // Set the incoming values for the basic block to be null values for all of @@ -867,6 +892,7 @@ void PromoteMem2Reg::run() { } NewPhiNodes.clear(); + cleanUpDbgAssigns(); } /// Determine which blocks the value is live in. @@ -1070,7 +1096,8 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, // Record debuginfo for the store before removing it. IncomingLocs[AllocaNo] = SI->getDebugLoc(); - AllocaATInfo[AllocaNo].updateForDeletedStore(SI, DIB); + AllocaATInfo[AllocaNo].updateForDeletedStore(SI, DIB, + &DbgAssignsToDelete); for (DbgVariableIntrinsic *DII : AllocaDbgUsers[ai->second]) if (DII->isAddressOfVariable()) ConvertDebugDeclareToDebugValue(DII, SI, DIB); diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/phi.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/phi.ll index 4101300dc7ee0..d7c1299788703 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/phi.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/phi.ll @@ -8,14 +8,14 @@ ; CHECK: entry: ; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %a, metadata ![[B:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 %a, metadata ![[A:[0-9]+]], {{.*}}, metadata ptr undef +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %a, metadata ![[A:[0-9]+]] ; CHECK: if.then: ; CHECK-NEXT: %add = ; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %add, metadata ![[B]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 %add, metadata ![[A]], {{.*}}, metadata ptr undef +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %add, metadata ![[A]] ; CHECK: if.else: ; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 -1, metadata ![[B]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 -1, metadata ![[A]], {{.*}}, metadata ptr undef +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 -1, metadata ![[A]] ; CHECK: if.end: ; CHECK-NEXT: %a.addr.0 = phi i32 ; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %a.addr.0, metadata ![[A]] diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/single-block-alloca.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/single-block-alloca.ll index 5d05741c502eb..26e61cd50e451 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/single-block-alloca.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/single-block-alloca.ll @@ -8,10 +8,10 @@ ; CHECK: entry: ; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %a, metadata ![[B:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 %a, metadata ![[A:[0-9]+]], {{.*}}, metadata ptr undef +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %a, metadata ![[A:[0-9]+]] ; CHECK-NEXT: %add = ; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %add, metadata ![[B]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 %add, metadata ![[A]], {{.*}}, metadata ptr undef +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %add, metadata ![[A]] ; CHECK-DAG: ![[A]] = !DILocalVariable(name: "a", ; CHECK-DAG: ![[B]] = !DILocalVariable(name: "b", diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/single-store-alloca.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/single-store-alloca.ll index 2d95a74c05c3e..ea8f0b8a10e8c 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/single-store-alloca.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/single-store-alloca.ll @@ -7,7 +7,7 @@ ; CHECK: entry: ; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %a, metadata ![[B:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 %a, metadata ![[A:[0-9]+]], {{.*}}, metadata ptr undef +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %a, metadata ![[A:[0-9]+]] ; CHECK-NEXT: ret ; CHECK-DAG: ![[A]] = !DILocalVariable(name: "a", diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/complex.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/complex.ll index d2bd10996c3ae..cc733a0dbbc1e 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/complex.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/complex.ll @@ -17,8 +17,8 @@ ;; dbg.assigns for the split (then promoted) stores. ; CHECK: %c.coerce.fca.0.extract = extractvalue [2 x i64] %c.coerce, 0 ; CHECK: %c.coerce.fca.1.extract = extractvalue [2 x i64] %c.coerce, 1 -; CHECK: call void @llvm.dbg.assign(metadata i64 %c.coerce.fca.0.extract,{{.+}}, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64),{{.+}}, metadata ptr undef, metadata !DIExpression()) -; CHECK: call void @llvm.dbg.assign(metadata i64 %c.coerce.fca.1.extract,{{.+}}, metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64),{{.+}}, metadata ptr undef, {{.+}}) +; CHECK: call void @llvm.dbg.value(metadata i64 %c.coerce.fca.0.extract,{{.+}}, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)) +; CHECK: call void @llvm.dbg.value(metadata i64 %c.coerce.fca.1.extract,{{.+}}, metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)) target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "armv7-apple-unknown" diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/frag.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/frag.ll index 28786c61982cf..38ebed1cf0b91 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/frag.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/frag.ll @@ -21,8 +21,8 @@ ; CHECK: %call = call ; CHECK-NEXT: %0 = extractvalue { <2 x float>, <2 x float> } %call, 0 ; CHECK-NEXT: %1 = extractvalue { <2 x float>, <2 x float> } %call, 1 -; CHECK-NEXT: call void @llvm.dbg.assign(metadata <2 x float> %0, metadata ![[var:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 256, 64),{{.+}},{{.+}}undef, metadata !DIExpression()), !dbg -; CHECK-NEXT: call void @llvm.dbg.assign(metadata <2 x float> %1, metadata ![[var]], metadata !DIExpression(DW_OP_LLVM_fragment, 320, 64),{{.+}},{{.+}}undef, metadata !DIExpression()), !dbg +; CHECK-NEXT: call void @llvm.dbg.value(metadata <2 x float> %0, metadata ![[var:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 256, 64)) +; CHECK-NEXT: call void @llvm.dbg.value(metadata <2 x float> %1, metadata ![[var]], metadata !DIExpression(DW_OP_LLVM_fragment, 320, 64)) %class.c = type { [4 x float] } diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/id.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/id.ll index 6b797ce17b3c9..13c05081ffc52 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/id.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/id.ll @@ -28,13 +28,13 @@ ; CHECK: if.then: ; CHECK-NEXT: %1 = load float -; CHECK-NEXT: call void @llvm.dbg.assign(metadata float %storemerge, metadata ![[var:[0-9]+]], metadata !DIExpression(), metadata ![[id:[0-9]+]], metadata ptr undef, metadata !DIExpression()), !dbg ![[dbg:[0-9]+]] +; CHECK-NEXT: call void @llvm.dbg.value(metadata float %storemerge, metadata ![[var:[0-9]+]], metadata !DIExpression()) ; CHECK: if.else: ; CHECK-NEXT: %2 = load float ; CHECK-NEXT: %3 = load float ; CHECK-NEXT: %div = fdiv float -; CHECK: call void @llvm.dbg.assign(metadata float %storemerge, metadata ![[var]], metadata !DIExpression(), metadata ![[id]], metadata ptr undef, metadata !DIExpression()), !dbg ![[dbg]] +; CHECK: call void @llvm.dbg.value(metadata float %storemerge, metadata ![[var]], metadata !DIExpression()) %class.a = type { i8 } diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/memcpy.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/memcpy.ll index da488e907010b..22fd7eb86273d 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/memcpy.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/memcpy.ll @@ -39,7 +39,7 @@ ;; Intrinsics for the splits above. ; CHECK-NEXT: call void @llvm.dbg.assign(metadata {{.+}} undef, metadata ![[TO]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 96), metadata ![[ID_4]], metadata ptr %To.sroa.0, metadata !DIExpression()), !dbg -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 %To.sroa.3.0.copyload, metadata ![[TO]], metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %To.sroa.3.0.copyload, metadata ![[TO]], metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32)) ; CHECK-NEXT: call void @llvm.dbg.assign(metadata {{.+}} undef, metadata ![[TO]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 96), metadata ![[ID_6]], metadata ptr %To.sroa.4, metadata !DIExpression()), !dbg target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/memmove-to-from-same-alloca.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/memmove-to-from-same-alloca.ll index 92ae82b694e0f..3e744eaf985cf 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/memmove-to-from-same-alloca.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/memmove-to-from-same-alloca.ll @@ -41,7 +41,7 @@ ; CHECK: %A.sroa.0.sroa.5 = alloca [5 x i32] ; CHECK: llvm.memcpy{{.*}}(ptr align 4 %A.sroa.0.sroa.5, ptr align 4 getelementptr inbounds (i8, ptr @Glob, i64 4), i64 20, i1 false){{.*}}!DIAssignID ![[ID:[0-9]+]] ;; Here's the dbg.assign for element 0 - it's not important for the test. -; CHECK-NEXT: llvm.dbg.assign({{.*}}!DIExpression(DW_OP_LLVM_fragment, 0, 32){{.*}}) +; CHECK-NEXT: llvm.dbg.value({{.*}}!DIExpression(DW_OP_LLVM_fragment, 0, 32){{.*}}) ;; This is the dbg.assign we care about: ; CHECK-NEXT: llvm.dbg.assign(metadata i1 undef, metadata ![[VAR:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 160), metadata ![[ID]], metadata ptr %A.sroa.0.sroa.5, metadata !DIExpression()) diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/rewrite.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/rewrite.ll index 7ee607d6ba8c2..f7ba8026df463 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/rewrite.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/rewrite.ll @@ -37,12 +37,12 @@ ; CHECK-NEXT: call void @llvm.dbg.assign(metadata i8 0, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 96), metadata ![[ID_5]], metadata ptr %S.sroa.0, metadata !DIExpression()), !dbg ;; Check the middle slice (no memset) gets a correct dbg.assign. -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 0, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 0, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32)) ; CHECK-NEXT: call void @llvm.dbg.assign(metadata i8 0, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 96), metadata ![[ID_6]], metadata ptr %S.sroa.5, metadata !DIExpression()), !dbg ;; mem2reg promotes the load/store to the middle slice created by SROA: ; CHECK-NEXT: %0 = load i32, ptr @Glob, align 4, !dbg !{{.+}} -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 %0, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32), metadata ![[ID_4:[0-9]+]], metadata ptr undef, metadata !DIExpression()), !dbg +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %0, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32)) target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/store.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/store.ll index 6e5c7e76b9c54..72d6ab234dcbe 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/store.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/store.ll @@ -40,13 +40,13 @@ ; CHECK-NEXT: call void @llvm.dbg.assign(metadata i8 0, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 96), metadata ![[ID_4]], metadata ptr %S.sroa.0, metadata !DIExpression()), !dbg ;; This is the one we care about most in this test: check that a memset->store ;; gets a correct dbg.assign. -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 0, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 0, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32)) ; CHECK-NEXT: call void @llvm.dbg.assign(metadata i8 0, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 96), metadata ![[ID_5]], metadata ptr %S.sroa.4, metadata !DIExpression()), !dbg ;; The load from global+store becomes a load. ;; FIXME: In reality it is actually stored again later on. ; CHECK-NEXT: %0 = load i32, ptr @Glob, align 4, !dbg !{{.+}} -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 %0, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg ! +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %0, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32)) target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/unspecified-var-size.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/unspecified-var-size.ll index aff5a59819bcc..c04e5a7a2146a 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/unspecified-var-size.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/unspecified-var-size.ll @@ -7,7 +7,7 @@ ;; Check that migrateDebugInfo doesn't crash when encountering an alloca for a ;; variable with a type of unspecified size (e.g. DW_TAG_unspecified_type). -; CHECK: @llvm.dbg.assign(metadata ptr %0,{{.+}}, metadata !DIExpression(),{{.+}}, metadata ptr undef, {{.+}}) +; CHECK: @llvm.dbg.value(metadata ptr %0,{{.+}}, metadata !DIExpression()) ;; There should be no new fragment and the value component should remain as %0. define dso_local void @_Z3funDn(ptr %0) #0 !dbg !14 { diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/user-memcpy.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/user-memcpy.ll index ad43744aa86b2..abc110273ab3b 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/user-memcpy.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/user-memcpy.ll @@ -2,9 +2,7 @@ ; RUN: | FileCheck %s --implicit-check-not="call void @llvm.dbg" ;; Check that the fragments generated in SROA for a split alloca that has a -;; dbg.assign with non-zero-offset fragment already are correct. Ensure that -;; only the value-expression gets fragment info; that the address-expression -;; remains untouched. +;; dbg.assign with non-zero-offset fragment are correct. ;; $ cat test.cpp ;; #include @@ -23,11 +21,11 @@ ;; Allocas have been promoted - the linked dbg.assigns have been removed. ;; | V3i point = {0, 0, 0}; -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i64 0, metadata ![[point:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i64 0, metadata ![[point]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg +; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 0, metadata ![[point:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)) +; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 0, metadata ![[point]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)) ;; point.z = 5000; -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i64 5000, metadata ![[point]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 64), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg +; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 5000, metadata ![[point]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 64)) ;; | V3i other = {10, 9, 8}; ;; other is global const: @@ -37,17 +35,17 @@ ; CHECK-NEXT: %other.sroa.0.0.copyload = load i64, ptr @__const._Z3funv.other ; CHECK-NEXT: %other.sroa.2.0.copyload = load i64, ptr getelementptr inbounds (i8, ptr @__const._Z3funv.other, i64 8) ; CHECK-NEXT: %other.sroa.3.0.copyload = load i64, ptr getelementptr inbounds (i8, ptr @__const._Z3funv.other, i64 16) -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i64 %other.sroa.0.0.copyload, metadata ![[other:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i64 %other.sroa.2.0.copyload, metadata ![[other]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i64 %other.sroa.3.0.copyload, metadata ![[other]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 64), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg +; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 %other.sroa.0.0.copyload, metadata ![[other:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)) +; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 %other.sroa.2.0.copyload, metadata ![[other]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)) +; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 %other.sroa.3.0.copyload, metadata ![[other]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 64)) ;; | std::memcpy(&point.y, &other.x, sizeof(long) * 2); ;; other is now 3 scalars: ;; point.y = other.x -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i64 %other.sroa.0.0.copyload, metadata ![[point]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg +; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 %other.sroa.0.0.copyload, metadata ![[point]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)) ;; ;; point.z = other.y -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i64 %other.sroa.2.0.copyload, metadata ![[point]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 64), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg +; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 %other.sroa.2.0.copyload, metadata ![[point]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 64)) ; CHECK: ![[point]] = !DILocalVariable(name: "point", ; CHECK: ![[other]] = !DILocalVariable(name: "other", diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/vec-1.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/vec-1.ll index 421838b3de046..f09e88144e835 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/vec-1.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/vec-1.ll @@ -18,9 +18,9 @@ ; CHECK: %call = call ; CHECK-NEXT: %0 = extractvalue { <2 x float>, <2 x float> } %call, 0 -; CHECK-NEXT: call void @llvm.dbg.assign(metadata <2 x float> %0, metadata ![[var:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64), metadata ![[id1:[0-9]+]],{{.+}} undef, metadata !DIExpression()), !dbg +; CHECK-NEXT: call void @llvm.dbg.value(metadata <2 x float> %0, metadata ![[var:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)) ; CHECK-NEXT: %1 = extractvalue { <2 x float>, <2 x float> } %call, 1 -; CHECK-NEXT: call void @llvm.dbg.assign(metadata <2 x float> %1, metadata ![[var]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64), metadata ![[id2:[0-9]+]], {{.+}} undef, metadata !DIExpression()), !dbg +; CHECK-NEXT: call void @llvm.dbg.value(metadata <2 x float> %1, metadata ![[var]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)) %class.c = type { i8 } %class.a = type { [4 x float] } diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/vec-2.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/vec-2.ll index e8ec4dcf85f4d..9633950d4e634 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/vec-2.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/vec-2.ll @@ -29,9 +29,9 @@ ; CHECK: %i.sroa.2.12.vec.insert = insertelement <2 x float> %i.sroa.2.0.vec.insert, float %2, i32 1, !dbg ;; There's a few dbg intrinsics we're not interested in testing wedged in here. ; CHECK-NEXT: dbg.value -; CHECK-NEXT: dbg.assign -; CHECK-NEXT: dbg.assign -; CHECK-NEXT: call void @llvm.dbg.assign(metadata float %2,{{.+}}, metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32),{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg +; CHECK-NEXT: dbg.value +; CHECK-NEXT: dbg.value +; CHECK-NEXT: call void @llvm.dbg.value(metadata float %2,{{.+}}, metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32)) %class.d = type { %class.a } %class.a = type { [4 x float] } From 289542b1e72dc4fe17093952dfb1b04cce259183 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 21 Mar 2023 10:46:13 +0100 Subject: [PATCH 158/691] [InstCombine] Fold icmp eq of non-inbounds geps We can fold equality comparisons of non-inbounds geps to offset comparison (https://alive2.llvm.org/ce/z/x2Zp8b). The inbounds requirement is only necessary for relational comparisons. --- llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp | 3 ++- llvm/test/Transforms/InstCombine/icmp-gep.ll | 5 ++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index b9473634e6dc7..421b1824b965d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -884,7 +884,8 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, // Only lower this if the icmp is the only user of the GEP or if we expect // the result to fold to a constant! - if (GEPsInBounds && (isa(GEPLHS) || GEPLHS->hasOneUse()) && + if ((GEPsInBounds || CmpInst::isEquality(Cond)) && + (isa(GEPLHS) || GEPLHS->hasOneUse()) && (isa(GEPRHS) || GEPRHS->hasOneUse())) { // ((gep Ptr, OFFSET1) cmp (gep Ptr, OFFSET2) ---> (OFFSET1 cmp OFFSET2) Value *L = EmitGEPOffset(GEPLHS); diff --git a/llvm/test/Transforms/InstCombine/icmp-gep.ll b/llvm/test/Transforms/InstCombine/icmp-gep.ll index 5fef7ac435361..5cc6d9f80bac4 100644 --- a/llvm/test/Transforms/InstCombine/icmp-gep.ll +++ b/llvm/test/Transforms/InstCombine/icmp-gep.ll @@ -299,9 +299,8 @@ define i1 @test_gep_ult_no_inbounds(ptr %foo, i64 %i, i64 %j) { define i1 @test_gep_eq_no_inbounds(ptr %foo, i64 %i, i64 %j) { ; CHECK-LABEL: @test_gep_eq_no_inbounds( -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, ptr [[FOO:%.*]], i64 [[I:%.*]] -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr [[FOO]], i64 [[J:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[GEP1]], [[GEP2]] +; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl i64 [[I:%.*]], 2 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[GEP1_IDX]], [[J:%.*]] ; CHECK-NEXT: ret i1 [[CMP]] ; %gep1 = getelementptr i32, ptr %foo, i64 %i From 54ab9541492d808ae4cf9130dd052d602b78ee32 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Tue, 21 Mar 2023 13:58:36 +0400 Subject: [PATCH 159/691] [BOLT] Reject symbols pointing to section end Sometimes, symbols are present that point to the end of a section (i.e., one-past the highest valid address). Currently, BOLT either rejects those symbols when they don't point to another existing section, or errs when they do and the other section is not executable. I suppose BOLT would accept the symbol when it points to an executable section. In any case, these symbols should not be considered while discovering functions and should not result in an error. This patch implements that. Note that this patch checks explicitly for symbols whose value equals the end of their section. It might make more sense to verify that the symbol's value is within [section start, section end). However, I'm not sure if this could every happen *and* its value does not equal the end. Another way to implement this is to verify that the BinarySection we find at the symbol's address actually corresponds to the symbol's section. I'm not sure what the best approach is so feedback is welcome. Reviewed By: yota9, rafauler Differential Revision: https://reviews.llvm.org/D146215 --- bolt/lib/Rewrite/RewriteInstance.cpp | 10 ++++++++++ bolt/test/X86/section-end-sym.s | 29 ++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 bolt/test/X86/section-end-sym.s diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 04ccbcf20de11..57e41d5b5724d 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -1050,6 +1050,16 @@ void RewriteInstance::discoverFileObjects() { LLVM_DEBUG(dbgs() << "BOLT-DEBUG: considering symbol " << UniqueName << " for function\n"); + if (Address == Section->getAddress() + Section->getSize()) { + assert(SymbolSize == 0 && + "unexpect non-zero sized symbol at end of section"); + LLVM_DEBUG( + dbgs() + << "BOLT-DEBUG: rejecting as symbol points to end of its section\n"); + registerName(SymbolSize); + continue; + } + if (!Section->isText()) { assert(SymbolType != SymbolRef::ST_Function && "unexpected function inside non-code section"); diff --git a/bolt/test/X86/section-end-sym.s b/bolt/test/X86/section-end-sym.s new file mode 100644 index 0000000000000..a9bca5604ec16 --- /dev/null +++ b/bolt/test/X86/section-end-sym.s @@ -0,0 +1,29 @@ +## Check that BOLT doesn't consider end-of-section symbols (e.g., _etext) as +## functions. + +# REQUIRES: system-linux + +# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t.o +# RUN: ld.lld %t.o -o %t.exe -q +# RUN: llvm-bolt %t.exe -o /dev/null --print-cfg --debug-only=bolt 2>&1 \ +# RUN: | FileCheck %s + +# CHECK: considering symbol etext for function +# CHECK-NEXT: rejecting as symbol points to end of its section +# CHECK-NOT: Binary Function "etext{{.*}}" after building cfg + + + .text + .globl _start + .type _start,@function +_start: + retq + .size _start, .-_start + + .align 0x1000 + .globl etext +etext: + + .data +.Lfoo: + .word 0 From 25ca26e0da2e1f80d62f71807828762691a049ac Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Tue, 21 Mar 2023 09:49:43 +0000 Subject: [PATCH 160/691] [AMDGPU] Make use of GCNSubtarget::hasNSAEncoding. NFC. --- llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp index 366bc0a8ec0d5..4c9ad9b5bcf75 100644 --- a/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp +++ b/llvm/lib/Target/AMDGPU/GCNNSAReassign.cpp @@ -237,7 +237,7 @@ GCNNSAReassign::CheckNSA(const MachineInstr &MI, bool Fast) const { bool GCNNSAReassign::runOnMachineFunction(MachineFunction &MF) { ST = &MF.getSubtarget(); - if (ST->getGeneration() < GCNSubtarget::GFX10) + if (!ST->hasNSAEncoding()) return false; MRI = &MF.getRegInfo(); From c270aafb182c61123daa55ab53794af7f4ff9757 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Tue, 21 Mar 2023 17:11:10 +0700 Subject: [PATCH 161/691] Mark widenable condition as noundef This corresponds to its description in LangRef: The intrinsic @llvm.experimental.widenable.condition() returns either true or false. Differential Revision: https://reviews.llvm.org/D146508 Reviewed By: skatkov, nikic --- llvm/include/llvm/IR/Intrinsics.td | 2 +- .../InstCombine/freeze-integer-intrinsics.ll | 3 +-- .../InstCombine/widenable-conditions.ll | 18 +++++++++--------- .../Transforms/SimpleLoopUnswitch/pr60736.ll | 5 +++-- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 0e32ca9c8c7e4..c24f53ce018f9 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1376,7 +1376,7 @@ def int_experimental_guard : DefaultAttrsIntrinsic<[], [llvm_i1_ty, llvm_vararg_ // Supports widenable conditions for guards represented as explicit branches. def int_experimental_widenable_condition : DefaultAttrsIntrinsic<[llvm_i1_ty], [], - [IntrInaccessibleMemOnly, IntrWillReturn, IntrSpeculatable]>; + [IntrInaccessibleMemOnly, IntrWillReturn, IntrSpeculatable, NoUndef]>; // NOP: calls/invokes to this intrinsic are removed by codegen def int_donothing : DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrWillReturn]>; diff --git a/llvm/test/Transforms/InstCombine/freeze-integer-intrinsics.ll b/llvm/test/Transforms/InstCombine/freeze-integer-intrinsics.ll index 1d8d2a8029576..105bd28fb052e 100644 --- a/llvm/test/Transforms/InstCombine/freeze-integer-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/freeze-integer-intrinsics.ll @@ -419,8 +419,7 @@ define @sshl_sat_v2i32_scalable_zeroinitializer( Date: Tue, 21 Mar 2023 11:19:13 +0100 Subject: [PATCH 162/691] [lldb] Relax expectation on TestMainThreadExit The exit code of the (funky) test inferior depends on the linux kernel version (changed some time between 5.15 and 6.1). --- .../thread/main_thread_exit/TestMainThreadExit.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lldb/test/API/functionalities/thread/main_thread_exit/TestMainThreadExit.py b/lldb/test/API/functionalities/thread/main_thread_exit/TestMainThreadExit.py index 50060dd242f8c..7a8dd80faf944 100644 --- a/lldb/test/API/functionalities/thread/main_thread_exit/TestMainThreadExit.py +++ b/lldb/test/API/functionalities/thread/main_thread_exit/TestMainThreadExit.py @@ -26,4 +26,5 @@ def test(self): self.expect_expr("call_me()", result_value="12345") self.runCmd("continue") - self.assertEquals(self.process().GetExitStatus(), 47) + # Exit code depends on the version of the linux kernel + self.assertIn(self.process().GetExitStatus(), [42, 47]) From 3a8f161a3401edeb58e018e2d389dd2413a6417f Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Tue, 21 Mar 2023 11:56:08 +0100 Subject: [PATCH 163/691] [mlir] Add a pattern to fold single- and zero-iteration scf.forall ops. Differential Revision: https://reviews.llvm.org/D145368 --- mlir/include/mlir/Dialect/SCF/IR/SCF.h | 8 + .../mlir/Dialect/Utils/StaticValueUtils.h | 5 + mlir/lib/Dialect/SCF/IR/SCF.cpp | 212 ++++++++++++++---- mlir/lib/Dialect/SCF/Utils/Utils.cpp | 14 +- mlir/lib/Dialect/Utils/StaticValueUtils.cpp | 21 ++ mlir/test/Dialect/SCF/canonicalize.mlir | 107 +++++++++ ...fold-consecutive-insert-extract-slice.mlir | 2 +- 7 files changed, 311 insertions(+), 58 deletions(-) diff --git a/mlir/include/mlir/Dialect/SCF/IR/SCF.h b/mlir/include/mlir/Dialect/SCF/IR/SCF.h index 7f714d0a07646..cb399b78c406d 100644 --- a/mlir/include/mlir/Dialect/SCF/IR/SCF.h +++ b/mlir/include/mlir/Dialect/SCF/IR/SCF.h @@ -62,6 +62,14 @@ ForallOp getForallOpThreadIndexOwner(Value val); // TODO: Consider moving this functionality to RegionBranchOpInterface. bool insideMutuallyExclusiveBranches(Operation *a, Operation *b); +/// Promotes the loop body of a scf::ForallOp to its containing block if the +/// loop was known to have a single iteration. +LogicalResult promoteIfSingleIteration(PatternRewriter &rewriter, + scf::ForallOp forallOp); + +/// Promotes the loop body of a scf::ForallOp to its containing block. +void promote(PatternRewriter &rewriter, scf::ForallOp forallOp); + /// An owning vector of values, handy to return from functions. using ValueVector = SmallVector; using LoopVector = SmallVector; diff --git a/mlir/include/mlir/Dialect/Utils/StaticValueUtils.h b/mlir/include/mlir/Dialect/Utils/StaticValueUtils.h index 27c27756b3918..47910e2069761 100644 --- a/mlir/include/mlir/Dialect/Utils/StaticValueUtils.h +++ b/mlir/include/mlir/Dialect/Utils/StaticValueUtils.h @@ -128,6 +128,11 @@ SmallVector getValuesSortedByKey(ArrayRef keys, ArrayRef values, llvm::function_ref compare); +/// Return the number of iterations for a loop with a lower bound `lb`, upper +/// bound `ub` and step `step`. +std::optional constantTripCount(OpFoldResult lb, OpFoldResult ub, + OpFoldResult step); + } // namespace mlir #endif // MLIR_DIALECT_UTILS_STATICVALUEUTILS_H diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp index 4e7bcc499be3d..e212159442844 100644 --- a/mlir/lib/Dialect/SCF/IR/SCF.cpp +++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp @@ -534,6 +534,61 @@ void ForOp::getSuccessorRegions(std::optional index, regions.push_back(RegionSuccessor(getResults())); } +/// Promotes the loop body of a forallOp to its containing block if it can be +/// determined that the loop has a single iteration. +LogicalResult mlir::scf::promoteIfSingleIteration(PatternRewriter &rewriter, + scf::ForallOp forallOp) { + for (auto [lb, ub, step] : + llvm::zip(forallOp.getMixedLowerBound(), forallOp.getMixedUpperBound(), + forallOp.getMixedStep())) { + auto tripCount = constantTripCount(lb, ub, step); + if (!tripCount.has_value() || *tripCount != 1) + return failure(); + } + + promote(rewriter, forallOp); + return success(); +} + +/// Promotes the loop body of a scf::ForallOp to its containing block. +void mlir::scf::promote(PatternRewriter &rewriter, scf::ForallOp forallOp) { + IRMapping mapping; + mapping.map(forallOp.getInductionVars(), forallOp.getLowerBound(rewriter)); + mapping.map(forallOp.getOutputBlockArguments(), forallOp.getOutputs()); + for (auto &bodyOp : forallOp.getBody()->without_terminator()) + rewriter.clone(bodyOp, mapping); + + SmallVector results; + results.reserve(forallOp.getResults().size()); + scf::InParallelOp terminator = forallOp.getTerminator(); + for (auto &yieldingOp : terminator.getYieldingOps()) { + auto parallelInsertSliceOp = + cast(yieldingOp); + + Value dst = parallelInsertSliceOp.getDest(); + Value src = parallelInsertSliceOp.getSource(); + + auto getMappedValues = [&](ValueRange values) { + return llvm::to_vector(llvm::map_range( + values, [&](Value value) { return mapping.lookupOrDefault(value); })); + }; + + Value srcVal = mapping.lookupOrDefault(src); + if (srcVal.getType().isa()) { + results.push_back(rewriter.create( + forallOp.getLoc(), dst.getType(), srcVal, + mapping.lookupOrDefault(dst), + getMappedValues(parallelInsertSliceOp.getOffsets()), + getMappedValues(parallelInsertSliceOp.getSizes()), + getMappedValues(parallelInsertSliceOp.getStrides()), + parallelInsertSliceOp.getStaticOffsets(), + parallelInsertSliceOp.getStaticSizes(), + parallelInsertSliceOp.getStaticStrides())); + } + } + rewriter.replaceOp(forallOp, results); +} + LoopNest mlir::scf::buildLoopNest( OpBuilder &builder, Location loc, ValueRange lbs, ValueRange ubs, ValueRange steps, ValueRange iterArgs, @@ -1452,16 +1507,99 @@ class ForallOpControlOperandsFolder : public OpRewritePattern { dispatchIndexOpFoldResults(mixedStep, dynamicStep, staticStep); op.getDynamicStepMutable().assign(dynamicStep); op.setStaticStep(staticStep); + + op->setAttr(ForallOp::getOperandSegmentSizeAttr(), + rewriter.getDenseI32ArrayAttr( + {static_cast(dynamicLowerBound.size()), + static_cast(dynamicUpperBound.size()), + static_cast(dynamicStep.size()), + static_cast(op.getNumResults())})); }); return success(); } }; +struct ForallOpSingleOrZeroIterationDimsFolder + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(ForallOp op, + PatternRewriter &rewriter) const override { + // Do not fold dimensions if they are mapped to processing units. + if (op.getMapping().has_value()) + return failure(); + Location loc = op.getLoc(); + + // Compute new loop bounds that omit all single-iteration loop dimensions. + SmallVector newMixedLowerBounds, newMixedUpperBounds, + newMixedSteps; + IRMapping mapping; + for (auto [lb, ub, step, iv] : + llvm::zip(op.getMixedLowerBound(), op.getMixedUpperBound(), + op.getMixedStep(), op.getInductionVars())) { + auto numIterations = constantTripCount(lb, ub, step); + if (numIterations.has_value()) { + // Remove the loop if it performs zero iterations. + if (*numIterations == 0) { + rewriter.replaceOp(op, op.getOutputs()); + return success(); + } + // Replace the loop induction variable by the lower bound if the loop + // performs a single iteration. Otherwise, copy the loop bounds. + if (*numIterations == 1) { + mapping.map(iv, getValueOrCreateConstantIndexOp(rewriter, loc, lb)); + continue; + } + } + newMixedLowerBounds.push_back(lb); + newMixedUpperBounds.push_back(ub); + newMixedSteps.push_back(step); + } + // Exit if none of the loop dimensions perform a single iteration. + if (newMixedLowerBounds.size() == static_cast(op.getRank())) { + return rewriter.notifyMatchFailure( + op, "no dimensions have 0 or 1 iterations"); + } + + // All of the loop dimensions perform a single iteration. Inline loop body. + if (newMixedLowerBounds.empty()) { + promote(rewriter, op); + return success(); + } + + // Replace the loop by a lower-dimensional loop. + ForallOp newOp; + newOp = rewriter.create(loc, newMixedLowerBounds, + newMixedUpperBounds, newMixedSteps, + op.getOutputs(), std::nullopt, nullptr); + newOp.getBodyRegion().getBlocks().clear(); + // The new loop needs to keep all attributes from the old one, except for + // "operand_segment_sizes" and static loop bound attributes which capture + // the outdated information of the old iteration domain. + SmallVector elidedAttrs{newOp.getOperandSegmentSizesAttrName(), + newOp.getStaticLowerBoundAttrName(), + newOp.getStaticUpperBoundAttrName(), + newOp.getStaticStepAttrName()}; + for (const auto &namedAttr : op->getAttrs()) { + if (llvm::is_contained(elidedAttrs, namedAttr.getName())) + continue; + rewriter.updateRootInPlace(newOp, [&]() { + newOp->setAttr(namedAttr.getName(), namedAttr.getValue()); + }); + } + rewriter.cloneRegionBefore(op.getRegion(), newOp.getRegion(), + newOp.getRegion().begin(), mapping); + rewriter.replaceOp(op, newOp.getResults()); + return success(); + } +}; + } // namespace void ForallOp::getCanonicalizationPatterns(RewritePatternSet &results, MLIRContext *context) { - results.add(context); + results.add(context); } //===----------------------------------------------------------------------===// @@ -2615,41 +2753,37 @@ ParallelOp mlir::scf::getParallelForInductionVarOwner(Value val) { namespace { // Collapse loop dimensions that perform a single iteration. -struct CollapseSingleIterationLoops : public OpRewritePattern { +struct ParallelOpSingleOrZeroIterationDimsFolder + : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(ParallelOp op, PatternRewriter &rewriter) const override { - IRMapping mapping; + Location loc = op.getLoc(); + // Compute new loop bounds that omit all single-iteration loop dimensions. - SmallVector newLowerBounds; - SmallVector newUpperBounds; - SmallVector newSteps; - newLowerBounds.reserve(op.getLowerBound().size()); - newUpperBounds.reserve(op.getUpperBound().size()); - newSteps.reserve(op.getStep().size()); - for (auto [lowerBound, upperBound, step, iv] : + SmallVector newLowerBounds, newUpperBounds, newSteps; + IRMapping mapping; + for (auto [lb, ub, step, iv] : llvm::zip(op.getLowerBound(), op.getUpperBound(), op.getStep(), op.getInductionVars())) { - // Collect the statically known loop bounds. - auto lowerBoundConstant = - dyn_cast_or_null(lowerBound.getDefiningOp()); - auto upperBoundConstant = - dyn_cast_or_null(upperBound.getDefiningOp()); - auto stepConstant = - dyn_cast_or_null(step.getDefiningOp()); - // Replace the loop induction variable by the lower bound if the loop - // performs a single iteration. Otherwise, copy the loop bounds. - if (lowerBoundConstant && upperBoundConstant && stepConstant && - (upperBoundConstant.value() - lowerBoundConstant.value()) > 0 && - (upperBoundConstant.value() - lowerBoundConstant.value()) <= - stepConstant.value()) { - mapping.map(iv, lowerBound); - } else { - newLowerBounds.push_back(lowerBound); - newUpperBounds.push_back(upperBound); - newSteps.push_back(step); + auto numIterations = constantTripCount(lb, ub, step); + if (numIterations.has_value()) { + // Remove the loop if it performs zero iterations. + if (*numIterations == 0) { + rewriter.replaceOp(op, op.getInitVals()); + return success(); + } + // Replace the loop induction variable by the lower bound if the loop + // performs a single iteration. Otherwise, copy the loop bounds. + if (*numIterations == 1) { + mapping.map(iv, getValueOrCreateConstantIndexOp(rewriter, loc, lb)); + continue; + } } + newLowerBounds.push_back(lb); + newUpperBounds.push_back(ub); + newSteps.push_back(step); } // Exit if none of the loop dimensions perform a single iteration. if (newLowerBounds.size() == op.getLowerBound().size()) @@ -2694,23 +2828,6 @@ struct CollapseSingleIterationLoops : public OpRewritePattern { } }; -/// Removes parallel loops in which at least one lower/upper bound pair consists -/// of the same values - such loops have an empty iteration domain. -struct RemoveEmptyParallelLoops : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(ParallelOp op, - PatternRewriter &rewriter) const override { - for (auto dim : llvm::zip(op.getLowerBound(), op.getUpperBound())) { - if (std::get<0>(dim) == std::get<1>(dim)) { - rewriter.replaceOp(op, op.getInitVals()); - return success(); - } - } - return failure(); - } -}; - struct MergeNestedParallelLoops : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -2773,8 +2890,9 @@ struct MergeNestedParallelLoops : public OpRewritePattern { void ParallelOp::getCanonicalizationPatterns(RewritePatternSet &results, MLIRContext *context) { - results.add(context); + results + .add( + context); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SCF/Utils/Utils.cpp b/mlir/lib/Dialect/SCF/Utils/Utils.cpp index 6eca0ef9f69cf..e16e2881185a9 100644 --- a/mlir/lib/Dialect/SCF/Utils/Utils.cpp +++ b/mlir/lib/Dialect/SCF/Utils/Utils.cpp @@ -381,18 +381,12 @@ static void replaceIterArgsAndYieldResults(scf::ForOp forOp) { /// Promotes the loop body of a forOp to its containing block if the forOp /// it can be determined that the loop has a single iteration. LogicalResult mlir::promoteIfSingleIteration(scf::ForOp forOp) { - auto lbCstOp = forOp.getLowerBound().getDefiningOp(); - auto ubCstOp = forOp.getUpperBound().getDefiningOp(); - auto stepCstOp = forOp.getStep().getDefiningOp(); - if (!lbCstOp || !ubCstOp || !stepCstOp || lbCstOp.value() < 0 || - ubCstOp.value() < 0 || stepCstOp.value() < 0) - return failure(); - int64_t tripCount = - mlir::ceilDiv(ubCstOp.value() - lbCstOp.value(), stepCstOp.value()); - if (tripCount != 1) + std::optional tripCount = constantTripCount( + forOp.getLowerBound(), forOp.getUpperBound(), forOp.getStep()); + if (!tripCount.has_value() || tripCount != 1) return failure(); auto iv = forOp.getInductionVar(); - iv.replaceAllUsesWith(lbCstOp); + iv.replaceAllUsesWith(forOp.getLowerBound()); replaceIterArgsAndYieldResults(forOp); diff --git a/mlir/lib/Dialect/Utils/StaticValueUtils.cpp b/mlir/lib/Dialect/Utils/StaticValueUtils.cpp index e646de95a76c9..45edd5f89ffed 100644 --- a/mlir/lib/Dialect/Utils/StaticValueUtils.cpp +++ b/mlir/lib/Dialect/Utils/StaticValueUtils.cpp @@ -10,6 +10,7 @@ #include "mlir/Dialect/Arith/Utils/Utils.h" #include "mlir/IR/Matchers.h" #include "mlir/Support/LLVM.h" +#include "mlir/Support/MathExtras.h" #include "llvm/ADT/APSInt.h" namespace mlir { @@ -228,4 +229,24 @@ getValuesSortedByKey(ArrayRef keys, ArrayRef values, return getValuesSortedByKeyImpl(keys, values, compare); } +/// Return the number of iterations for a loop with a lower bound `lb`, upper +/// bound `ub` and step `step`. +std::optional constantTripCount(OpFoldResult lb, OpFoldResult ub, + OpFoldResult step) { + if (lb == ub) + return 0; + + std::optional lbConstant = getConstantIntValue(lb); + if (!lbConstant) + return std::nullopt; + std::optional ubConstant = getConstantIntValue(ub); + if (!ubConstant) + return std::nullopt; + std::optional stepConstant = getConstantIntValue(step); + if (!stepConstant) + return std::nullopt; + + return mlir::ceilDiv(*ubConstant - *lbConstant, *stepConstant); +} + } // namespace mlir diff --git a/mlir/test/Dialect/SCF/canonicalize.mlir b/mlir/test/Dialect/SCF/canonicalize.mlir index a3ce8a63d4c9f..f69cf196597e2 100644 --- a/mlir/test/Dialect/SCF/canonicalize.mlir +++ b/mlir/test/Dialect/SCF/canonicalize.mlir @@ -1544,3 +1544,110 @@ func.func @forall_fold_control_operands( return %result : tensor } // CHECK: forall (%{{.*}}, %{{.*}}) in (%{{.*}}, 10) + +// ----- + +func.func @inline_forall_loop(%in: tensor<8x8xf32>) -> tensor<8x8xf32> { + %c8 = arith.constant 8 : index + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %cst = arith.constant 0.000000e+00 : f32 + %0 = tensor.empty() : tensor<8x8xf32> + %1 = scf.forall (%i, %j) = (%c0, %c0) to (%c1, %c1) + step (%c8, %c8) shared_outs (%out_ = %0) -> (tensor<8x8xf32>) { + %slice = tensor.extract_slice %out_[%i, %j] [2, 3] [1, 1] + : tensor<8x8xf32> to tensor<2x3xf32> + %fill = linalg.fill ins(%cst : f32) outs(%slice : tensor<2x3xf32>) + -> tensor<2x3xf32> + scf.forall.in_parallel { + tensor.parallel_insert_slice %fill into %out_[%i, %j] [2, 3] [1, 1] + : tensor<2x3xf32> into tensor<8x8xf32> + } + } + return %1 : tensor<8x8xf32> +} +// CHECK-LABEL: @inline_forall_loop +// CHECK-NOT: scf.forall +// CHECK: %[[OUT:.*]] = tensor.empty + +// CHECK-NEXT: %[[SLICE:.*]] = tensor.extract_slice %[[OUT]] +// CHECK-SAME: : tensor<8x8xf32> to tensor<2x3xf32> + +// CHECK-NEXT: %[[FILL:.*]] = linalg.fill +// CHECK-SAME: outs(%[[SLICE]] + +// CHECK-NEXT: tensor.insert_slice %[[FILL]] +// CHECK-SAME: : tensor<2x3xf32> into tensor<8x8xf32> + +// ----- + +func.func @do_not_inline_distributed_forall_loop( + %in: tensor<8x8xf32>) -> tensor<8x8xf32> { + %cst = arith.constant 0.000000e+00 : f32 + %0 = tensor.empty() : tensor<8x8xf32> + %1 = scf.forall (%i, %j) = (0, 0) to (1, 1) step (8, 8) + shared_outs (%out_ = %0) -> (tensor<8x8xf32>) { + %slice = tensor.extract_slice %out_[%i, %j] [2, 3] [1, 1] + : tensor<8x8xf32> to tensor<2x3xf32> + %fill = linalg.fill ins(%cst : f32) outs(%slice : tensor<2x3xf32>) + -> tensor<2x3xf32> + scf.forall.in_parallel { + tensor.parallel_insert_slice %fill into %out_[%i, %j] [2, 3] [1, 1] + : tensor<2x3xf32> into tensor<8x8xf32> + } + }{ mapping = [#gpu.thread, #gpu.thread] } + return %1 : tensor<8x8xf32> +} +// CHECK-LABEL: @do_not_inline_distributed_forall_loop +// CHECK: scf.forall + +// ----- + +func.func @collapse_one_dim_parallel(%in: tensor<8x8xf32>) -> tensor<8x8xf32> { + %c8 = arith.constant 8 : index + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c16 = arith.constant 16 : index + %cst = arith.constant 0.000000e+00 : f32 + %0 = tensor.empty() : tensor<8x8xf32> + %1 = scf.forall (%i, %j) = (0, %c0) to (1, %c16) + step (8, %c8) shared_outs (%out_ = %0) -> (tensor<8x8xf32>) { + %fill = linalg.fill ins(%cst : f32) outs(%out_ : tensor<8x8xf32>) + -> tensor<8x8xf32> + scf.forall.in_parallel { + tensor.parallel_insert_slice %fill into %out_[%i, %j] [8, 8] [1, 1] + : tensor<8x8xf32> into tensor<8x8xf32> + } + } + return %1 : tensor<8x8xf32> +} +// CHECK-LABEL: @collapse_one_dim_parallel +// CHECK: scf.forall (%[[ARG:.*]]) = (0) to (16) step (8) +// CHECK: linalg.fill +// CHECK: tensor.parallel_insert_slice + +// ----- + +func.func @remove_empty_forall(%in: tensor<8x8xf32>) -> tensor<8x8xf32> { + %c8 = arith.constant 8 : index + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c16 = arith.constant 16 : index + %cst = arith.constant 0.000000e+00 : f32 + %0 = tensor.empty() : tensor<8x8xf32> + %1 = scf.forall (%i, %j) = (%c0, %c16) to (%c1, %c16) + step (%c8, %c8) shared_outs (%out_ = %0) -> (tensor<8x8xf32>) { + %fill = linalg.fill ins(%cst : f32) outs(%out_ : tensor<8x8xf32>) + -> tensor<8x8xf32> + scf.forall.in_parallel { + tensor.parallel_insert_slice %fill into %out_[%i, %j] [8, 8] [1, 1] + : tensor<8x8xf32> into tensor<8x8xf32> + } + } + return %1 : tensor<8x8xf32> +} +// CHECK-LABEL: @remove_empty_forall +// CHECK-NOT: scf.forall +// CHECK: %[[EMPTY:.*]] = tensor.empty +// CHECK: return %[[EMPTY]] + diff --git a/mlir/test/Dialect/Tensor/fold-consecutive-insert-extract-slice.mlir b/mlir/test/Dialect/Tensor/fold-consecutive-insert-extract-slice.mlir index 2358ddeb5b01b..750a8d0edf0e2 100644 --- a/mlir/test/Dialect/Tensor/fold-consecutive-insert-extract-slice.mlir +++ b/mlir/test/Dialect/Tensor/fold-consecutive-insert-extract-slice.mlir @@ -86,7 +86,7 @@ func.func @insert_slice_rank_reducing_dynamic_shape( // CHECK-LABEL: func.func @parallel_insert_slice // CHECK-NOT: tensor.insert_slice -// CHECK: tensor.parallel_insert_slice %{{.*}} into %{{.*}}[%{{.*}}, %{{.*}}] [1, 1] [1, 1] : tensor into tensor<1x2xf32> +// CHECK: tensor.parallel_insert_slice %{{.*}} into %{{.*}}[0, %{{.*}}] [1, 1] [1, 1] : tensor into tensor<1x2xf32> func.func @parallel_insert_slice(%t0: tensor<1x2xf32>, %t1: tensor, %t2: tensor<1x1xf32>) -> tensor<1x2xf32> { %c1 = arith.constant 1 : index %c2 = arith.constant 2 : index From af99aa0ff77da893457605113062220957dc7499 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 21 Mar 2023 11:43:23 +0000 Subject: [PATCH 164/691] [LV] Set imbounds flag using CreateGEP in VPWidenMemInst (NFC). This avoids having to cast the result of the builder to GetElementPtrInst. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 1054f6aab0748..0b71d309c2975 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9603,7 +9603,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { const auto CreateVecPtr = [&](unsigned Part, Value *Ptr) -> Value * { // Calculate the pointer for the specific unroll-part. - GetElementPtrInst *PartPtr = nullptr; + Value *PartPtr = nullptr; // Use i32 for the gep index type when the value is constant, // or query DataLayout for a more suitable index type otherwise. @@ -9627,20 +9627,15 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { // LastLane = 1 - RunTimeVF Value *LastLane = Builder.CreateSub(ConstantInt::get(IndexTy, 1), RunTimeVF); + PartPtr = Builder.CreateGEP(ScalarDataTy, Ptr, NumElt, "", InBounds); PartPtr = - cast(Builder.CreateGEP(ScalarDataTy, Ptr, NumElt)); - PartPtr->setIsInBounds(InBounds); - PartPtr = cast( - Builder.CreateGEP(ScalarDataTy, PartPtr, LastLane)); - PartPtr->setIsInBounds(InBounds); + Builder.CreateGEP(ScalarDataTy, PartPtr, LastLane, "", InBounds); if (isMaskRequired) // Reverse of a null all-one mask is a null mask. BlockInMaskParts[Part] = Builder.CreateVectorReverse(BlockInMaskParts[Part], "reverse"); } else { Value *Increment = createStepForVF(Builder, IndexTy, State.VF, Part); - PartPtr = cast( - Builder.CreateGEP(ScalarDataTy, Ptr, Increment)); - PartPtr->setIsInBounds(InBounds); + PartPtr = Builder.CreateGEP(ScalarDataTy, Ptr, Increment, "", InBounds); } unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace(); From c8af0d3cea8cab9277651f8cabe54e2f1a3b167f Mon Sep 17 00:00:00 2001 From: David Spickett Date: Thu, 16 Mar 2023 11:32:35 +0000 Subject: [PATCH 165/691] [lldb] Add compile time checks for signal codes when on the matching platform This adds a new macro to the UnixSignals subclasses, ADD_SIGCODE. ADD_SIGCODE(4, ILL_ILLOPC, 1, "illegal opcode"); Adds a sigcode to signal 4. That code is ILL_ILLOPC and we expect its value to be 1. When compiling on a system that matches the class e.g. FreeBSD for FreeBSDSignals, the macro will check that that is true. When you're not on FreeBSD we just use the number 1, and ILL_ILLOPC won't be defined to anything because we don't include csignal. Example error: LinuxSignals.cpp:52:3: error: static_assert failed due to requirement 'ILL_COPROC == 9' "Value mismatch for signal code ILL_COPROC" Reviewed By: arichardson Differential Revision: https://reviews.llvm.org/D146222 --- lldb/include/lldb/Target/UnixSignals.h | 2 + .../Platform/FreeBSD/FreeBSDSignals.cpp | 65 ++++++++++------- .../Plugins/Platform/Linux/LinuxSignals.cpp | 71 ++++++++++++------- .../Plugins/Platform/NetBSD/NetBSDSignals.cpp | 53 ++++++++------ 4 files changed, 118 insertions(+), 73 deletions(-) diff --git a/lldb/include/lldb/Target/UnixSignals.h b/lldb/include/lldb/Target/UnixSignals.h index 1287e0007cc75..65eac7ebdd823 100644 --- a/lldb/include/lldb/Target/UnixSignals.h +++ b/lldb/include/lldb/Target/UnixSignals.h @@ -90,6 +90,8 @@ class UnixSignals { enum SignalCodePrintOption { None, Address, Bounds }; + // Instead of calling this directly, use a ADD_SIGCODE macro to get compile + // time checks when on the native platform. void AddSignalCode( int signo, int code, const char *description, SignalCodePrintOption print_option = SignalCodePrintOption::None); diff --git a/lldb/source/Plugins/Platform/FreeBSD/FreeBSDSignals.cpp b/lldb/source/Plugins/Platform/FreeBSD/FreeBSDSignals.cpp index c2997e0249219..f597bed80ddec 100644 --- a/lldb/source/Plugins/Platform/FreeBSD/FreeBSDSignals.cpp +++ b/lldb/source/Plugins/Platform/FreeBSD/FreeBSDSignals.cpp @@ -8,6 +8,17 @@ #include "FreeBSDSignals.h" +#ifdef __FreeBSD__ +#include + +#define ADD_SIGCODE(signal, name, value, ...) \ + static_assert(name == value, "Value mismatch for signal code " #name); \ + AddSignalCode(signal, value, __VA_ARGS__) +#else +#define ADD_SIGCODE(signal, name, value, ...) \ + AddSignalCode(signal, value, __VA_ARGS__) +#endif /* ifdef __FreeBSD__ */ + using namespace lldb_private; FreeBSDSignals::FreeBSDSignals() : UnixSignals() { Reset(); } @@ -17,39 +28,39 @@ void FreeBSDSignals::Reset() { // clang-format off // SIGILL - AddSignalCode(4, 1 /*ILL_ILLOPC*/, "illegal opcode"); - AddSignalCode(4, 2 /*ILL_ILLOPN*/, "illegal operand"); - AddSignalCode(4, 3 /*ILL_ILLADR*/, "illegal addressing mode"); - AddSignalCode(4, 4 /*ILL_ILLTRP*/, "illegal trap"); - AddSignalCode(4, 5 /*ILL_PRVOPC*/, "privileged opcode"); - AddSignalCode(4, 6 /*ILL_PRVREG*/, "privileged register"); - AddSignalCode(4, 7 /*ILL_COPROC*/, "coprocessor error"); - AddSignalCode(4, 8 /*ILL_BADSTK*/, "internal stack error"); + ADD_SIGCODE(4, ILL_ILLOPC, 1, "illegal opcode"); + ADD_SIGCODE(4, ILL_ILLOPN, 2, "illegal operand"); + ADD_SIGCODE(4, ILL_ILLADR, 3, "illegal addressing mode"); + ADD_SIGCODE(4, ILL_ILLTRP, 4, "illegal trap"); + ADD_SIGCODE(4, ILL_PRVOPC, 5, "privileged opcode"); + ADD_SIGCODE(4, ILL_PRVREG, 6, "privileged register"); + ADD_SIGCODE(4, ILL_COPROC, 7, "coprocessor error"); + ADD_SIGCODE(4, ILL_BADSTK, 8, "internal stack error"); // SIGFPE - AddSignalCode(8, 1 /*FPE_INTOVF*/, "integer overflow"); - AddSignalCode(8, 2 /*FPE_INTDIV*/, "integer divide by zero"); - AddSignalCode(8, 3 /*FPE_FLTDIV*/, "floating point divide by zero"); - AddSignalCode(8, 4 /*FPE_FLTOVF*/, "floating point overflow"); - AddSignalCode(8, 5 /*FPE_FLTUND*/, "floating point underflow"); - AddSignalCode(8, 6 /*FPE_FLTRES*/, "floating point inexact result"); - AddSignalCode(8, 7 /*FPE_FLTINV*/, "invalid floating point operation"); - AddSignalCode(8, 8 /*FPE_FLTSUB*/, "subscript out of range"); - AddSignalCode(8, 9 /*FPE_FLTIDO*/, "input denormal operation"); + ADD_SIGCODE(8, FPE_INTOVF, 1, "integer overflow"); + ADD_SIGCODE(8, FPE_INTDIV, 2, "integer divide by zero"); + ADD_SIGCODE(8, FPE_FLTDIV, 3, "floating point divide by zero"); + ADD_SIGCODE(8, FPE_FLTOVF, 4, "floating point overflow"); + ADD_SIGCODE(8, FPE_FLTUND, 5, "floating point underflow"); + ADD_SIGCODE(8, FPE_FLTRES, 6, "floating point inexact result"); + ADD_SIGCODE(8, FPE_FLTINV, 7, "invalid floating point operation"); + ADD_SIGCODE(8, FPE_FLTSUB, 8, "subscript out of range"); + ADD_SIGCODE(8, FPE_FLTIDO, 9, "input denormal operation"); // SIGBUS - AddSignalCode(10, 1 /*BUS_ADRALN*/, "invalid address alignment"); - AddSignalCode(10, 2 /*BUS_ADRERR*/, "nonexistent physical address"); - AddSignalCode(10, 3 /*BUS_OBJERR*/, "object-specific hardware error"); - AddSignalCode(10, 100 /*BUS_OOMERR*/, "no memory"); + ADD_SIGCODE(10, BUS_ADRALN, 1, "invalid address alignment"); + ADD_SIGCODE(10, BUS_ADRERR, 2, "nonexistent physical address"); + ADD_SIGCODE(10, BUS_OBJERR, 3, "object-specific hardware error"); + ADD_SIGCODE(10, BUS_OOMERR, 100, "no memory"); // SIGSEGV - AddSignalCode(11, 1 /*SEGV_MAPERR*/, "address not mapped to object", - SignalCodePrintOption::Address); - AddSignalCode(11, 2 /*SEGV_ACCERR*/, "invalid permissions for mapped object", - SignalCodePrintOption::Address); - AddSignalCode(11, 100 /*SEGV_PKUERR*/, "PKU violation", - SignalCodePrintOption::Address); + ADD_SIGCODE(11, SEGV_MAPERR, 1, "address not mapped to object", + SignalCodePrintOption::Address); + ADD_SIGCODE(11, SEGV_ACCERR, 2, "invalid permissions for mapped object", + SignalCodePrintOption::Address); + ADD_SIGCODE(11, SEGV_PKUERR, 100, "PKU violation", + SignalCodePrintOption::Address); // SIGNO NAME SUPPRESS STOP NOTIFY DESCRIPTION // ===== ============== ======== ====== ====== ======================== diff --git a/lldb/source/Plugins/Platform/Linux/LinuxSignals.cpp b/lldb/source/Plugins/Platform/Linux/LinuxSignals.cpp index fee7b89ef8371..834a558e4d22c 100644 --- a/lldb/source/Plugins/Platform/Linux/LinuxSignals.cpp +++ b/lldb/source/Plugins/Platform/Linux/LinuxSignals.cpp @@ -8,6 +8,27 @@ #include "LinuxSignals.h" +#ifdef __linux__ +#include + +#ifndef SEGV_BNDERR +#define SEGV_BNDERR 3 +#endif +#ifndef SEGV_MTEAERR +#define SEGV_MTEAERR 8 +#endif +#ifndef SEGV_MTESERR +#define SEGV_MTESERR 9 +#endif + +#define ADD_SIGCODE(signal, name, value, ...) \ + static_assert(name == value, "Value mismatch for signal code " #name); \ + AddSignalCode(signal, value, __VA_ARGS__) +#else +#define ADD_SIGCODE(signal, name, value, ...) \ + AddSignalCode(signal, value, __VA_ARGS__) +#endif /* ifdef __linux__ */ + using namespace lldb_private; LinuxSignals::LinuxSignals() : UnixSignals() { Reset(); } @@ -22,45 +43,45 @@ void LinuxSignals::Reset() { AddSignal(3, "SIGQUIT", false, true, true, "quit"); AddSignal(4, "SIGILL", false, true, true, "illegal instruction"); - AddSignalCode(4, 1 /*ILL_ILLOPC*/, "illegal opcode"); - AddSignalCode(4, 2 /*ILL_ILLOPN*/, "illegal operand"); - AddSignalCode(4, 3 /*ILL_ILLADR*/, "illegal addressing mode"); - AddSignalCode(4, 4 /*ILL_ILLTRP*/, "illegal trap"); - AddSignalCode(4, 5 /*ILL_PRVOPC*/, "privileged opcode"); - AddSignalCode(4, 6 /*ILL_PRVREG*/, "privileged register"); - AddSignalCode(4, 7 /*ILL_COPROC*/, "coprocessor error"); - AddSignalCode(4, 8 /*ILL_BADSTK*/, "internal stack error"); + ADD_SIGCODE(4, ILL_ILLOPC, 1, "illegal opcode"); + ADD_SIGCODE(4, ILL_ILLOPN, 2, "illegal operand"); + ADD_SIGCODE(4, ILL_ILLADR, 3, "illegal addressing mode"); + ADD_SIGCODE(4, ILL_ILLTRP, 4, "illegal trap"); + ADD_SIGCODE(4, ILL_PRVOPC, 5, "privileged opcode"); + ADD_SIGCODE(4, ILL_PRVREG, 6, "privileged register"); + ADD_SIGCODE(4, ILL_COPROC, 7, "coprocessor error"); + ADD_SIGCODE(4, ILL_BADSTK, 8, "internal stack error"); AddSignal(5, "SIGTRAP", true, true, true, "trace trap (not reset when caught)"); AddSignal(6, "SIGABRT", false, true, true, "abort()/IOT trap", "SIGIOT"); AddSignal(7, "SIGBUS", false, true, true, "bus error"); - AddSignalCode(7, 1 /*BUS_ADRALN*/, "illegal alignment"); - AddSignalCode(7, 2 /*BUS_ADRERR*/, "illegal address"); - AddSignalCode(7, 3 /*BUS_OBJERR*/, "hardware error"); + ADD_SIGCODE(7, BUS_ADRALN, 1, "illegal alignment"); + ADD_SIGCODE(7, BUS_ADRERR, 2, "illegal address"); + ADD_SIGCODE(7, BUS_OBJERR, 3, "hardware error"); AddSignal(8, "SIGFPE", false, true, true, "floating point exception"); - AddSignalCode(8, 1 /*FPE_INTDIV*/, "integer divide by zero"); - AddSignalCode(8, 2 /*FPE_INTOVF*/, "integer overflow"); - AddSignalCode(8, 3 /*FPE_FLTDIV*/, "floating point divide by zero"); - AddSignalCode(8, 4 /*FPE_FLTOVF*/, "floating point overflow"); - AddSignalCode(8, 5 /*FPE_FLTUND*/, "floating point underflow"); - AddSignalCode(8, 6 /*FPE_FLTRES*/, "floating point inexact result"); - AddSignalCode(8, 7 /*FPE_FLTINV*/, "floating point invalid operation"); - AddSignalCode(8, 8 /*FPE_FLTSUB*/, "subscript out of range"); + ADD_SIGCODE(8, FPE_INTDIV, 1, "integer divide by zero"); + ADD_SIGCODE(8, FPE_INTOVF, 2, "integer overflow"); + ADD_SIGCODE(8, FPE_FLTDIV, 3, "floating point divide by zero"); + ADD_SIGCODE(8, FPE_FLTOVF, 4, "floating point overflow"); + ADD_SIGCODE(8, FPE_FLTUND, 5, "floating point underflow"); + ADD_SIGCODE(8, FPE_FLTRES, 6, "floating point inexact result"); + ADD_SIGCODE(8, FPE_FLTINV, 7, "floating point invalid operation"); + ADD_SIGCODE(8, FPE_FLTSUB, 8, "subscript out of range"); AddSignal(9, "SIGKILL", false, true, true, "kill"); AddSignal(10, "SIGUSR1", false, true, true, "user defined signal 1"); AddSignal(11, "SIGSEGV", false, true, true, "segmentation violation"); - AddSignalCode(11, 1 /*SEGV_MAPERR*/, "address not mapped to object", SignalCodePrintOption::Address); - AddSignalCode(11, 2 /*SEGV_ACCERR*/, "invalid permissions for mapped object", SignalCodePrintOption::Address); - AddSignalCode(11, 3 /*SEGV_BNDERR*/, "failed address bounds checks", SignalCodePrintOption::Bounds); - AddSignalCode(11, 8 /*SEGV_MTEAERR*/, "async tag check fault"); - AddSignalCode(11, 9 /*SEGV_MTESERR*/, "sync tag check fault", SignalCodePrintOption::Address); + ADD_SIGCODE(11, SEGV_MAPERR, 1, "address not mapped to object", SignalCodePrintOption::Address); + ADD_SIGCODE(11, SEGV_ACCERR, 2, "invalid permissions for mapped object", SignalCodePrintOption::Address); + ADD_SIGCODE(11, SEGV_BNDERR, 3, "failed address bounds checks", SignalCodePrintOption::Bounds); + ADD_SIGCODE(11, SEGV_MTEAERR, 8, "async tag check fault"); + ADD_SIGCODE(11, SEGV_MTESERR, 9, "sync tag check fault", SignalCodePrintOption::Address); // Some platforms will occasionally send nonstandard spurious SI_KERNEL // codes. One way to get this is via unaligned SIMD loads. Treat it as invalid address. - AddSignalCode(11, 0x80 /*SI_KERNEL*/, "invalid address", SignalCodePrintOption::Address); + ADD_SIGCODE(11, SI_KERNEL, 0x80, "invalid address", SignalCodePrintOption::Address); AddSignal(12, "SIGUSR2", false, true, true, "user defined signal 2"); AddSignal(13, "SIGPIPE", false, true, true, "write to pipe with reading end closed"); diff --git a/lldb/source/Plugins/Platform/NetBSD/NetBSDSignals.cpp b/lldb/source/Plugins/Platform/NetBSD/NetBSDSignals.cpp index 84a664c05558c..285808e10c95f 100644 --- a/lldb/source/Plugins/Platform/NetBSD/NetBSDSignals.cpp +++ b/lldb/source/Plugins/Platform/NetBSD/NetBSDSignals.cpp @@ -8,6 +8,17 @@ #include "NetBSDSignals.h" +#ifdef __NetBSD__ +#include + +#define ADD_SIGCODE(signal, name, value, ...) \ + static_assert(name == value, "Value mismatch for signal code " #name); \ + AddSignalCode(signal, value, __VA_ARGS__) +#else +#define ADD_SIGCODE(signal, name, value, ...) \ + AddSignalCode(signal, value, __VA_ARGS__) +#endif /* ifdef __NetBSD__ */ + using namespace lldb_private; NetBSDSignals::NetBSDSignals() : UnixSignals() { Reset(); } @@ -17,34 +28,34 @@ void NetBSDSignals::Reset() { // clang-format off // SIGILL - AddSignalCode(4, 1 /*ILL_ILLOPC*/, "illegal opcode"); - AddSignalCode(4, 2 /*ILL_ILLOPN*/, "illegal operand"); - AddSignalCode(4, 3 /*ILL_ILLADR*/, "illegal addressing mode"); - AddSignalCode(4, 4 /*ILL_ILLTRP*/, "illegal trap"); - AddSignalCode(4, 5 /*ILL_PRVOPC*/, "privileged opcode"); - AddSignalCode(4, 6 /*ILL_PRVREG*/, "privileged register"); - AddSignalCode(4, 7 /*ILL_COPROC*/, "coprocessor error"); - AddSignalCode(4, 8 /*ILL_BADSTK*/, "internal stack error"); + ADD_SIGCODE(4, ILL_ILLOPC, 1, "illegal opcode"); + ADD_SIGCODE(4, ILL_ILLOPN, 2, "illegal operand"); + ADD_SIGCODE(4, ILL_ILLADR, 3, "illegal addressing mode"); + ADD_SIGCODE(4, ILL_ILLTRP, 4, "illegal trap"); + ADD_SIGCODE(4, ILL_PRVOPC, 5, "privileged opcode"); + ADD_SIGCODE(4, ILL_PRVREG, 6, "privileged register"); + ADD_SIGCODE(4, ILL_COPROC, 7, "coprocessor error"); + ADD_SIGCODE(4, ILL_BADSTK, 8, "internal stack error"); // SIGFPE - AddSignalCode(8, 1 /*FPE_INTDIV*/, "integer divide by zero"); - AddSignalCode(8, 2 /*FPE_INTOVF*/, "integer overflow"); - AddSignalCode(8, 3 /*FPE_FLTDIV*/, "floating point divide by zero"); - AddSignalCode(8, 4 /*FPE_FLTOVF*/, "floating point overflow"); - AddSignalCode(8, 5 /*FPE_FLTUND*/, "floating point underflow"); - AddSignalCode(8, 6 /*FPE_FLTRES*/, "floating point inexact result"); - AddSignalCode(8, 7 /*FPE_FLTINV*/, "invalid floating point operation"); - AddSignalCode(8, 8 /*FPE_FLTSUB*/, "subscript out of range"); + ADD_SIGCODE(8, FPE_INTDIV, 1, "integer divide by zero"); + ADD_SIGCODE(8, FPE_INTOVF, 2, "integer overflow"); + ADD_SIGCODE(8, FPE_FLTDIV, 3, "floating point divide by zero"); + ADD_SIGCODE(8, FPE_FLTOVF, 4, "floating point overflow"); + ADD_SIGCODE(8, FPE_FLTUND, 5, "floating point underflow"); + ADD_SIGCODE(8, FPE_FLTRES, 6, "floating point inexact result"); + ADD_SIGCODE(8, FPE_FLTINV, 7, "invalid floating point operation"); + ADD_SIGCODE(8, FPE_FLTSUB, 8, "subscript out of range"); // SIGBUS - AddSignalCode(10, 1 /*BUS_ADRALN*/, "invalid address alignment"); - AddSignalCode(10, 2 /*BUS_ADRERR*/, "non-existent physical address"); - AddSignalCode(10, 3 /*BUS_OBJERR*/, "object specific hardware error"); + ADD_SIGCODE(10, BUS_ADRALN, 1, "invalid address alignment"); + ADD_SIGCODE(10, BUS_ADRERR, 2, "non-existent physical address"); + ADD_SIGCODE(10, BUS_OBJERR, 3, "object specific hardware error"); // SIGSEGV - AddSignalCode(11, 1 /*SEGV_MAPERR*/, "address not mapped to object", + ADD_SIGCODE(11, SEGV_MAPERR, 1, "address not mapped to object", SignalCodePrintOption::Address); - AddSignalCode(11, 2 /*SEGV_ACCERR*/, "invalid permissions for mapped object", + ADD_SIGCODE(11, SEGV_ACCERR, 2, "invalid permissions for mapped object", SignalCodePrintOption::Address); // SIGNO NAME SUPPRESS STOP NOTIFY DESCRIPTION From 3e2d4e85d34705466fe0c8c370b2481d65587fc4 Mon Sep 17 00:00:00 2001 From: "Luo, Yuanke" Date: Tue, 21 Mar 2023 20:12:35 +0800 Subject: [PATCH 166/691] Revert "[X86] Create extra prolog/epilog for stack realignment [part 2]" This reverts commit 614c63bec6d67cbfdc17b50e443ff769a28c18d0. --- llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp | 2 +- llvm/test/CodeGen/X86/i386-baseptr.ll | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp b/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp index bf60791234315..ef3751e40f17f 100644 --- a/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp +++ b/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp @@ -114,7 +114,7 @@ bool X86ArgumentStackSlotPass::runOnMachineFunction(MachineFunction &MF) { if (F.hasFnAttribute(Attribute::Naked)) return false; // Only support Linux - if (!STI.isTargetLinux() && !STI.isTargetELF()) + if (!STI.isTargetLinux()) return false; if (!TRI->hasBasePointer(MF)) return false; diff --git a/llvm/test/CodeGen/X86/i386-baseptr.ll b/llvm/test/CodeGen/X86/i386-baseptr.ll index df921f2738ff0..cb8d849a86841 100644 --- a/llvm/test/CodeGen/X86/i386-baseptr.ll +++ b/llvm/test/CodeGen/X86/i386-baseptr.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=i386-pc-linux -stackrealign < %s | FileCheck %s -; RUN: llc -mtriple=i386-pc-none-elf -stackrealign < %s | FileCheck %s declare i32 @helper() nounwind define void @base() #0 { From 35130e4d6efb70c87b4bc184e020449c4b3abf31 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 21 Mar 2023 11:59:56 +0100 Subject: [PATCH 167/691] [AArch64] Add tests for bitcast to and mask reduction (NFC) This is a copy of the vecreduce_or tests but with comparison to -1 rather than 0. --- .../test/CodeGen/AArch64/dag-combine-setcc.ll | 784 +++++++++++++++++- 1 file changed, 776 insertions(+), 8 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll index 60bd2a373a2c1..6ec5e3b11a488 100644 --- a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll +++ b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll @@ -5,7 +5,7 @@ define i1 @combine_setcc_eq_vecreduce_or_v8i1(<8 x i8> %a) { ; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: cmeq v0.8b, v0.8b, #0 -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: umaxv b0, v0.8b ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: bic w0, w8, w9 @@ -20,7 +20,7 @@ define i1 @combine_setcc_eq_vecreduce_or_v16i1(<16 x i8> %a) { ; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: cmeq v0.16b, v0.16b, #0 -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: umaxv b0, v0.16b ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: bic w0, w8, w9 @@ -35,7 +35,7 @@ define i1 @combine_setcc_eq_vecreduce_or_v32i1(<32 x i8> %a) { ; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v32i1: ; CHECK: // %bb.0: ; CHECK-NEXT: cmeq v1.16b, v1.16b, #0 -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: cmeq v0.16b, v0.16b, #0 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: umaxv b0, v0.16b @@ -52,7 +52,7 @@ define i1 @combine_setcc_eq_vecreduce_or_v64i1(<64 x i8> %a) { ; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v64i1: ; CHECK: // %bb.0: ; CHECK-NEXT: cmeq v2.16b, v2.16b, #0 -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: cmeq v3.16b, v3.16b, #0 ; CHECK-NEXT: cmeq v1.16b, v1.16b, #0 ; CHECK-NEXT: cmeq v0.16b, v0.16b, #0 @@ -129,6 +129,774 @@ define i1 @combine_setcc_ne_vecreduce_or_v64i1(<64 x i8> %a) { ret i1 %cmp2 } +define i1 @combine_setcc_eq_vecreduce_and_v8i1(<8 x i8> %a) { +; CHECK-LABEL: combine_setcc_eq_vecreduce_and_v8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: cmeq v0.8b, v0.8b, #0 +; CHECK-NEXT: mov w8, #255 // =0xff +; CHECK-NEXT: umov w9, v0.b[0] +; CHECK-NEXT: umov w10, v0.b[1] +; CHECK-NEXT: umov w11, v0.b[2] +; CHECK-NEXT: umov w12, v0.b[3] +; CHECK-NEXT: umov w13, v0.b[4] +; CHECK-NEXT: umov w14, v0.b[6] +; CHECK-NEXT: and w9, w9, #0x1 +; CHECK-NEXT: bfi w9, w10, #1, #1 +; CHECK-NEXT: umov w10, v0.b[5] +; CHECK-NEXT: bfi w9, w11, #2, #1 +; CHECK-NEXT: umov w11, v0.b[7] +; CHECK-NEXT: bfi w9, w12, #3, #1 +; CHECK-NEXT: and w12, w14, #0x1 +; CHECK-NEXT: bfi w9, w13, #4, #1 +; CHECK-NEXT: bfi w9, w10, #5, #1 +; CHECK-NEXT: orr w9, w9, w12, lsl #6 +; CHECK-NEXT: orr w9, w9, w11, lsl #7 +; CHECK-NEXT: bics wzr, w8, w9 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %cmp1 = icmp eq <8 x i8> %a, zeroinitializer + %cast = bitcast <8 x i1> %cmp1 to i8 + %cmp2 = icmp eq i8 %cast, -1 + ret i1 %cmp2 +} + +define i1 @combine_setcc_eq_vecreduce_and_v16i1(<16 x i8> %a) { +; CHECK-LABEL: combine_setcc_eq_vecreduce_and_v16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: cmeq v0.16b, v0.16b, #0 +; CHECK-NEXT: mov w8, #65535 // =0xffff +; CHECK-NEXT: umov w9, v0.b[0] +; CHECK-NEXT: umov w10, v0.b[1] +; CHECK-NEXT: umov w11, v0.b[2] +; CHECK-NEXT: umov w12, v0.b[3] +; CHECK-NEXT: umov w13, v0.b[4] +; CHECK-NEXT: umov w14, v0.b[5] +; CHECK-NEXT: and w9, w9, #0x1 +; CHECK-NEXT: bfi w9, w10, #1, #1 +; CHECK-NEXT: umov w10, v0.b[6] +; CHECK-NEXT: bfi w9, w11, #2, #1 +; CHECK-NEXT: umov w11, v0.b[7] +; CHECK-NEXT: bfi w9, w12, #3, #1 +; CHECK-NEXT: umov w12, v0.b[8] +; CHECK-NEXT: bfi w9, w13, #4, #1 +; CHECK-NEXT: umov w13, v0.b[9] +; CHECK-NEXT: and w10, w10, #0x1 +; CHECK-NEXT: bfi w9, w14, #5, #1 +; CHECK-NEXT: umov w14, v0.b[10] +; CHECK-NEXT: and w11, w11, #0x1 +; CHECK-NEXT: orr w9, w9, w10, lsl #6 +; CHECK-NEXT: umov w10, v0.b[11] +; CHECK-NEXT: and w12, w12, #0x1 +; CHECK-NEXT: orr w9, w9, w11, lsl #7 +; CHECK-NEXT: umov w11, v0.b[12] +; CHECK-NEXT: and w13, w13, #0x1 +; CHECK-NEXT: orr w9, w9, w12, lsl #8 +; CHECK-NEXT: umov w12, v0.b[13] +; CHECK-NEXT: and w14, w14, #0x1 +; CHECK-NEXT: orr w9, w9, w13, lsl #9 +; CHECK-NEXT: umov w13, v0.b[14] +; CHECK-NEXT: and w10, w10, #0x1 +; CHECK-NEXT: orr w9, w9, w14, lsl #10 +; CHECK-NEXT: and w11, w11, #0x1 +; CHECK-NEXT: orr w9, w9, w10, lsl #11 +; CHECK-NEXT: and w10, w12, #0x1 +; CHECK-NEXT: umov w12, v0.b[15] +; CHECK-NEXT: orr w9, w9, w11, lsl #12 +; CHECK-NEXT: and w11, w13, #0x1 +; CHECK-NEXT: orr w9, w9, w10, lsl #13 +; CHECK-NEXT: orr w9, w9, w11, lsl #14 +; CHECK-NEXT: orr w9, w9, w12, lsl #15 +; CHECK-NEXT: bics wzr, w8, w9 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %cmp1 = icmp eq <16 x i8> %a, zeroinitializer + %cast = bitcast <16 x i1> %cmp1 to i16 + %cmp2 = icmp eq i16 %cast, -1 + ret i1 %cmp2 +} + +define i1 @combine_setcc_eq_vecreduce_and_v32i1(<32 x i8> %a) { +; CHECK-LABEL: combine_setcc_eq_vecreduce_and_v32i1: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: cmeq v1.16b, v1.16b, #0 +; CHECK-NEXT: mov w8, #65535 // =0xffff +; CHECK-NEXT: cmeq v0.16b, v0.16b, #0 +; CHECK-NEXT: umov w9, v1.b[0] +; CHECK-NEXT: umov w10, v1.b[1] +; CHECK-NEXT: umov w11, v1.b[2] +; CHECK-NEXT: umov w12, v1.b[3] +; CHECK-NEXT: umov w13, v1.b[4] +; CHECK-NEXT: umov w14, v1.b[5] +; CHECK-NEXT: umov w15, v1.b[6] +; CHECK-NEXT: and w9, w9, #0x1 +; CHECK-NEXT: umov w16, v1.b[7] +; CHECK-NEXT: bfi w9, w10, #1, #1 +; CHECK-NEXT: umov w10, v1.b[8] +; CHECK-NEXT: bfi w9, w11, #2, #1 +; CHECK-NEXT: umov w11, v1.b[9] +; CHECK-NEXT: bfi w9, w12, #3, #1 +; CHECK-NEXT: umov w12, v1.b[10] +; CHECK-NEXT: bfi w9, w13, #4, #1 +; CHECK-NEXT: umov w13, v0.b[0] +; CHECK-NEXT: bfi w9, w14, #5, #1 +; CHECK-NEXT: umov w14, v0.b[1] +; CHECK-NEXT: and w15, w15, #0x1 +; CHECK-NEXT: and w16, w16, #0x1 +; CHECK-NEXT: and w10, w10, #0x1 +; CHECK-NEXT: and w11, w11, #0x1 +; CHECK-NEXT: orr w9, w9, w15, lsl #6 +; CHECK-NEXT: and w13, w13, #0x1 +; CHECK-NEXT: bfi w13, w14, #1, #1 +; CHECK-NEXT: orr w9, w9, w16, lsl #7 +; CHECK-NEXT: umov w14, v0.b[2] +; CHECK-NEXT: and w12, w12, #0x1 +; CHECK-NEXT: orr w9, w9, w10, lsl #8 +; CHECK-NEXT: umov w10, v0.b[3] +; CHECK-NEXT: orr w9, w9, w11, lsl #9 +; CHECK-NEXT: umov w11, v0.b[4] +; CHECK-NEXT: orr w9, w9, w12, lsl #10 +; CHECK-NEXT: umov w12, v0.b[6] +; CHECK-NEXT: bfi w13, w14, #2, #1 +; CHECK-NEXT: umov w14, v0.b[5] +; CHECK-NEXT: umov w16, v1.b[12] +; CHECK-NEXT: bfi w13, w10, #3, #1 +; CHECK-NEXT: umov w10, v0.b[7] +; CHECK-NEXT: umov w15, v1.b[11] +; CHECK-NEXT: bfi w13, w11, #4, #1 +; CHECK-NEXT: and w11, w12, #0x1 +; CHECK-NEXT: umov w12, v0.b[8] +; CHECK-NEXT: bfi w13, w14, #5, #1 +; CHECK-NEXT: orr w11, w13, w11, lsl #6 +; CHECK-NEXT: umov w13, v0.b[9] +; CHECK-NEXT: and w14, w16, #0x1 +; CHECK-NEXT: and w10, w10, #0x1 +; CHECK-NEXT: umov w16, v0.b[10] +; CHECK-NEXT: and w15, w15, #0x1 +; CHECK-NEXT: and w12, w12, #0x1 +; CHECK-NEXT: orr w10, w11, w10, lsl #7 +; CHECK-NEXT: orr w9, w9, w15, lsl #11 +; CHECK-NEXT: and w11, w13, #0x1 +; CHECK-NEXT: orr w10, w10, w12, lsl #8 +; CHECK-NEXT: umov w12, v0.b[11] +; CHECK-NEXT: orr w9, w9, w14, lsl #12 +; CHECK-NEXT: and w13, w16, #0x1 +; CHECK-NEXT: umov w14, v0.b[12] +; CHECK-NEXT: orr w10, w10, w11, lsl #9 +; CHECK-NEXT: umov w15, v1.b[13] +; CHECK-NEXT: orr w10, w10, w13, lsl #10 +; CHECK-NEXT: umov w13, v0.b[13] +; CHECK-NEXT: umov w11, v1.b[14] +; CHECK-NEXT: and w12, w12, #0x1 +; CHECK-NEXT: umov w16, v0.b[14] +; CHECK-NEXT: and w14, w14, #0x1 +; CHECK-NEXT: orr w10, w10, w12, lsl #11 +; CHECK-NEXT: and w15, w15, #0x1 +; CHECK-NEXT: umov w12, v1.b[15] +; CHECK-NEXT: orr w10, w10, w14, lsl #12 +; CHECK-NEXT: and w13, w13, #0x1 +; CHECK-NEXT: umov w14, v0.b[15] +; CHECK-NEXT: and w11, w11, #0x1 +; CHECK-NEXT: and w16, w16, #0x1 +; CHECK-NEXT: orr w9, w9, w15, lsl #13 +; CHECK-NEXT: orr w10, w10, w13, lsl #13 +; CHECK-NEXT: orr w9, w9, w11, lsl #14 +; CHECK-NEXT: orr w10, w10, w16, lsl #14 +; CHECK-NEXT: orr w9, w9, w12, lsl #15 +; CHECK-NEXT: orr w10, w10, w14, lsl #15 +; CHECK-NEXT: and w9, w10, w9 +; CHECK-NEXT: bics wzr, w8, w9 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %cmp1 = icmp eq <32 x i8> %a, zeroinitializer + %cast = bitcast <32 x i1> %cmp1 to i32 + %cmp2 = icmp eq i32 %cast, -1 + ret i1 %cmp2 +} + +define i1 @combine_setcc_eq_vecreduce_and_v64i1(<64 x i8> %a) { +; CHECK-LABEL: combine_setcc_eq_vecreduce_and_v64i1: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: cmeq v3.16b, v3.16b, #0 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cmeq v2.16b, v2.16b, #0 +; CHECK-NEXT: umov w10, v3.b[0] +; CHECK-NEXT: umov w9, v3.b[1] +; CHECK-NEXT: umov w11, v3.b[2] +; CHECK-NEXT: umov w12, v3.b[3] +; CHECK-NEXT: umov w13, v3.b[4] +; CHECK-NEXT: umov w15, v3.b[6] +; CHECK-NEXT: umov w14, v3.b[5] +; CHECK-NEXT: and w10, w10, #0x1 +; CHECK-NEXT: umov w16, v3.b[7] +; CHECK-NEXT: lsl w10, w10, #16 +; CHECK-NEXT: umov w17, v3.b[8] +; CHECK-NEXT: bfi w10, w9, #17, #1 +; CHECK-NEXT: umov w18, v3.b[9] +; CHECK-NEXT: bfi w10, w11, #18, #1 +; CHECK-NEXT: umov w9, v3.b[10] +; CHECK-NEXT: bfi w10, w12, #19, #1 +; CHECK-NEXT: and w15, w15, #0x1 +; CHECK-NEXT: bfi w10, w13, #20, #1 +; CHECK-NEXT: umov w11, v3.b[11] +; CHECK-NEXT: bfi w10, w14, #21, #1 +; CHECK-NEXT: and w16, w16, #0x1 +; CHECK-NEXT: umov w12, v3.b[12] +; CHECK-NEXT: and w17, w17, #0x1 +; CHECK-NEXT: orr w10, w10, w15, lsl #22 +; CHECK-NEXT: umov w13, v3.b[13] +; CHECK-NEXT: and w18, w18, #0x1 +; CHECK-NEXT: orr w10, w10, w16, lsl #23 +; CHECK-NEXT: umov w14, v3.b[14] +; CHECK-NEXT: and w9, w9, #0x1 +; CHECK-NEXT: orr w10, w10, w17, lsl #24 +; CHECK-NEXT: and w11, w11, #0x1 +; CHECK-NEXT: orr w10, w10, w18, lsl #25 +; CHECK-NEXT: and w12, w12, #0x1 +; CHECK-NEXT: orr w9, w10, w9, lsl #26 +; CHECK-NEXT: and w13, w13, #0x1 +; CHECK-NEXT: orr w9, w9, w11, lsl #27 +; CHECK-NEXT: umov w11, v2.b[0] +; CHECK-NEXT: and w10, w14, #0x1 +; CHECK-NEXT: orr w9, w9, w12, lsl #28 +; CHECK-NEXT: umov w12, v2.b[1] +; CHECK-NEXT: orr w9, w9, w13, lsl #29 +; CHECK-NEXT: umov w13, v2.b[2] +; CHECK-NEXT: orr w9, w9, w10, lsl #30 +; CHECK-NEXT: umov w10, v2.b[3] +; CHECK-NEXT: and w11, w11, #0x1 +; CHECK-NEXT: umov w14, v2.b[4] +; CHECK-NEXT: umov w16, v2.b[10] +; CHECK-NEXT: bfi w11, w12, #1, #1 +; CHECK-NEXT: umov w12, v2.b[5] +; CHECK-NEXT: bfi w11, w13, #2, #1 +; CHECK-NEXT: umov w13, v2.b[6] +; CHECK-NEXT: bfi w11, w10, #3, #1 +; CHECK-NEXT: umov w10, v2.b[7] +; CHECK-NEXT: bfi w11, w14, #4, #1 +; CHECK-NEXT: umov w14, v2.b[9] +; CHECK-NEXT: bfi w11, w12, #5, #1 +; CHECK-NEXT: umov w12, v2.b[8] +; CHECK-NEXT: and w13, w13, #0x1 +; CHECK-NEXT: umov w15, v3.b[15] +; CHECK-NEXT: and w10, w10, #0x1 +; CHECK-NEXT: orr w11, w11, w13, lsl #6 +; CHECK-NEXT: umov w13, v2.b[11] +; CHECK-NEXT: orr w10, w11, w10, lsl #7 +; CHECK-NEXT: and w11, w12, #0x1 +; CHECK-NEXT: and w12, w14, #0x1 +; CHECK-NEXT: umov w14, v2.b[14] +; CHECK-NEXT: orr w10, w10, w11, lsl #8 +; CHECK-NEXT: and w11, w16, #0x1 +; CHECK-NEXT: orr w10, w10, w12, lsl #9 +; CHECK-NEXT: umov w12, v2.b[12] +; CHECK-NEXT: orr w10, w10, w11, lsl #10 +; CHECK-NEXT: umov w11, v2.b[13] +; CHECK-NEXT: and w13, w13, #0x1 +; CHECK-NEXT: orr w9, w9, w15, lsl #31 +; CHECK-NEXT: cmeq v1.16b, v1.16b, #0 +; CHECK-NEXT: and w12, w12, #0x1 +; CHECK-NEXT: orr w10, w10, w13, lsl #11 +; CHECK-NEXT: and w11, w11, #0x1 +; CHECK-NEXT: umov w13, v1.b[0] +; CHECK-NEXT: orr w10, w10, w12, lsl #12 +; CHECK-NEXT: and w12, w14, #0x1 +; CHECK-NEXT: orr w10, w10, w11, lsl #13 +; CHECK-NEXT: umov w11, v1.b[1] +; CHECK-NEXT: umov w14, v1.b[2] +; CHECK-NEXT: umov w15, v1.b[3] +; CHECK-NEXT: and w13, w13, #0x1 +; CHECK-NEXT: orr w10, w10, w12, lsl #14 +; CHECK-NEXT: lsl w13, w13, #16 +; CHECK-NEXT: umov w12, v2.b[15] +; CHECK-NEXT: bfi w13, w11, #17, #1 +; CHECK-NEXT: umov w11, v1.b[4] +; CHECK-NEXT: umov w17, v1.b[6] +; CHECK-NEXT: umov w16, v1.b[5] +; CHECK-NEXT: bfi w13, w14, #18, #1 +; CHECK-NEXT: umov w14, v1.b[7] +; CHECK-NEXT: bfi w13, w15, #19, #1 +; CHECK-NEXT: orr w10, w10, w12, lsl #15 +; CHECK-NEXT: bfi w13, w11, #20, #1 +; CHECK-NEXT: umov w11, v1.b[8] +; CHECK-NEXT: and w12, w17, #0x1 +; CHECK-NEXT: bfi w13, w16, #21, #1 +; CHECK-NEXT: umov w15, v1.b[9] +; CHECK-NEXT: and w14, w14, #0x1 +; CHECK-NEXT: orr w12, w13, w12, lsl #22 +; CHECK-NEXT: umov w13, v1.b[10] +; CHECK-NEXT: and w11, w11, #0x1 +; CHECK-NEXT: orr w12, w12, w14, lsl #23 +; CHECK-NEXT: cmeq v0.16b, v0.16b, #0 +; CHECK-NEXT: umov w18, v1.b[12] +; CHECK-NEXT: orr w11, w12, w11, lsl #24 +; CHECK-NEXT: and w12, w15, #0x1 +; CHECK-NEXT: and w13, w13, #0x1 +; CHECK-NEXT: umov w14, v0.b[0] +; CHECK-NEXT: orr w11, w11, w12, lsl #25 +; CHECK-NEXT: umov w12, v0.b[1] +; CHECK-NEXT: orr w11, w11, w13, lsl #26 +; CHECK-NEXT: umov w13, v0.b[2] +; CHECK-NEXT: umov w16, v0.b[3] +; CHECK-NEXT: umov w17, v0.b[4] +; CHECK-NEXT: and w14, w14, #0x1 +; CHECK-NEXT: umov w15, v1.b[11] +; CHECK-NEXT: bfi w14, w12, #1, #1 +; CHECK-NEXT: umov w12, v0.b[6] +; CHECK-NEXT: bfi w14, w13, #2, #1 +; CHECK-NEXT: umov w13, v0.b[5] +; CHECK-NEXT: bfi w14, w16, #3, #1 +; CHECK-NEXT: umov w16, v0.b[7] +; CHECK-NEXT: bfi w14, w17, #4, #1 +; CHECK-NEXT: umov w17, v0.b[8] +; CHECK-NEXT: and w12, w12, #0x1 +; CHECK-NEXT: and w15, w15, #0x1 +; CHECK-NEXT: bfi w14, w13, #5, #1 +; CHECK-NEXT: and w13, w18, #0x1 +; CHECK-NEXT: orr w12, w14, w12, lsl #6 +; CHECK-NEXT: and w14, w16, #0x1 +; CHECK-NEXT: umov w16, v0.b[9] +; CHECK-NEXT: umov w18, v0.b[10] +; CHECK-NEXT: and w17, w17, #0x1 +; CHECK-NEXT: orr w11, w11, w15, lsl #27 +; CHECK-NEXT: orr w12, w12, w14, lsl #7 +; CHECK-NEXT: orr w11, w11, w13, lsl #28 +; CHECK-NEXT: umov w14, v0.b[11] +; CHECK-NEXT: orr w12, w12, w17, lsl #8 +; CHECK-NEXT: and w13, w16, #0x1 +; CHECK-NEXT: and w15, w18, #0x1 +; CHECK-NEXT: umov w16, v0.b[12] +; CHECK-NEXT: umov w17, v1.b[13] +; CHECK-NEXT: orr w12, w12, w13, lsl #9 +; CHECK-NEXT: umov w18, v0.b[14] +; CHECK-NEXT: orr w12, w12, w15, lsl #10 +; CHECK-NEXT: umov w15, v0.b[13] +; CHECK-NEXT: and w14, w14, #0x1 +; CHECK-NEXT: umov w13, v1.b[14] +; CHECK-NEXT: and w16, w16, #0x1 +; CHECK-NEXT: and w17, w17, #0x1 +; CHECK-NEXT: orr w12, w12, w14, lsl #11 +; CHECK-NEXT: umov w14, v1.b[15] +; CHECK-NEXT: orr w12, w12, w16, lsl #12 +; CHECK-NEXT: and w15, w15, #0x1 +; CHECK-NEXT: umov w16, v0.b[15] +; CHECK-NEXT: and w18, w18, #0x1 +; CHECK-NEXT: and w13, w13, #0x1 +; CHECK-NEXT: orr w12, w12, w15, lsl #13 +; CHECK-NEXT: orr w11, w11, w17, lsl #29 +; CHECK-NEXT: orr w12, w12, w18, lsl #14 +; CHECK-NEXT: orr w11, w11, w13, lsl #30 +; CHECK-NEXT: and w10, w10, #0xffff +; CHECK-NEXT: orr w12, w12, w16, lsl #15 +; CHECK-NEXT: orr w11, w11, w14, lsl #31 +; CHECK-NEXT: and w12, w12, #0xffff +; CHECK-NEXT: orr w9, w10, w9 +; CHECK-NEXT: orr w10, w12, w11 +; CHECK-NEXT: and x9, x9, x10 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %cmp1 = icmp eq <64 x i8> %a, zeroinitializer + %cast = bitcast <64 x i1> %cmp1 to i64 + %cmp2 = icmp eq i64 %cast, -1 + ret i1 %cmp2 +} + +define i1 @combine_setcc_ne_vecreduce_and_v8i1(<8 x i8> %a) { +; CHECK-LABEL: combine_setcc_ne_vecreduce_and_v8i1: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: cmtst v0.8b, v0.8b, v0.8b +; CHECK-NEXT: mov w8, #255 // =0xff +; CHECK-NEXT: umov w9, v0.b[0] +; CHECK-NEXT: umov w10, v0.b[1] +; CHECK-NEXT: umov w11, v0.b[2] +; CHECK-NEXT: umov w12, v0.b[3] +; CHECK-NEXT: umov w13, v0.b[4] +; CHECK-NEXT: umov w14, v0.b[6] +; CHECK-NEXT: and w9, w9, #0x1 +; CHECK-NEXT: bfi w9, w10, #1, #1 +; CHECK-NEXT: umov w10, v0.b[5] +; CHECK-NEXT: bfi w9, w11, #2, #1 +; CHECK-NEXT: umov w11, v0.b[7] +; CHECK-NEXT: bfi w9, w12, #3, #1 +; CHECK-NEXT: and w12, w14, #0x1 +; CHECK-NEXT: bfi w9, w13, #4, #1 +; CHECK-NEXT: bfi w9, w10, #5, #1 +; CHECK-NEXT: orr w9, w9, w12, lsl #6 +; CHECK-NEXT: orr w9, w9, w11, lsl #7 +; CHECK-NEXT: bics wzr, w8, w9 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %cmp1 = icmp ne <8 x i8> %a, zeroinitializer + %cast = bitcast <8 x i1> %cmp1 to i8 + %cmp2 = icmp ne i8 %cast, -1 + ret i1 %cmp2 +} + +define i1 @combine_setcc_ne_vecreduce_and_v16i1(<16 x i8> %a) { +; CHECK-LABEL: combine_setcc_ne_vecreduce_and_v16i1: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: cmtst v0.16b, v0.16b, v0.16b +; CHECK-NEXT: mov w8, #65535 // =0xffff +; CHECK-NEXT: umov w9, v0.b[0] +; CHECK-NEXT: umov w10, v0.b[1] +; CHECK-NEXT: umov w11, v0.b[2] +; CHECK-NEXT: umov w12, v0.b[3] +; CHECK-NEXT: umov w13, v0.b[4] +; CHECK-NEXT: umov w14, v0.b[5] +; CHECK-NEXT: and w9, w9, #0x1 +; CHECK-NEXT: bfi w9, w10, #1, #1 +; CHECK-NEXT: umov w10, v0.b[6] +; CHECK-NEXT: bfi w9, w11, #2, #1 +; CHECK-NEXT: umov w11, v0.b[7] +; CHECK-NEXT: bfi w9, w12, #3, #1 +; CHECK-NEXT: umov w12, v0.b[8] +; CHECK-NEXT: bfi w9, w13, #4, #1 +; CHECK-NEXT: umov w13, v0.b[9] +; CHECK-NEXT: and w10, w10, #0x1 +; CHECK-NEXT: bfi w9, w14, #5, #1 +; CHECK-NEXT: umov w14, v0.b[10] +; CHECK-NEXT: and w11, w11, #0x1 +; CHECK-NEXT: orr w9, w9, w10, lsl #6 +; CHECK-NEXT: umov w10, v0.b[11] +; CHECK-NEXT: and w12, w12, #0x1 +; CHECK-NEXT: orr w9, w9, w11, lsl #7 +; CHECK-NEXT: umov w11, v0.b[12] +; CHECK-NEXT: and w13, w13, #0x1 +; CHECK-NEXT: orr w9, w9, w12, lsl #8 +; CHECK-NEXT: umov w12, v0.b[13] +; CHECK-NEXT: and w14, w14, #0x1 +; CHECK-NEXT: orr w9, w9, w13, lsl #9 +; CHECK-NEXT: umov w13, v0.b[14] +; CHECK-NEXT: and w10, w10, #0x1 +; CHECK-NEXT: orr w9, w9, w14, lsl #10 +; CHECK-NEXT: and w11, w11, #0x1 +; CHECK-NEXT: orr w9, w9, w10, lsl #11 +; CHECK-NEXT: and w10, w12, #0x1 +; CHECK-NEXT: umov w12, v0.b[15] +; CHECK-NEXT: orr w9, w9, w11, lsl #12 +; CHECK-NEXT: and w11, w13, #0x1 +; CHECK-NEXT: orr w9, w9, w10, lsl #13 +; CHECK-NEXT: orr w9, w9, w11, lsl #14 +; CHECK-NEXT: orr w9, w9, w12, lsl #15 +; CHECK-NEXT: bics wzr, w8, w9 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %cmp1 = icmp ne <16 x i8> %a, zeroinitializer + %cast = bitcast <16 x i1> %cmp1 to i16 + %cmp2 = icmp ne i16 %cast, -1 + ret i1 %cmp2 +} + +define i1 @combine_setcc_ne_vecreduce_and_v32i1(<32 x i8> %a) { +; CHECK-LABEL: combine_setcc_ne_vecreduce_and_v32i1: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: cmtst v1.16b, v1.16b, v1.16b +; CHECK-NEXT: mov w8, #65535 // =0xffff +; CHECK-NEXT: cmtst v0.16b, v0.16b, v0.16b +; CHECK-NEXT: umov w10, v1.b[0] +; CHECK-NEXT: umov w9, v1.b[1] +; CHECK-NEXT: umov w11, v1.b[2] +; CHECK-NEXT: umov w12, v1.b[3] +; CHECK-NEXT: umov w13, v1.b[4] +; CHECK-NEXT: umov w14, v1.b[5] +; CHECK-NEXT: umov w15, v1.b[6] +; CHECK-NEXT: umov w16, v1.b[7] +; CHECK-NEXT: and w10, w10, #0x1 +; CHECK-NEXT: umov w17, v1.b[8] +; CHECK-NEXT: bfi w10, w9, #1, #1 +; CHECK-NEXT: umov w9, v1.b[9] +; CHECK-NEXT: bfi w10, w11, #2, #1 +; CHECK-NEXT: umov w11, v0.b[0] +; CHECK-NEXT: bfi w10, w12, #3, #1 +; CHECK-NEXT: umov w12, v0.b[1] +; CHECK-NEXT: bfi w10, w13, #4, #1 +; CHECK-NEXT: and w15, w15, #0x1 +; CHECK-NEXT: bfi w10, w14, #5, #1 +; CHECK-NEXT: umov w14, v0.b[2] +; CHECK-NEXT: umov w13, v1.b[10] +; CHECK-NEXT: and w11, w11, #0x1 +; CHECK-NEXT: orr w10, w10, w15, lsl #6 +; CHECK-NEXT: and w15, w16, #0x1 +; CHECK-NEXT: bfi w11, w12, #1, #1 +; CHECK-NEXT: and w16, w17, #0x1 +; CHECK-NEXT: and w9, w9, #0x1 +; CHECK-NEXT: bfi w11, w14, #2, #1 +; CHECK-NEXT: orr w10, w10, w15, lsl #7 +; CHECK-NEXT: umov w14, v0.b[3] +; CHECK-NEXT: and w13, w13, #0x1 +; CHECK-NEXT: orr w10, w10, w16, lsl #8 +; CHECK-NEXT: umov w15, v0.b[4] +; CHECK-NEXT: orr w9, w10, w9, lsl #9 +; CHECK-NEXT: umov w10, v0.b[6] +; CHECK-NEXT: orr w9, w9, w13, lsl #10 +; CHECK-NEXT: umov w13, v0.b[5] +; CHECK-NEXT: bfi w11, w14, #3, #1 +; CHECK-NEXT: umov w14, v0.b[7] +; CHECK-NEXT: umov w16, v1.b[12] +; CHECK-NEXT: umov w12, v1.b[11] +; CHECK-NEXT: bfi w11, w15, #4, #1 +; CHECK-NEXT: and w10, w10, #0x1 +; CHECK-NEXT: umov w15, v0.b[8] +; CHECK-NEXT: bfi w11, w13, #5, #1 +; CHECK-NEXT: orr w10, w11, w10, lsl #6 +; CHECK-NEXT: and w11, w14, #0x1 +; CHECK-NEXT: umov w14, v0.b[9] +; CHECK-NEXT: and w13, w16, #0x1 +; CHECK-NEXT: umov w16, v0.b[10] +; CHECK-NEXT: and w12, w12, #0x1 +; CHECK-NEXT: and w15, w15, #0x1 +; CHECK-NEXT: orr w10, w10, w11, lsl #7 +; CHECK-NEXT: orr w9, w9, w12, lsl #11 +; CHECK-NEXT: umov w12, v0.b[11] +; CHECK-NEXT: and w11, w14, #0x1 +; CHECK-NEXT: orr w9, w9, w13, lsl #12 +; CHECK-NEXT: orr w10, w10, w15, lsl #8 +; CHECK-NEXT: and w13, w16, #0x1 +; CHECK-NEXT: umov w14, v0.b[12] +; CHECK-NEXT: orr w10, w10, w11, lsl #9 +; CHECK-NEXT: umov w15, v1.b[13] +; CHECK-NEXT: orr w10, w10, w13, lsl #10 +; CHECK-NEXT: umov w13, v0.b[13] +; CHECK-NEXT: umov w11, v1.b[14] +; CHECK-NEXT: and w12, w12, #0x1 +; CHECK-NEXT: umov w16, v0.b[14] +; CHECK-NEXT: and w14, w14, #0x1 +; CHECK-NEXT: orr w10, w10, w12, lsl #11 +; CHECK-NEXT: and w15, w15, #0x1 +; CHECK-NEXT: umov w12, v1.b[15] +; CHECK-NEXT: orr w10, w10, w14, lsl #12 +; CHECK-NEXT: and w13, w13, #0x1 +; CHECK-NEXT: umov w14, v0.b[15] +; CHECK-NEXT: and w11, w11, #0x1 +; CHECK-NEXT: and w16, w16, #0x1 +; CHECK-NEXT: orr w9, w9, w15, lsl #13 +; CHECK-NEXT: orr w10, w10, w13, lsl #13 +; CHECK-NEXT: orr w9, w9, w11, lsl #14 +; CHECK-NEXT: orr w10, w10, w16, lsl #14 +; CHECK-NEXT: orr w9, w9, w12, lsl #15 +; CHECK-NEXT: orr w10, w10, w14, lsl #15 +; CHECK-NEXT: and w9, w10, w9 +; CHECK-NEXT: bics wzr, w8, w9 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %cmp1 = icmp ne <32 x i8> %a, zeroinitializer + %cast = bitcast <32 x i1> %cmp1 to i32 + %cmp2 = icmp ne i32 %cast, -1 + ret i1 %cmp2 +} + +define i1 @combine_setcc_ne_vecreduce_and_v64i1(<64 x i8> %a) { +; CHECK-LABEL: combine_setcc_ne_vecreduce_and_v64i1: +; CHECK: // %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: cmtst v3.16b, v3.16b, v3.16b +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cmtst v2.16b, v2.16b, v2.16b +; CHECK-NEXT: cmtst v1.16b, v1.16b, v1.16b +; CHECK-NEXT: umov w10, v3.b[0] +; CHECK-NEXT: umov w9, v3.b[1] +; CHECK-NEXT: umov w11, v3.b[2] +; CHECK-NEXT: umov w12, v3.b[3] +; CHECK-NEXT: umov w13, v3.b[4] +; CHECK-NEXT: umov w15, v3.b[6] +; CHECK-NEXT: umov w14, v3.b[5] +; CHECK-NEXT: and w10, w10, #0x1 +; CHECK-NEXT: umov w16, v3.b[7] +; CHECK-NEXT: lsl w10, w10, #16 +; CHECK-NEXT: umov w17, v3.b[8] +; CHECK-NEXT: bfi w10, w9, #17, #1 +; CHECK-NEXT: umov w18, v3.b[9] +; CHECK-NEXT: bfi w10, w11, #18, #1 +; CHECK-NEXT: umov w9, v3.b[10] +; CHECK-NEXT: bfi w10, w12, #19, #1 +; CHECK-NEXT: and w15, w15, #0x1 +; CHECK-NEXT: bfi w10, w13, #20, #1 +; CHECK-NEXT: and w16, w16, #0x1 +; CHECK-NEXT: bfi w10, w14, #21, #1 +; CHECK-NEXT: umov w11, v3.b[11] +; CHECK-NEXT: and w17, w17, #0x1 +; CHECK-NEXT: orr w10, w10, w15, lsl #22 +; CHECK-NEXT: and w18, w18, #0x1 +; CHECK-NEXT: umov w13, v2.b[0] +; CHECK-NEXT: orr w10, w10, w16, lsl #23 +; CHECK-NEXT: and w9, w9, #0x1 +; CHECK-NEXT: umov w15, v2.b[1] +; CHECK-NEXT: orr w10, w10, w17, lsl #24 +; CHECK-NEXT: orr w10, w10, w18, lsl #25 +; CHECK-NEXT: umov w16, v2.b[2] +; CHECK-NEXT: and w11, w11, #0x1 +; CHECK-NEXT: orr w9, w10, w9, lsl #26 +; CHECK-NEXT: umov w10, v2.b[3] +; CHECK-NEXT: umov w12, v3.b[12] +; CHECK-NEXT: and w13, w13, #0x1 +; CHECK-NEXT: umov w14, v3.b[13] +; CHECK-NEXT: orr w9, w9, w11, lsl #27 +; CHECK-NEXT: umov w11, v2.b[4] +; CHECK-NEXT: bfi w13, w15, #1, #1 +; CHECK-NEXT: umov w15, v2.b[5] +; CHECK-NEXT: bfi w13, w16, #2, #1 +; CHECK-NEXT: umov w16, v2.b[6] +; CHECK-NEXT: bfi w13, w10, #3, #1 +; CHECK-NEXT: umov w10, v2.b[7] +; CHECK-NEXT: and w12, w12, #0x1 +; CHECK-NEXT: bfi w13, w11, #4, #1 +; CHECK-NEXT: and w11, w14, #0x1 +; CHECK-NEXT: umov w14, v2.b[8] +; CHECK-NEXT: orr w9, w9, w12, lsl #28 +; CHECK-NEXT: bfi w13, w15, #5, #1 +; CHECK-NEXT: and w12, w16, #0x1 +; CHECK-NEXT: umov w15, v2.b[9] +; CHECK-NEXT: and w10, w10, #0x1 +; CHECK-NEXT: umov w16, v3.b[14] +; CHECK-NEXT: orr w12, w13, w12, lsl #6 +; CHECK-NEXT: orr w9, w9, w11, lsl #29 +; CHECK-NEXT: orr w10, w12, w10, lsl #7 +; CHECK-NEXT: and w11, w14, #0x1 +; CHECK-NEXT: umov w12, v2.b[10] +; CHECK-NEXT: and w13, w15, #0x1 +; CHECK-NEXT: umov w14, v2.b[11] +; CHECK-NEXT: orr w10, w10, w11, lsl #8 +; CHECK-NEXT: orr w10, w10, w13, lsl #9 +; CHECK-NEXT: umov w13, v2.b[12] +; CHECK-NEXT: and w15, w16, #0x1 +; CHECK-NEXT: umov w16, v2.b[13] +; CHECK-NEXT: and w12, w12, #0x1 +; CHECK-NEXT: umov w11, v3.b[15] +; CHECK-NEXT: and w14, w14, #0x1 +; CHECK-NEXT: orr w9, w9, w15, lsl #30 +; CHECK-NEXT: orr w10, w10, w12, lsl #10 +; CHECK-NEXT: and w12, w13, #0x1 +; CHECK-NEXT: orr w10, w10, w14, lsl #11 +; CHECK-NEXT: umov w14, v1.b[0] +; CHECK-NEXT: and w13, w16, #0x1 +; CHECK-NEXT: orr w9, w9, w11, lsl #31 +; CHECK-NEXT: orr w10, w10, w12, lsl #12 +; CHECK-NEXT: umov w11, v1.b[1] +; CHECK-NEXT: orr w10, w10, w13, lsl #13 +; CHECK-NEXT: umov w13, v1.b[2] +; CHECK-NEXT: and w14, w14, #0x1 +; CHECK-NEXT: umov w15, v1.b[3] +; CHECK-NEXT: lsl w14, w14, #16 +; CHECK-NEXT: umov w16, v1.b[4] +; CHECK-NEXT: umov w12, v2.b[14] +; CHECK-NEXT: bfi w14, w11, #17, #1 +; CHECK-NEXT: umov w11, v1.b[6] +; CHECK-NEXT: bfi w14, w13, #18, #1 +; CHECK-NEXT: umov w13, v1.b[5] +; CHECK-NEXT: bfi w14, w15, #19, #1 +; CHECK-NEXT: bfi w14, w16, #20, #1 +; CHECK-NEXT: umov w15, v1.b[7] +; CHECK-NEXT: umov w16, v1.b[8] +; CHECK-NEXT: and w12, w12, #0x1 +; CHECK-NEXT: and w11, w11, #0x1 +; CHECK-NEXT: umov w18, v1.b[12] +; CHECK-NEXT: bfi w14, w13, #21, #1 +; CHECK-NEXT: orr w10, w10, w12, lsl #14 +; CHECK-NEXT: umov w12, v2.b[15] +; CHECK-NEXT: orr w11, w14, w11, lsl #22 +; CHECK-NEXT: umov w14, v1.b[9] +; CHECK-NEXT: and w13, w15, #0x1 +; CHECK-NEXT: and w15, w16, #0x1 +; CHECK-NEXT: umov w16, v1.b[10] +; CHECK-NEXT: cmtst v0.16b, v0.16b, v0.16b +; CHECK-NEXT: orr w11, w11, w13, lsl #23 +; CHECK-NEXT: orr w10, w10, w12, lsl #15 +; CHECK-NEXT: orr w11, w11, w15, lsl #24 +; CHECK-NEXT: and w12, w14, #0x1 +; CHECK-NEXT: umov w15, v1.b[11] +; CHECK-NEXT: and w13, w16, #0x1 +; CHECK-NEXT: umov w14, v0.b[0] +; CHECK-NEXT: orr w11, w11, w12, lsl #25 +; CHECK-NEXT: umov w12, v0.b[1] +; CHECK-NEXT: orr w11, w11, w13, lsl #26 +; CHECK-NEXT: umov w13, v0.b[2] +; CHECK-NEXT: umov w16, v0.b[3] +; CHECK-NEXT: umov w17, v0.b[4] +; CHECK-NEXT: and w14, w14, #0x1 +; CHECK-NEXT: and w15, w15, #0x1 +; CHECK-NEXT: bfi w14, w12, #1, #1 +; CHECK-NEXT: umov w12, v0.b[6] +; CHECK-NEXT: bfi w14, w13, #2, #1 +; CHECK-NEXT: umov w13, v0.b[5] +; CHECK-NEXT: bfi w14, w16, #3, #1 +; CHECK-NEXT: umov w16, v0.b[7] +; CHECK-NEXT: bfi w14, w17, #4, #1 +; CHECK-NEXT: umov w17, v0.b[8] +; CHECK-NEXT: and w12, w12, #0x1 +; CHECK-NEXT: orr w11, w11, w15, lsl #27 +; CHECK-NEXT: bfi w14, w13, #5, #1 +; CHECK-NEXT: and w13, w18, #0x1 +; CHECK-NEXT: orr w12, w14, w12, lsl #6 +; CHECK-NEXT: and w14, w16, #0x1 +; CHECK-NEXT: umov w16, v0.b[9] +; CHECK-NEXT: umov w18, v0.b[10] +; CHECK-NEXT: and w17, w17, #0x1 +; CHECK-NEXT: orr w12, w12, w14, lsl #7 +; CHECK-NEXT: orr w11, w11, w13, lsl #28 +; CHECK-NEXT: umov w14, v0.b[11] +; CHECK-NEXT: orr w12, w12, w17, lsl #8 +; CHECK-NEXT: umov w17, v1.b[13] +; CHECK-NEXT: and w13, w16, #0x1 +; CHECK-NEXT: and w15, w18, #0x1 +; CHECK-NEXT: umov w16, v0.b[12] +; CHECK-NEXT: umov w18, v0.b[14] +; CHECK-NEXT: orr w12, w12, w13, lsl #9 +; CHECK-NEXT: and w14, w14, #0x1 +; CHECK-NEXT: orr w12, w12, w15, lsl #10 +; CHECK-NEXT: umov w15, v0.b[13] +; CHECK-NEXT: umov w13, v1.b[14] +; CHECK-NEXT: orr w12, w12, w14, lsl #11 +; CHECK-NEXT: and w16, w16, #0x1 +; CHECK-NEXT: and w17, w17, #0x1 +; CHECK-NEXT: umov w14, v1.b[15] +; CHECK-NEXT: and w18, w18, #0x1 +; CHECK-NEXT: orr w12, w12, w16, lsl #12 +; CHECK-NEXT: and w15, w15, #0x1 +; CHECK-NEXT: umov w16, v0.b[15] +; CHECK-NEXT: and w13, w13, #0x1 +; CHECK-NEXT: orr w12, w12, w15, lsl #13 +; CHECK-NEXT: orr w11, w11, w17, lsl #29 +; CHECK-NEXT: orr w12, w12, w18, lsl #14 +; CHECK-NEXT: orr w11, w11, w13, lsl #30 +; CHECK-NEXT: orr w11, w11, w14, lsl #31 +; CHECK-NEXT: and w10, w10, #0xffff +; CHECK-NEXT: orr w12, w12, w16, lsl #15 +; CHECK-NEXT: orr w9, w10, w9 +; CHECK-NEXT: and w12, w12, #0xffff +; CHECK-NEXT: orr w10, w12, w11 +; CHECK-NEXT: and x9, x9, x10 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret + %cmp1 = icmp ne <64 x i8> %a, zeroinitializer + %cast = bitcast <64 x i1> %cmp1 to i64 + %cmp2 = icmp ne i64 %cast, -1 + ret i1 %cmp2 +} + define i1 @combine_setcc_eq0_conjunction_xor_or(ptr %a, ptr %b) { ; CHECK-LABEL: combine_setcc_eq0_conjunction_xor_or: ; CHECK: // %bb.0: @@ -164,11 +932,11 @@ define i32 @combine_setcc_multiuse(i32 %0, i32 %1, i32 %2, i32 %3) { ; CHECK-NEXT: eor w8, w1, w0 ; CHECK-NEXT: eor w9, w3, w2 ; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: cbz w8, .LBB10_2 +; CHECK-NEXT: cbz w8, .LBB18_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: b use -; CHECK-NEXT: .LBB10_2: +; CHECK-NEXT: .LBB18_2: ; CHECK-NEXT: ret %5 = xor i32 %1, %0 %6 = xor i32 %3, %2 @@ -209,7 +977,7 @@ define [2 x i64] @PR58675(i128 %a.addr, i128 %b.addr) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: mov x9, xzr -; CHECK-NEXT: .LBB12_1: // %do.body +; CHECK-NEXT: .LBB20_1: // %do.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: cmp x0, x8 ; CHECK-NEXT: csel x10, x0, x8, lo @@ -221,7 +989,7 @@ define [2 x i64] @PR58675(i128 %a.addr, i128 %b.addr) { ; CHECK-NEXT: sbc x9, x3, x11 ; CHECK-NEXT: cmp x3, x11 ; CHECK-NEXT: ccmp x2, x10, #0, eq -; CHECK-NEXT: b.ne .LBB12_1 +; CHECK-NEXT: b.ne .LBB20_1 ; CHECK-NEXT: // %bb.2: // %do.end ; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: mov x1, xzr From 8f072bbd74021d1aee913ae500d439a23b591297 Mon Sep 17 00:00:00 2001 From: Uday Bondhugula Date: Tue, 21 Mar 2023 17:48:45 +0530 Subject: [PATCH 168/691] [MLIR] Fix warnings in AttrTypeSubElements.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix warnings in AttrTypeSubElements.h (below) with GCC 9.4.0. ``` mlir/lib/IR/ExtensibleDialect.cpp:443:62: required from here mlir/include/mlir/IR/AttrTypeSubElements.h:412:37: warning: parameter ‘derived’ set but not used [-Wunused-but-set-parameter] 412 | void walkImmediateSubElementsImpl(T derived, | ~~^~~ ``` Reviewed By: mehdi_amini Differential Revision: https://reviews.llvm.org/D146107 --- mlir/include/mlir/IR/AttrTypeSubElements.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/mlir/include/mlir/IR/AttrTypeSubElements.h b/mlir/include/mlir/IR/AttrTypeSubElements.h index fe3f4cd24effd..8382162d9a373 100644 --- a/mlir/include/mlir/IR/AttrTypeSubElements.h +++ b/mlir/include/mlir/IR/AttrTypeSubElements.h @@ -413,16 +413,17 @@ void walkImmediateSubElementsImpl(T derived, function_ref walkAttrsFn, function_ref walkTypesFn) { using ImplT = typename T::ImplType; + (void)derived; + (void)walkAttrsFn; + (void)walkTypesFn; if constexpr (llvm::is_detected::value) { auto key = static_cast(derived.getImpl())->getAsKey(); // If we don't have any sub-elements, there is nothing to do. - if constexpr (!has_sub_attr_or_type_v) { + if constexpr (!has_sub_attr_or_type_v) return; - } else { - AttrTypeImmediateSubElementWalker walker(walkAttrsFn, walkTypesFn); - AttrTypeSubElementHandler::walk(key, walker); - } + AttrTypeImmediateSubElementWalker walker(walkAttrsFn, walkTypesFn); + AttrTypeSubElementHandler::walk(key, walker); } } From da8260a9b11152e2a6fb0fcad9e3c0b0312ecd6f Mon Sep 17 00:00:00 2001 From: "Luo, Yuanke" Date: Tue, 21 Mar 2023 20:14:22 +0800 Subject: [PATCH 169/691] Revert "[X86] Create extra prolog/epilog for stack realignment" This reverts commit e4c1dfed38370b4933f05c8e24b1d77df56b526c. --- llvm/lib/Target/X86/CMakeLists.txt | 1 - llvm/lib/Target/X86/X86.h | 2 - .../Target/X86/X86ArgumentStackSlotRebase.cpp | 195 ------------------ llvm/lib/Target/X86/X86FrameLowering.cpp | 192 ++--------------- llvm/lib/Target/X86/X86MachineFunctionInfo.h | 5 - llvm/lib/Target/X86/X86RegisterInfo.cpp | 44 ---- llvm/lib/Target/X86/X86RegisterInfo.h | 4 - llvm/lib/Target/X86/X86RegisterInfo.td | 12 -- llvm/lib/Target/X86/X86TargetMachine.cpp | 2 - .../CodeGen/MIR/X86/inline-asm-registers.mir | 8 +- llvm/test/CodeGen/X86/O0-pipeline.ll | 1 - llvm/test/CodeGen/X86/i386-baseptr.ll | 22 +- llvm/test/CodeGen/X86/opt-pipeline.ll | 1 - .../X86/statepoint-invoke-ra-enter-at-end.mir | 4 +- llvm/test/CodeGen/X86/x86-64-baseptr.ll | 118 +++++------ 15 files changed, 82 insertions(+), 529 deletions(-) delete mode 100644 llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt index 5eba6db5b06eb..545e8a38b6961 100644 --- a/llvm/lib/Target/X86/CMakeLists.txt +++ b/llvm/lib/Target/X86/CMakeLists.txt @@ -26,7 +26,6 @@ endif() add_public_tablegen_target(X86CommonTableGen) set(sources - X86ArgumentStackSlotRebase.cpp X86AsmPrinter.cpp X86AvoidTrailingCall.cpp X86CallFrameOptimization.cpp diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 5ff9d7facc878..044b2636f951c 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -166,13 +166,11 @@ FunctionPass *createX86LoadValueInjectionLoadHardeningPass(); FunctionPass *createX86LoadValueInjectionRetHardeningPass(); FunctionPass *createX86SpeculativeLoadHardeningPass(); FunctionPass *createX86SpeculativeExecutionSideEffectSuppression(); -FunctionPass *createX86ArgumentStackSlotPass(); void initializeEvexToVexInstPassPass(PassRegistry &); void initializeFPSPass(PassRegistry &); void initializeFixupBWInstPassPass(PassRegistry &); void initializeFixupLEAPassPass(PassRegistry &); -void initializeX86ArgumentStackSlotPassPass(PassRegistry &); void initializeX86FixupInstTuningPassPass(PassRegistry &); void initializeWinEHStatePassPass(PassRegistry &); void initializeX86AvoidSFBPassPass(PassRegistry &); diff --git a/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp b/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp deleted file mode 100644 index ef3751e40f17f..0000000000000 --- a/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp +++ /dev/null @@ -1,195 +0,0 @@ -//===---- X86ArgumentStackSlotRebase.cpp - rebase argument stack slot -----===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass replace the frame register with a GPR virtual register and set -// the stack offset for each instruction which reference argument from stack. -// -//===----------------------------------------------------------------------===// - -#include "X86.h" -#include "X86InstrBuilder.h" -#include "X86MachineFunctionInfo.h" -#include "X86RegisterInfo.h" -#include "X86Subtarget.h" -#include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetOpcodes.h" -#include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" -#include "llvm/IR/Attributes.h" -#include "llvm/IR/Function.h" -#include "llvm/InitializePasses.h" -#include "llvm/Pass.h" - -using namespace llvm; - -#define DEBUG_TYPE "x86argumentstackrebase" - -namespace { - -class X86ArgumentStackSlotPass : public MachineFunctionPass { - -public: - static char ID; // Pass identification, replacement for typeid - - explicit X86ArgumentStackSlotPass() : MachineFunctionPass(ID) { - initializeX86ArgumentStackSlotPassPass(*PassRegistry::getPassRegistry()); - } - - bool runOnMachineFunction(MachineFunction &MF) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } -}; - -} // end anonymous namespace - -char X86ArgumentStackSlotPass::ID = 0; - -INITIALIZE_PASS(X86ArgumentStackSlotPass, DEBUG_TYPE, "Argument Stack Rebase", - false, false) - -FunctionPass *llvm::createX86ArgumentStackSlotPass() { - return new X86ArgumentStackSlotPass(); -} - -static Register getArgBaseReg(MachineFunction &MF) { - MachineRegisterInfo &MRI = MF.getRegInfo(); - const X86Subtarget &STI = MF.getSubtarget(); - const Function &F = MF.getFunction(); - CallingConv::ID CC = F.getCallingConv(); - Register NoReg; - const TargetRegisterClass *RC = nullptr; - switch (CC) { - // We need a virtual register in case there is inline assembly - // clobber argument base register. - case CallingConv::C: - RC = STI.is64Bit() ? &X86::GR64_ArgRefRegClass : &X86::GR32_ArgRefRegClass; - break; - case CallingConv::X86_RegCall: - // FIXME: For regcall there is no scratch register on 32-bit target. - // We may use a callee saved register as argument base register and - // save it before being changed as base pointer. We need DW_CFA to - // indicate where the callee saved register is saved, so that it can - // be correctly unwind. - // push ebx - // mov ebx, esp - // and esp, -128 - // ... - // pop ebx - // ret - RC = STI.is64Bit() ? &X86::GR64_ArgRefRegClass : nullptr; - break; - // TODO: Refine register class for each calling convention. - default: - break; - } - if (RC) - return MRI.createVirtualRegister(RC); - else - return NoReg; -} - -bool X86ArgumentStackSlotPass::runOnMachineFunction(MachineFunction &MF) { - const Function &F = MF.getFunction(); - MachineFrameInfo &MFI = MF.getFrameInfo(); - const X86Subtarget &STI = MF.getSubtarget(); - const X86RegisterInfo *TRI = STI.getRegisterInfo(); - const X86InstrInfo *TII = STI.getInstrInfo(); - X86MachineFunctionInfo *X86FI = MF.getInfo(); - bool Changed = false; - - if (F.hasFnAttribute(Attribute::Naked)) - return false; - // Only support Linux - if (!STI.isTargetLinux()) - return false; - if (!TRI->hasBasePointer(MF)) - return false; - - Register BasePtr = TRI->getBaseRegister(); - auto IsBaseRegisterClobbered = [&]() { - for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : MBB) { - if (!MI.isInlineAsm()) - continue; - for (MachineOperand &MO : MI.operands()) { - if (!MO.isReg()) - continue; - Register Reg = MO.getReg(); - if (!Register::isPhysicalRegister(Reg)) - continue; - if (TRI->isSuperOrSubRegisterEq(BasePtr, Reg)) - return true; - } - } - } - return false; - }; - if (!IsBaseRegisterClobbered()) - return false; - - Register ArgBaseReg = getArgBaseReg(MF); - if (!ArgBaseReg.isValid()) - return false; - // leal 4(%esp), %reg - // FIXME: will the instruction be duplicated or eliminated? Should - // define a pseudo instruction for it? - MachineBasicBlock &MBB = MF.front(); - MachineBasicBlock::iterator MBBI = MBB.begin(); - DebugLoc DL; - // Emit instruction to copy get stack pointer to a virtual register - // and save the instruction to x86 machine functon info. We can get - // physical register of ArgBaseReg after register allocation. The - // stack slot is used to save/restore argument base pointer. We can - // get the index from the instruction. - unsigned SlotSize = TRI->getSlotSize(); - int FI = MFI.CreateSpillStackObject(SlotSize, Align(SlotSize)); - MachineInstr *LEA = - BuildMI(MBB, MBBI, DL, - TII->get(STI.is64Bit() ? X86::LEA64r : X86::LEA32r), ArgBaseReg) - .addFrameIndex(FI) - .addImm(1) - .addUse(X86::NoRegister) - .addImm(SlotSize) - .addUse(X86::NoRegister) - .setMIFlag(MachineInstr::FrameSetup); - X86FI->setStackPtrSaveMI(LEA); - - for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : MBB) { - int I = 0; - for (MachineOperand &MO : MI.operands()) { - if (MO.isFI()) { - int Idx = MO.getIndex(); - if (!MFI.isFixedObjectIndex(Idx)) - continue; - int64_t Offset = MFI.getObjectOffset(Idx); - if (Offset < 0) - continue; - // TODO replace register for debug instruction - if (MI.isDebugInstr()) - continue; - // Replace frame register with argument base pointer and its offset. - TRI->eliminateFrameIndex(MI.getIterator(), I, ArgBaseReg, Offset); - Changed = true; - } - ++I; - } - } - } - - return Changed; -} diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 78f96817c8420..cb42a1025ea21 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -33,7 +33,6 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/LEB128.h" #include "llvm/Target/TargetOptions.h" #include @@ -477,7 +476,6 @@ void X86FrameLowering::emitCalleeSavedFrameMoves( MachineFrameInfo &MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); - X86MachineFunctionInfo *X86FI = MF.getInfo(); // Add callee saved registers to move list. const std::vector &CSI = MFI.getCalleeSavedInfo(); @@ -489,62 +487,13 @@ void X86FrameLowering::emitCalleeSavedFrameMoves( unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); if (IsPrologue) { - if (X86FI->getStackPtrSaveMI()) { - // +2*SlotSize because there is return address and ebp at the bottom - // of the stack. - // | retaddr | - // | ebp | - // | |<--ebp - Offset += 2 * SlotSize; - SmallString<64> CfaExpr; - CfaExpr.push_back(dwarf::DW_CFA_expression); - uint8_t buffer[16]; - CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer)); - CfaExpr.push_back(2); - Register FramePtr = TRI->getFrameRegister(MF); - const Register MachineFramePtr = - STI.isTarget64BitILP32() - ? Register(getX86SubSuperRegister(FramePtr, 64)) - : FramePtr; - unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true); - CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr)); - CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer)); - BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::createEscape(nullptr, CfaExpr.str()), - MachineInstr::FrameSetup); - } else { - BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); - } + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); } else { BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createRestore(nullptr, DwarfReg)); } } - if (auto *MI = X86FI->getStackPtrSaveMI()) { - int FI = MI->getOperand(1).getIndex(); - int64_t Offset = MFI.getObjectOffset(FI) + 2 * SlotSize; - SmallString<64> CfaExpr; - Register FramePtr = TRI->getFrameRegister(MF); - const Register MachineFramePtr = - STI.isTarget64BitILP32() - ? Register(getX86SubSuperRegister(FramePtr, 64)) - : FramePtr; - unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true); - CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr)); - uint8_t buffer[16]; - CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer)); - CfaExpr.push_back(dwarf::DW_OP_deref); - - SmallString<64> DefCfaExpr; - DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression); - DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer)); - DefCfaExpr.append(CfaExpr.str()); - // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref - BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()), - MachineInstr::FrameSetup); - } } void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero, @@ -1560,42 +1509,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc DL; - Register ArgBaseReg; - - // Emit extra prolog for argument stack slot reference. - if (auto *MI = X86FI->getStackPtrSaveMI()) { - // MI is lea instruction that created in X86ArgumentStackSlotPass. - // Creat extra prolog for stack realignment. - ArgBaseReg = MI->getOperand(0).getReg(); - // leal 4(%esp), %basereg - // .cfi_def_cfa %basereg, 0 - // andl $-128, %esp - // pushl -4(%basereg) - BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::LEA64r : X86::LEA32r), - ArgBaseReg) - .addUse(StackPtr) - .addImm(1) - .addUse(X86::NoRegister) - .addImm(SlotSize) - .addUse(X86::NoRegister) - .setMIFlag(MachineInstr::FrameSetup); - if (NeedsDwarfCFI) { - // .cfi_def_cfa %basereg, 0 - unsigned DwarfStackPtr = TRI->getDwarfRegNum(ArgBaseReg, true); - BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, 0), - MachineInstr::FrameSetup); - } - BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign); - int64_t Offset = Is64Bit ? -2 * (int64_t)SlotSize : -1 * (int64_t)SlotSize; - BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm: X86::PUSH32rmm)) - .addReg(ArgBaseReg) - .addImm(1) - .addReg(X86::NoRegister) - .addImm(Offset) - .addReg(X86::NoRegister) - .setMIFlag(MachineInstr::FrameSetup); - } // Space reserved for stack-based arguments when making a (ABI-guaranteed) // tail call. @@ -1727,7 +1640,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, .addReg(MachineFramePtr, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); - if (NeedsDwarfCFI && !ArgBaseReg.isValid()) { + if (NeedsDwarfCFI) { // Mark the place where EBP/RBP was saved. // Define the current CFA rule to use the provided offset. assert(StackSize); @@ -1804,28 +1717,13 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); if (NeedsDwarfCFI) { - if (ArgBaseReg.isValid()) { - SmallString<64> CfaExpr; - CfaExpr.push_back(dwarf::DW_CFA_expression); - uint8_t buffer[16]; - unsigned DwarfReg = TRI->getDwarfRegNum(MachineFramePtr, true); - CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer)); - CfaExpr.push_back(2); - CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg)); - CfaExpr.push_back(0); - // DW_CFA_expression: reg5 DW_OP_breg5 +0 - BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::createEscape(nullptr, CfaExpr.str()), - MachineInstr::FrameSetup); - } else { - // Mark effective beginning of when frame pointer becomes valid. - // Define the current CFA to use the EBP/RBP register. - unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); - BuildCFI( - MBB, MBBI, DL, - MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr), - MachineInstr::FrameSetup); - } + // Mark effective beginning of when frame pointer becomes valid. + // Define the current CFA to use the EBP/RBP register. + unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); + BuildCFI( + MBB, MBBI, DL, + MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr), + MachineInstr::FrameSetup); } if (NeedsWinFPO) { @@ -1892,8 +1790,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // Realign stack after we pushed callee-saved registers (so that we'll be // able to calculate their offsets from the frame pointer). // Don't do this for Win64, it needs to realign the stack after the prologue. - if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) && - !ArgBaseReg.isValid()) { + if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF)) { assert(HasFP && "There should be a frame pointer if stack is realigned."); BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign); @@ -2151,16 +2048,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); } } - if (ArgBaseReg.isValid()) { - // Save argument base pointer. - auto *MI = X86FI->getStackPtrSaveMI(); - int FI = MI->getOperand(1).getIndex(); - unsigned MOVmr = Is64Bit ? X86::MOV64mr : X86::MOV32mr; - // movl %basereg, offset(%ebp) - addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), FI) - .addReg(ArgBaseReg) - .setMIFlag(MachineInstr::FrameSetup); - } if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) { // Mark end of stack pointer adjustment. @@ -2309,34 +2196,6 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, !MF.getTarget().getTargetTriple().isOSWindows()) && MF.needsFrameMoves(); - Register ArgBaseReg; - if (auto *MI = X86FI->getStackPtrSaveMI()) { - unsigned Opc = X86::LEA32r; - Register StackReg = X86::ESP; - ArgBaseReg = MI->getOperand(0).getReg(); - if (STI.is64Bit()) { - Opc = X86::LEA64r; - StackReg = X86::RSP; - } - // leal -8(%basereg), %esp - // .cfi_def_cfa %esp, 4 - BuildMI(MBB, MBBI, DL, TII.get(Opc), StackReg) - .addUse(ArgBaseReg) - .addImm(1) - .addUse(X86::NoRegister) - .addImm((int64_t)SlotSize * -2) - .addUse(X86::NoRegister) - .setMIFlag(MachineInstr::FrameDestroy); - if (NeedsDwarfCFI) { - unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackReg, true); - BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize), - MachineInstr::FrameDestroy); - --MBBI; - } - --MBBI; - } - if (IsFunclet) { assert(HasFP && "EH funclets without FP not yet implemented"); NumBytes = getWinEHFuncletFrameSize(MF); @@ -2378,13 +2237,11 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } if (NeedsDwarfCFI) { - if (!ArgBaseReg.isValid()) { - unsigned DwarfStackPtr = - TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true); - BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize), - MachineInstr::FrameDestroy); - } + unsigned DwarfStackPtr = + TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize), + MachineInstr::FrameDestroy); if (!MBB.succ_empty() && !MBB.isReturnBlock()) { unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); BuildCFI(MBB, AfterPop, DL, @@ -2414,15 +2271,6 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, --MBBI; } - if (ArgBaseReg.isValid()) { - // Restore argument base pointer. - auto *MI = X86FI->getStackPtrSaveMI(); - int FI = MI->getOperand(1).getIndex(); - unsigned MOVrm = Is64Bit ? X86::MOV64rm : X86::MOV32rm; - // movl offset(%ebp), %basereg - addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVrm), ArgBaseReg), FI) - .setMIFlag(MachineInstr::FrameDestroy); - } MBBI = FirstCSPop; if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET) @@ -4041,16 +3889,8 @@ void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const { void X86FrameLowering::processFunctionBeforeFrameIndicesReplaced( MachineFunction &MF, RegScavenger *RS) const { - auto *X86FI = MF.getInfo(); - if (STI.is32Bit() && MF.hasEHFunclets()) restoreWinEHStackPointersInParent(MF); - // We have emitted prolog and epilog. Don't need stack pointer saving - // instruction any more. - if (MachineInstr *MI = X86FI->getStackPtrSaveMI()) { - MI->eraseFromParent(); - X86FI->setStackPtrSaveMI(nullptr); - } } void X86FrameLowering::restoreWinEHStackPointersInParent( diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h index 31c087df9a911..372838a212cc8 100644 --- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h +++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h @@ -117,8 +117,6 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// determine if we should insert tilerelease in frame lowering. bool HasVirtualTileReg = false; - MachineInstr *StackPtrSaveMI = nullptr; - std::optional SwiftAsyncContextFrameIdx; // Preallocated fields are only used during isel. @@ -227,9 +225,6 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { bool hasVirtualTileReg() const { return HasVirtualTileReg; } void setHasVirtualTileReg(bool v) { HasVirtualTileReg = v; } - void setStackPtrSaveMI(MachineInstr *MI) { StackPtrSaveMI = MI; } - MachineInstr *getStackPtrSaveMI() const { return StackPtrSaveMI; } - std::optional getSwiftAsyncContextFrameIdx() const { return SwiftAsyncContextFrameIdx; } diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index 0edc0a432f8e0..327c61c4c5d0b 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -702,11 +702,6 @@ static bool CantUseSP(const MachineFrameInfo &MFI) { bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { const X86MachineFunctionInfo *X86FI = MF.getInfo(); - // We have a virtual register to reference argument, and don't need base - // pointer. - if (X86FI->getStackPtrSaveMI() != nullptr) - return false; - if (X86FI->hasPreallocatedCall()) return true; @@ -783,45 +778,6 @@ static bool isFuncletReturnInstr(MachineInstr &MI) { llvm_unreachable("impossible"); } -void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - unsigned FIOperandNum, - Register BaseReg, - int FIOffset) const { - MachineInstr &MI = *II; - unsigned Opc = MI.getOpcode(); - if (Opc == TargetOpcode::LOCAL_ESCAPE) { - MachineOperand &FI = MI.getOperand(FIOperandNum); - FI.ChangeToImmediate(FIOffset); - return; - } - - MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false); - - // The frame index format for stackmaps and patchpoints is different from the - // X86 format. It only has a FI and an offset. - if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { - assert(BasePtr == FramePtr && "Expected the FP as base register"); - int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset; - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); - return; - } - - if (MI.getOperand(FIOperandNum + 3).isImm()) { - // Offset is a 32-bit integer. - int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm()); - int Offset = FIOffset + Imm; - assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) && - "Requesting 64-bit offset in 32-bit immediate!"); - if (Offset != 0 || !tryOptimizeLEAtoMOV(II)) - MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset); - } else { - // Offset is symbolic. This is extremely rare. - uint64_t Offset = - FIOffset + (uint64_t)MI.getOperand(FIOperandNum + 3).getOffset(); - MI.getOperand(FIOperandNum + 3).setOffset(Offset); - } -} - bool X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h index 48eeb72479f8c..f88d4b18f1d86 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.h +++ b/llvm/lib/Target/X86/X86RegisterInfo.h @@ -133,10 +133,6 @@ class X86RegisterInfo final : public X86GenRegisterInfo { bool canRealignStack(const MachineFunction &MF) const override; - void eliminateFrameIndex(MachineBasicBlock::iterator II, - unsigned FIOperandNum, Register BaseReg, - int FIOffset) const; - bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS = nullptr) const override; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td index 1e6477e658b9d..4ef7150122ca8 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.td +++ b/llvm/lib/Target/X86/X86RegisterInfo.td @@ -433,18 +433,6 @@ def GR64PLTSafe : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI, R8, R9, RBX, R14, R15, R12, R13, RBP)>; -// It includes the GPR that are used as scratch register for Linux64 calling -// convention. -def GR64_ArgRef: RegisterClass<"X86", [i64], 64, (add R10, R11)> { - let GeneratePressureSet = 0; -} - -// It includes the GPR that are used as scratch register for Linux32 calling -// convention. -def GR32_ArgRef: RegisterClass<"X86", [i32], 32, (add ECX, EDX)> { - let GeneratePressureSet = 0; -} - // Segment registers for use by MOV instructions (and others) that have a // segment register as one operand. Always contain a 16-bit segment // descriptor. diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index d870f9b031f57..dfb7030def7e7 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -104,7 +104,6 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() { initializePseudoProbeInserterPass(PR); initializeX86ReturnThunksPass(PR); initializeX86DAGToDAGISelPass(PR); - initializeX86ArgumentStackSlotPassPass(PR); } static std::unique_ptr createTLOF(const Triple &TT) { @@ -519,7 +518,6 @@ bool X86PassConfig::addPreISel() { } void X86PassConfig::addPreRegAlloc() { - addPass(createX86ArgumentStackSlotPass()); if (getOptLevel() != CodeGenOpt::None) { addPass(&LiveRangeShrinkID); addPass(createX86FixupSetCC()); diff --git a/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir b/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir index f92d49cabdcda..0202103c8ff4d 100644 --- a/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir +++ b/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir @@ -28,8 +28,8 @@ body: | liveins: $rdi, $rsi ; CHECK-LABEL: name: test - ; CHECK: INLINEASM &foo, 0 /* attdialect */, 4521994 /* regdef:GR64 */, def $rsi, 4521994 /* regdef:GR64 */, def dead $rdi, - INLINEASM &foo, 0, 4521994, def $rsi, 4521994, def dead $rdi, 2147549193, killed $rdi, 2147483657, killed $rsi, 12, implicit-def dead early-clobber $eflags + ; CHECK: INLINEASM &foo, 0 /* attdialect */, 4456458 /* regdef:GR64 */, def $rsi, 4456458 /* regdef:GR64 */, def dead $rdi, + INLINEASM &foo, 0, 4456458, def $rsi, 4456458, def dead $rdi, 2147549193, killed $rdi, 2147483657, killed $rsi, 12, implicit-def dead early-clobber $eflags $rax = MOV64rr killed $rsi RET64 killed $rax ... @@ -45,8 +45,8 @@ body: | ; Verify that the register ties are preserved. ; CHECK-LABEL: name: test2 - ; CHECK: INLINEASM &foo, 0 /* attdialect */, 4521994 /* regdef:GR64 */, def $rsi, 4521994 /* regdef:GR64 */, def dead $rdi, 2147549193 /* reguse tiedto:$1 */, killed $rdi(tied-def 5), 2147483657 /* reguse tiedto:$0 */, killed $rsi(tied-def 3), 12 /* clobber */, implicit-def dead early-clobber $eflags - INLINEASM &foo, 0, 4521994, def $rsi, 4521994, def dead $rdi, 2147549193, killed $rdi(tied-def 5), 2147483657, killed $rsi(tied-def 3), 12, implicit-def dead early-clobber $eflags + ; CHECK: INLINEASM &foo, 0 /* attdialect */, 4456458 /* regdef:GR64 */, def $rsi, 4456458 /* regdef:GR64 */, def dead $rdi, 2147549193 /* reguse tiedto:$1 */, killed $rdi(tied-def 5), 2147483657 /* reguse tiedto:$0 */, killed $rsi(tied-def 3), 12 /* clobber */, implicit-def dead early-clobber $eflags + INLINEASM &foo, 0, 4456458, def $rsi, 4456458, def dead $rdi, 2147549193, killed $rdi(tied-def 5), 2147483657, killed $rsi(tied-def 3), 12, implicit-def dead early-clobber $eflags $rax = MOV64rr killed $rsi RET64 killed $rax ... diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll index 01deb3fbc00d3..e78464120426d 100644 --- a/llvm/test/CodeGen/X86/O0-pipeline.ll +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll @@ -41,7 +41,6 @@ ; CHECK-NEXT: X86 PIC Global Base Reg Initialization ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions ; CHECK-NEXT: Local Stack Slot Allocation -; CHECK-NEXT: Argument Stack Rebase ; CHECK-NEXT: X86 speculative load hardening ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: X86 EFLAGS copy lowering diff --git a/llvm/test/CodeGen/X86/i386-baseptr.ll b/llvm/test/CodeGen/X86/i386-baseptr.ll index cb8d849a86841..8baa16c076bac 100644 --- a/llvm/test/CodeGen/X86/i386-baseptr.ll +++ b/llvm/test/CodeGen/X86/i386-baseptr.ll @@ -39,18 +39,16 @@ entry: define void @clobber_base() #0 { ; CHECK-LABEL: clobber_base: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: leal {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: .cfi_def_cfa %ecx, 0 -; CHECK-NEXT: andl $-128, %esp -; CHECK-NEXT: pushl -4(%ecx) ; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset %ebp, -8 ; CHECK-NEXT: movl %esp, %ebp -; CHECK-NEXT: .cfi_escape 0x10, 0x05, 0x02, 0x75, 0x00 # +; CHECK-NEXT: .cfi_def_cfa_register %ebp ; CHECK-NEXT: pushl %esi -; CHECK-NEXT: subl $244, %esp -; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: .cfi_escape 0x10, 0x06, 0x02, 0x75, 0x7c # -; CHECK-NEXT: .cfi_escape 0x0f, 0x04, 0x75, 0x84, 0x7f, 0x06 # +; CHECK-NEXT: andl $-128, %esp +; CHECK-NEXT: subl $128, %esp +; CHECK-NEXT: movl %esp, %esi +; CHECK-NEXT: .cfi_offset %esi, -12 ; CHECK-NEXT: calll helper@PLT ; CHECK-NEXT: movl %esp, %ecx ; CHECK-NEXT: leal 31(,%eax,4), %eax @@ -65,14 +63,12 @@ define void @clobber_base() #0 { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $8, %edx ; CHECK-NEXT: #APP -; CHECK-NEXT: movl %edx, -120(%ebp) +; CHECK-NEXT: movl %edx, (%esi) ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $0, (%ecx,%eax) -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: leal -4(%ebp), %esp ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %ebp -; CHECK-NEXT: leal -8(%ecx), %esp ; CHECK-NEXT: .cfi_def_cfa %esp, 4 ; CHECK-NEXT: retl entry: @@ -139,6 +135,6 @@ entry: ret void } -attributes #0 = {"frame-pointer"="all"} +attributes #0 = { "frame-pointer"="all"} !llvm.module.flags = !{!0} !0 = !{i32 2, !"override-stack-alignment", i32 32} diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll index 5c7e6d1da32c7..4361b863c25b0 100644 --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -116,7 +116,6 @@ ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions -; CHECK-NEXT: Argument Stack Rebase ; CHECK-NEXT: Live Range Shrink ; CHECK-NEXT: X86 Fixup SetCC ; CHECK-NEXT: Lazy Machine Block Frequency Analysis diff --git a/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir b/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir index fe057a8d78eb1..2170573f4b0d9 100644 --- a/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir +++ b/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir @@ -350,7 +350,7 @@ body: | ; CHECK-NEXT: CMP64rr [[NOT64r2]], [[COPY6]], implicit-def $eflags ; CHECK-NEXT: undef %100.sub_32bit:gr64_with_sub_8bit = MOV32ri 0 ; CHECK-NEXT: [[CMOV64rr:%[0-9]+]]:gr64 = CMOV64rr [[CMOV64rr]], %100, 4, implicit killed $eflags - ; CHECK-NEXT: INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4521993 /* reguse:GR64 */, %100, 4521993 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags + ; CHECK-NEXT: INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4456457 /* reguse:GR64 */, %100, 4456457 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags ; CHECK-NEXT: LCMPXCHG32 undef %67:gr64, 1, $noreg, 0, $noreg, [[COPY5]], implicit-def dead $eax, implicit-def dead $eflags, implicit undef $eax :: (load store acquire monotonic (s32) on `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK-NEXT: $rdi = COPY [[COPY4]] @@ -470,7 +470,7 @@ body: | %63:gr64 = NOT64r %63 CMP64rr %63, %31, implicit-def $eflags %63:gr64 = CMOV64rr %63, %53, 4, implicit killed $eflags - INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4521993 /* reguse:GR64 */, %53, 4521993 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags + INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4456457 /* reguse:GR64 */, %53, 4456457 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags LCMPXCHG32 undef %67:gr64, 1, $noreg, 0, $noreg, %65, implicit-def dead $eax, implicit-def dead $eflags, implicit undef $eax :: (load store acquire monotonic (s32) on `ptr addrspace(1) undef`, addrspace 1) ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp $rdi = COPY %64 diff --git a/llvm/test/CodeGen/X86/x86-64-baseptr.ll b/llvm/test/CodeGen/X86/x86-64-baseptr.ll index 9b6256189cdfc..914a5a4796247 100644 --- a/llvm/test/CodeGen/X86/x86-64-baseptr.ll +++ b/llvm/test/CodeGen/X86/x86-64-baseptr.ll @@ -77,18 +77,16 @@ entry: define void @clobber_base() #0 { ; CHECK-LABEL: clobber_base: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %r10 -; CHECK-NEXT: .cfi_def_cfa %r10, 0 -; CHECK-NEXT: andq $-128, %rsp -; CHECK-NEXT: pushq -16(%r10) ; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 ; CHECK-NEXT: movq %rsp, %rbp -; CHECK-NEXT: .cfi_escape 0x10, 0x06, 0x02, 0x76, 0x00 # +; CHECK-NEXT: .cfi_def_cfa_register %rbp ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: subq $232, %rsp -; CHECK-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: .cfi_escape 0x10, 0x03, 0x02, 0x76, 0x78 # -; CHECK-NEXT: .cfi_escape 0x0f, 0x04, 0x76, 0x88, 0x7f, 0x06 # +; CHECK-NEXT: andq $-128, %rsp +; CHECK-NEXT: subq $128, %rsp +; CHECK-NEXT: movq %rsp, %rbx +; CHECK-NEXT: .cfi_offset %rbx, -24 ; CHECK-NEXT: callq helper@PLT ; CHECK-NEXT: movq %rsp, %rcx ; CHECK-NEXT: movl %eax, %eax @@ -104,31 +102,27 @@ define void @clobber_base() #0 { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $8, %edx ; CHECK-NEXT: #APP -; CHECK-NEXT: movl %edx, -112(%rbp) +; CHECK-NEXT: movl %edx, (%rbx) ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $0, (%rcx,%rax) -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload ; CHECK-NEXT: leaq -8(%rbp), %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %rbp -; CHECK-NEXT: leaq -16(%r10), %rsp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 ; CHECK-NEXT: retq ; ; X32ABI-LABEL: clobber_base: ; X32ABI: # %bb.0: # %entry -; X32ABI-NEXT: leaq {{[0-9]+}}(%esp), %r10 -; X32ABI-NEXT: .cfi_def_cfa %r10, 0 -; X32ABI-NEXT: andl $-128, %esp -; X32ABI-NEXT: pushq -16(%r10) ; X32ABI-NEXT: pushq %rbp +; X32ABI-NEXT: .cfi_def_cfa_offset 16 +; X32ABI-NEXT: .cfi_offset %rbp, -16 ; X32ABI-NEXT: movl %esp, %ebp -; X32ABI-NEXT: .cfi_escape 0x10, 0x06, 0x02, 0x76, 0x00 # +; X32ABI-NEXT: .cfi_def_cfa_register %rbp ; X32ABI-NEXT: pushq %rbx -; X32ABI-NEXT: subl $232, %esp -; X32ABI-NEXT: movq %r10, {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Spill -; X32ABI-NEXT: .cfi_escape 0x10, 0x03, 0x02, 0x76, 0x78 # -; X32ABI-NEXT: .cfi_escape 0x0f, 0x04, 0x76, 0x88, 0x7f, 0x06 # +; X32ABI-NEXT: andl $-128, %esp +; X32ABI-NEXT: subl $128, %esp +; X32ABI-NEXT: movl %esp, %ebx +; X32ABI-NEXT: .cfi_offset %rbx, -24 ; X32ABI-NEXT: callq helper@PLT ; X32ABI-NEXT: # kill: def $eax killed $eax def $rax ; X32ABI-NEXT: leal 31(,%rax,4), %eax @@ -144,14 +138,12 @@ define void @clobber_base() #0 { ; X32ABI-NEXT: #NO_APP ; X32ABI-NEXT: movl $8, %edx ; X32ABI-NEXT: #APP -; X32ABI-NEXT: movl %edx, -112(%ebp) +; X32ABI-NEXT: movl %edx, (%ebx) ; X32ABI-NEXT: #NO_APP ; X32ABI-NEXT: movl $0, (%ecx,%eax) -; X32ABI-NEXT: movq {{[-0-9]+}}(%e{{[sb]}}p), %r10 # 8-byte Reload ; X32ABI-NEXT: leal -8(%ebp), %esp ; X32ABI-NEXT: popq %rbx ; X32ABI-NEXT: popq %rbp -; X32ABI-NEXT: leaq -16(%r10), %rsp ; X32ABI-NEXT: .cfi_def_cfa %rsp, 8 ; X32ABI-NEXT: retq entry: @@ -168,15 +160,14 @@ entry: define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32 %param3, i32 %param4, i32 %param5, i32 %param6, i32 %param7, i32 %param8, i32 %param9, i32 %param10, i32 %param11, i32 %param12) #0 { ; CHECK-LABEL: clobber_baseptr_argptr: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %r10 -; CHECK-NEXT: .cfi_def_cfa %r10, 0 -; CHECK-NEXT: andq $-128, %rsp -; CHECK-NEXT: pushq -16(%r10) ; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 ; CHECK-NEXT: movq %rsp, %rbp -; CHECK-NEXT: .cfi_escape 0x10, 0x06, 0x02, 0x76, 0x00 # +; CHECK-NEXT: .cfi_def_cfa_register %rbp ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: subq $360, %rsp # imm = 0x168 +; CHECK-NEXT: andq $-128, %rsp +; CHECK-NEXT: subq $256, %rsp # imm = 0x100 ; CHECK-NEXT: movaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -185,18 +176,17 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32 ; CHECK-NEXT: movaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movaps %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: .cfi_escape 0x10, 0x03, 0x02, 0x76, 0x78 # -; CHECK-NEXT: .cfi_escape 0x10, 0x19, 0x02, 0x76, 0xf0, 0x7e # -; CHECK-NEXT: .cfi_escape 0x10, 0x1a, 0x02, 0x76, 0x80, 0x7f # -; CHECK-NEXT: .cfi_escape 0x10, 0x1b, 0x02, 0x76, 0x90, 0x7f # -; CHECK-NEXT: .cfi_escape 0x10, 0x1c, 0x02, 0x76, 0xa0, 0x7f # -; CHECK-NEXT: .cfi_escape 0x10, 0x1d, 0x02, 0x76, 0xb0, 0x7f # -; CHECK-NEXT: .cfi_escape 0x10, 0x1e, 0x02, 0x76, 0x40 # -; CHECK-NEXT: .cfi_escape 0x10, 0x1f, 0x02, 0x76, 0x50 # -; CHECK-NEXT: .cfi_escape 0x10, 0x20, 0x02, 0x76, 0x60 # -; CHECK-NEXT: .cfi_escape 0x0f, 0x04, 0x76, 0x88, 0x7e, 0x06 # -; CHECK-NEXT: movl (%r10), %r14d +; CHECK-NEXT: movq %rsp, %rbx +; CHECK-NEXT: .cfi_offset %rbx, -24 +; CHECK-NEXT: .cfi_offset %xmm8, -160 +; CHECK-NEXT: .cfi_offset %xmm9, -144 +; CHECK-NEXT: .cfi_offset %xmm10, -128 +; CHECK-NEXT: .cfi_offset %xmm11, -112 +; CHECK-NEXT: .cfi_offset %xmm12, -96 +; CHECK-NEXT: .cfi_offset %xmm13, -80 +; CHECK-NEXT: .cfi_offset %xmm14, -64 +; CHECK-NEXT: .cfi_offset %xmm15, -48 +; CHECK-NEXT: movl 16(%rbp), %r14d ; CHECK-NEXT: callq helper@PLT ; CHECK-NEXT: movq %rsp, %rcx ; CHECK-NEXT: movl %eax, %eax @@ -215,7 +205,7 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32 ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $8, %edx ; CHECK-NEXT: #APP -; CHECK-NEXT: movl %edx, -240(%rbp) +; CHECK-NEXT: movl %edx, (%rbx) ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl %r14d, (%rcx,%rax) ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm8 # 16-byte Reload @@ -226,25 +216,22 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32 ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload ; CHECK-NEXT: leaq -8(%rbp), %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %rbp -; CHECK-NEXT: leaq -16(%r10), %rsp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 ; CHECK-NEXT: retq ; ; X32ABI-LABEL: clobber_baseptr_argptr: ; X32ABI: # %bb.0: # %entry -; X32ABI-NEXT: leaq {{[0-9]+}}(%esp), %r10 -; X32ABI-NEXT: .cfi_def_cfa %r10, 0 -; X32ABI-NEXT: andl $-128, %esp -; X32ABI-NEXT: pushq -16(%r10) ; X32ABI-NEXT: pushq %rbp +; X32ABI-NEXT: .cfi_def_cfa_offset 16 +; X32ABI-NEXT: .cfi_offset %rbp, -16 ; X32ABI-NEXT: movl %esp, %ebp -; X32ABI-NEXT: .cfi_escape 0x10, 0x06, 0x02, 0x76, 0x00 # +; X32ABI-NEXT: .cfi_def_cfa_register %rbp ; X32ABI-NEXT: pushq %rbx -; X32ABI-NEXT: subl $360, %esp # imm = 0x168 +; X32ABI-NEXT: andl $-128, %esp +; X32ABI-NEXT: subl $256, %esp # imm = 0x100 ; X32ABI-NEXT: movaps %xmm15, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32ABI-NEXT: movaps %xmm14, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32ABI-NEXT: movaps %xmm13, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill @@ -253,18 +240,17 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32 ; X32ABI-NEXT: movaps %xmm10, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32ABI-NEXT: movaps %xmm9, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; X32ABI-NEXT: movaps %xmm8, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill -; X32ABI-NEXT: movq %r10, {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Spill -; X32ABI-NEXT: .cfi_escape 0x10, 0x03, 0x02, 0x76, 0x78 # -; X32ABI-NEXT: .cfi_escape 0x10, 0x19, 0x02, 0x76, 0xf0, 0x7e # -; X32ABI-NEXT: .cfi_escape 0x10, 0x1a, 0x02, 0x76, 0x80, 0x7f # -; X32ABI-NEXT: .cfi_escape 0x10, 0x1b, 0x02, 0x76, 0x90, 0x7f # -; X32ABI-NEXT: .cfi_escape 0x10, 0x1c, 0x02, 0x76, 0xa0, 0x7f # -; X32ABI-NEXT: .cfi_escape 0x10, 0x1d, 0x02, 0x76, 0xb0, 0x7f # -; X32ABI-NEXT: .cfi_escape 0x10, 0x1e, 0x02, 0x76, 0x40 # -; X32ABI-NEXT: .cfi_escape 0x10, 0x1f, 0x02, 0x76, 0x50 # -; X32ABI-NEXT: .cfi_escape 0x10, 0x20, 0x02, 0x76, 0x60 # -; X32ABI-NEXT: .cfi_escape 0x0f, 0x04, 0x76, 0x88, 0x7e, 0x06 # -; X32ABI-NEXT: movl (%r10), %r14d +; X32ABI-NEXT: movl %esp, %ebx +; X32ABI-NEXT: .cfi_offset %rbx, -24 +; X32ABI-NEXT: .cfi_offset %xmm8, -160 +; X32ABI-NEXT: .cfi_offset %xmm9, -144 +; X32ABI-NEXT: .cfi_offset %xmm10, -128 +; X32ABI-NEXT: .cfi_offset %xmm11, -112 +; X32ABI-NEXT: .cfi_offset %xmm12, -96 +; X32ABI-NEXT: .cfi_offset %xmm13, -80 +; X32ABI-NEXT: .cfi_offset %xmm14, -64 +; X32ABI-NEXT: .cfi_offset %xmm15, -48 +; X32ABI-NEXT: movl 16(%ebp), %r14d ; X32ABI-NEXT: callq helper@PLT ; X32ABI-NEXT: # kill: def $eax killed $eax def $rax ; X32ABI-NEXT: leal 31(,%rax,4), %eax @@ -283,7 +269,7 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32 ; X32ABI-NEXT: #NO_APP ; X32ABI-NEXT: movl $8, %edx ; X32ABI-NEXT: #APP -; X32ABI-NEXT: movl %edx, -240(%ebp) +; X32ABI-NEXT: movl %edx, (%ebx) ; X32ABI-NEXT: #NO_APP ; X32ABI-NEXT: movl %r14d, (%ecx,%eax) ; X32ABI-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm8 # 16-byte Reload @@ -294,11 +280,9 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32 ; X32ABI-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm13 # 16-byte Reload ; X32ABI-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm14 # 16-byte Reload ; X32ABI-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm15 # 16-byte Reload -; X32ABI-NEXT: movq {{[-0-9]+}}(%e{{[sb]}}p), %r10 # 8-byte Reload ; X32ABI-NEXT: leal -8(%ebp), %esp ; X32ABI-NEXT: popq %rbx ; X32ABI-NEXT: popq %rbp -; X32ABI-NEXT: leaq -16(%r10), %rsp ; X32ABI-NEXT: .cfi_def_cfa %rsp, 8 ; X32ABI-NEXT: retq entry: @@ -314,6 +298,6 @@ entry: ret void } -attributes #0 = {"frame-pointer"="all"} +attributes #0 = { "frame-pointer"="all"} !llvm.module.flags = !{!0} !0 = !{i32 2, !"override-stack-alignment", i32 32} From 64276eec7ed07e762ef90c248a36cbafdcd7bd81 Mon Sep 17 00:00:00 2001 From: Ilyas Mustafazade Date: Tue, 21 Mar 2023 13:29:05 +0100 Subject: [PATCH 170/691] [clang][NFC] Fix typo in comment Differential Revision: https://reviews.llvm.org/D146422 --- clang/lib/Sema/SemaInit.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index ddb2b5cf5cd16..17d8b6c98207b 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -5437,7 +5437,7 @@ static void TryOrBuildParenListInitialization( } // Not all of the args have been processed, so there must've been more args - // then were required to initialize the element. + // than were required to initialize the element. if (ArgIndexToProcess < Args.size()) { Sequence.SetFailed(InitializationSequence::FK_ParenthesizedListInitFailed); if (!VerifyOnly) { From cdccea8eb3b739ce11c2bab7b9c561b5a80abbf7 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Tue, 21 Mar 2023 12:38:15 +0000 Subject: [PATCH 171/691] [gn build] Port da8260a9b111 --- llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn index 7c27f7ca21583..e2351f36ed1b1 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn @@ -77,7 +77,6 @@ static_library("LLVMX86CodeGen") { deps += [ ":X86GenFoldTables" ] } sources = [ - "X86ArgumentStackSlotRebase.cpp", "X86AsmPrinter.cpp", "X86AvoidStoreForwardingBlocks.cpp", "X86AvoidTrailingCall.cpp", From 0e9523efda8a4ad95ecb1d5b5e65e10bcc3711f5 Mon Sep 17 00:00:00 2001 From: Sergio Afonso Date: Mon, 13 Mar 2023 12:51:18 +0000 Subject: [PATCH 172/691] [mlir] Support lowering of dialect attributes attached to top-level modules This patch supports the processing of dialect attributes attached to top-level module-type operations during MLIR-to-LLVMIR lowering. This approach modifies the `mlir::translateModuleToLLVMIR()` function to call `ModuleTranslation::convertOperation()` on the top-level operation, after its body has been lowered. This, in turn, will get the `LLVMTranslationDialectInterface` object associated to that operation's dialect before trying to use it for lowering prior to processing dialect attributes attached to the operation. Since there are no `LLVMTranslationDialectInterface`s for the builtin and GPU dialects, which define their own module-type operations, this patch also adds and registers them. The requirement for always calling `mlir::registerBuiltinDialectTranslation()` before any translation of MLIR to LLVM IR where builtin module operations are present is introduced. The purpose of these new translation interfaces is to succeed when processing module-type operations, allowing the lowering process to continue and to prevent the introduction of failures related to not finding such interfaces. Differential Revision: https://reviews.llvm.org/D145932 --- flang/lib/Frontend/CMakeLists.txt | 1 + flang/lib/Optimizer/CodeGen/CMakeLists.txt | 1 + flang/lib/Optimizer/Dialect/CMakeLists.txt | 1 + flang/lib/Optimizer/Support/CMakeLists.txt | 1 + flang/lib/Optimizer/Support/InitFIR.cpp | 5 +- flang/tools/tco/CMakeLists.txt | 1 + mlir/examples/toy/Ch6/CMakeLists.txt | 1 + mlir/examples/toy/Ch6/toyc.cpp | 3 ++ mlir/examples/toy/Ch7/CMakeLists.txt | 1 + mlir/examples/toy/Ch7/toyc.cpp | 3 ++ mlir/include/mlir/Target/LLVMIR/Dialect/All.h | 4 ++ .../Builtin/BuiltinToLLVMIRTranslation.h | 31 +++++++++++++ .../Dialect/GPU/GPUToLLVMIRTranslation.h | 31 +++++++++++++ mlir/lib/CAPI/ExecutionEngine/CMakeLists.txt | 1 + .../CAPI/ExecutionEngine/ExecutionEngine.cpp | 5 +- .../CAPI/RegisterEverything/CMakeLists.txt | 1 + .../RegisterEverything/RegisterEverything.cpp | 5 +- mlir/lib/Dialect/GPU/CMakeLists.txt | 2 + .../GPU/Transforms/SerializeToBlob.cpp | 2 + mlir/lib/ExecutionEngine/CMakeLists.txt | 1 + mlir/lib/Target/LLVMIR/CMakeLists.txt | 2 + .../Builtin/BuiltinToLLVMIRTranslation.cpp | 46 +++++++++++++++++++ .../LLVMIR/Dialect/Builtin/CMakeLists.txt | 12 +++++ mlir/lib/Target/LLVMIR/Dialect/CMakeLists.txt | 2 + .../Target/LLVMIR/Dialect/GPU/CMakeLists.txt | 13 ++++++ .../Dialect/GPU/GPUToLLVMIRTranslation.cpp | 45 ++++++++++++++++++ mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 4 ++ mlir/tools/mlir-cpu-runner/CMakeLists.txt | 1 + .../mlir-spirv-cpu-runner/CMakeLists.txt | 1 + mlir/tools/mlir-vulkan-runner/CMakeLists.txt | 1 + mlir/unittests/ExecutionEngine/Invoke.cpp | 7 +++ 31 files changed, 232 insertions(+), 3 deletions(-) create mode 100644 mlir/include/mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h create mode 100644 mlir/include/mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h create mode 100644 mlir/lib/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.cpp create mode 100644 mlir/lib/Target/LLVMIR/Dialect/Builtin/CMakeLists.txt create mode 100644 mlir/lib/Target/LLVMIR/Dialect/GPU/CMakeLists.txt create mode 100644 mlir/lib/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.cpp diff --git a/flang/lib/Frontend/CMakeLists.txt b/flang/lib/Frontend/CMakeLists.txt index 7a04a2e0b4336..c23a395107018 100644 --- a/flang/lib/Frontend/CMakeLists.txt +++ b/flang/lib/Frontend/CMakeLists.txt @@ -34,6 +34,7 @@ add_flang_library(flangFrontend HLFIRDialect HLFIRTransforms MLIRTransforms + MLIRBuiltinToLLVMIRTranslation MLIRLLVMToLLVMIRTranslation MLIRSCFToControlFlow MLIRTargetLLVMIRImport diff --git a/flang/lib/Optimizer/CodeGen/CMakeLists.txt b/flang/lib/Optimizer/CodeGen/CMakeLists.txt index 1b8fe47084a6c..711bb402b40bd 100644 --- a/flang/lib/Optimizer/CodeGen/CMakeLists.txt +++ b/flang/lib/Optimizer/CodeGen/CMakeLists.txt @@ -23,6 +23,7 @@ add_flang_library(FIRCodeGen MLIRMathToLLVM MLIRMathToLibm MLIROpenMPToLLVM + MLIRBuiltinToLLVMIRTranslation MLIRLLVMToLLVMIRTranslation MLIRTargetLLVMIRExport diff --git a/flang/lib/Optimizer/Dialect/CMakeLists.txt b/flang/lib/Optimizer/Dialect/CMakeLists.txt index 342a9d141e72a..fe5edb54a78e9 100644 --- a/flang/lib/Optimizer/Dialect/CMakeLists.txt +++ b/flang/lib/Optimizer/Dialect/CMakeLists.txt @@ -17,6 +17,7 @@ add_flang_library(FIRDialect LINK_LIBS FIRDialectSupport MLIRArithDialect + MLIRBuiltinToLLVMIRTranslation MLIROpenMPToLLVM MLIRLLVMToLLVMIRTranslation MLIRTargetLLVMIRExport diff --git a/flang/lib/Optimizer/Support/CMakeLists.txt b/flang/lib/Optimizer/Support/CMakeLists.txt index 33a9ed4e439d5..2d0e4dd4c9f43 100644 --- a/flang/lib/Optimizer/Support/CMakeLists.txt +++ b/flang/lib/Optimizer/Support/CMakeLists.txt @@ -12,6 +12,7 @@ add_flang_library(FIRSupport LINK_LIBS ${dialect_libs} + MLIRBuiltinToLLVMIRTranslation MLIROpenMPToLLVMIRTranslation MLIRLLVMToLLVMIRTranslation MLIRTargetLLVMIRExport diff --git a/flang/lib/Optimizer/Support/InitFIR.cpp b/flang/lib/Optimizer/Support/InitFIR.cpp index baa1336d9ca02..09852b1add372 100644 --- a/flang/lib/Optimizer/Support/InitFIR.cpp +++ b/flang/lib/Optimizer/Support/InitFIR.cpp @@ -7,14 +7,17 @@ //===----------------------------------------------------------------------===// #include "flang/Optimizer/Support/InitFIR.h" +#include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h" void fir::support::registerLLVMTranslation(mlir::MLIRContext &context) { mlir::DialectRegistry registry; // Register OpenMP dialect interface here as well. - mlir::registerOpenMPDialectTranslation(registry); + registerOpenMPDialectTranslation(registry); // Register LLVM-IR dialect interface. registerLLVMDialectTranslation(registry); + // Register builtin dialect interface. + registerBuiltinDialectTranslation(registry); context.appendDialectRegistry(registry); } diff --git a/flang/tools/tco/CMakeLists.txt b/flang/tools/tco/CMakeLists.txt index 207e7225276df..cb9b438a06528 100644 --- a/flang/tools/tco/CMakeLists.txt +++ b/flang/tools/tco/CMakeLists.txt @@ -17,6 +17,7 @@ target_link_libraries(tco PRIVATE ${dialect_libs} MLIRIR MLIRLLVMDialect + MLIRBuiltinToLLVMIRTranslation MLIRLLVMToLLVMIRTranslation MLIRTargetLLVMIRExport MLIRPass diff --git a/mlir/examples/toy/Ch6/CMakeLists.txt b/mlir/examples/toy/Ch6/CMakeLists.txt index 13ab002ad6864..2a2b796da0266 100644 --- a/mlir/examples/toy/Ch6/CMakeLists.txt +++ b/mlir/examples/toy/Ch6/CMakeLists.txt @@ -44,6 +44,7 @@ target_link_libraries(toyc-ch6 ${dialect_libs} ${conversion_libs} MLIRAnalysis + MLIRBuiltinToLLVMIRTranslation MLIRCallInterfaces MLIRCastInterfaces MLIRExecutionEngine diff --git a/mlir/examples/toy/Ch6/toyc.cpp b/mlir/examples/toy/Ch6/toyc.cpp index 9397874441f3a..830df1cbb17ef 100644 --- a/mlir/examples/toy/Ch6/toyc.cpp +++ b/mlir/examples/toy/Ch6/toyc.cpp @@ -27,6 +27,7 @@ #include "mlir/Parser/Parser.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" +#include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Export.h" #include "mlir/Transforms/Passes.h" @@ -200,6 +201,7 @@ int dumpAST() { int dumpLLVMIR(mlir::ModuleOp module) { // Register the translation to LLVM IR with the MLIR context. + mlir::registerBuiltinDialectTranslation(*module->getContext()); mlir::registerLLVMDialectTranslation(*module->getContext()); // Convert the module to LLVM IR in a new LLVM IR context. @@ -234,6 +236,7 @@ int runJit(mlir::ModuleOp module) { // Register the translation from MLIR to LLVM IR, which must happen before we // can JIT-compile. + mlir::registerBuiltinDialectTranslation(*module->getContext()); mlir::registerLLVMDialectTranslation(*module->getContext()); // An optimization pipeline to use within the execution engine. diff --git a/mlir/examples/toy/Ch7/CMakeLists.txt b/mlir/examples/toy/Ch7/CMakeLists.txt index 8c4c3ee06b572..05a56dcec2f85 100644 --- a/mlir/examples/toy/Ch7/CMakeLists.txt +++ b/mlir/examples/toy/Ch7/CMakeLists.txt @@ -43,6 +43,7 @@ target_link_libraries(toyc-ch7 ${dialect_libs} ${conversion_libs} MLIRAnalysis + MLIRBuiltinToLLVMIRTranslation MLIRCallInterfaces MLIRCastInterfaces MLIRExecutionEngine diff --git a/mlir/examples/toy/Ch7/toyc.cpp b/mlir/examples/toy/Ch7/toyc.cpp index 78dcc1abcc45c..056f2a878908f 100644 --- a/mlir/examples/toy/Ch7/toyc.cpp +++ b/mlir/examples/toy/Ch7/toyc.cpp @@ -27,6 +27,7 @@ #include "mlir/Parser/Parser.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" +#include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Export.h" #include "mlir/Transforms/Passes.h" @@ -201,6 +202,7 @@ int dumpAST() { int dumpLLVMIR(mlir::ModuleOp module) { // Register the translation to LLVM IR with the MLIR context. + mlir::registerBuiltinDialectTranslation(*module->getContext()); mlir::registerLLVMDialectTranslation(*module->getContext()); // Convert the module to LLVM IR in a new LLVM IR context. @@ -235,6 +237,7 @@ int runJit(mlir::ModuleOp module) { // Register the translation from MLIR to LLVM IR, which must happen before we // can JIT-compile. + mlir::registerBuiltinDialectTranslation(*module->getContext()); mlir::registerLLVMDialectTranslation(*module->getContext()); // An optimization pipeline to use within the execution engine. diff --git a/mlir/include/mlir/Target/LLVMIR/Dialect/All.h b/mlir/include/mlir/Target/LLVMIR/Dialect/All.h index b9e52975692cd..cd7f76ff669a4 100644 --- a/mlir/include/mlir/Target/LLVMIR/Dialect/All.h +++ b/mlir/include/mlir/Target/LLVMIR/Dialect/All.h @@ -17,6 +17,8 @@ #include "mlir/Target/LLVMIR/Dialect/AMX/AMXToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/ArmNeon/ArmNeonToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/ArmSVE/ArmSVEToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h" +#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMIRToLLVMTranslation.h" #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h" @@ -34,6 +36,8 @@ static inline void registerAllToLLVMIRTranslations(DialectRegistry ®istry) { registerArmNeonDialectTranslation(registry); registerAMXDialectTranslation(registry); registerArmSVEDialectTranslation(registry); + registerBuiltinDialectTranslation(registry); + registerGPUDialectTranslation(registry); registerLLVMDialectTranslation(registry); registerNVVMDialectTranslation(registry); registerOpenACCDialectTranslation(registry); diff --git a/mlir/include/mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h b/mlir/include/mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h new file mode 100644 index 0000000000000..cfb262920bd69 --- /dev/null +++ b/mlir/include/mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h @@ -0,0 +1,31 @@ +//==- BuiltinToLLVMIRTranslation.h - Builtin Dialect to LLVM IR -*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This provides registration calls for builtin dialect to LLVM IR translation. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TARGET_LLVMIR_DIALECT_BUILTIN_BUILTINTOLLVMIRTRANSLATION_H +#define MLIR_TARGET_LLVMIR_DIALECT_BUILTIN_BUILTINTOLLVMIRTRANSLATION_H + +namespace mlir { + +class DialectRegistry; +class MLIRContext; + +/// Register the translation from the builtin dialect to the LLVM IR in the +/// given registry. +void registerBuiltinDialectTranslation(DialectRegistry ®istry); + +/// Register the translation from the builtin dialect in the registry associated +/// with the given context. +void registerBuiltinDialectTranslation(MLIRContext &context); + +} // namespace mlir + +#endif // MLIR_TARGET_LLVMIR_DIALECT_BUILTIN_BUILTINTOLLVMIRTRANSLATION_H diff --git a/mlir/include/mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h b/mlir/include/mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h new file mode 100644 index 0000000000000..3ba7a18bed8c6 --- /dev/null +++ b/mlir/include/mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h @@ -0,0 +1,31 @@ +//===- GPUToLLVMIRTranslation.h - GPU Dialect to LLVM IR --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This provides registration calls for GPU dialect to LLVM IR translation. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TARGET_LLVMIR_DIALECT_GPU_GPUTOLLVMIRTRANSLATION_H +#define MLIR_TARGET_LLVMIR_DIALECT_GPU_GPUTOLLVMIRTRANSLATION_H + +namespace mlir { + +class DialectRegistry; +class MLIRContext; + +/// Register the GPU dialect and the translation from it to the LLVM IR in +/// the given registry; +void registerGPUDialectTranslation(DialectRegistry ®istry); + +/// Register the GPU dialect and the translation from it in the registry +/// associated with the given context. +void registerGPUDialectTranslation(MLIRContext &context); + +} // namespace mlir + +#endif // MLIR_TARGET_LLVMIR_DIALECT_GPU_GPUTOLLVMIRTRANSLATION_H diff --git a/mlir/lib/CAPI/ExecutionEngine/CMakeLists.txt b/mlir/lib/CAPI/ExecutionEngine/CMakeLists.txt index 105ce24dd134f..0be8f2af5dcf4 100644 --- a/mlir/lib/CAPI/ExecutionEngine/CMakeLists.txt +++ b/mlir/lib/CAPI/ExecutionEngine/CMakeLists.txt @@ -8,6 +8,7 @@ add_mlir_upstream_c_api_library(MLIRCAPIExecutionEngine ExecutionEngine.cpp LINK_LIBS PUBLIC + MLIRBuiltinToLLVMIRTranslation MLIRExecutionEngine MLIRLLVMToLLVMIRTranslation ) diff --git a/mlir/lib/CAPI/ExecutionEngine/ExecutionEngine.cpp b/mlir/lib/CAPI/ExecutionEngine/ExecutionEngine.cpp index a832119cefba9..a0ea7f4abcfde 100644 --- a/mlir/lib/CAPI/ExecutionEngine/ExecutionEngine.cpp +++ b/mlir/lib/CAPI/ExecutionEngine/ExecutionEngine.cpp @@ -11,6 +11,7 @@ #include "mlir/CAPI/IR.h" #include "mlir/CAPI/Support.h" #include "mlir/ExecutionEngine/OptUtils.h" +#include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" #include "llvm/ExecutionEngine/Orc/Mangling.h" #include "llvm/Support/TargetSelect.h" @@ -29,7 +30,9 @@ mlirExecutionEngineCreate(MlirModule op, int optLevel, int numPaths, }(); (void)initOnce; - mlir::registerLLVMDialectTranslation(*unwrap(op)->getContext()); + auto &ctx = *unwrap(op)->getContext(); + mlir::registerBuiltinDialectTranslation(ctx); + mlir::registerLLVMDialectTranslation(ctx); auto tmBuilderOrError = llvm::orc::JITTargetMachineBuilder::detectHost(); if (!tmBuilderOrError) { diff --git a/mlir/lib/CAPI/RegisterEverything/CMakeLists.txt b/mlir/lib/CAPI/RegisterEverything/CMakeLists.txt index 942bba84e5533..55fe49bcef7b0 100644 --- a/mlir/lib/CAPI/RegisterEverything/CMakeLists.txt +++ b/mlir/lib/CAPI/RegisterEverything/CMakeLists.txt @@ -10,6 +10,7 @@ add_mlir_upstream_c_api_library(MLIRCAPIRegisterEverything ${translation_libs} ${conversion_libs} + MLIRBuiltinToLLVMIRTranslation MLIRCAPIIR MLIRLLVMToLLVMIRTranslation MLIRCAPITransforms diff --git a/mlir/lib/CAPI/RegisterEverything/RegisterEverything.cpp b/mlir/lib/CAPI/RegisterEverything/RegisterEverything.cpp index 25a1a216c4a24..e4a7516437cb8 100644 --- a/mlir/lib/CAPI/RegisterEverything/RegisterEverything.cpp +++ b/mlir/lib/CAPI/RegisterEverything/RegisterEverything.cpp @@ -11,6 +11,7 @@ #include "mlir/CAPI/IR.h" #include "mlir/InitAllDialects.h" #include "mlir/InitAllPasses.h" +#include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" void mlirRegisterAllDialects(MlirDialectRegistry registry) { @@ -18,7 +19,9 @@ void mlirRegisterAllDialects(MlirDialectRegistry registry) { } void mlirRegisterAllLLVMTranslations(MlirContext context) { - mlir::registerLLVMDialectTranslation(*unwrap(context)); + auto &ctx = *unwrap(context); + mlir::registerBuiltinDialectTranslation(ctx); + mlir::registerLLVMDialectTranslation(ctx); } void mlirRegisterAllPasses() { mlir::registerAllPasses(); } diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt index 94f3ab505f23a..50e49826a7370 100644 --- a/mlir/lib/Dialect/GPU/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/CMakeLists.txt @@ -70,11 +70,13 @@ add_mlir_dialect_library(MLIRGPUTransforms MLIRAffineUtils MLIRArithDialect MLIRAsyncDialect + MLIRBuiltinToLLVMIRTranslation MLIRDataLayoutInterfaces MLIRExecutionEngineUtils MLIRGPUOps MLIRIR MLIRLLVMDialect + MLIRGPUToLLVMIRTranslation MLIRLLVMToLLVMIRTranslation MLIRMemRefDialect MLIRPass diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp index 3c4f005e8e123..d82e6ca2ba905 100644 --- a/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp @@ -14,6 +14,7 @@ #include "mlir/Dialect/GPU/Transforms/Passes.h" #include "mlir/Pass/Pass.h" +#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Export.h" #include "llvm/IR/LegacyPassManager.h" @@ -108,6 +109,7 @@ gpu::SerializeToBlobPass::optimizeLlvm(llvm::Module &llvmModule, void gpu::SerializeToBlobPass::getDependentDialects( DialectRegistry ®istry) const { + registerGPUDialectTranslation(registry); registerLLVMDialectTranslation(registry); OperationPass::getDependentDialects(registry); } diff --git a/mlir/lib/ExecutionEngine/CMakeLists.txt b/mlir/lib/ExecutionEngine/CMakeLists.txt index a212e1ac20486..5ed7af625aacc 100644 --- a/mlir/lib/ExecutionEngine/CMakeLists.txt +++ b/mlir/lib/ExecutionEngine/CMakeLists.txt @@ -86,6 +86,7 @@ add_mlir_library(MLIRExecutionEngine ${LLVM_JIT_LISTENER_LIB} LINK_LIBS PUBLIC + MLIRBuiltinToLLVMIRTranslation MLIRExecutionEngineUtils MLIRLLVMDialect MLIRLLVMToLLVMIRTranslation diff --git a/mlir/lib/Target/LLVMIR/CMakeLists.txt b/mlir/lib/Target/LLVMIR/CMakeLists.txt index 7411573791b76..da28c36394248 100644 --- a/mlir/lib/Target/LLVMIR/CMakeLists.txt +++ b/mlir/lib/Target/LLVMIR/CMakeLists.txt @@ -47,6 +47,8 @@ add_mlir_translation_library(MLIRToLLVMIRTranslationRegistration MLIRArmNeonToLLVMIRTranslation MLIRArmSVEToLLVMIRTranslation MLIRAMXToLLVMIRTranslation + MLIRBuiltinToLLVMIRTranslation + MLIRGPUToLLVMIRTranslation MLIRX86VectorToLLVMIRTranslation MLIRLLVMToLLVMIRTranslation MLIRNVVMToLLVMIRTranslation diff --git a/mlir/lib/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.cpp new file mode 100644 index 0000000000000..51c304cfbb8e5 --- /dev/null +++ b/mlir/lib/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.cpp @@ -0,0 +1,46 @@ +//===- BuiltinToLLVMIRTranslation.cpp - Translate builtin to LLVM IR ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a translation between the MLIR builtin dialect and LLVM +// IR. +// +//===----------------------------------------------------------------------===// +#include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h" +#include "mlir/IR/BuiltinDialect.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/Target/LLVMIR/LLVMTranslationInterface.h" + +using namespace mlir; + +namespace { + +class BuiltinDialectLLVMIRTranslationInterface + : public LLVMTranslationDialectInterface { +public: + using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface; + + LogicalResult + convertOperation(Operation *op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) const override { + return success(isa(op)); + } +}; + +} // namespace + +void mlir::registerBuiltinDialectTranslation(DialectRegistry ®istry) { + registry.addExtension(+[](MLIRContext *ctx, BuiltinDialect *dialect) { + dialect->addInterfaces(); + }); +} + +void mlir::registerBuiltinDialectTranslation(MLIRContext &context) { + DialectRegistry registry; + registerBuiltinDialectTranslation(registry); + context.appendDialectRegistry(registry); +} diff --git a/mlir/lib/Target/LLVMIR/Dialect/Builtin/CMakeLists.txt b/mlir/lib/Target/LLVMIR/Dialect/Builtin/CMakeLists.txt new file mode 100644 index 0000000000000..e4639694852d6 --- /dev/null +++ b/mlir/lib/Target/LLVMIR/Dialect/Builtin/CMakeLists.txt @@ -0,0 +1,12 @@ +add_mlir_translation_library(MLIRBuiltinToLLVMIRTranslation + BuiltinToLLVMIRTranslation.cpp + + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC + MLIRIR + MLIRLLVMDialect + MLIRSupport + MLIRTargetLLVMIRExport + ) diff --git a/mlir/lib/Target/LLVMIR/Dialect/CMakeLists.txt b/mlir/lib/Target/LLVMIR/Dialect/CMakeLists.txt index 8df5176ad0563..f27810feed824 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/CMakeLists.txt +++ b/mlir/lib/Target/LLVMIR/Dialect/CMakeLists.txt @@ -1,6 +1,8 @@ add_subdirectory(ArmNeon) add_subdirectory(ArmSVE) add_subdirectory(AMX) +add_subdirectory(Builtin) +add_subdirectory(GPU) add_subdirectory(LLVMIR) add_subdirectory(NVVM) add_subdirectory(OpenACC) diff --git a/mlir/lib/Target/LLVMIR/Dialect/GPU/CMakeLists.txt b/mlir/lib/Target/LLVMIR/Dialect/GPU/CMakeLists.txt new file mode 100644 index 0000000000000..88e9990e21614 --- /dev/null +++ b/mlir/lib/Target/LLVMIR/Dialect/GPU/CMakeLists.txt @@ -0,0 +1,13 @@ +add_mlir_translation_library(MLIRGPUToLLVMIRTranslation + GPUToLLVMIRTranslation.cpp + + LINK_COMPONENTS + Core + + LINK_LIBS PUBLIC + MLIRIR + MLIRGPUOps + MLIRLLVMDialect + MLIRSupport + MLIRTargetLLVMIRExport + ) diff --git a/mlir/lib/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.cpp new file mode 100644 index 0000000000000..a12316112e095 --- /dev/null +++ b/mlir/lib/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.cpp @@ -0,0 +1,45 @@ +//===- GPUToLLVMIRTranslation.cpp - Translate GPU dialect to LLVM IR ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a translation between the MLIR GPU dialect and LLVM IR. +// +//===----------------------------------------------------------------------===// +#include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h" +#include "mlir/Dialect/GPU/IR/GPUDialect.h" +#include "mlir/Target/LLVMIR/LLVMTranslationInterface.h" + +using namespace mlir; + +namespace { + +class GPUDialectLLVMIRTranslationInterface + : public LLVMTranslationDialectInterface { +public: + using LLVMTranslationDialectInterface::LLVMTranslationDialectInterface; + + LogicalResult + convertOperation(Operation *op, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) const override { + return isa(op) ? success() : failure(); + } +}; + +} // namespace + +void mlir::registerGPUDialectTranslation(DialectRegistry ®istry) { + registry.insert(); + registry.addExtension(+[](MLIRContext *ctx, gpu::GPUDialect *dialect) { + dialect->addInterfaces(); + }); +} + +void mlir::registerGPUDialectTranslation(MLIRContext &context) { + DialectRegistry registry; + registerGPUDialectTranslation(registry); + context.appendDialectRegistry(registry); +} diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp index 0ce7d4fe12322..9b0bfa22c73a3 100644 --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -1320,6 +1320,10 @@ mlir::translateModuleToLLVMIR(Operation *module, llvm::LLVMContext &llvmContext, } } + // Convert module itself. + if (failed(translator.convertOperation(*module, llvmBuilder))) + return nullptr; + if (llvm::verifyModule(*translator.llvmModule, &llvm::errs())) return nullptr; diff --git a/mlir/tools/mlir-cpu-runner/CMakeLists.txt b/mlir/tools/mlir-cpu-runner/CMakeLists.txt index 092ef1b0b7c70..c2ab62ee93a5e 100644 --- a/mlir/tools/mlir-cpu-runner/CMakeLists.txt +++ b/mlir/tools/mlir-cpu-runner/CMakeLists.txt @@ -11,6 +11,7 @@ add_mlir_tool(mlir-cpu-runner llvm_update_compile_flags(mlir-cpu-runner) target_link_libraries(mlir-cpu-runner PRIVATE MLIRAnalysis + MLIRBuiltinToLLVMIRTranslation MLIRExecutionEngine MLIRIR MLIRJitRunner diff --git a/mlir/tools/mlir-spirv-cpu-runner/CMakeLists.txt b/mlir/tools/mlir-spirv-cpu-runner/CMakeLists.txt index 60951adf8620c..789c94be4452a 100644 --- a/mlir/tools/mlir-spirv-cpu-runner/CMakeLists.txt +++ b/mlir/tools/mlir-spirv-cpu-runner/CMakeLists.txt @@ -17,6 +17,7 @@ if (MLIR_ENABLE_SPIRV_CPU_RUNNER) ${conversion_libs} MLIRAnalysis MLIRArithDialect + MLIRBuiltinToLLVMIRTranslation MLIRExecutionEngine MLIRFuncDialect MLIRGPUOps diff --git a/mlir/tools/mlir-vulkan-runner/CMakeLists.txt b/mlir/tools/mlir-vulkan-runner/CMakeLists.txt index 4f64117c296f9..a24ac080d7462 100644 --- a/mlir/tools/mlir-vulkan-runner/CMakeLists.txt +++ b/mlir/tools/mlir-vulkan-runner/CMakeLists.txt @@ -57,6 +57,7 @@ if (MLIR_ENABLE_VULKAN_RUNNER) ${conversion_libs} MLIRAnalysis MLIRArithDialect + MLIRBuiltinToLLVMIRTranslation MLIRExecutionEngine MLIRFuncDialect MLIRGPUOps diff --git a/mlir/unittests/ExecutionEngine/Invoke.cpp b/mlir/unittests/ExecutionEngine/Invoke.cpp index 0f9212d6bd3c7..e726d93b8831a 100644 --- a/mlir/unittests/ExecutionEngine/Invoke.cpp +++ b/mlir/unittests/ExecutionEngine/Invoke.cpp @@ -23,6 +23,7 @@ #include "mlir/InitAllDialects.h" #include "mlir/Parser/Parser.h" #include "mlir/Pass/PassManager.h" +#include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Export.h" #include "llvm/Support/TargetSelect.h" @@ -69,6 +70,7 @@ TEST(MLIRExecutionEngine, SKIP_WITHOUT_JIT(AddInteger)) { )mlir"; DialectRegistry registry; registerAllDialects(registry); + registerBuiltinDialectTranslation(registry); registerLLVMDialectTranslation(registry); MLIRContext context(registry); OwningOpRef module = @@ -95,6 +97,7 @@ TEST(MLIRExecutionEngine, SKIP_WITHOUT_JIT(SubtractFloat)) { )mlir"; DialectRegistry registry; registerAllDialects(registry); + registerBuiltinDialectTranslation(registry); registerLLVMDialectTranslation(registry); MLIRContext context(registry); OwningOpRef module = @@ -126,6 +129,7 @@ TEST(NativeMemRefJit, SKIP_WITHOUT_JIT(ZeroRankMemref)) { )mlir"; DialectRegistry registry; registerAllDialects(registry); + registerBuiltinDialectTranslation(registry); registerLLVMDialectTranslation(registry); MLIRContext context(registry); auto module = parseSourceString(moduleStr, &context); @@ -161,6 +165,7 @@ TEST(NativeMemRefJit, SKIP_WITHOUT_JIT(RankOneMemref)) { )mlir"; DialectRegistry registry; registerAllDialects(registry); + registerBuiltinDialectTranslation(registry); registerLLVMDialectTranslation(registry); MLIRContext context(registry); auto module = parseSourceString(moduleStr, &context); @@ -215,6 +220,7 @@ TEST(NativeMemRefJit, SKIP_WITHOUT_JIT(BasicMemref)) { )mlir"; DialectRegistry registry; registerAllDialects(registry); + registerBuiltinDialectTranslation(registry); registerLLVMDialectTranslation(registry); MLIRContext context(registry); OwningOpRef module = @@ -264,6 +270,7 @@ TEST(NativeMemRefJit, MAYBE_JITCallback) { )mlir"; DialectRegistry registry; registerAllDialects(registry); + registerBuiltinDialectTranslation(registry); registerLLVMDialectTranslation(registry); MLIRContext context(registry); auto module = parseSourceString(moduleStr, &context); From a6a788bdfb39cbf66e7196a39785d848ce714338 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 21 Mar 2023 12:59:10 +0000 Subject: [PATCH 173/691] [DAG] foldBinOpIntoSelect - use FoldConstantArithmetic instead of getNode() + constant check. This prevents unused nodes from being created if the constant check fails. Noticed while triaging D127115 regressions --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 43cb2fde1fe9f..5a331ceb96d64 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2483,16 +2483,14 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { // constant. Eliminate the binop by pulling the constant math into the // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT + // CBO, CF + CBO - NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT) - : DAG.getNode(BinOpcode, DL, VT, CT, CBO); - if (!NewCT.isUndef() && !isConstantOrConstantVector(NewCT, true) && - !DAG.isConstantFPBuildVectorOrConstantFP(NewCT)) + NewCT = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CT}) + : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CT, CBO}); + if (!NewCT) return SDValue(); - NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF) - : DAG.getNode(BinOpcode, DL, VT, CF, CBO); - if (!NewCF.isUndef() && !isConstantOrConstantVector(NewCF, true) && - !DAG.isConstantFPBuildVectorOrConstantFP(NewCF)) + NewCF = SelOpNo ? DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CBO, CF}) + : DAG.FoldConstantArithmetic(BinOpcode, DL, VT, {CF, CBO}); + if (!NewCF) return SDValue(); } From 282cae0b9a602267ad7ef622f770066491332a11 Mon Sep 17 00:00:00 2001 From: Ilya Biryukov Date: Tue, 21 Mar 2023 14:06:45 +0100 Subject: [PATCH 174/691] [Sema] Fix crash on __fp16 parameters in template instantiations Fixes #61441. Currently, Clang stores `nullptr` in the parameter lists inside `FunctionProtoTypeLoc` if `__fp16` is used without pointer qualifiers. Any code path that calls `Declarator::setInvalidType()` before `GetFullTypeForDeclarator` will lead to the same problem downstream. The relevant code is: ```cpp if (D.isInvalidType()) return Context.getTrivialTypeSourceInfo(T); return GetTypeSourceInfoForDeclarator(state, T, TInfo); ``` `GetTypeSourceInfoForDeclarator` sets the parameter `Decl`, but we can't call it when `isInvalidType() == true` as this causes other assertion failures that seem harder to fix. Reviewed By: kadircet Differential Revision: https://reviews.llvm.org/D146426 --- clang/lib/Sema/SemaChecking.cpp | 4 ++++ clang/lib/Sema/SemaDeclCXX.cpp | 4 ++++ clang/lib/Sema/SemaLambda.cpp | 5 ++++- clang/lib/Sema/SemaTemplateInstantiate.cpp | 2 +- clang/test/SemaCXX/GH61441.cpp | 12 ++++++++++++ 5 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 clang/test/SemaCXX/GH61441.cpp diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 02d1cb010db50..a3f2452b53d0c 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -15879,6 +15879,10 @@ bool Sema::CheckParmsForFunctionDef(ArrayRef Parameters, bool CheckParameterNames) { bool HasInvalidParm = false; for (ParmVarDecl *Param : Parameters) { + if (!Param) { + HasInvalidParm = true; + continue; + } // C99 6.7.5.3p4: the parameters in a parameter type list in a // function declarator that is part of a function definition of // that function shall not have incomplete type. diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 16e1071366b30..3dcc08f797811 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -1712,6 +1712,8 @@ static bool CheckConstexprDestructorSubobjects(Sema &SemaRef, /// Check whether a function's parameter types are all literal types. If so, /// return true. If not, produce a suitable diagnostic and return false. +/// If any ParamDecl is null, return false without producing a diagnostic. +/// The code creating null parameters is responsible for producing a diagnostic. static bool CheckConstexprParameterTypes(Sema &SemaRef, const FunctionDecl *FD, Sema::CheckConstexprKind Kind) { @@ -1721,6 +1723,8 @@ static bool CheckConstexprParameterTypes(Sema &SemaRef, e = FT->param_type_end(); i != e; ++i, ++ArgIndex) { const ParmVarDecl *PD = FD->getParamDecl(ArgIndex); + if (!PD) + return false; SourceLocation ParamLoc = PD->getLocation(); if (CheckLiteralType(SemaRef, Kind, ParamLoc, *i, diag::err_constexpr_non_literal_param, ArgIndex + 1, diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp index 6a79a03e02fda..3a82c7b3e8285 100644 --- a/clang/lib/Sema/SemaLambda.cpp +++ b/clang/lib/Sema/SemaLambda.cpp @@ -966,8 +966,11 @@ void Sema::CompleteLambdaCallOperator( if (!Params.empty()) { CheckParmsForFunctionDef(Params, /*CheckParameterNames=*/false); Method->setParams(Params); - for (auto P : Method->parameters()) + for (auto P : Method->parameters()) { + if (!P) + continue; P->setOwningFunction(Method); + } } buildLambdaScopeReturnType(*this, LSI, Method, HasExplicitResultType); diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index da66ffea46f49..b4649ce4c413c 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -1343,7 +1343,7 @@ namespace { CXXMethodDecl *MD = Result.getAs()->getCallOperator(); for (ParmVarDecl *PVD : MD->parameters()) { - if (!PVD->hasDefaultArg()) + if (!PVD || !PVD->hasDefaultArg()) continue; Expr *UninstExpr = PVD->getUninstantiatedDefaultArg(); // FIXME: Obtain the source location for the '=' token. diff --git a/clang/test/SemaCXX/GH61441.cpp b/clang/test/SemaCXX/GH61441.cpp new file mode 100644 index 0000000000000..73ff77515a7c4 --- /dev/null +++ b/clang/test/SemaCXX/GH61441.cpp @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -fsyntax-only -verify -cl-std=clc++ -fblocks %s +// Checks Clang does not crash. We run in OpenCL mode to trigger block pointer +// crash. The __fp16 crash happens in standard mode too. + +template +int foo() { + auto x = [&](__fp16) { return 0; }; // expected-error {{not allowed}} + auto y = [&](void(^)(int)) { return 0; }; // expected-error {{not allowed}} + return 0; +} + +int bar() { return foo(); } From 20294bf6c14808a77fdced9814ea8e8077bee913 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Tue, 21 Mar 2023 01:33:51 -0700 Subject: [PATCH 175/691] [mlir][Transform] NFC - Add more advanced debug spew to help hunt down potential misuses Differential Revision: https://reviews.llvm.org/D146509 --- .../Transform/IR/TransformInterfaces.cpp | 49 +++++++++++++++++-- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp b/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp index 7afbed33c30a8..4002e59dd607d 100644 --- a/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp +++ b/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp @@ -17,8 +17,10 @@ #include "llvm/Support/ErrorHandling.h" #define DEBUG_TYPE "transform-dialect" +#define DEBUG_TYPE_FULL "transform-dialect-full" #define DEBUG_PRINT_AFTER_ALL "transform-dialect-print-top-level-after-all" #define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "] ") +#define LDBG(X) LLVM_DEBUG(DBGS() << (X)) using namespace mlir; @@ -396,7 +398,17 @@ void transform::TransformState::recordOpHandleInvalidationOne( if (invalidatedHandles.count(otherHandle)) return; + LDBG("--recordOpHandleInvalidationOne\n"); + LLVM_DEBUG(llvm::interleaveComma(potentialAncestors, + DBGS() << "--ancestors: ", + [](Operation *op) { llvm::dbgs() << *op; }); + llvm::dbgs() << "\n"); for (Operation *ancestor : potentialAncestors) { + LLVM_DEBUG(DBGS() << "----handle one ancestor: " << *ancestor << "\n"); + LLVM_DEBUG(DBGS() << "----of payload with name: " + << payloadOp->getName().getIdentifier() << "\n"); + DEBUG_WITH_TYPE(DEBUG_TYPE_FULL, + { (DBGS() << "----of payload: " << *payloadOp << "\n"); }); if (!ancestor->isAncestor(payloadOp)) continue; @@ -562,6 +574,7 @@ void transform::TransformState::recordValueHandleInvalidation( LogicalResult transform::TransformState::checkAndRecordHandleInvalidation( TransformOpInterface transform) { + LDBG("--Start checkAndRecordHandleInvalidation\n"); auto memoryEffectsIface = cast(transform.getOperation()); SmallVector effects; @@ -569,11 +582,15 @@ LogicalResult transform::TransformState::checkAndRecordHandleInvalidation( transform::TransformMappingResource::get(), effects); for (OpOperand &target : transform->getOpOperands()) { + LLVM_DEBUG(DBGS() << "----iterate on handle: " << target.get() << "\n"); // If the operand uses an invalidated handle, report it. auto it = invalidatedHandles.find(target.get()); if (!transform.allowsRepeatedHandleOperands() && - it != invalidatedHandles.end()) + it != invalidatedHandles.end()) { + LLVM_DEBUG( + DBGS() << "--End checkAndRecordHandleInvalidation -> FAILURE\n"); return it->getSecond()(transform->getLoc()), failure(); + } // Invalidate handles pointing to the operations nested in the operation // associated with the handle consumed by this operation. @@ -582,17 +599,25 @@ LogicalResult transform::TransformState::checkAndRecordHandleInvalidation( effect.getValue() == target.get(); }; if (llvm::any_of(effects, consumesTarget)) { + LLVM_DEBUG(DBGS() << "----found consume effect -> SKIP\n"); if (target.get().getType().isa()) { + LDBG("----recordOpHandleInvalidation\n"); ArrayRef payloadOps = getPayloadOps(target.get()); recordOpHandleInvalidation(target, payloadOps); } else if (target.get() .getType() .isa()) { + LDBG("----recordValueHandleInvalidation\n"); recordValueHandleInvalidation(target); + } else { + LDBG("----not a TransformHandle -> SKIP AND DROP ON THE FLOOR\n"); } + } else { + LLVM_DEBUG(DBGS() << "----no consume effect -> SKIP\n"); } } + LDBG("--End checkAndRecordHandleInvalidation -> SUCCESS\n"); return success(); } @@ -621,7 +646,7 @@ checkRepeatedConsumptionInOperand(ArrayRef payload, DiagnosedSilenceableFailure transform::TransformState::applyTransform(TransformOpInterface transform) { - LLVM_DEBUG(DBGS() << "applying: " << transform << "\n"); + LLVM_DEBUG(DBGS() << "\n"; DBGS() << "applying: " << transform << "\n"); auto printOnFailureRAII = llvm::make_scope_exit([this] { (void)this; DEBUG_WITH_TYPE(DEBUG_PRINT_AFTER_ALL, { @@ -631,28 +656,42 @@ transform::TransformState::applyTransform(TransformOpInterface transform) { }); }); if (options.getExpensiveChecksEnabled()) { + LDBG("ExpensiveChecksEnabled\n"); if (failed(checkAndRecordHandleInvalidation(transform))) return DiagnosedSilenceableFailure::definiteFailure(); for (OpOperand &operand : transform->getOpOperands()) { - if (!isHandleConsumed(operand.get(), transform)) + LLVM_DEBUG(DBGS() << "iterate on handle: " << operand.get() << "\n"); + if (!isHandleConsumed(operand.get(), transform)) { + LDBG("--handle not consumed -> SKIP\n"); continue; + } + LDBG("--handle is consumed\n"); Type operandType = operand.get().getType(); if (operandType.isa()) { + LLVM_DEBUG( + DBGS() << "--checkRepeatedConsumptionInOperand for Operation*\n"); DiagnosedSilenceableFailure check = checkRepeatedConsumptionInOperand( getPayloadOps(operand.get()), transform, operand.getOperandNumber()); - if (!check.succeeded()) + if (!check.succeeded()) { + LDBG("----FAILED\n"); return check; + } } else if (operandType.isa()) { + LDBG("--checkRepeatedConsumptionInOperand For Value\n"); DiagnosedSilenceableFailure check = checkRepeatedConsumptionInOperand( getPayloadValues(operand.get()), transform, operand.getOperandNumber()); - if (!check.succeeded()) + if (!check.succeeded()) { + LDBG("----FAILED\n"); return check; + } + } else { + LDBG("--not a TransformHandle -> SKIP AND DROP ON THE FLOOR\n"); } } } From 8022ff5f197bf3b2e259126e8a7b806504f91151 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 21 Mar 2023 13:24:47 +0000 Subject: [PATCH 176/691] [X86] Add verify-machineinstrs checks to baseptr tests Help catch regressions from D145650 that were only noticed on EXPENSIVE_CHECKS builds --- llvm/test/CodeGen/X86/i386-baseptr.ll | 2 +- llvm/test/CodeGen/X86/x86-64-baseptr.ll | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/test/CodeGen/X86/i386-baseptr.ll b/llvm/test/CodeGen/X86/i386-baseptr.ll index 8baa16c076bac..baae07ef435ed 100644 --- a/llvm/test/CodeGen/X86/i386-baseptr.ll +++ b/llvm/test/CodeGen/X86/i386-baseptr.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=i386-pc-linux -stackrealign < %s | FileCheck %s +; RUN: llc -mtriple=i386-pc-linux -stackrealign -verify-machineinstrs < %s | FileCheck %s declare i32 @helper() nounwind define void @base() #0 { diff --git a/llvm/test/CodeGen/X86/x86-64-baseptr.ll b/llvm/test/CodeGen/X86/x86-64-baseptr.ll index 914a5a4796247..dbd04f22a5c09 100644 --- a/llvm/test/CodeGen/X86/x86-64-baseptr.ll +++ b/llvm/test/CodeGen/X86/x86-64-baseptr.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-pc-linux -stackrealign < %s | FileCheck %s -; RUN: llc -mtriple=x86_64-pc-linux-gnux32 -stackrealign < %s | FileCheck -check-prefix=X32ABI %s +; RUN: llc -mtriple=x86_64-pc-linux -stackrealign -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-pc-linux-gnux32 -stackrealign -verify-machineinstrs < %s | FileCheck -check-prefix=X32ABI %s ; This should run with NaCl as well ( -mtriple=x86_64-pc-nacl ) but currently doesn't due to PR22655 From 382eb7c2c71648fe15fe568ba67f93d3b53beb08 Mon Sep 17 00:00:00 2001 From: Jan Sjodin Date: Mon, 20 Mar 2023 16:54:52 -0400 Subject: [PATCH 177/691] [mlir] Add alloca address space handling to the data layout subsystem This patch adds alloca address space information to the data layout interface and implementation in the DLTI dialect. This is needed for targets that use separate address spaces for local/stack data. Reviewed By: ftynse, krzysz00 Differential Revision: https://reviews.llvm.org/D144657 --- mlir/include/mlir/Dialect/DLTI/DLTI.h | 3 ++ mlir/include/mlir/Dialect/DLTI/DLTIBase.td | 5 +++ .../mlir/Interfaces/DataLayoutInterfaces.h | 10 ++++++ .../mlir/Interfaces/DataLayoutInterfaces.td | 18 ++++++++++ mlir/lib/Dialect/DLTI/DLTI.cpp | 10 ++++++ mlir/lib/Interfaces/DataLayoutInterfaces.cpp | 34 +++++++++++++++++-- mlir/lib/Target/LLVMIR/ModuleImport.cpp | 16 ++++++++- mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 9 +++++ mlir/test/Dialect/LLVMIR/layout.mlir | 24 ++++++++++--- .../lib/Dialect/DLTI/TestDataLayoutQuery.cpp | 9 ++++- .../Interfaces/DataLayoutInterfacesTest.cpp | 26 +++++++++++++- 11 files changed, 155 insertions(+), 9 deletions(-) diff --git a/mlir/include/mlir/Dialect/DLTI/DLTI.h b/mlir/include/mlir/Dialect/DLTI/DLTI.h index 1aa9ab5e250ab..77a036754f966 100644 --- a/mlir/include/mlir/Dialect/DLTI/DLTI.h +++ b/mlir/include/mlir/Dialect/DLTI/DLTI.h @@ -98,6 +98,9 @@ class DataLayoutSpecAttr /// Returns the list of entries. DataLayoutEntryListRef getEntries() const; + /// Returns the alloca memory space identifier. + StringAttr getAllocaMemorySpaceIdentifier(MLIRContext *context) const; + /// Parses an instance of this attribute. static DataLayoutSpecAttr parse(AsmParser &parser); diff --git a/mlir/include/mlir/Dialect/DLTI/DLTIBase.td b/mlir/include/mlir/Dialect/DLTI/DLTIBase.td index 866cc934e05cb..a1f9eb361d717 100644 --- a/mlir/include/mlir/Dialect/DLTI/DLTIBase.td +++ b/mlir/include/mlir/Dialect/DLTI/DLTIBase.td @@ -23,9 +23,11 @@ def DLTI_Dialect : Dialect { }]; let extraClassDeclaration = [{ + // Top level attribute name. constexpr const static ::llvm::StringLiteral kDataLayoutAttrName = "dlti.dl_spec"; + // Constants used in entries. constexpr const static ::llvm::StringLiteral kDataLayoutEndiannessKey = "dlti.endianness"; @@ -34,6 +36,9 @@ def DLTI_Dialect : Dialect { constexpr const static ::llvm::StringLiteral kDataLayoutEndiannessLittle = "little"; + + constexpr const static ::llvm::StringLiteral + kDataLayoutAllocaMemorySpaceKey = "dlti.alloca_memory_space"; }]; let useDefaultAttributePrinterParser = 1; diff --git a/mlir/include/mlir/Interfaces/DataLayoutInterfaces.h b/mlir/include/mlir/Interfaces/DataLayoutInterfaces.h index 5cc80a06f3632..3d4d3796ecca0 100644 --- a/mlir/include/mlir/Interfaces/DataLayoutInterfaces.h +++ b/mlir/include/mlir/Interfaces/DataLayoutInterfaces.h @@ -56,6 +56,10 @@ unsigned getDefaultPreferredAlignment(Type type, const DataLayout &dataLayout, ArrayRef params); +/// Default handler for alloca memory space request. Dispatches to the +/// DataLayoutInterface if specified, otherwise returns the default. +Attribute getDefaultAllocaMemorySpace(DataLayoutEntryInterface entry); + /// Given a list of data layout entries, returns a new list containing the /// entries with keys having the given type ID, i.e. belonging to the same type /// class. @@ -159,6 +163,9 @@ class DataLayout { /// Returns the preferred of the given type in the current scope. unsigned getTypePreferredAlignment(Type t) const; + /// Returns the memory space used for AllocaOps. + Attribute getAllocaMemorySpace() const; + private: /// Combined layout spec at the given scope. const DataLayoutSpecInterface originalLayout; @@ -180,6 +187,9 @@ class DataLayout { mutable DenseMap bitsizes; mutable DenseMap abiAlignments; mutable DenseMap preferredAlignments; + + /// Cache for alloca memory space. + mutable std::optional allocaMemorySpace; }; } // namespace mlir diff --git a/mlir/include/mlir/Interfaces/DataLayoutInterfaces.td b/mlir/include/mlir/Interfaces/DataLayoutInterfaces.td index 0ca0f9e22191e..2d9d6acbedae5 100644 --- a/mlir/include/mlir/Interfaces/DataLayoutInterfaces.td +++ b/mlir/include/mlir/Interfaces/DataLayoutInterfaces.td @@ -106,6 +106,12 @@ def DataLayoutSpecInterface : AttrInterface<"DataLayoutSpecInterface"> { /*methodName=*/"getEntries", /*args=*/(ins) >, + InterfaceMethod< + /*description=*/"Returns the alloca memory space identifier.", + /*retTy=*/"::mlir::StringAttr", + /*methodName=*/"getAllocaMemorySpaceIdentifier", + /*args=*/(ins "::mlir::MLIRContext *":$context) + >, // Implementations may override this if they have an efficient lookup // mechanism. InterfaceMethod< @@ -256,6 +262,18 @@ def DataLayoutOpInterface : OpInterface<"DataLayoutOpInterface"> { params); }] >, + StaticInterfaceMethod< + /*description=*/"Returns the memory space used by the ABI computed " + "using the relevant entries. The data layout object " + "can be used for recursive queries.", + /*retTy=*/"::mlir::Attribute", + /*methodName=*/"getAllocaMemorySpace", + /*args=*/(ins "::mlir::DataLayoutEntryInterface":$entry), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return ::mlir::detail::getDefaultAllocaMemorySpace(entry); + }] + >, ]; let verify = [{ return ::mlir::detail::verifyDataLayoutOp($_op); }]; diff --git a/mlir/lib/Dialect/DLTI/DLTI.cpp b/mlir/lib/Dialect/DLTI/DLTI.cpp index eaf6f1e619a01..86cc3ea766031 100644 --- a/mlir/lib/Dialect/DLTI/DLTI.cpp +++ b/mlir/lib/Dialect/DLTI/DLTI.cpp @@ -106,6 +106,8 @@ void DataLayoutEntryAttr::print(AsmPrinter &os) const { //===----------------------------------------------------------------------===// // constexpr const StringLiteral mlir::DataLayoutSpecAttr::kAttrKeyword; +constexpr const StringLiteral + mlir::DLTIDialect::kDataLayoutAllocaMemorySpaceKey; namespace mlir { namespace impl { @@ -273,6 +275,12 @@ DataLayoutEntryListRef DataLayoutSpecAttr::getEntries() const { return getImpl()->entries; } +StringAttr +DataLayoutSpecAttr::getAllocaMemorySpaceIdentifier(MLIRContext *context) const { + return Builder(context).getStringAttr( + DLTIDialect::kDataLayoutAllocaMemorySpaceKey); +} + /// Parses an attribute with syntax /// attr ::= `#target.` `dl_spec` `<` attr-list? `>` /// attr-list ::= attr @@ -329,6 +337,8 @@ class TargetDataLayoutInterface : public DataLayoutDialectInterface { << DLTIDialect::kDataLayoutEndiannessBig << "' or '" << DLTIDialect::kDataLayoutEndiannessLittle << "'"; } + if (entryName == DLTIDialect::kDataLayoutAllocaMemorySpaceKey) + return success(); return emitError(loc) << "unknown data layout entry name: " << entryName; } }; diff --git a/mlir/lib/Interfaces/DataLayoutInterfaces.cpp b/mlir/lib/Interfaces/DataLayoutInterfaces.cpp index 141ec9e441660..ba6a321360fa5 100644 --- a/mlir/lib/Interfaces/DataLayoutInterfaces.cpp +++ b/mlir/lib/Interfaces/DataLayoutInterfaces.cpp @@ -213,6 +213,18 @@ unsigned mlir::detail::getDefaultPreferredAlignment( reportMissingDataLayout(type); } +// Returns the memory space used for allocal operations if specified in the +// given entry. If the entry is empty the default memory space represented by +// an empty attribute is returned. +Attribute +mlir::detail::getDefaultAllocaMemorySpace(DataLayoutEntryInterface entry) { + if (entry == DataLayoutEntryInterface()) { + return Attribute(); + } + + return entry.getValue(); +} + DataLayoutEntryList mlir::detail::filterEntriesForType(DataLayoutEntryListRef entries, TypeID typeID) { @@ -346,7 +358,8 @@ void checkMissingLayout(DataLayoutSpecInterface originalLayout, OpTy op) { mlir::DataLayout::DataLayout() : DataLayout(ModuleOp()) {} mlir::DataLayout::DataLayout(DataLayoutOpInterface op) - : originalLayout(getCombinedDataLayout(op)), scope(op) { + : originalLayout(getCombinedDataLayout(op)), scope(op), + allocaMemorySpace(std::nullopt) { #if LLVM_ENABLE_ABI_BREAKING_CHECKS checkMissingLayout(originalLayout, op); collectParentLayouts(op, layoutStack); @@ -354,7 +367,8 @@ mlir::DataLayout::DataLayout(DataLayoutOpInterface op) } mlir::DataLayout::DataLayout(ModuleOp op) - : originalLayout(getCombinedDataLayout(op)), scope(op) { + : originalLayout(getCombinedDataLayout(op)), scope(op), + allocaMemorySpace(std::nullopt) { #if LLVM_ENABLE_ABI_BREAKING_CHECKS checkMissingLayout(originalLayout, op); collectParentLayouts(op, layoutStack); @@ -456,6 +470,22 @@ unsigned mlir::DataLayout::getTypePreferredAlignment(Type t) const { }); } +mlir::Attribute mlir::DataLayout::getAllocaMemorySpace() const { + checkValid(); + MLIRContext *context = scope->getContext(); + if (allocaMemorySpace) + return *allocaMemorySpace; + DataLayoutEntryInterface entry; + if (originalLayout) + entry = originalLayout.getSpecForIdentifier( + originalLayout.getAllocaMemorySpaceIdentifier(context)); + if (auto iface = dyn_cast_or_null(scope)) + allocaMemorySpace = iface.getAllocaMemorySpace(entry); + else + allocaMemorySpace = detail::getDefaultAllocaMemorySpace(entry); + return *allocaMemorySpace; +} + //===----------------------------------------------------------------------===// // DataLayoutSpecInterface //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Target/LLVMIR/ModuleImport.cpp b/mlir/lib/Target/LLVMIR/ModuleImport.cpp index eecb855275134..a45768726bc43 100644 --- a/mlir/lib/Target/LLVMIR/ModuleImport.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleImport.cpp @@ -20,6 +20,7 @@ #include "mlir/Dialect/DLTI/DLTI.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/IR/Builders.h" #include "mlir/IR/Matchers.h" #include "mlir/Interfaces/DataLayoutInterfaces.h" #include "mlir/Tools/mlir-translate/Translation.h" @@ -179,9 +180,10 @@ mlir::translateDataLayout(const llvm::DataLayout &dataLayout, // Remaining unhandled default layout defaults // e (little endian if not set) // p[n]:64:64:64 (non zero address spaces have 64-bit properties) + // Alloca address space defaults to 0. std::string append = "p:64:64:64-S0-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f16:16:16-f64:" - "64:64-f128:128:128-v64:64:64-v128:128:128-a:0:64"; + "64:64-f128:128:128-v64:64:64-v128:128:128-a:0:64-A0"; if (layoutstr.empty()) layoutstr = append; else @@ -227,6 +229,18 @@ mlir::translateDataLayout(const llvm::DataLayout &dataLayout, StringAttr::get(context, DLTIDialect::kDataLayoutEndiannessKey), value); entries.emplace_back(entry); + } else if (symbol == 'A') { + unsigned addressSpace; + if (parameter.getAsInteger(/*Radix=*/10, addressSpace)) + return nullptr; + // Skip storing if generic address space is defined. + if (addressSpace != 0) { + auto entry = DataLayoutEntryAttr::get( + StringAttr::get(context, + DLTIDialect::kDataLayoutAllocaMemorySpaceKey), + mlir::Builder(context).getUI32IntegerAttr(addressSpace)); + entries.emplace_back(entry); + } } } diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp index 9b0bfa22c73a3..ca5fb7dda3841 100644 --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -79,6 +79,15 @@ translateDataLayout(DataLayoutSpecInterface attribute, layoutStream.flush(); continue; } + if (key.getValue() == DLTIDialect::kDataLayoutAllocaMemorySpaceKey) { + auto value = entry.getValue().cast(); + if (value != 0) { + // Only emit non-default address space. + layoutStream << "A" << value; + layoutStream.flush(); + } + continue; + } emitError(*loc) << "unsupported data layout key " << key; return failure(); } diff --git a/mlir/test/Dialect/LLVMIR/layout.mlir b/mlir/test/Dialect/LLVMIR/layout.mlir index c2f162dd61750..e5c8c0bd86db1 100644 --- a/mlir/test/Dialect/LLVMIR/layout.mlir +++ b/mlir/test/Dialect/LLVMIR/layout.mlir @@ -4,37 +4,44 @@ module { // CHECK: @no_spec func.func @no_spec() { // CHECK: alignment = 8 + // CHECK: alloca_memory_space = 0 // CHECK: bitsize = 64 // CHECK: preferred = 8 // CHECK: size = 8 "test.data_layout_query"() : () -> !llvm.ptr // CHECK: alignment = 8 + // CHECK: alloca_memory_space = 0 // CHECK: bitsize = 64 // CHECK: preferred = 8 // CHECK: size = 8 "test.data_layout_query"() : () -> !llvm.ptr // CHECK: alignment = 8 + // CHECK: alloca_memory_space = 0 // CHECK: bitsize = 64 // CHECK: preferred = 8 // CHECK: size = 8 "test.data_layout_query"() : () -> !llvm.ptr // CHECK: alignment = 8 + // CHECK: alloca_memory_space = 0 // CHECK: bitsize = 64 // CHECK: preferred = 8 // CHECK: size = 8 "test.data_layout_query"() : () -> !llvm.ptr> // CHECK: alignment = 8 + // CHECK: alloca_memory_space = 0 // CHECK: bitsize = 64 // CHECK: preferred = 8 // CHECK: size = 8 "test.data_layout_query"() : () -> !llvm.ptr // CHECK: alignment = 8 + // CHECK: alloca_memory_space = 0 // CHECK: bitsize = 64 // CHECK: preferred = 8 // CHECK: size = 8 "test.data_layout_query"() : () -> !llvm.ptr // CHECK: alignment = 8 - // CHECK: bitsize = 64 + // CHECK: alloca_memory_space = 0 + // CHECK: bitsize = 64 // CHECK: preferred = 8 // CHECK: size = 8 "test.data_layout_query"() : () -> !llvm.ptr<5> @@ -47,47 +54,56 @@ module { module attributes { dlti.dl_spec = #dlti.dl_spec< #dlti.dl_entry, dense<[32, 32, 64]> : vector<3xi32>>, #dlti.dl_entry, dense<[64, 64, 64]> : vector<3xi32>>, - #dlti.dl_entry, dense<[32, 64, 64]> : vector<3xi32>> + #dlti.dl_entry, dense<[32, 64, 64]> : vector<3xi32>>, + #dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32> >} { // CHECK: @spec func.func @spec() { // CHECK: alignment = 4 + // CHECK: alloca_memory_space = 5 // CHECK: bitsize = 32 // CHECK: preferred = 8 // CHECK: size = 4 "test.data_layout_query"() : () -> !llvm.ptr // CHECK: alignment = 4 + // CHECK: alloca_memory_space = 5 // CHECK: bitsize = 32 // CHECK: preferred = 8 // CHECK: size = 4 "test.data_layout_query"() : () -> !llvm.ptr // CHECK: alignment = 4 + // CHECK: alloca_memory_space = 5 // CHECK: bitsize = 32 // CHECK: preferred = 8 // CHECK: size = 4 "test.data_layout_query"() : () -> !llvm.ptr // CHECK: alignment = 4 + // CHECK: alloca_memory_space = 5 // CHECK: bitsize = 32 // CHECK: preferred = 8 // CHECK: size = 4 "test.data_layout_query"() : () -> !llvm.ptr> // CHECK: alignment = 4 + // CHECK: alloca_memory_space = 5 // CHECK: bitsize = 32 // CHECK: preferred = 8 // CHECK: size = 4 "test.data_layout_query"() : () -> !llvm.ptr // CHECK: alignment = 8 + // CHECK: alloca_memory_space = 5 // CHECK: bitsize = 64 // CHECK: preferred = 8 // CHECK: size = 8 "test.data_layout_query"() : () -> !llvm.ptr // CHECK: alignment = 4 - // CHECK: bitsize = 32 + // CHECK: alloca_memory_space = 5 + // CHECK: bitsize = 32 // CHECK: preferred = 8 // CHECK: size = 4 "test.data_layout_query"() : () -> !llvm.ptr<3> // CHECK: alignment = 8 - // CHECK: bitsize = 32 + // CHECK: alloca_memory_space = 5 + // CHECK: bitsize = 32 // CHECK: preferred = 8 // CHECK: size = 4 "test.data_layout_query"() : () -> !llvm.ptr<4> diff --git a/mlir/test/lib/Dialect/DLTI/TestDataLayoutQuery.cpp b/mlir/test/lib/Dialect/DLTI/TestDataLayoutQuery.cpp index a7fde171dd854..9e71bd4e999f7 100644 --- a/mlir/test/lib/Dialect/DLTI/TestDataLayoutQuery.cpp +++ b/mlir/test/lib/Dialect/DLTI/TestDataLayoutQuery.cpp @@ -40,11 +40,18 @@ struct TestDataLayoutQuery unsigned bitsize = layout.getTypeSizeInBits(op.getType()); unsigned alignment = layout.getTypeABIAlignment(op.getType()); unsigned preferred = layout.getTypePreferredAlignment(op.getType()); + Attribute allocaMemorySpace = layout.getAllocaMemorySpace(); op->setAttrs( {builder.getNamedAttr("size", builder.getIndexAttr(size)), builder.getNamedAttr("bitsize", builder.getIndexAttr(bitsize)), builder.getNamedAttr("alignment", builder.getIndexAttr(alignment)), - builder.getNamedAttr("preferred", builder.getIndexAttr(preferred))}); + builder.getNamedAttr("preferred", builder.getIndexAttr(preferred)), + builder.getNamedAttr("alloca_memory_space", + allocaMemorySpace == Attribute() + ? builder.getUI32IntegerAttr(0) + : allocaMemorySpace) + + }); }); } }; diff --git a/mlir/unittests/Interfaces/DataLayoutInterfacesTest.cpp b/mlir/unittests/Interfaces/DataLayoutInterfacesTest.cpp index 31176aa07d2bf..c35e176035227 100644 --- a/mlir/unittests/Interfaces/DataLayoutInterfacesTest.cpp +++ b/mlir/unittests/Interfaces/DataLayoutInterfacesTest.cpp @@ -22,6 +22,8 @@ using namespace mlir; namespace { constexpr static llvm::StringLiteral kAttrName = "dltest.layout"; +constexpr static llvm::StringLiteral kAllocaKeyName = + "dltest.alloca_memory_space"; /// Trivial array storage for the custom data layout spec attribute, just a list /// of entries. @@ -62,6 +64,9 @@ struct CustomDataLayoutSpec } DataLayoutEntryListRef getEntries() const { return getImpl()->entries; } LogicalResult verifySpec(Location loc) { return success(); } + StringAttr getAllocaMemorySpaceIdentifier(MLIRContext *context) const { + return Builder(context).getStringAttr(kAllocaKeyName); + } }; /// A type subject to data layout that exits the program if it is queried more @@ -104,6 +109,15 @@ struct SingleQueryType executed = true; return 4; } + + Attribute getAllocaMemorySpace(DataLayoutEntryInterface entry) { + static bool executed = false; + if (executed) + llvm::report_fatal_error("repeated call"); + + executed = true; + return Attribute(); + } }; /// A types that is not subject to data layout. @@ -260,6 +274,8 @@ module {} EXPECT_EQ(layout.getTypeABIAlignment(Float16Type::get(&ctx)), 2u); EXPECT_EQ(layout.getTypePreferredAlignment(IntegerType::get(&ctx, 42)), 8u); EXPECT_EQ(layout.getTypePreferredAlignment(Float16Type::get(&ctx)), 2u); + + EXPECT_EQ(layout.getAllocaMemorySpace(), Attribute()); } TEST(DataLayout, NullSpec) { @@ -275,6 +291,7 @@ TEST(DataLayout, NullSpec) { auto op = cast(module->getBody()->getOperations().front()); DataLayout layout(op); + EXPECT_EQ(layout.getTypeSize(IntegerType::get(&ctx, 42)), 42u); EXPECT_EQ(layout.getTypeSize(Float16Type::get(&ctx)), 16u); EXPECT_EQ(layout.getTypeSizeInBits(IntegerType::get(&ctx, 42)), 8u * 42u); @@ -283,6 +300,8 @@ TEST(DataLayout, NullSpec) { EXPECT_EQ(layout.getTypeABIAlignment(Float16Type::get(&ctx)), 16u); EXPECT_EQ(layout.getTypePreferredAlignment(IntegerType::get(&ctx, 42)), 128u); EXPECT_EQ(layout.getTypePreferredAlignment(Float16Type::get(&ctx)), 32u); + + EXPECT_EQ(layout.getAllocaMemorySpace(), Attribute()); } TEST(DataLayout, EmptySpec) { @@ -306,13 +325,16 @@ TEST(DataLayout, EmptySpec) { EXPECT_EQ(layout.getTypeABIAlignment(Float16Type::get(&ctx)), 16u); EXPECT_EQ(layout.getTypePreferredAlignment(IntegerType::get(&ctx, 42)), 128u); EXPECT_EQ(layout.getTypePreferredAlignment(Float16Type::get(&ctx)), 32u); + + EXPECT_EQ(layout.getAllocaMemorySpace(), Attribute()); } TEST(DataLayout, SpecWithEntries) { const char *ir = R"MLIR( "dltest.op_with_layout"() { dltest.layout = #dltest.spec< #dlti.dl_entry, - #dlti.dl_entry + #dlti.dl_entry, + #dlti.dl_entry<"dltest.alloca_memory_space", 5 : i32> > } : () -> () )MLIR"; @@ -341,6 +363,8 @@ TEST(DataLayout, SpecWithEntries) { EXPECT_EQ(layout.getTypeABIAlignment(Float32Type::get(&ctx)), 32u); EXPECT_EQ(layout.getTypePreferredAlignment(IntegerType::get(&ctx, 32)), 64u); EXPECT_EQ(layout.getTypePreferredAlignment(Float32Type::get(&ctx)), 64u); + + EXPECT_EQ(layout.getAllocaMemorySpace(), Builder(&ctx).getI32IntegerAttr(5)); } TEST(DataLayout, Caching) { From 155abd0678ab8c9f7d11ee175a3ce7a33d107c67 Mon Sep 17 00:00:00 2001 From: Maryam Moghadas Date: Tue, 7 Mar 2023 09:27:02 -0600 Subject: [PATCH 178/691] [PowerPC] Emit warn_deprecated_lax_vec_conv_all warning only for PPC This patch is to isolate the lax vector conversions warning only for PPC, the reason is that SystemZ wants different logic in terms of vector bool compatibility. Reviewed By: lei Differential Revision: https://reviews.llvm.org/D145506 --- clang/lib/Sema/SemaExpr.cpp | 13 ++++++++----- clang/lib/Sema/SemaOverload.cpp | 3 ++- clang/test/CodeGen/SystemZ/zvector.c | 2 +- clang/test/CodeGen/SystemZ/zvector2.c | 2 +- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 963bd43b6e128..40c2a993b8ffe 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -9856,7 +9856,8 @@ Sema::CheckAssignmentConstraints(QualType LHSType, ExprResult &RHS, // The default for lax vector conversions with Altivec vectors will // change, so if we are converting between vector types where // at least one is an Altivec vector, emit a warning. - if (anyAltivecTypes(RHSType, LHSType) && + if (Context.getTargetInfo().getTriple().isPPC() && + anyAltivecTypes(RHSType, LHSType) && !Context.areCompatibleVectorTypes(RHSType, LHSType)) Diag(RHS.get()->getExprLoc(), diag::warn_deprecated_lax_vec_conv_all) << RHSType << LHSType; @@ -9873,9 +9874,10 @@ Sema::CheckAssignmentConstraints(QualType LHSType, ExprResult &RHS, const VectorType *VecType = RHSType->getAs(); if (VecType && VecType->getNumElements() == 1 && isLaxVectorConversion(RHSType, LHSType)) { - if (VecType->getVectorKind() == VectorType::AltiVecVector || - VecType->getVectorKind() == VectorType::AltiVecBool || - VecType->getVectorKind() == VectorType::AltiVecPixel) + if (Context.getTargetInfo().getTriple().isPPC() && + (VecType->getVectorKind() == VectorType::AltiVecVector || + VecType->getVectorKind() == VectorType::AltiVecBool || + VecType->getVectorKind() == VectorType::AltiVecPixel)) Diag(RHS.get()->getExprLoc(), diag::warn_deprecated_lax_vec_conv_all) << RHSType << LHSType; ExprResult *VecExpr = &RHS; @@ -10836,7 +10838,8 @@ QualType Sema::CheckVectorOperands(ExprResult &LHS, ExprResult &RHS, QualType OtherType = LHSVecType ? RHSType : LHSType; ExprResult *OtherExpr = LHSVecType ? &RHS : &LHS; if (isLaxVectorConversion(OtherType, VecType)) { - if (anyAltivecTypes(RHSType, LHSType) && + if (Context.getTargetInfo().getTriple().isPPC() && + anyAltivecTypes(RHSType, LHSType) && !Context.areCompatibleVectorTypes(RHSType, LHSType)) Diag(Loc, diag::warn_deprecated_lax_vec_conv_all) << RHSType << LHSType; // If we're allowing lax vector conversions, only the total (data) size diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 3f1bb3f571222..18b356c2f9f7b 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -1775,7 +1775,8 @@ static bool IsVectorConversion(Sema &S, QualType FromType, QualType ToType, if (S.Context.areCompatibleVectorTypes(FromType, ToType) || (S.isLaxVectorConversion(FromType, ToType) && !ToType->hasAttr(attr::ArmMveStrictPolymorphism))) { - if (S.isLaxVectorConversion(FromType, ToType) && + if (S.getASTContext().getTargetInfo().getTriple().isPPC() && + S.isLaxVectorConversion(FromType, ToType) && S.anyAltivecTypes(FromType, ToType) && !S.Context.areCompatibleVectorTypes(FromType, ToType) && !InOverloadResolution && !CStyle) { diff --git a/clang/test/CodeGen/SystemZ/zvector.c b/clang/test/CodeGen/SystemZ/zvector.c index 33fde545b9d4e..2720770624fb6 100644 --- a/clang/test/CodeGen/SystemZ/zvector.c +++ b/clang/test/CodeGen/SystemZ/zvector.c @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -triple s390x-linux-gnu -target-cpu z13 -fzvector \ -// RUN: -emit-llvm -o - -W -Wall -Werror -Wno-error=deprecate-lax-vec-conv-all \ +// RUN: -emit-llvm -o - -W -Wall -Werror \ // RUN: %s | opt -S -passes=mem2reg | FileCheck %s volatile vector signed char sc, sc2; diff --git a/clang/test/CodeGen/SystemZ/zvector2.c b/clang/test/CodeGen/SystemZ/zvector2.c index 93fb1f5434305..36cbf228feac8 100644 --- a/clang/test/CodeGen/SystemZ/zvector2.c +++ b/clang/test/CodeGen/SystemZ/zvector2.c @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -triple s390x-linux-gnu -target-cpu z14 -fzvector \ -// RUN: -O -emit-llvm -o - -W -Wall -Werror -Wno-error=deprecate-lax-vec-conv-all %s | FileCheck %s +// RUN: -O -emit-llvm -o - -W -Wall -Werror %s | FileCheck %s volatile vector float ff, ff2; volatile vector bool int bi; From 16b192f497c8c25d1bba15d09c3b452db0219da2 Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Tue, 21 Mar 2023 15:00:10 +0100 Subject: [PATCH 179/691] [mlir][Bazel] Adjust BUILD files to 0e9523efda8a4ad95ecb1d5b5e65e10bcc3711f5 --- .../llvm-project-overlay/mlir/BUILD.bazel | 35 +++++++++++++++++++ .../mlir/examples/toy/Ch6/BUILD.bazel | 1 + .../mlir/examples/toy/Ch7/BUILD.bazel | 1 + .../mlir/unittests/BUILD.bazel | 1 + 4 files changed, 38 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 10eb6b82f8f25..3bef7dd570562 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -616,6 +616,7 @@ mlir_c_api_cc_library( ], includes = ["include"], deps = [ + ":BuiltinToLLVMIRTranslation", ":ExecutionEngineUtils", ":LLVMToLLVMIRTranslation", "//llvm:OrcJIT", @@ -650,6 +651,7 @@ mlir_c_api_cc_library( includes = ["include"], deps = [ ":AllPassesAndDialects", + ":BuiltinToLLVMIRTranslation", ":LLVMToLLVMIRTranslation", ], ) @@ -4057,6 +4059,7 @@ cc_library( deps = [ ":ArithDialect", ":AsmParser", + ":GPUToLLVMIRTranslation", ":AsyncDialect", ":ControlFlowDialect", ":DLTIDialect", @@ -6737,6 +6740,33 @@ cc_library( ], ) +cc_library( + name = "GPUToLLVMIRTranslation", + srcs = glob(["lib/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.cpp"]), + hdrs = glob(["include/mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"]), + includes = ["include"], + deps = [ + ":GPUDialect", + ":IR", + ":ToLLVMIRTranslation", + "//llvm:Core", + "//llvm:Support", + ], +) + +cc_library( + name = "BuiltinToLLVMIRTranslation", + srcs = glob(["lib/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.cpp"]), + hdrs = glob(["include/mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h"]), + includes = ["include"], + deps = [ + ":IR", + ":ToLLVMIRTranslation", + "//llvm:Core", + "//llvm:Support", + ], +) + cc_library( name = "LLVMToLLVMIRTranslation", srcs = ["lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp"], @@ -6827,6 +6857,8 @@ cc_library( ":AMXToLLVMIRTranslation", ":ArmNeonToLLVMIRTranslation", ":ArmSVEToLLVMIRTranslation", + ":BuiltinToLLVMIRTranslation", + ":GPUToLLVMIRTranslation", ":LLVMIRToLLVMTranslation", ":LLVMToLLVMIRTranslation", ":NVVMToLLVMIRTranslation", @@ -7360,6 +7392,7 @@ cc_binary( srcs = ["tools/mlir-cpu-runner/mlir-cpu-runner.cpp"], deps = [ ":AllToLLVMIRTranslations", + ":BuiltinToLLVMIRTranslation", ":ExecutionEngineUtils", ":IR", ":LLVMDialect", @@ -7466,6 +7499,7 @@ cc_binary( srcs = ["tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp"], deps = [ ":ArithDialect", + ":BuiltinToLLVMIRTranslation", ":ExecutionEngineUtils", ":FuncDialect", ":FuncToLLVM", @@ -7498,6 +7532,7 @@ cc_binary( srcs = ["tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp"], deps = [ ":ArithDialect", + ":BuiltinToLLVMIRTranslation", ":ExecutionEngineUtils", ":FuncDialect", ":FuncToLLVM", diff --git a/utils/bazel/llvm-project-overlay/mlir/examples/toy/Ch6/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/examples/toy/Ch6/BUILD.bazel index c207a6a8b9f82..7b48716aacded 100644 --- a/utils/bazel/llvm-project-overlay/mlir/examples/toy/Ch6/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/examples/toy/Ch6/BUILD.bazel @@ -105,6 +105,7 @@ cc_binary( "//mlir:Analysis", "//mlir:ArithDialect", "//mlir:ArithToLLVM", + "//mlir:BuiltinToLLVMIRTranslation", "//mlir:CastOpInterfaces", "//mlir:ControlFlowToLLVM", "//mlir:ExecutionEngine", diff --git a/utils/bazel/llvm-project-overlay/mlir/examples/toy/Ch7/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/examples/toy/Ch7/BUILD.bazel index 8f017686dc63e..7ddc237542181 100644 --- a/utils/bazel/llvm-project-overlay/mlir/examples/toy/Ch7/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/examples/toy/Ch7/BUILD.bazel @@ -105,6 +105,7 @@ cc_binary( "//mlir:Analysis", "//mlir:ArithDialect", "//mlir:ArithToLLVM", + "//mlir:BuiltinToLLVMIRTranslation", "//mlir:CastOpInterfaces", "//mlir:ControlFlowToLLVM", "//mlir:ExecutionEngine", diff --git a/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel index 9091f856c8874..73ead99d7863f 100644 --- a/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/unittests/BUILD.bazel @@ -391,6 +391,7 @@ cc_test( "//mlir:AllPassesAndDialects", "//mlir:Analysis", "//mlir:ArithToLLVM", + "//mlir:BuiltinToLLVMIRTranslation", "//mlir:ExecutionEngine", "//mlir:FuncDialect", "//mlir:FuncToLLVM", From dc9d2e588dd2b48d0c7777f90c90e1d862854c5b Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Tue, 21 Mar 2023 15:15:36 +0100 Subject: [PATCH 180/691] [mlir] Add missing registrations to runners. --- mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp | 2 ++ mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp | 2 ++ 2 files changed, 4 insertions(+) diff --git a/mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp b/mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp index ba9683ca9d7b1..e732e5dd9bedf 100644 --- a/mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp +++ b/mlir/tools/mlir-spirv-cpu-runner/mlir-spirv-cpu-runner.cpp @@ -28,6 +28,7 @@ #include "mlir/ExecutionEngine/OptUtils.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" +#include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Export.h" @@ -111,6 +112,7 @@ int main(int argc, char **argv) { registry.insert(); + mlir::registerBuiltinDialectTranslation(registry); mlir::registerLLVMDialectTranslation(registry); return mlir::JitRunnerMain(argc, argv, registry, jitRunnerConfig); diff --git a/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp b/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp index d196902eb169f..ccf6c3775239b 100644 --- a/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp +++ b/mlir/tools/mlir-vulkan-runner/mlir-vulkan-runner.cpp @@ -34,6 +34,7 @@ #include "mlir/ExecutionEngine/JitRunner.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" +#include "mlir/Target/LLVMIR/Dialect/Builtin/BuiltinToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/TargetSelect.h" @@ -114,6 +115,7 @@ int main(int argc, char **argv) { mlir::gpu::GPUDialect, mlir::spirv::SPIRVDialect, mlir::func::FuncDialect, mlir::memref::MemRefDialect, mlir::vector::VectorDialect>(); + mlir::registerBuiltinDialectTranslation(registry); mlir::registerLLVMDialectTranslation(registry); return mlir::JitRunnerMain(argc, argv, registry, jitRunnerConfig); From 88e5206f2c96a34e23a4d63f0a38afb2db044f0a Mon Sep 17 00:00:00 2001 From: Pavel Kosov Date: Tue, 21 Mar 2023 14:05:19 +0300 Subject: [PATCH 181/691] Reland [clang][ASTImport] Add support for import of empty records Patch represents the clang part of changes in D143347 Reviewed By: balazske Differential Revision: https://reviews.llvm.org/D145057 ~~ Huawei RRI, OS Lab --- clang/include/clang/AST/ASTImporter.h | 1 + clang/include/clang/AST/DeclCXX.h | 4 ++++ clang/lib/AST/ASTImporter.cpp | 30 +++++++++++++++++-------- clang/unittests/AST/ASTImporterTest.cpp | 23 +++++++++++++++++++ 4 files changed, 49 insertions(+), 9 deletions(-) diff --git a/clang/include/clang/AST/ASTImporter.h b/clang/include/clang/AST/ASTImporter.h index f851decd0965c..4ffd913846575 100644 --- a/clang/include/clang/AST/ASTImporter.h +++ b/clang/include/clang/AST/ASTImporter.h @@ -258,6 +258,7 @@ class TypeSourceInfo; FoundDeclsTy findDeclsInToCtx(DeclContext *DC, DeclarationName Name); void AddToLookupTable(Decl *ToD); + llvm::Error ImportAttrs(Decl *ToD, Decl *FromD); protected: /// Can be overwritten by subclasses to implement their own import logic. diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h index ff8f8a1bb12d6..dd35ef4adfd70 100644 --- a/clang/include/clang/AST/DeclCXX.h +++ b/clang/include/clang/AST/DeclCXX.h @@ -1165,6 +1165,10 @@ class CXXRecordDecl : public RecordDecl { /// /// \note This does NOT include a check for union-ness. bool isEmpty() const { return data().Empty; } + /// Marks this record as empty. This is used by DWARFASTParserClang + /// when parsing records with empty fields having [[no_unique_address]] + /// attribute + void markEmpty() { data().Empty = true; } void setInitMethod(bool Val) { data().HasInitMethod = Val; } bool hasInitMethod() const { return data().HasInitMethod; } diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index bd055082778df..d0da2dae3aa23 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -3895,6 +3895,12 @@ ExpectedDecl ASTNodeImporter::VisitFieldDecl(FieldDecl *D) { D->getInClassInitStyle())) return ToField; + // We need [[no_unqiue_address]] attributes to be added to FieldDecl, before + // we add fields in CXXRecordDecl::addedMember, otherwise record will be + // marked as having non-zero size. + Err = Importer.ImportAttrs(ToField, D); + if (Err) + return std::move(Err); ToField->setAccess(D->getAccess()); ToField->setLexicalDeclContext(LexicalDC); if (ToInitializer) @@ -8981,6 +8987,19 @@ TranslationUnitDecl *ASTImporter::GetFromTU(Decl *ToD) { return FromDPos->second->getTranslationUnitDecl(); } +Error ASTImporter::ImportAttrs(Decl *ToD, Decl *FromD) { + if (!FromD->hasAttrs() || ToD->hasAttrs()) + return Error::success(); + for (const Attr *FromAttr : FromD->getAttrs()) { + auto ToAttrOrErr = Import(FromAttr); + if (ToAttrOrErr) + ToD->addAttr(*ToAttrOrErr); + else + return ToAttrOrErr.takeError(); + } + return Error::success(); +} + Expected ASTImporter::Import(Decl *FromD) { if (!FromD) return nullptr; @@ -9115,15 +9134,8 @@ Expected ASTImporter::Import(Decl *FromD) { // Make sure that ImportImpl registered the imported decl. assert(ImportedDecls.count(FromD) != 0 && "Missing call to MapImported?"); - - if (FromD->hasAttrs()) - for (const Attr *FromAttr : FromD->getAttrs()) { - auto ToAttrOrErr = Import(FromAttr); - if (ToAttrOrErr) - ToD->addAttr(*ToAttrOrErr); - else - return ToAttrOrErr.takeError(); - } + if (auto Error = ImportAttrs(ToD, FromD)) + return std::move(Error); // Notify subclasses. Imported(FromD, ToD); diff --git a/clang/unittests/AST/ASTImporterTest.cpp b/clang/unittests/AST/ASTImporterTest.cpp index 6300551ca4469..95ff850860300 100644 --- a/clang/unittests/AST/ASTImporterTest.cpp +++ b/clang/unittests/AST/ASTImporterTest.cpp @@ -8478,6 +8478,29 @@ TEST_P(ASTImporterOptionSpecificTestBase, VaListCpp) { ToVaList->getUnderlyingType(), ToBuiltinVaList->getUnderlyingType())); } +TEST_P(ASTImporterOptionSpecificTestBase, + ImportDefinitionOfEmptyClassWithNoUniqueAddressField) { + Decl *FromTU = getTuDecl( + R"( + struct B {}; + struct A { B b; }; + )", + Lang_CXX20); + + CXXRecordDecl *FromD = FirstDeclMatcher().match( + FromTU, cxxRecordDecl(hasName("A"))); + + for (auto *FD : FromD->fields()) + FD->addAttr(clang::NoUniqueAddressAttr::Create(FromD->getASTContext(), + clang::SourceRange())); + FromD->markEmpty(); + + CXXRecordDecl *ToD = Import(FromD, Lang_CXX20); + EXPECT_TRUE(ToD->isEmpty()); + for (auto *FD : ToD->fields()) + EXPECT_EQ(true, FD->hasAttr()); +} + INSTANTIATE_TEST_SUITE_P(ParameterizedTests, ASTImporterLookupTableTest, DefaultTestValuesForRunOptions); From 392d9eb03af5a1adac66a86939351b22b3e73495 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Fri, 17 Mar 2023 11:04:38 +0000 Subject: [PATCH 182/691] [lldb] For native compiles, check signal numbers are correct when adding codes Reviewed By: arichardson, emaste Differential Revision: https://reviews.llvm.org/D146285 --- .../Platform/FreeBSD/FreeBSDSignals.cpp | 63 ++++++++++--------- .../Plugins/Platform/Linux/LinuxSignals.cpp | 63 ++++++++++--------- .../Plugins/Platform/NetBSD/NetBSDSignals.cpp | 57 +++++++++-------- 3 files changed, 96 insertions(+), 87 deletions(-) diff --git a/lldb/source/Plugins/Platform/FreeBSD/FreeBSDSignals.cpp b/lldb/source/Plugins/Platform/FreeBSD/FreeBSDSignals.cpp index f597bed80ddec..f436de0d033e5 100644 --- a/lldb/source/Plugins/Platform/FreeBSD/FreeBSDSignals.cpp +++ b/lldb/source/Plugins/Platform/FreeBSD/FreeBSDSignals.cpp @@ -11,13 +11,16 @@ #ifdef __FreeBSD__ #include -#define ADD_SIGCODE(signal, name, value, ...) \ - static_assert(name == value, "Value mismatch for signal code " #name); \ - AddSignalCode(signal, value, __VA_ARGS__) +#define ADD_SIGCODE(signal_name, signal_value, code_name, code_value, ...) \ + static_assert(signal_name == signal_value, \ + "Value mismatch for signal number " #signal_name); \ + static_assert(code_name == code_value, \ + "Value mismatch for signal code " #code_name); \ + AddSignalCode(signal_value, code_value, __VA_ARGS__) #else -#define ADD_SIGCODE(signal, name, value, ...) \ - AddSignalCode(signal, value, __VA_ARGS__) -#endif /* ifdef __FreeBSD__ */ +#define ADD_SIGCODE(signal_name, signal_value, code_name, code_value, ...) \ + AddSignalCode(signal_value, code_value, __VA_ARGS__) +#endif /* ifdef __FreeBSD */ using namespace lldb_private; @@ -28,38 +31,38 @@ void FreeBSDSignals::Reset() { // clang-format off // SIGILL - ADD_SIGCODE(4, ILL_ILLOPC, 1, "illegal opcode"); - ADD_SIGCODE(4, ILL_ILLOPN, 2, "illegal operand"); - ADD_SIGCODE(4, ILL_ILLADR, 3, "illegal addressing mode"); - ADD_SIGCODE(4, ILL_ILLTRP, 4, "illegal trap"); - ADD_SIGCODE(4, ILL_PRVOPC, 5, "privileged opcode"); - ADD_SIGCODE(4, ILL_PRVREG, 6, "privileged register"); - ADD_SIGCODE(4, ILL_COPROC, 7, "coprocessor error"); - ADD_SIGCODE(4, ILL_BADSTK, 8, "internal stack error"); + ADD_SIGCODE(SIGILL, 4, ILL_ILLOPC, 1, "illegal opcode"); + ADD_SIGCODE(SIGILL, 4, ILL_ILLOPN, 2, "illegal operand"); + ADD_SIGCODE(SIGILL, 4, ILL_ILLADR, 3, "illegal addressing mode"); + ADD_SIGCODE(SIGILL, 4, ILL_ILLTRP, 4, "illegal trap"); + ADD_SIGCODE(SIGILL, 4, ILL_PRVOPC, 5, "privileged opcode"); + ADD_SIGCODE(SIGILL, 4, ILL_PRVREG, 6, "privileged register"); + ADD_SIGCODE(SIGILL, 4, ILL_COPROC, 7, "coprocessor error"); + ADD_SIGCODE(SIGILL, 4, ILL_BADSTK, 8, "internal stack error"); // SIGFPE - ADD_SIGCODE(8, FPE_INTOVF, 1, "integer overflow"); - ADD_SIGCODE(8, FPE_INTDIV, 2, "integer divide by zero"); - ADD_SIGCODE(8, FPE_FLTDIV, 3, "floating point divide by zero"); - ADD_SIGCODE(8, FPE_FLTOVF, 4, "floating point overflow"); - ADD_SIGCODE(8, FPE_FLTUND, 5, "floating point underflow"); - ADD_SIGCODE(8, FPE_FLTRES, 6, "floating point inexact result"); - ADD_SIGCODE(8, FPE_FLTINV, 7, "invalid floating point operation"); - ADD_SIGCODE(8, FPE_FLTSUB, 8, "subscript out of range"); - ADD_SIGCODE(8, FPE_FLTIDO, 9, "input denormal operation"); + ADD_SIGCODE(SIGFPE, 8, FPE_INTOVF, 1, "integer overflow"); + ADD_SIGCODE(SIGFPE, 8, FPE_INTDIV, 2, "integer divide by zero"); + ADD_SIGCODE(SIGFPE, 8, FPE_FLTDIV, 3, "floating point divide by zero"); + ADD_SIGCODE(SIGFPE, 8, FPE_FLTOVF, 4, "floating point overflow"); + ADD_SIGCODE(SIGFPE, 8, FPE_FLTUND, 5, "floating point underflow"); + ADD_SIGCODE(SIGFPE, 8, FPE_FLTRES, 6, "floating point inexact result"); + ADD_SIGCODE(SIGFPE, 8, FPE_FLTINV, 7, "invalid floating point operation"); + ADD_SIGCODE(SIGFPE, 8, FPE_FLTSUB, 8, "subscript out of range"); + ADD_SIGCODE(SIGFPE, 8, FPE_FLTIDO, 9, "input denormal operation"); // SIGBUS - ADD_SIGCODE(10, BUS_ADRALN, 1, "invalid address alignment"); - ADD_SIGCODE(10, BUS_ADRERR, 2, "nonexistent physical address"); - ADD_SIGCODE(10, BUS_OBJERR, 3, "object-specific hardware error"); - ADD_SIGCODE(10, BUS_OOMERR, 100, "no memory"); + ADD_SIGCODE(SIGBUS, 10, BUS_ADRALN, 1, "invalid address alignment"); + ADD_SIGCODE(SIGBUS, 10, BUS_ADRERR, 2, "nonexistent physical address"); + ADD_SIGCODE(SIGBUS, 10, BUS_OBJERR, 3, "object-specific hardware error"); + ADD_SIGCODE(SIGBUS, 10, BUS_OOMERR, 100, "no memory"); // SIGSEGV - ADD_SIGCODE(11, SEGV_MAPERR, 1, "address not mapped to object", + ADD_SIGCODE(SIGSEGV, 11, SEGV_MAPERR, 1, "address not mapped to object", SignalCodePrintOption::Address); - ADD_SIGCODE(11, SEGV_ACCERR, 2, "invalid permissions for mapped object", + ADD_SIGCODE(SIGSEGV, 11, SEGV_ACCERR, 2, "invalid permissions for mapped object", SignalCodePrintOption::Address); - ADD_SIGCODE(11, SEGV_PKUERR, 100, "PKU violation", + ADD_SIGCODE(SIGSEGV, 11, SEGV_PKUERR, 100, "PKU violation", SignalCodePrintOption::Address); // SIGNO NAME SUPPRESS STOP NOTIFY DESCRIPTION diff --git a/lldb/source/Plugins/Platform/Linux/LinuxSignals.cpp b/lldb/source/Plugins/Platform/Linux/LinuxSignals.cpp index 834a558e4d22c..3f25dbc6abbbe 100644 --- a/lldb/source/Plugins/Platform/Linux/LinuxSignals.cpp +++ b/lldb/source/Plugins/Platform/Linux/LinuxSignals.cpp @@ -21,12 +21,15 @@ #define SEGV_MTESERR 9 #endif -#define ADD_SIGCODE(signal, name, value, ...) \ - static_assert(name == value, "Value mismatch for signal code " #name); \ - AddSignalCode(signal, value, __VA_ARGS__) +#define ADD_SIGCODE(signal_name, signal_value, code_name, code_value, ...) \ + static_assert(signal_name == signal_value, \ + "Value mismatch for signal number " #signal_name); \ + static_assert(code_name == code_value, \ + "Value mismatch for signal code " #code_name); \ + AddSignalCode(signal_value, code_value, __VA_ARGS__) #else -#define ADD_SIGCODE(signal, name, value, ...) \ - AddSignalCode(signal, value, __VA_ARGS__) +#define ADD_SIGCODE(signal_name, signal_value, code_name, code_value, ...) \ + AddSignalCode(signal_value, code_value, __VA_ARGS__) #endif /* ifdef __linux__ */ using namespace lldb_private; @@ -43,45 +46,45 @@ void LinuxSignals::Reset() { AddSignal(3, "SIGQUIT", false, true, true, "quit"); AddSignal(4, "SIGILL", false, true, true, "illegal instruction"); - ADD_SIGCODE(4, ILL_ILLOPC, 1, "illegal opcode"); - ADD_SIGCODE(4, ILL_ILLOPN, 2, "illegal operand"); - ADD_SIGCODE(4, ILL_ILLADR, 3, "illegal addressing mode"); - ADD_SIGCODE(4, ILL_ILLTRP, 4, "illegal trap"); - ADD_SIGCODE(4, ILL_PRVOPC, 5, "privileged opcode"); - ADD_SIGCODE(4, ILL_PRVREG, 6, "privileged register"); - ADD_SIGCODE(4, ILL_COPROC, 7, "coprocessor error"); - ADD_SIGCODE(4, ILL_BADSTK, 8, "internal stack error"); + ADD_SIGCODE(SIGILL, 4, ILL_ILLOPC, 1, "illegal opcode"); + ADD_SIGCODE(SIGILL, 4, ILL_ILLOPN, 2, "illegal operand"); + ADD_SIGCODE(SIGILL, 4, ILL_ILLADR, 3, "illegal addressing mode"); + ADD_SIGCODE(SIGILL, 4, ILL_ILLTRP, 4, "illegal trap"); + ADD_SIGCODE(SIGILL, 4, ILL_PRVOPC, 5, "privileged opcode"); + ADD_SIGCODE(SIGILL, 4, ILL_PRVREG, 6, "privileged register"); + ADD_SIGCODE(SIGILL, 4, ILL_COPROC, 7, "coprocessor error"); + ADD_SIGCODE(SIGILL, 4, ILL_BADSTK, 8, "internal stack error"); AddSignal(5, "SIGTRAP", true, true, true, "trace trap (not reset when caught)"); AddSignal(6, "SIGABRT", false, true, true, "abort()/IOT trap", "SIGIOT"); AddSignal(7, "SIGBUS", false, true, true, "bus error"); - ADD_SIGCODE(7, BUS_ADRALN, 1, "illegal alignment"); - ADD_SIGCODE(7, BUS_ADRERR, 2, "illegal address"); - ADD_SIGCODE(7, BUS_OBJERR, 3, "hardware error"); + ADD_SIGCODE(SIGBUS, 7, BUS_ADRALN, 1, "illegal alignment"); + ADD_SIGCODE(SIGBUS, 7, BUS_ADRERR, 2, "illegal address"); + ADD_SIGCODE(SIGBUS, 7, BUS_OBJERR, 3, "hardware error"); AddSignal(8, "SIGFPE", false, true, true, "floating point exception"); - ADD_SIGCODE(8, FPE_INTDIV, 1, "integer divide by zero"); - ADD_SIGCODE(8, FPE_INTOVF, 2, "integer overflow"); - ADD_SIGCODE(8, FPE_FLTDIV, 3, "floating point divide by zero"); - ADD_SIGCODE(8, FPE_FLTOVF, 4, "floating point overflow"); - ADD_SIGCODE(8, FPE_FLTUND, 5, "floating point underflow"); - ADD_SIGCODE(8, FPE_FLTRES, 6, "floating point inexact result"); - ADD_SIGCODE(8, FPE_FLTINV, 7, "floating point invalid operation"); - ADD_SIGCODE(8, FPE_FLTSUB, 8, "subscript out of range"); + ADD_SIGCODE(SIGFPE, 8, FPE_INTDIV, 1, "integer divide by zero"); + ADD_SIGCODE(SIGFPE, 8, FPE_INTOVF, 2, "integer overflow"); + ADD_SIGCODE(SIGFPE, 8, FPE_FLTDIV, 3, "floating point divide by zero"); + ADD_SIGCODE(SIGFPE, 8, FPE_FLTOVF, 4, "floating point overflow"); + ADD_SIGCODE(SIGFPE, 8, FPE_FLTUND, 5, "floating point underflow"); + ADD_SIGCODE(SIGFPE, 8, FPE_FLTRES, 6, "floating point inexact result"); + ADD_SIGCODE(SIGFPE, 8, FPE_FLTINV, 7, "floating point invalid operation"); + ADD_SIGCODE(SIGFPE, 8, FPE_FLTSUB, 8, "subscript out of range"); AddSignal(9, "SIGKILL", false, true, true, "kill"); AddSignal(10, "SIGUSR1", false, true, true, "user defined signal 1"); AddSignal(11, "SIGSEGV", false, true, true, "segmentation violation"); - ADD_SIGCODE(11, SEGV_MAPERR, 1, "address not mapped to object", SignalCodePrintOption::Address); - ADD_SIGCODE(11, SEGV_ACCERR, 2, "invalid permissions for mapped object", SignalCodePrintOption::Address); - ADD_SIGCODE(11, SEGV_BNDERR, 3, "failed address bounds checks", SignalCodePrintOption::Bounds); - ADD_SIGCODE(11, SEGV_MTEAERR, 8, "async tag check fault"); - ADD_SIGCODE(11, SEGV_MTESERR, 9, "sync tag check fault", SignalCodePrintOption::Address); + ADD_SIGCODE(SIGSEGV, 11, SEGV_MAPERR, 1, "address not mapped to object", SignalCodePrintOption::Address); + ADD_SIGCODE(SIGSEGV, 11, SEGV_ACCERR, 2, "invalid permissions for mapped object", SignalCodePrintOption::Address); + ADD_SIGCODE(SIGSEGV, 11, SEGV_BNDERR, 3, "failed address bounds checks", SignalCodePrintOption::Bounds); + ADD_SIGCODE(SIGSEGV, 11, SEGV_MTEAERR, 8, "async tag check fault"); + ADD_SIGCODE(SIGSEGV, 11, SEGV_MTESERR, 9, "sync tag check fault", SignalCodePrintOption::Address); // Some platforms will occasionally send nonstandard spurious SI_KERNEL // codes. One way to get this is via unaligned SIMD loads. Treat it as invalid address. - ADD_SIGCODE(11, SI_KERNEL, 0x80, "invalid address", SignalCodePrintOption::Address); + ADD_SIGCODE(SIGSEGV, 11, SI_KERNEL, 0x80, "invalid address", SignalCodePrintOption::Address); AddSignal(12, "SIGUSR2", false, true, true, "user defined signal 2"); AddSignal(13, "SIGPIPE", false, true, true, "write to pipe with reading end closed"); diff --git a/lldb/source/Plugins/Platform/NetBSD/NetBSDSignals.cpp b/lldb/source/Plugins/Platform/NetBSD/NetBSDSignals.cpp index 285808e10c95f..6e4e5038566b3 100644 --- a/lldb/source/Plugins/Platform/NetBSD/NetBSDSignals.cpp +++ b/lldb/source/Plugins/Platform/NetBSD/NetBSDSignals.cpp @@ -11,13 +11,16 @@ #ifdef __NetBSD__ #include -#define ADD_SIGCODE(signal, name, value, ...) \ - static_assert(name == value, "Value mismatch for signal code " #name); \ - AddSignalCode(signal, value, __VA_ARGS__) +#define ADD_SIGCODE(signal_name, signal_value, code_name, code_value, ...) \ + static_assert(signal_name == signal_value, \ + "Value mismatch for signal number " #signal_name); \ + static_assert(code_name == code_value, \ + "Value mismatch for signal code " #code_name); \ + AddSignalCode(signal_value, code_value, __VA_ARGS__) #else -#define ADD_SIGCODE(signal, name, value, ...) \ - AddSignalCode(signal, value, __VA_ARGS__) -#endif /* ifdef __NetBSD__ */ +#define ADD_SIGCODE(signal_name, signal_value, code_name, code_value, ...) \ + AddSignalCode(signal_value, code_value, __VA_ARGS__) +#endif /* ifdef __NetBSD */ using namespace lldb_private; @@ -28,34 +31,34 @@ void NetBSDSignals::Reset() { // clang-format off // SIGILL - ADD_SIGCODE(4, ILL_ILLOPC, 1, "illegal opcode"); - ADD_SIGCODE(4, ILL_ILLOPN, 2, "illegal operand"); - ADD_SIGCODE(4, ILL_ILLADR, 3, "illegal addressing mode"); - ADD_SIGCODE(4, ILL_ILLTRP, 4, "illegal trap"); - ADD_SIGCODE(4, ILL_PRVOPC, 5, "privileged opcode"); - ADD_SIGCODE(4, ILL_PRVREG, 6, "privileged register"); - ADD_SIGCODE(4, ILL_COPROC, 7, "coprocessor error"); - ADD_SIGCODE(4, ILL_BADSTK, 8, "internal stack error"); + ADD_SIGCODE(SIGILL, 4, ILL_ILLOPC, 1, "illegal opcode"); + ADD_SIGCODE(SIGILL, 4, ILL_ILLOPN, 2, "illegal operand"); + ADD_SIGCODE(SIGILL, 4, ILL_ILLADR, 3, "illegal addressing mode"); + ADD_SIGCODE(SIGILL, 4, ILL_ILLTRP, 4, "illegal trap"); + ADD_SIGCODE(SIGILL, 4, ILL_PRVOPC, 5, "privileged opcode"); + ADD_SIGCODE(SIGILL, 4, ILL_PRVREG, 6, "privileged register"); + ADD_SIGCODE(SIGILL, 4, ILL_COPROC, 7, "coprocessor error"); + ADD_SIGCODE(SIGILL, 4, ILL_BADSTK, 8, "internal stack error"); // SIGFPE - ADD_SIGCODE(8, FPE_INTDIV, 1, "integer divide by zero"); - ADD_SIGCODE(8, FPE_INTOVF, 2, "integer overflow"); - ADD_SIGCODE(8, FPE_FLTDIV, 3, "floating point divide by zero"); - ADD_SIGCODE(8, FPE_FLTOVF, 4, "floating point overflow"); - ADD_SIGCODE(8, FPE_FLTUND, 5, "floating point underflow"); - ADD_SIGCODE(8, FPE_FLTRES, 6, "floating point inexact result"); - ADD_SIGCODE(8, FPE_FLTINV, 7, "invalid floating point operation"); - ADD_SIGCODE(8, FPE_FLTSUB, 8, "subscript out of range"); + ADD_SIGCODE(SIGFPE, 8, FPE_INTDIV, 1, "integer divide by zero"); + ADD_SIGCODE(SIGFPE, 8, FPE_INTOVF, 2, "integer overflow"); + ADD_SIGCODE(SIGFPE, 8, FPE_FLTDIV, 3, "floating point divide by zero"); + ADD_SIGCODE(SIGFPE, 8, FPE_FLTOVF, 4, "floating point overflow"); + ADD_SIGCODE(SIGFPE, 8, FPE_FLTUND, 5, "floating point underflow"); + ADD_SIGCODE(SIGFPE, 8, FPE_FLTRES, 6, "floating point inexact result"); + ADD_SIGCODE(SIGFPE, 8, FPE_FLTINV, 7, "invalid floating point operation"); + ADD_SIGCODE(SIGFPE, 8, FPE_FLTSUB, 8, "subscript out of range"); // SIGBUS - ADD_SIGCODE(10, BUS_ADRALN, 1, "invalid address alignment"); - ADD_SIGCODE(10, BUS_ADRERR, 2, "non-existent physical address"); - ADD_SIGCODE(10, BUS_OBJERR, 3, "object specific hardware error"); + ADD_SIGCODE(SIGBUS, 10, BUS_ADRALN, 1, "invalid address alignment"); + ADD_SIGCODE(SIGBUS, 10, BUS_ADRERR, 2, "non-existent physical address"); + ADD_SIGCODE(SIGBUS, 10, BUS_OBJERR, 3, "object specific hardware error"); // SIGSEGV - ADD_SIGCODE(11, SEGV_MAPERR, 1, "address not mapped to object", + ADD_SIGCODE(SIGSEGV, 11, SEGV_MAPERR, 1, "address not mapped to object", SignalCodePrintOption::Address); - ADD_SIGCODE(11, SEGV_ACCERR, 2, "invalid permissions for mapped object", + ADD_SIGCODE(SIGSEGV, 11, SEGV_ACCERR, 2, "invalid permissions for mapped object", SignalCodePrintOption::Address); // SIGNO NAME SUPPRESS STOP NOTIFY DESCRIPTION From 042783f556639cd5c3b2f4db67c6eeacea53459e Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 21 Mar 2023 07:18:27 -0700 Subject: [PATCH 183/691] [LFTR] Assert and simplify under assumption exit counts are integers [nfc] This invariant was introduced in 8f3d16905d75b07a933d01dc29677fe5867c1b3e. Differential Revision: https://reviews.llvm.org/D146470 --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 0f784a8715edb..2b19842050a5f 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -912,6 +912,7 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, const SCEV *ExitCount, bool UsePostInc, Loop *L, SCEVExpander &Rewriter, ScalarEvolution *SE) { assert(isLoopCounter(IndVar, L, SE)); + assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer"); const SCEVAddRecExpr *AR = cast(SE->getSCEV(IndVar)); const SCEV *IVInit = AR->getStart(); assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); @@ -920,8 +921,7 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, // finds a valid pointer IV. Sign extend ExitCount in order to materialize a // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing // the existing GEPs whenever possible. - if (IndVar->getType()->isPointerTy() && - !ExitCount->getType()->isPointerTy()) { + if (IndVar->getType()->isPointerTy()) { // IVOffset will be the new GEP offset that is interpreted by GEP as a // signed value. ExitCount on the other hand represents the loop trip count, // which is an unsigned value. FindLoopCounter only allows induction @@ -979,8 +979,7 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, // Ensure that we generate the same type as IndVar, or a smaller integer // type. In the presence of null pointer values, we have an integer type // SCEV expression (IVInit) for a pointer type IV value (IndVar). - Type *LimitTy = ExitCount->getType()->isPointerTy() ? - IndVar->getType() : ExitCount->getType(); + Type *LimitTy = ExitCount->getType(); BranchInst *BI = cast(ExitingBB->getTerminator()); return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); } From 83dc4734ba69da95d8c879bb174a1169b3f9e0b8 Mon Sep 17 00:00:00 2001 From: Daniel Kiss Date: Tue, 21 Mar 2023 10:46:57 +0100 Subject: [PATCH 184/691] Fix -fsplit-lto-unit with ifuncs ifuncs can't take part of the whole-program devirtualization so no need them to be copied to the merged module. The corresponding resolver function also kept out which caused the crash. Fixes #60962 #57870 Reviewed By: tejohnson Differential Revision: https://reviews.llvm.org/D144982 --- .../Transforms/IPO/ThinLTOBitcodeWriter.cpp | 7 +++++ llvm/test/ThinLTO/X86/ifunc_splitlto.ll | 31 +++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 llvm/test/ThinLTO/X86/ifunc_splitlto.ll diff --git a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index 6700970100859..d3384c86c390f 100644 --- a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -196,6 +196,13 @@ void simplifyExternals(Module &M) { F.eraseFromParent(); } + for (GlobalIFunc &I : llvm::make_early_inc_range(M.ifuncs())) { + if (I.use_empty()) + I.eraseFromParent(); + else + assert(I.getResolverFunction() && "ifunc misses its resolver function"); + } + for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) { if (GV.isDeclaration() && GV.use_empty()) { GV.eraseFromParent(); diff --git a/llvm/test/ThinLTO/X86/ifunc_splitlto.ll b/llvm/test/ThinLTO/X86/ifunc_splitlto.ll new file mode 100644 index 0000000000000..360eaa10086b2 --- /dev/null +++ b/llvm/test/ThinLTO/X86/ifunc_splitlto.ll @@ -0,0 +1,31 @@ +; regresstion test for https://github.com/llvm/llvm-project/issues/60962 +; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t %s +; RUN: llvm-modextract -b -n 0 -o - %t | llvm-dis | FileCheck --check-prefix=M0 %s +; RUN: llvm-modextract -b -n 1 -o - %t | llvm-dis | FileCheck --check-prefix=M1 %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@i = ifunc ptr (ptr, i64), ptr @hoge + +@g = constant i8 1, !type !0 +@assoc = private constant i8 2, !associated !1 + +define ptr @hoge() !type !2 { +bb: + ret ptr null +} + +; M0: @g = external constant +; M0: @i = ifunc ptr (ptr, i64), ptr @hoge +; M0: define ptr @hoge() +; M0-NOT: @assoc +; M1: @g = constant i8 1 +; M1: @assoc = private constant i8 2 +; M1-NOT: @i = ifunc ptr (ptr, i64), ptr @hoge +; M1-NOT: define ptr @hoge() + +!0 = !{i32 0, !"typeid"} +!1 = !{ptr @g} +!2 = !{i64 0, !3} +!3 = distinct !{} From 270a6a2824e3b42bd87acc986732a8b8f0765be7 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Tue, 21 Mar 2023 10:49:54 -0400 Subject: [PATCH 185/691] No longer issue pedantic warning about pre-c++2b compat We were accidentally issuing "overloaded 'operator[]' with more than one parameter is a C++2b extension" with -pedantic because it was an ExtWarn diagnostic rather than a Warning. This corrects the diagnostic category and adds some test coverage. Fixes #61582 --- clang/docs/ReleaseNotes.rst | 3 +++ clang/include/clang/Basic/DiagnosticSemaKinds.td | 5 +++-- .../SemaCXX/cxx2b-overloaded-operator-pedantic.cpp | 11 +++++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 clang/test/SemaCXX/cxx2b-overloaded-operator-pedantic.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index e7688b09f68e6..e2e4d6f51d81a 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -238,6 +238,9 @@ Bug Fixes to C++ Support (`#58674 `_) - Fix incorrect deletion of the default constructor of unions in some cases. (`#48416 `_) +- No longer issue a pre-C++2b compatibility warning in ``-pedantic`` mode + regading overloaded `operator[]` with more than one parmeter. (`#61582 + `_) Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index bbab5752c9bfe..613e4a5006561 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -9140,8 +9140,9 @@ def err_operator_overload_static : Error< def err_operator_overload_default_arg : Error< "parameter of overloaded %0 cannot have a default argument">; -def ext_subscript_overload : ExtWarn< - "overloaded %0 with %select{no|a defaulted|more than one}1 parameter is a C++2b extension">, InGroup, DefaultIgnore; +def ext_subscript_overload : Warning< + "overloaded %0 with %select{no|a defaulted|more than one}1 parameter is a " + "C++2b extension">, InGroup, DefaultIgnore; def error_subscript_overload : Error< "overloaded %0 cannot have %select{no|a defaulted|more than one}1 parameter before C++2b">; diff --git a/clang/test/SemaCXX/cxx2b-overloaded-operator-pedantic.cpp b/clang/test/SemaCXX/cxx2b-overloaded-operator-pedantic.cpp new file mode 100644 index 0000000000000..53782c61c1c53 --- /dev/null +++ b/clang/test/SemaCXX/cxx2b-overloaded-operator-pedantic.cpp @@ -0,0 +1,11 @@ +// RUN: %clang_cc1 -verify -std=c++2b -pedantic %s +// RUN: %clang_cc1 -verify=compat -std=c++2b -Wpre-c++2b-compat %s + +// expected-no-diagnostics + +struct GH61582 { + // We accidentally would issue this diagnostic in pedantic mode; show that we + // only issue it when enabling the compat warnings now. + void operator[](int, int); // compat-warning {{overloaded 'operator[]' with more than one parameter is a C++2b extension}} +}; + From fb409a2822df90e3c63d5e674966f18c8638cdaf Mon Sep 17 00:00:00 2001 From: Alex Zinenko Date: Mon, 20 Mar 2023 16:18:35 +0000 Subject: [PATCH 186/691] [mlir] Transform dialect: add named sequences Named sequences introduce an additional abstraction and reuse capability to the transform dialect. They can be though of as macros parameterized with handles that can be invoked in places where a transform dialect operation is expected. Such reuse was previously not possible in the dialect and required dynamic construction of the transform IR from the client language. Named sequences are intentionally restricted to disallow recursion, as it could make the dialect accidentally Turing-complete, which isn't desired at this point. Reviewed By: springerm Differential Revision: https://reviews.llvm.org/D146433 --- .../Dialect/Transform/IR/TransformDialect.td | 11 + .../Transform/IR/TransformInterfaces.h | 60 ++- .../mlir/Dialect/Transform/IR/TransformOps.h | 2 + .../mlir/Dialect/Transform/IR/TransformOps.td | 138 ++++++- .../Dialect/Transform/IR/TransformDialect.cpp | 51 +++ .../Transform/IR/TransformInterfaces.cpp | 110 ++++-- .../lib/Dialect/Transform/IR/TransformOps.cpp | 350 ++++++++++++++---- .../TransformInterpreterPassBase.cpp | 3 + mlir/test/Dialect/Transform/ops-invalid.mlir | 181 +++++++++ .../Dialect/Transform/test-interpreter.mlir | 79 ++++ .../llvm-project-overlay/mlir/BUILD.bazel | 6 +- 11 files changed, 883 insertions(+), 108 deletions(-) diff --git a/mlir/include/mlir/Dialect/Transform/IR/TransformDialect.td b/mlir/include/mlir/Dialect/Transform/IR/TransformDialect.td index 0c8c0b6ae55a3..639a7c70db3ef 100644 --- a/mlir/include/mlir/Dialect/Transform/IR/TransformDialect.td +++ b/mlir/include/mlir/Dialect/Transform/IR/TransformDialect.td @@ -23,8 +23,19 @@ def Transform_Dialect : Dialect { "::mlir::pdl_interp::PDLInterpDialect", ]; + let hasOperationAttrVerify = 1; let extraClassDeclaration = [{ + /// Name of the attribute attachable to the symbol table operation + /// containing named sequences. This is used to trigger verification. + constexpr const static llvm::StringLiteral + kWithNamedSequenceAttrName = "transform.with_named_sequence"; + + /// Names of the attribute attachable to an operation so it can be + /// identified as root by the default interpreter pass. + constexpr const static llvm::StringLiteral + kTargetTagAttrName = "transform.target_tag"; + /// Returns the named PDL constraint functions available in the dialect /// as a map from their name to the function. const ::llvm::StringMap<::mlir::PDLConstraintFunction> & diff --git a/mlir/include/mlir/Dialect/Transform/IR/TransformInterfaces.h b/mlir/include/mlir/Dialect/Transform/IR/TransformInterfaces.h index b2332c83cf35e..78a812e0afa92 100644 --- a/mlir/include/mlir/Dialect/Transform/IR/TransformInterfaces.h +++ b/mlir/include/mlir/Dialect/Transform/IR/TransformInterfaces.h @@ -192,6 +192,12 @@ class TransformState { // class body to comply with visibility and full-declaration requirements. inline RegionScope make_region_scope(Region ®ion); + /// Creates a new region scope for the given isolated-from-above region. + /// Unlike the non-isolated counterpart, there is no nesting expectation. + // Implementation note: this method is inline but implemented outside of the + // class body to comply with visibility and full-declaration requirements + inline RegionScope make_isolated_region_scope(Region ®ion); + /// A RAII object maintaining a "stack frame" for a transform IR region. When /// applying a transform IR operation that contains a region, the caller is /// expected to create a RegionScope before applying the ops contained in the @@ -201,17 +207,23 @@ class TransformState { class RegionScope { public: /// Forgets the mapping from or to values defined in the associated - /// transform IR region. + /// transform IR region, and restores the mapping that existed before + /// entering this scope. ~RegionScope() { state.mappings.erase(region); + if (storedMappings.has_value()) + state.mappings.swap(*storedMappings); #if LLVM_ENABLE_ABI_BREAKING_CHECKS state.regionStack.pop_back(); #endif // LLVM_ENABLE_ABI_BREAKING_CHECKS } private: + /// Tag structure for differentiating the constructor for isolated regions. + struct Isolated {}; + /// Creates a new scope for mappings between values defined in the given - /// transform IR region and payload IR operations. + /// transform IR region and payload IR objects. RegionScope(TransformState &state, Region ®ion) : state(state), region(®ion) { auto res = state.mappings.try_emplace(this->region); @@ -225,13 +237,33 @@ class TransformState { #endif // LLVM_ENABLE_ABI_BREAKING_CHECKS } + /// Creates a new scope for mappings between values defined in the given + /// isolated-from-above transform IR region and payload IR objects. + RegionScope(TransformState &state, Region ®ion, Isolated) + : state(state), region(®ion) { + // Store the previous mapping stack locally. + storedMappings = llvm::SmallDenseMap(); + storedMappings->swap(state.mappings); + state.mappings.try_emplace(this->region); +#if LLVM_ENABLE_ABI_BREAKING_CHECKS + state.regionStack.push_back(this->region); +#endif // LLVM_ENABLE_ABI_BREAKING_CHECKS + } + /// Back-reference to the transform state. TransformState &state; /// The region this scope is associated with. Region *region; + /// Local copy of the mappings that existed before entering the current + /// region. Used only when the current region is isolated so we don't + /// accidentally look up the values defined outside the isolated region. + std::optional> storedMappings = + std::nullopt; + friend RegionScope TransformState::make_region_scope(Region &); + friend RegionScope TransformState::make_isolated_region_scope(Region &); }; friend class RegionScope; @@ -551,6 +583,13 @@ class TransformResults { /// TransformValueHandleTypeInterface. void setValues(OpResult handle, ValueRange values); + /// Indicates that the result of the transform IR op at the given position + /// corresponds to the given range of mapped values. All mapped values are + /// expected to be compatible with the type of the result, e.g., if the result + /// is an operation handle, all mapped values are expected to be payload + /// operations. + void setMappedValues(OpResult handle, ArrayRef values); + private: /// Creates an instance of TransformResults that expects mappings for /// `numSegments` values, which may be associated with payload operations or @@ -597,10 +636,21 @@ class TransformResults { RaggedArray values; }; +/// Creates a RAII object the lifetime of which corresponds to the new mapping +/// for transform IR values defined in the given region. Values defined in +/// surrounding regions remain accessible. TransformState::RegionScope TransformState::make_region_scope(Region ®ion) { return RegionScope(*this, region); } +/// Creates a RAII object the lifetime of which corresponds to the new mapping +/// for transform IR values defined in the given isolated-from-above region. +/// Values defined in surrounding regions cannot be accessed. +TransformState::RegionScope +TransformState::make_isolated_region_scope(Region ®ion) { + return RegionScope(*this, region, RegionScope::Isolated()); +} + namespace detail { /// Maps the only block argument of the op with PossibleTopLevelTransformOpTrait /// to either the list of operations associated with its operand or the root of @@ -614,6 +664,12 @@ LogicalResult verifyPossibleTopLevelTransformOpTrait(Operation *op); /// Verification hook for TransformOpInterface. LogicalResult verifyTransformOpInterface(Operation *op); + +/// Populates `mappings` with mapped values associated with the given transform +/// IR values in the given `state`. +void prepareValueMappings( + SmallVectorImpl> &mappings, + ValueRange values, const transform::TransformState &state); } // namespace detail /// This trait is supposed to be attached to Transform dialect operations that diff --git a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.h b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.h index 7eb9a01fc0f07..2424b161917f4 100644 --- a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.h +++ b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.h @@ -12,9 +12,11 @@ #include "mlir/Dialect/PDL/IR/PDLTypes.h" #include "mlir/Dialect/Transform/IR/TransformInterfaces.h" #include "mlir/Dialect/Transform/IR/TransformTypes.h" +#include "mlir/IR/FunctionInterfaces.h" #include "mlir/IR/OpDefinition.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/SymbolTable.h" +#include "mlir/Interfaces/CallInterfaces.h" #include "mlir/Interfaces/CastInterfaces.h" #include "mlir/Interfaces/ControlFlowInterfaces.h" diff --git a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td index 886586513dc85..3ffc3f71433cc 100644 --- a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td +++ b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td @@ -9,10 +9,12 @@ #ifndef MLIR_DIALECT_TRANSFORM_IR_TRANSFORMOPS #define MLIR_DIALECT_TRANSFORM_IR_TRANSFORMOPS +include "mlir/Interfaces/CallInterfaces.td" include "mlir/Interfaces/CastInterfaces.td" include "mlir/Interfaces/ControlFlowInterfaces.td" include "mlir/Interfaces/InferTypeOpInterface.td" include "mlir/Interfaces/SideEffectInterfaces.td" +include "mlir/IR/FunctionInterfaces.td" include "mlir/IR/OpAsmInterface.td" include "mlir/IR/SymbolInterfaces.td" include "mlir/Dialect/Transform/IR/TransformAttrs.td" @@ -266,6 +268,51 @@ def GetResultOp : TransformDialectOp<"get_result", "functional-type(operands, results)"; } +def IncludeOp : TransformDialectOp<"include", + [CallOpInterface, + DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods]> { + let summary = "Includes a named transform sequence"; + let description = [{ + The application of this transform operation is equivalent to applying the + operations contained in the named transform sequence with operands being + remapped to block arguments. The behavior of the operation when a + transformation in the included named sequence produces a silenceable error + is controlled by the `failure_propagation_mode` attribute. When set to + `propagate`, the failure of any nested transformation in the sequence + implies immediate failure of the entire sequence with a silenceable error, + and no further transformation is attempted. When set to `suppress`, + silenceable errors in nested operations are ignored and further + transformations are applied. Beware that even silenceable errors may leave + the payload IR in a state unsuitable for further transformations. It is the + responsibility of the user to ensure the following transformations are + robust enough when errors are suppressed. Definite errors are propagated + immediately regardless of the mode. The objects associated with the results + of this operation are the same as those associated with the operands of the + `transform.yield` in the referenced named sequence. + }]; + + let arguments = (ins SymbolRefAttr:$target, + FailurePropagationMode:$failure_propagation_mode, + Variadic:$operands); + let results = (outs Variadic:$results); + + let assemblyFormat = + "$target `failures` `(` $failure_propagation_mode `)`" + "`(` $operands `)` attr-dict `:` functional-type($operands, $results)"; + + let extraClassDeclaration = [{ + ::mlir::CallInterfaceCallable getCallableForCallee() { + return getTarget(); + } + + ::mlir::Operation::operand_range getArgOperands() { + return getOperands(); + } + }]; +} + def MergeHandlesOp : TransformDialectOp<"merge_handles", [DeclareOpInterfaceMethods, DeclareOpInterfaceMethods, @@ -289,6 +336,67 @@ def MergeHandlesOp : TransformDialectOp<"merge_handles", let hasFolder = 1; } +def NamedSequenceOp : TransformDialectOp<"named_sequence", + [CallableOpInterface, + FunctionOpInterface, + IsolatedFromAbove, + DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods]> { + let summary = "Named transform sequence that can be included elsewhere"; + let description = [{ + Defines a named (callable, function-like) sequence of other Transform + dialect operations that can be included using `transform.include` as part of + another Transform dialect construct. This sequence is not processed + immediately but rather dispatched to when the inclusion is processed. The + arguments and results can be used to communicate a subset of mapping into + the named sequence. The sequence must consist of a single block and end with + a `transform.yield` terminator. The operands of the terminator become the + results of the `transform.include`. + + When dispatched to, the operations in the named sequence are executed one by + one, similarly to the regular unnamed sequence. The failure propagation mode + is specified on the `transform.include`. Different inclusions may use + different failure propagation modes. This transform operation always + succeeds by itself, but the inclusion may fail if any of the operations + fail. + + Named sequences can only appear at the top-level of the Transform dialect + nesting structure. That is, they cannot be nested in other Transform dialect + operations. Furthermore, one of the ancestors must have the `SymbolTable` + trait and have the `transform.with_named_sequence` attribute attached. + + Named sequences may include other named sequences via `transform.include`, + but recursion is *not* allowed. + }]; + + let arguments = (ins + SymbolNameAttr:$sym_name, + TypeAttrBase<"::mlir::FunctionType", + "function type attribute">:$function_type, + OptionalAttr:$arg_attrs, + OptionalAttr:$res_attrs); + let regions = (region SizedRegion<1>:$body); + + let hasCustomAssemblyFormat = 1; + let hasVerifier = 1; + + let extraClassDeclaration = [{ + ::llvm::ArrayRef<::mlir::Type> getArgumentTypes() { + return getFunctionType().getInputs(); + } + ::llvm::ArrayRef<::mlir::Type> getResultTypes() { + return getFunctionType().getResults(); + } + + ::mlir::Region *getCallableRegion() { + return &getBody(); + } + ::llvm::ArrayRef<::mlir::Type> getCallableResults() { + return getFunctionType().getResults(); + } + }]; +} + def SplitHandlesOp : TransformDialectOp<"split_handles", [FunctionalStyleTransformOpTrait, DeclareOpInterfaceMethods, @@ -376,7 +484,6 @@ def PrintOp : TransformDialectOp<"print", let assemblyFormat = "$target attr-dict (`:` type($target)^)?"; } - def ReplicateOp : TransformDialectOp<"replicate", [DeclareOpInterfaceMethods, DeclareOpInterfaceMethods, @@ -426,21 +533,21 @@ def SequenceOp : TransformDialectOp<"sequence", let description = [{ The transformations indicated by the sequence are applied in order of their appearance. Each value produced by a transformation within the sequence - corresponds to an operation or a group of operations in the payload IR. - The behavior of the operation when a nested transformation produces a - silenceable error is controlled by the `failure_propagation_mode` attribute. - When set to `propagate`, the failure of any nested transformation in the - sequence implies immediate failure of the entire sequence with a silenceable - error, and no further transformation is attempted. When set to `suppress`, + corresponds to a group of operations or values in the payload IR, or to a + group of parameters, depending on the type of the value. The behavior of the + operation when a nested transformation produces a silenceable error is + controlled by the `failure_propagation_mode` attribute. When set to + `propagate`, the failure of any nested transformation in the sequence + implies immediate failure of the entire sequence with a silenceable error, + and no further transformation is attempted. When set to `suppress`, silenceable errors in nested operations are ignored and further transformations are applied. Beware that even silenceable errors may leave - the payload IR in a state unsuitable for further transformations. It is - the responsibility of the caller to ensure the following transformations - are robust enough when errors are suppressed. Definite errors reported by - nested transformations abort the sequence regardless of the propagation - mode. The set of modes may be extended in the future, e.g., to collect - silenceable errors and report them after attempting all transformations in - the sequence. + the payload IR in a state unsuitable for further transformations. It is the + responsibility of the caller to ensure the following transformations are + robust enough when errors are suppressed. Definite errors reported by nested + transformations abort the sequence regardless of the propagation mode. The + set of modes may be extended in the future, e.g., to collect silenceable + errors and report them after attempting all transformations in the sequence. The entry block of this operation has a single argument that maps to either the operand if provided or the top-level container operation of the payload @@ -565,7 +672,8 @@ def YieldOp : TransformDialectOp<"yield", }]; let arguments = (ins - Arg, "Operation handles yielded back to the parent" + Arg, + "Transform values yielded back to the parent" >:$operands); let assemblyFormat = "operands attr-dict (`:` type($operands)^)?"; diff --git a/mlir/lib/Dialect/Transform/IR/TransformDialect.cpp b/mlir/lib/Dialect/Transform/IR/TransformDialect.cpp index 1f61ecd971628..99ff80e08ebdd 100644 --- a/mlir/lib/Dialect/Transform/IR/TransformDialect.cpp +++ b/mlir/lib/Dialect/Transform/IR/TransformDialect.cpp @@ -7,12 +7,14 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/Transform/IR/TransformDialect.h" +#include "mlir/Analysis/CallGraph.h" #include "mlir/Dialect/PDL/IR/PDL.h" #include "mlir/Dialect/PDLInterp/IR/PDLInterp.h" #include "mlir/Dialect/Transform/IR/TransformInterfaces.h" #include "mlir/Dialect/Transform/IR/TransformOps.h" #include "mlir/Dialect/Transform/IR/TransformTypes.h" #include "mlir/IR/DialectImplementation.h" +#include "llvm/ADT/SCCIterator.h" using namespace mlir; @@ -128,4 +130,53 @@ void transform::TransformDialect::reportDuplicateOpRegistration( llvm::report_fatal_error(StringRef(buffer)); } +LogicalResult transform::TransformDialect::verifyOperationAttribute( + Operation *op, NamedAttribute attribute) { + if (attribute.getName().getValue() == kWithNamedSequenceAttrName) { + if (!op->hasTrait()) { + return emitError(op->getLoc()) << attribute.getName() + << " attribute can only be attached to " + "operations with symbol tables"; + } + + const mlir::CallGraph callgraph(op); + for (auto scc = llvm::scc_begin(&callgraph); !scc.isAtEnd(); ++scc) { + if (!scc.hasCycle()) + continue; + + // Need to check this here additionally because this verification may run + // before we check the nested operations. + if ((*scc->begin())->isExternal()) + return op->emitOpError() << "contains a call to an external operation, " + "which is not allowed"; + + Operation *first = (*scc->begin())->getCallableRegion()->getParentOp(); + InFlightDiagnostic diag = emitError(first->getLoc()) + << "recursion not allowed in named sequences"; + for (auto it = std::next(scc->begin()); it != scc->end(); ++it) { + // Need to check this here additionally because this verification may + // run before we check the nested operations. + if ((*it)->isExternal()) { + return op->emitOpError() << "contains a call to an external " + "operation, which is not allowed"; + } + + Operation *current = (*it)->getCallableRegion()->getParentOp(); + diag.attachNote(current->getLoc()) << "operation on recursion stack"; + } + return diag; + } + return success(); + } + if (attribute.getName().getValue() == kTargetTagAttrName) { + if (!attribute.getValue().isa()) { + return op->emitError() + << attribute.getName() << " attribute must be a string"; + } + return success(); + } + return emitError(op->getLoc()) + << "unknown attribute: " << attribute.getName(); +} + #include "mlir/Dialect/Transform/IR/TransformDialectEnums.cpp.inc" diff --git a/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp b/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp index 4002e59dd607d..c9045c50c9bac 100644 --- a/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp +++ b/mlir/lib/Dialect/Transform/IR/TransformInterfaces.cpp @@ -104,50 +104,77 @@ LogicalResult transform::TransformState::getHandlesForPayloadValue( return success(found); } -LogicalResult -transform::TransformState::mapBlockArgument(BlockArgument argument, - ArrayRef values) { - if (argument.getType().isa()) { +/// Given a list of MappedValues, cast them to the value kind implied by the +/// interface of the handle type, and dispatch to one of the callbacks. +static DiagnosedSilenceableFailure dispatchMappedValues( + Value handle, ArrayRef values, + function_ref)> operationsFn, + function_ref)> paramsFn, + function_ref valuesFn) { + if (handle.getType().isa()) { SmallVector operations; operations.reserve(values.size()); - for (MappedValue value : values) { + for (transform::MappedValue value : values) { if (auto *op = value.dyn_cast()) { operations.push_back(op); continue; } - return emitError(argument.getLoc()) + return emitSilenceableFailure(handle.getLoc()) << "wrong kind of value provided for top-level operation handle"; } - return setPayloadOps(argument, operations); + if (failed(operationsFn(operations))) + return DiagnosedSilenceableFailure::definiteFailure(); + return DiagnosedSilenceableFailure::success(); } - if (argument.getType().isa()) { + if (handle.getType().isa()) { SmallVector payloadValues; payloadValues.reserve(values.size()); - for (MappedValue value : values) { + for (transform::MappedValue value : values) { if (auto v = value.dyn_cast()) { payloadValues.push_back(v); continue; } - return emitError(argument.getLoc()) + return emitSilenceableFailure(handle.getLoc()) << "wrong kind of value provided for the top-level value handle"; } - return setPayloadValues(argument, payloadValues); + if (failed(valuesFn(payloadValues))) + return DiagnosedSilenceableFailure::definiteFailure(); + return DiagnosedSilenceableFailure::success(); } - assert(argument.getType().isa() && + assert(handle.getType().isa() && "unsupported kind of block argument"); - SmallVector parameters; + SmallVector parameters; parameters.reserve(values.size()); - for (MappedValue value : values) { + for (transform::MappedValue value : values) { if (auto attr = value.dyn_cast()) { parameters.push_back(attr); continue; } - return emitError(argument.getLoc()) + return emitSilenceableFailure(handle.getLoc()) << "wrong kind of value provided for top-level parameter"; } - return setParams(argument, parameters); + if (failed(paramsFn(parameters))) + return DiagnosedSilenceableFailure::definiteFailure(); + return DiagnosedSilenceableFailure::success(); +} + +LogicalResult +transform::TransformState::mapBlockArgument(BlockArgument argument, + ArrayRef values) { + return dispatchMappedValues( + argument, values, + [&](ArrayRef operations) { + return setPayloadOps(argument, operations); + }, + [&](ArrayRef params) { + return setParams(argument, params); + }, + [&](ValueRange payloadValues) { + return setPayloadValues(argument, payloadValues); + }) + .checkAndReport(); } LogicalResult @@ -887,6 +914,27 @@ void transform::TransformResults::setValues(OpResult handle, this->values.replace(position, values); } +void transform::TransformResults::setMappedValues( + OpResult handle, ArrayRef values) { + DiagnosedSilenceableFailure diag = dispatchMappedValues( + handle, values, + [&](ArrayRef operations) { + return set(handle, operations), success(); + }, + [&](ArrayRef params) { + return setParams(handle, params), success(); + }, + [&](ValueRange payloadValues) { + return setValues(handle, payloadValues), success(); + }); +#ifndef NDEBUG + if (!diag.succeeded()) + llvm::dbgs() << diag.getStatusString() << "\n"; + assert(diag.succeeded() && "incorrect mapping"); +#endif // NDEBUG + (void)diag.silence(); +} + ArrayRef transform::TransformResults::get(unsigned resultNumber) const { assert(resultNumber < operations.size() && @@ -1029,24 +1077,30 @@ void transform::detail::setApplyToOneResults( // Utilities for PossibleTopLevelTransformOpTrait. //===----------------------------------------------------------------------===// +void transform::detail::prepareValueMappings( + SmallVectorImpl> &mappings, + ValueRange values, const transform::TransformState &state) { + for (Value operand : values) { + SmallVector &mapped = mappings.emplace_back(); + if (operand.getType().isa()) { + llvm::append_range(mapped, state.getPayloadOps(operand)); + } else if (operand.getType().isa()) { + llvm::append_range(mapped, state.getPayloadValues(operand)); + } else { + assert(operand.getType().isa() && + "unsupported kind of transform dialect value"); + llvm::append_range(mapped, state.getParams(operand)); + } + } +} + LogicalResult transform::detail::mapPossibleTopLevelTransformOpBlockArguments( TransformState &state, Operation *op, Region ®ion) { SmallVector targets; SmallVector> extraMappings; if (op->getNumOperands() != 0) { llvm::append_range(targets, state.getPayloadOps(op->getOperand(0))); - for (Value operand : op->getOperands().drop_front()) { - SmallVector &mapped = extraMappings.emplace_back(); - if (operand.getType().isa()) { - llvm::append_range(mapped, state.getPayloadOps(operand)); - } else if (operand.getType().isa()) { - llvm::append_range(mapped, state.getPayloadValues(operand)); - } else { - assert(operand.getType().isa() && - "unsupported kind of transform dialect value"); - llvm::append_range(mapped, state.getParams(operand)); - } - } + prepareValueMappings(extraMappings, op->getOperands().drop_front(), state); } else { if (state.getNumTopLevelMappings() != region.front().getNumArguments() - 1) { diff --git a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp index cc4382ac1e0a1..6051007749e35 100644 --- a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp +++ b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp @@ -11,6 +11,7 @@ #include "mlir/Dialect/Transform/IR/TransformDialect.h" #include "mlir/Dialect/Transform/IR/TransformInterfaces.h" #include "mlir/Dialect/Transform/IR/TransformTypes.h" +#include "mlir/IR/FunctionImplementation.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Interfaces/ControlFlowInterfaces.h" #include "mlir/Rewrite/FrozenRewritePatternSet.h" @@ -175,11 +176,19 @@ static void forwardEmptyOperands(Block *block, transform::TransformState &state, static void forwardTerminatorOperands(Block *block, transform::TransformState &state, transform::TransformResults &results) { - for (const auto &pair : llvm::zip(block->getTerminator()->getOperands(), - block->getParentOp()->getOpResults())) { - Value terminatorOperand = std::get<0>(pair); - OpResult result = std::get<1>(pair); - results.set(result, state.getPayloadOps(terminatorOperand)); + for (auto &&[terminatorOperand, result] : + llvm::zip(block->getTerminator()->getOperands(), + block->getParentOp()->getOpResults())) { + if (result.getType().isa()) { + results.set(result, state.getPayloadOps(terminatorOperand)); + } else if (result.getType() + .isa()) { + results.setValues(result, state.getPayloadValues(terminatorOperand)); + } else { + assert(result.getType().isa() && + "unhandled transform type interface"); + results.setParams(result, state.getParams(terminatorOperand)); + } } } @@ -524,6 +533,177 @@ transform::GetResultOp::apply(transform::TransformResults &results, return DiagnosedSilenceableFailure::success(); } +//===----------------------------------------------------------------------===// +// IncludeOp +//===----------------------------------------------------------------------===// + +/// Applies the transform ops contained in `block`. Maps `results` to the same +/// values as the operands of the block terminator. +static DiagnosedSilenceableFailure +applySequenceBlock(Block &block, transform::FailurePropagationMode mode, + transform::TransformState &state, + transform::TransformResults &results) { + // Apply the sequenced ops one by one. + for (Operation &transform : block.without_terminator()) { + DiagnosedSilenceableFailure result = + state.applyTransform(cast(transform)); + if (result.isDefiniteFailure()) + return result; + + if (result.isSilenceableFailure()) { + if (mode == transform::FailurePropagationMode::Propagate) { + // Propagate empty results in case of early exit. + forwardEmptyOperands(&block, state, results); + return result; + } + (void)result.silence(); + } + } + + // Forward the operation mapping for values yielded from the sequence to the + // values produced by the sequence op. + forwardTerminatorOperands(&block, state, results); + return DiagnosedSilenceableFailure::success(); +} + +DiagnosedSilenceableFailure +transform::IncludeOp::apply(transform::TransformResults &results, + transform::TransformState &state) { + auto callee = SymbolTable::lookupNearestSymbolFrom( + getOperation(), getTarget()); + assert(callee && "unverified reference to unknown symbol"); + + // Map operands to block arguments. + SmallVector> mappings; + detail::prepareValueMappings(mappings, getOperands(), state); + auto scope = state.make_isolated_region_scope(callee.getBody()); + for (auto &&[arg, map] : + llvm::zip_equal(callee.getBody().front().getArguments(), mappings)) { + if (failed(state.mapBlockArgument(arg, map))) + return DiagnosedSilenceableFailure::definiteFailure(); + } + + DiagnosedSilenceableFailure result = applySequenceBlock( + callee.getBody().front(), getFailurePropagationMode(), state, results); + mappings.clear(); + detail::prepareValueMappings( + mappings, callee.getBody().front().getTerminator()->getOperands(), state); + for (auto &&[result, mapping] : llvm::zip_equal(getResults(), mappings)) + results.setMappedValues(result, mapping); + return result; +} + +/// Appends to `effects` the memory effect instances on `target` with the same +/// resource and effect as the ones the operation `iface` having on `source`. +static void +remapEffects(MemoryEffectOpInterface iface, BlockArgument source, Value target, + SmallVectorImpl &effects) { + SmallVector nestedEffects; + iface.getEffectsOnValue(source, nestedEffects); + for (const auto &effect : nestedEffects) + effects.emplace_back(effect.getEffect(), target, effect.getResource()); +} + +/// Appends to `effects` the same effects as the operations of `block` have on +/// block arguments but associated with `operands.` +static void +remapArgumentEffects(Block &block, ValueRange operands, + SmallVectorImpl &effects) { + for (Operation &op : block) { + auto iface = dyn_cast(&op); + if (!iface) + continue; + + for (auto &&[source, target] : llvm::zip(block.getArguments(), operands)) { + remapEffects(iface, source, target, effects); + } + + SmallVector nestedEffects; + iface.getEffectsOnResource(transform::PayloadIRResource::get(), + nestedEffects); + llvm::append_range(effects, nestedEffects); + } +} + +static DiagnosedSilenceableFailure +verifyNamedSequenceOp(transform::NamedSequenceOp op); + +void transform::IncludeOp::getEffects( + SmallVectorImpl &effects) { + // Bail if the callee is unknown. This may run as part of the verification + // process before we verified the validity of the callee or of this op. + auto target = + getOperation()->getAttrOfType(getTargetAttrName()); + if (!target) + return; + auto callee = SymbolTable::lookupNearestSymbolFrom( + getOperation(), getTarget()); + if (!callee) + return; + DiagnosedSilenceableFailure earlyVerifierResult = + verifyNamedSequenceOp(callee); + if (!earlyVerifierResult.succeeded()) { + (void)earlyVerifierResult.silence(); + return; + } + + // Carry over effects from the callee. + remapArgumentEffects(callee.getBody().front(), getOperands(), effects); + + // Proper effects. + onlyReadsHandle(getOperands(), effects); + producesHandle(getResults(), effects); +} + +template +static bool implementSameInterface(Type t1, Type t2) { + return ((isa(t1) && isa(t2)) || ... || false); +} + +LogicalResult +transform::IncludeOp::verifySymbolUses(SymbolTableCollection &symbolTable) { + // Access through indirection and do additional checking because this may be + // running before the main op verifier. + auto targetAttr = getOperation()->getAttrOfType("target"); + if (!targetAttr) + return emitOpError() << "expects a 'target' symbol reference attribute"; + + auto target = symbolTable.lookupNearestSymbolFrom( + *this, targetAttr); + if (!target) + return emitOpError() << "does not reference a named transform sequence"; + + FunctionType fnType = target.getFunctionType(); + if (fnType.getNumInputs() != getNumOperands()) + return emitError("incorrect number of operands for callee"); + + for (unsigned i = 0, e = fnType.getNumInputs(); i != e; ++i) { + if (getOperand(i).getType() != fnType.getInput(i)) { + return emitOpError("operand type mismatch: expected operand type ") + << fnType.getInput(i) << ", but provided " + << getOperand(i).getType() << " for operand number " << i; + } + } + + if (fnType.getNumResults() != getNumResults()) + return emitError("incorrect number of results for callee"); + + for (unsigned i = 0, e = fnType.getNumResults(); i != e; ++i) { + Type resultType = getResult(i).getType(); + Type funcType = fnType.getResult(i); + if (!implementSameInterface(resultType, + funcType)) { + return emitOpError() << "type of result #" << i + << " must implement the same transform dialect " + "interface as the corresponding callee result"; + } + } + + return success(); +} + //===----------------------------------------------------------------------===// // MergeHandlesOp //===----------------------------------------------------------------------===// @@ -567,6 +747,105 @@ OpFoldResult transform::MergeHandlesOp::fold(FoldAdaptor adaptor) { return getHandles().front(); } +//===----------------------------------------------------------------------===// +// NamedSequenceOp +//===----------------------------------------------------------------------===// + +DiagnosedSilenceableFailure +transform::NamedSequenceOp::apply(transform::TransformResults &results, + transform::TransformState &state) { + // Nothing to do here. + return DiagnosedSilenceableFailure::success(); +} + +void transform::NamedSequenceOp::getEffects( + SmallVectorImpl &effects) {} + +ParseResult transform::NamedSequenceOp::parse(OpAsmParser &parser, + OperationState &result) { + return function_interface_impl::parseFunctionOp( + parser, result, /*allowVariadic=*/false, + getFunctionTypeAttrName(result.name), + [](Builder &builder, ArrayRef inputs, ArrayRef results, + function_interface_impl::VariadicFlag, + std::string &) { return builder.getFunctionType(inputs, results); }, + getArgAttrsAttrName(result.name), getResAttrsAttrName(result.name)); +} + +void transform::NamedSequenceOp::print(OpAsmPrinter &printer) { + function_interface_impl::printFunctionOp( + printer, cast(getOperation()), /*isVariadic=*/false, + getFunctionTypeAttrName().getValue(), getArgAttrsAttrName(), + getResAttrsAttrName()); +} + +/// Verification of a NamedSequenceOp. This does not report the error +/// immediately, so it can be used to check for op's well-formedness before the +/// verifier runs, e.g., during trait verification. +static DiagnosedSilenceableFailure +verifyNamedSequenceOp(transform::NamedSequenceOp op) { + if (op.isExternal()) + return emitSilenceableFailure(op) << "cannot be empty"; + + if (Operation *parent = op->getParentWithTrait()) { + if (!parent->getAttr( + transform::TransformDialect::kWithNamedSequenceAttrName)) { + DiagnosedSilenceableFailure diag = + emitSilenceableFailure(op) + << "expects the parent symbol table to have the '" + << transform::TransformDialect::kWithNamedSequenceAttrName + << "' attribute"; + diag.attachNote(parent->getLoc()) << "symbol table operation"; + return diag; + } + } + + if (auto parent = op->getParentOfType()) { + DiagnosedSilenceableFailure diag = + emitSilenceableFailure(op) + << "cannot be defined inside another transform op"; + diag.attachNote(parent.getLoc()) << "ancestor transform op"; + return diag; + } + + if (op.getBody().front().empty()) + return emitSilenceableFailure(op) << "expected a non-empty body block"; + + Operation *terminator = &op.getBody().front().back(); + if (!isa(terminator)) { + DiagnosedSilenceableFailure diag = emitSilenceableFailure(op) + << "expected '" + << transform::YieldOp::getOperationName() + << "' as terminator"; + diag.attachNote(terminator->getLoc()) << "terminator"; + return diag; + } + + if (terminator->getNumOperands() != op.getFunctionType().getNumResults()) { + return emitSilenceableFailure(terminator) + << "expected terminator to have as many operands as the parent op " + "has results"; + } + for (auto [i, operandType, resultType] : + llvm::zip_equal(llvm::seq(0, terminator->getNumOperands()), + terminator->getOperands().getType(), + op.getFunctionType().getResults())) { + if (operandType == resultType) + continue; + return emitSilenceableFailure(terminator) + << "the type of the terminator operand #" << i + << " must match the type of the corresponding parent op result (" + << operandType << " vs " << resultType << ")"; + } + + return DiagnosedSilenceableFailure::success(); +} + +LogicalResult transform::NamedSequenceOp::verify() { + // Actual verification happens in a separate function for reusability. + return verifyNamedSequenceOp(*this).checkAndReport(); +} + //===----------------------------------------------------------------------===// // SplitHandlesOp //===----------------------------------------------------------------------===// @@ -692,27 +971,8 @@ transform::SequenceOp::apply(transform::TransformResults &results, if (failed(mapBlockArguments(state))) return DiagnosedSilenceableFailure::definiteFailure(); - // Apply the sequenced ops one by one. - for (Operation &transform : getBodyBlock()->without_terminator()) { - DiagnosedSilenceableFailure result = - state.applyTransform(cast(transform)); - if (result.isDefiniteFailure()) - return result; - - if (result.isSilenceableFailure()) { - if (getFailurePropagationMode() == FailurePropagationMode::Propagate) { - // Propagate empty results in case of early exit. - forwardEmptyOperands(getBodyBlock(), state, results); - return result; - } - (void)result.silence(); - } - } - - // Forward the operation mapping for values yielded from the sequence to the - // values produced by the sequence op. - forwardTerminatorOperands(getBodyBlock(), state, results); - return DiagnosedSilenceableFailure::success(); + return applySequenceBlock(*getBodyBlock(), getFailurePropagationMode(), state, + results); } static ParseResult parseSequenceOpOperands( @@ -871,22 +1131,6 @@ LogicalResult transform::SequenceOp::verify() { return success(); } -/// Appends to `effects` the memory effect instances on `target` with the same -/// resource and effect as the ones the operation `iface` having on `source`. -static void -remapEffects(MemoryEffectOpInterface iface, BlockArgument source, Value target, - SmallVectorImpl &effects) { - SmallVector nestedEffects; - iface.getEffectsOnValue(source, nestedEffects); - for (const auto &effect : nestedEffects) - effects.emplace_back(effect.getEffect(), target, effect.getResource()); -} - -namespace { -template -using has_get_extra_bindings = decltype(std::declval().getExtraBindings()); -} // namespace - /// Populate `effects` with transform dialect memory effects for the potential /// top-level operation. Such operations have recursive effects from nested /// operations. When they have an operand, we can additionally remap effects on @@ -911,26 +1155,8 @@ static void getPotentialTopLevelEffects( // Carry over all effects on arguments of the entry block as those on the // operands, this is the same value just remapped. - for (Operation &op : *operation.getBodyBlock()) { - auto iface = dyn_cast(&op); - if (!iface) - continue; - - remapEffects(iface, operation.getBodyBlock()->getArgument(0), - operation.getRoot(), effects); - if constexpr (llvm::is_detected::value) { - for (auto [source, target] : - llvm::zip(operation.getBodyBlock()->getArguments().drop_front(), - operation.getExtraBindings())) { - remapEffects(iface, source, target, effects); - } - } - - SmallVector nestedEffects; - iface.getEffectsOnResource(transform::PayloadIRResource::get(), - nestedEffects); - llvm::append_range(effects, nestedEffects); - } + remapArgumentEffects(*operation.getBodyBlock(), operation->getOperands(), + effects); } void transform::SequenceOp::getEffects( diff --git a/mlir/lib/Dialect/Transform/Transforms/TransformInterpreterPassBase.cpp b/mlir/lib/Dialect/Transform/Transforms/TransformInterpreterPassBase.cpp index 3d6ee21478e5a..40624e6ce54e4 100644 --- a/mlir/lib/Dialect/Transform/Transforms/TransformInterpreterPassBase.cpp +++ b/mlir/lib/Dialect/Transform/Transforms/TransformInterpreterPassBase.cpp @@ -83,6 +83,9 @@ static Operation *findTopLevelTransform(Operation *root, ::mlir::transform::TransformOpInterface topLevelTransform = nullptr; WalkResult walkResult = root->walk( [&](::mlir::transform::TransformOpInterface transformOp) { + if (!transformOp + ->hasTrait()) + return WalkResult::skip(); if (!topLevelTransform) { topLevelTransform = transformOp; return WalkResult::skip(); diff --git a/mlir/test/Dialect/Transform/ops-invalid.mlir b/mlir/test/Dialect/Transform/ops-invalid.mlir index 4abaa233a5d1f..ee03d9e3eca92 100644 --- a/mlir/test/Dialect/Transform/ops-invalid.mlir +++ b/mlir/test/Dialect/Transform/ops-invalid.mlir @@ -284,3 +284,184 @@ transform.sequence failures(suppress) { // expected-note @below {{no 'allocate' effect specified for result #0}} transform.test_required_memory_effects %arg0 {has_operand_effect, modifies_payload} : (!transform.any_op) -> !transform.any_op } + +// ----- + +// expected-error @below {{attribute can only be attached to operations with symbol tables}} +"test.unknown_container"() { transform.with_named_sequence } : () -> () + +// ----- + +module attributes { transform.with_named_sequence } { + // expected-error @below {{failed to verify constraint: region with 1 blocks}} + "transform.named_sequence"() ({}) { sym_name = "external_named_sequence", function_type = () -> () } : () -> () + + transform.sequence failures(propagate) { + ^bb0(%arg0: !transform.any_op): + transform.include @external_named_sequence failures(propagate) () : () -> () + } +} + +// ----- + +module attributes { transform.with_named_sequence } { + // expected-error @below {{recursion not allowed in named sequences}} + transform.named_sequence @self_recursion() -> () { + transform.include @self_recursion failures(suppress) () : () -> () + } +} + +// ----- + +module @mutual_recursion attributes { transform.with_named_sequence } { + // expected-note @below {{operation on recursion stack}} + transform.named_sequence @foo(%arg0: !transform.any_op) -> () { + transform.include @bar failures(suppress) (%arg0) : (!transform.any_op) -> () + transform.yield + } + + // expected-error @below {{recursion not allowed in named sequences}} + transform.named_sequence @bar(%arg0: !transform.any_op) -> () { + transform.include @foo failures(propagate) (%arg0) : (!transform.any_op) -> () + transform.yield + } +} + +// ----- + +// expected-error @below {{unknown attribute: "transform.unknown_container"}} +module @unknown_attribute attributes { transform.unknown_container } {} + +// ----- + +module { + transform.sequence failures(suppress) { + ^bb0(%arg0: !transform.any_op): + // expected-error @below {{op does not reference a named transform sequence}} + transform.include @non_existent failures(propagate) () : () -> () + } +} + +// ----- + +module attributes { transform.with_named_sequence } { + transform.sequence failures(suppress) { + ^bb0(%arg0: !transform.any_op): + // expected-error @below {{requires attribute 'target'}} + "transform.include"() {failure_propagation_mode = 0} : () -> () + } +} + +// ----- + +module attributes { transform.with_named_sequence } { + transform.named_sequence @foo(%arg0: !transform.any_op) -> () { + transform.yield + } + + transform.sequence failures(suppress) { + ^bb0(%arg1: !transform.any_op): + // expected-error @below {{incorrect number of operands for callee}} + transform.include @foo failures(suppress) () : () -> () + } +} + +// ----- + +module attributes { transform.with_named_sequence } { + transform.named_sequence @foo(%arg0: !transform.any_op) -> () { + transform.yield + } + + transform.sequence failures(suppress) { + ^bb0(%arg1: !transform.op<"builtin.module">): + // expected-error @below {{operand type mismatch: expected operand type '!transform.any_op', but provided '!transform.op<"builtin.module">' for operand number 0}} + transform.include @foo failures(suppress) (%arg1) : (!transform.op<"builtin.module">) -> () + } +} + +// ----- + +module attributes { transform.with_named_sequence } { + transform.named_sequence @foo(%arg0: !transform.any_op) -> (!transform.any_op) { + transform.yield %arg0 : !transform.any_op + } + + transform.sequence failures(suppress) { + ^bb0(%arg1: !transform.any_op): + // expected-error @below {{incorrect number of results for callee}} + transform.include @foo failures(suppress) (%arg1) : (!transform.any_op) -> () + } +} + +// ----- + +module attributes { transform.with_named_sequence } { + transform.named_sequence @foo(%arg0: !transform.any_op) -> (!transform.any_op) { + transform.yield %arg0 : !transform.any_op + } + + transform.sequence failures(suppress) { + ^bb0(%arg1: !transform.any_op): + // expected-error @below {{type of result #0 must implement the same transform dialect interface as the corresponding callee result}} + transform.include @foo failures(suppress) (%arg1) : (!transform.any_op) -> (!transform.any_value) + } +} + +// ----- + +// expected-note @below {{symbol table operation}} +module { + // expected-error @below {{expects the parent symbol table to have the 'transform.with_named_sequence' attribute}} + transform.named_sequence @parent_has_no_attributes() { + transform.yield + } +} + +// ----- + +module attributes { transform.with_named_sequence} { + // expected-note @below {{ancestor transform op}} + transform.sequence failures(suppress) { + ^bb0(%arg0: !transform.any_op): + // expected-error @below {{cannot be defined inside another transform op}} + transform.named_sequence @nested() { + transform.yield + } + } +} + +// ----- + +module attributes { transform.with_named_sequence} { + func.func private @foo() + + // expected-error @below {{expected 'transform.yield' as terminator}} + transform.named_sequence @nested() { + // expected-note @below {{terminator}} + func.call @foo() : () -> () + } +} + + +// ----- + +module attributes { transform.with_named_sequence} { + func.func private @foo() + + transform.named_sequence @nested(%arg0: !transform.any_op) { + // expected-error @below {{expected terminator to have as many operands as the parent op has results}} + transform.yield %arg0 : !transform.any_op + } +} + +// ----- + +module attributes { transform.with_named_sequence} { + func.func private @foo() + + transform.named_sequence @nested(%arg0: !transform.any_op) -> !transform.op<"builtin.module"> { + // expected-error @below {{the type of the terminator operand #0 must match the type of the corresponding parent op result}} + transform.yield %arg0 : !transform.any_op + } +} diff --git a/mlir/test/Dialect/Transform/test-interpreter.mlir b/mlir/test/Dialect/Transform/test-interpreter.mlir index 3a7f42015f38b..6b2b0dd3157c5 100644 --- a/mlir/test/Dialect/Transform/test-interpreter.mlir +++ b/mlir/test/Dialect/Transform/test-interpreter.mlir @@ -1255,3 +1255,82 @@ transform.sequence failures(propagate) { %op = transform.get_defining_op %bbarg : (!transform.any_value) -> !transform.any_op transform.test_print_remark_at_operand %op, "matched" : !transform.any_op } + +// ----- + +module @named_inclusion attributes { transform.with_named_sequence } { + + transform.named_sequence @foo(%arg0: !transform.any_op) -> () { + // expected-remark @below {{applying transformation "a"}} + transform.test_transform_op "a" + transform.yield + } + + transform.sequence failures(propagate) { + ^bb0(%arg0: !transform.any_op): + include @foo failures(propagate) (%arg0) : (!transform.any_op) -> () + } +} + +// ----- + +module @named_inclusion_in_named attributes { transform.with_named_sequence } { + + transform.named_sequence @foo(%arg0: !transform.any_op) -> () { + // expected-remark @below {{applying transformation "a"}} + transform.test_transform_op "a" + transform.yield + } + + transform.named_sequence @bar(%arg0: !transform.any_op) -> () { + // expected-remark @below {{applying transformation "b"}} + transform.test_transform_op "b" + transform.include @foo failures(propagate) (%arg0) : (!transform.any_op) -> () + transform.yield + } + + transform.sequence failures(propagate) { + ^bb0(%arg0: !transform.any_op): + transform.include @bar failures(suppress) (%arg0) : (!transform.any_op) -> () + } +} + +// ----- + +// expected-remark @below {{operation}} +module @named_operands attributes { transform.with_named_sequence } { + + transform.named_sequence @foo(%arg0: !transform.any_op, %arg1: !transform.any_value) -> () { + transform.test_print_remark_at_operand %arg0, "operation" : !transform.any_op + transform.test_print_remark_at_operand_value %arg1, "value" : !transform.any_value + transform.yield + } + + transform.sequence failures(propagate) { + // expected-remark @below {{value}} + // expected-note @below {{value handle points to a block argument #0 in block #0 in region #0}} + ^bb0(%arg0: !transform.any_op): + %0 = transform.test_produce_value_handle_to_self_operand %arg0 : (!transform.any_op) -> !transform.any_value + include @foo failures(propagate) (%arg0, %0) : (!transform.any_op, !transform.any_value) -> () + } +} + +// ----- + +// expected-remark @below {{operation}} +module @named_return attributes { transform.with_named_sequence } { + + // expected-remark @below {{value}} + // expected-note @below {{value handle points to a block argument #0 in block #0 in region #0}} + transform.named_sequence @foo(%arg0: !transform.any_op) -> (!transform.any_op, !transform.any_value) { + %0 = transform.test_produce_value_handle_to_self_operand %arg0 : (!transform.any_op) -> !transform.any_value + transform.yield %arg0, %0 : !transform.any_op, !transform.any_value + } + + transform.sequence failures(propagate) { + ^bb0(%arg0: !transform.any_op): + %0:2 = include @foo failures(propagate) (%arg0) : (!transform.any_op) -> (!transform.any_op, !transform.any_value) + transform.test_print_remark_at_operand %0#0, "operation" : !transform.any_op + transform.test_print_remark_at_operand_value %0#1, "value" : !transform.any_value + } +} diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 3bef7dd570562..99a8653d81478 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -9316,7 +9316,10 @@ gentbl_cc_library( ], tblgen = ":mlir-tblgen", td_file = "include/mlir/Dialect/Transform/IR/TransformOps.td", - deps = [":TransformDialectTdFiles"], + deps = [ + ":CallInterfacesTdFiles", + ":TransformDialectTdFiles" + ], ) gentbl_cc_library( @@ -9342,6 +9345,7 @@ cc_library( srcs = glob(["lib/Dialect/Transform/IR/*.cpp"]), hdrs = glob(["include/mlir/Dialect/Transform/IR/*.h"]), deps = [ + ":CallInterfaces", ":ControlFlowInterfaces", ":IR", ":PDLDialect", From fd004a4986eb3ecc14f03a4ff4eef9bc06c78059 Mon Sep 17 00:00:00 2001 From: Maya Amrami Date: Thu, 9 Mar 2023 13:51:27 +0200 Subject: [PATCH 187/691] [mlir] tosa.concat - Add InferTensorType interface When this interface is used, a call to inferReturnTypeComponents() is generated on creation and verification of the op. A few changes were required in inferReturnTypeComponents(): - Emit error when it fails. The verifier calls this method now, and it is preferable to indicate what caused the failure. - Fix the inferred return shapes so they have a type too. Reviewed By: rsuderman Differential Revision: https://reviews.llvm.org/D146132 --- mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td | 9 +++++++-- mlir/lib/Dialect/Tosa/IR/TosaOps.cpp | 17 +++++++++++++---- mlir/test/Dialect/Tosa/invalid.mlir | 6 ++++++ mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir | 10 ---------- 4 files changed, 26 insertions(+), 16 deletions(-) diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td index be5720caeb0de..7c8018ad64606 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td @@ -1419,8 +1419,7 @@ def Tosa_ReduceSumOp : Tosa_Op<"reduce_sum", [ // Operator: concat //===----------------------------------------------------------------------===// def Tosa_ConcatOp : Tosa_Op<"concat", [ - DeclareOpInterfaceMethods, + InferTensorType, Pure]> { let summary = "Concatenates tensors along one dimension."; @@ -1439,6 +1438,12 @@ def Tosa_ConcatOp : Tosa_Op<"concat", [ ); let hasCanonicalizer = 1; + + let extraClassDeclaration = [{ + /// Returns true when two result types are compatible for this op; + /// Method used by InferTypeOpInterface. + static bool isCompatibleReturnTypes(TypeRange l, TypeRange r); + }]; } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp index d7bb6d0bddbf6..0a09cdd19e2d8 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp @@ -422,6 +422,12 @@ LogicalResult tosa::FFT2dOp::inferReturnTypeComponents( return success(); } +bool tosa::ConcatOp::isCompatibleReturnTypes(TypeRange l, TypeRange r) { + if (l.size() != r.size() || l.size() != 1) + return false; + return succeeded(verifyCompatibleShape(l[0], r[0])); +} + LogicalResult tosa::ConcatOp::inferReturnTypeComponents( MLIRContext *context, ::std::optional location, ValueShapeRange operands, DictionaryAttr attributes, RegionRange regions, @@ -447,14 +453,17 @@ LogicalResult tosa::ConcatOp::inferReturnTypeComponents( if (outputShape[i] == ShapedType::kDynamic) outputShape[i] = operandShape.getDimSize(i); if (outputShape[i] != operandShape.getDimSize(i)) - return failure(); + return emitOptionalError(location, + "Cannot concat tensors with different sizes" + " on the non-axis dimension ", + i); } hasRankedInput = true; } - + Type inputType = operands.getType()[0].cast().getElementType(); if (!hasRankedInput) { - inferredReturnShapes.push_back(ShapedTypeComponents()); + inferredReturnShapes.push_back(ShapedTypeComponents(inputType)); return success(); } @@ -475,7 +484,7 @@ LogicalResult tosa::ConcatOp::inferReturnTypeComponents( outputShape[axis] = concatDimSize; - inferredReturnShapes.push_back(ShapedTypeComponents(outputShape)); + inferredReturnShapes.push_back(ShapedTypeComponents(outputShape, inputType)); return success(); } diff --git a/mlir/test/Dialect/Tosa/invalid.mlir b/mlir/test/Dialect/Tosa/invalid.mlir index c81b19639cd64..9f9c6ca6ce641 100644 --- a/mlir/test/Dialect/Tosa/invalid.mlir +++ b/mlir/test/Dialect/Tosa/invalid.mlir @@ -36,4 +36,10 @@ func.func @test_conv2d(%arg0: tensor<1x29x29x4xi8>, %arg1: tensor<16x3x3x4xi8>, return %0 : tensor<1x27x27x16xi8> } +// ----- +func.func @test_concat(%arg0 : tensor<2x1xf32>, %arg1 : tensor<2x2xf32>) -> tensor { + // expected-error@+1 {{Cannot concat tensors with different sizes on the non-axis dimension 1}} + %0 = "tosa.concat"(%arg0, %arg1) {axis = 0 : i64} : (tensor<2x1xf32>, tensor<2x2xf32>) -> tensor + return %0 : tensor +} diff --git a/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir b/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir index 94eea3b36eae2..505350786d08d 100644 --- a/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir +++ b/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir @@ -491,16 +491,6 @@ func.func @test_concat_axis_1(%arg0 : tensor<2x1xf32>, %arg1 : tensor<2x2xf32>) // ----- -// CHECK-LABEL: @test_concat_failure -func.func @test_concat_failure(%arg0 : tensor<2x1xf32>, %arg1 : tensor<2x2xf32>) -> () { - // CHECK: "tosa.concat"(%arg0, %arg1) {axis = 0 : i64} : (tensor<2x1xf32>, tensor<2x2xf32>) -> tensor - %0 = "tosa.concat"(%arg0, %arg1) {axis = 0 : i64} : (tensor<2x1xf32>, tensor<2x2xf32>) -> tensor - - return -} - -// ----- - // CHECK-LABEL: @test_padding_no_const func.func @test_padding_no_const(%arg0 : tensor<1x2xf32>, %arg1 : tensor<2x2xi32>) -> () { // CHECK: "tosa.pad"(%arg0, %arg1) : (tensor<1x2xf32>, tensor<2x2xi32>) -> tensor From faa1043144b3f6c3362fe1d9f43e55e741b40386 Mon Sep 17 00:00:00 2001 From: Valentin Clement Date: Tue, 21 Mar 2023 16:07:25 +0100 Subject: [PATCH 188/691] [flang] Carry over dynamic type information when creating an unlimited polymorphic temp The dyanmic type must be carried over in a PolymorphicValue when the address is loaded from an unlimited polymorphic allocatable. Reviewed By: PeteSteinfeld Differential Revision: https://reviews.llvm.org/D146525 --- flang/lib/Lower/ConvertExpr.cpp | 6 ++--- flang/lib/Optimizer/Builder/FIRBuilder.cpp | 9 ++++++-- flang/test/Lower/polymorphic.f90 | 26 ++++++++++++++++++++++ 3 files changed, 35 insertions(+), 6 deletions(-) diff --git a/flang/lib/Lower/ConvertExpr.cpp b/flang/lib/Lower/ConvertExpr.cpp index 03b803e8271b1..3138d24fc5322 100644 --- a/flang/lib/Lower/ConvertExpr.cpp +++ b/flang/lib/Lower/ConvertExpr.cpp @@ -416,7 +416,8 @@ static fir::ExtendedValue genLoad(fir::FirOpBuilder &builder, if (fir::unwrapRefType(fir::getBase(p).getType()) .isa()) return p; - return builder.create(loc, fir::getBase(p)); + mlir::Value load = builder.create(loc, fir::getBase(p)); + return fir::PolymorphicValue(load, p.getSourceBox()); }, [&](const fir::UnboxedValue &v) -> fir::ExtendedValue { if (fir::unwrapRefType(fir::getBase(v).getType()) @@ -429,9 +430,6 @@ static fir::ExtendedValue genLoad(fir::FirOpBuilder &builder, fir::factory::genMutableBoxRead(builder, loc, box)); }, [&](const fir::BoxValue &box) -> fir::ExtendedValue { - if (box.isUnlimitedPolymorphic()) - fir::emitFatalError( - loc, "attempting to load an unlimited polymorphic entity"); return genLoad(builder, loc, fir::factory::readBoxValue(builder, loc, box)); }, diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp index 244f972ca5a34..020d6c7b27eb1 100644 --- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp +++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp @@ -816,7 +816,7 @@ fir::factory::getExtents(mlir::Location loc, fir::FirOpBuilder &builder, fir::ExtendedValue fir::factory::readBoxValue(fir::FirOpBuilder &builder, mlir::Location loc, const fir::BoxValue &box) { - assert(!box.isUnlimitedPolymorphic() && !box.hasAssumedRank() && + assert(!box.hasAssumedRank() && "cannot read unlimited polymorphic or assumed rank fir.box"); auto addr = builder.create(loc, box.getMemTy(), box.getAddr()); @@ -830,10 +830,15 @@ fir::ExtendedValue fir::factory::readBoxValue(fir::FirOpBuilder &builder, } if (box.isDerivedWithLenParameters()) TODO(loc, "read fir.box with length parameters"); + mlir::Value sourceBox; + if (box.isPolymorphic()) + sourceBox = box.getAddr(); + if (box.isPolymorphic() && box.rank() == 0) + return fir::PolymorphicValue(addr, sourceBox); if (box.rank() == 0) return addr; return fir::ArrayBoxValue(addr, fir::factory::readExtents(builder, loc, box), - box.getLBounds()); + box.getLBounds(), sourceBox); } llvm::SmallVector diff --git a/flang/test/Lower/polymorphic.f90 b/flang/test/Lower/polymorphic.f90 index 67699bd32495f..ccc3d86998611 100644 --- a/flang/test/Lower/polymorphic.f90 +++ b/flang/test/Lower/polymorphic.f90 @@ -53,6 +53,10 @@ module polymorphic_test class(p1), allocatable :: a(:) end type + type :: p5 + class(*), allocatable :: up + end type + contains elemental subroutine assign_p1_int(lhs, rhs) @@ -1138,6 +1142,28 @@ subroutine class_array_with_entry(a) ! CHECK-SAME: %[[B:.*]]: !fir.class>> {fir.bindc_name = "b"}) { ! CHECK: %[[A:.*]] = fir.alloca !fir.class>>> + subroutine pass_up(up) + class(*), intent(in) :: up + end subroutine + + subroutine parenthesized_up(a) + type(p5) :: a + call pass_up((a%up)) + end subroutine + +! CHECK-LABEL: func.func @_QMpolymorphic_testPparenthesized_up( +! CHECK-SAME: %[[ARG0:.*]]: !fir.ref>}>> {fir.bindc_name = "a"}) { +! CHECK: %[[ALLOCA:.*]] = fir.alloca +! CHECK: %[[FIELD_UP:.*]] = fir.field_index up, !fir.type<_QMpolymorphic_testTp5{up:!fir.class>}> +! CHECK: %[[COORD:.*]] = fir.coordinate_of %[[ARG0]], %[[FIELD_UP]] : (!fir.ref>}>>, !fir.field) -> !fir.ref>> +! CHECK: %[[LOAD:.*]] = fir.load %[[COORD]] : !fir.ref>> +! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[LOAD]] : (!fir.class>) -> !fir.heap +! CHECK: %[[LOAD_ADDR:.*]] = fir.load %[[BOX_ADDR]] : !fir.heap +! CHECK: %[[NO_REASSOC:.*]] = fir.no_reassoc %[[LOAD_ADDR]] : none +! CHECK: fir.store %[[NO_REASSOC]] to %[[ALLOCA]] : !fir.ref +! CHECK: %[[EMBOX:.*]] = fir.embox %[[ALLOCA]] source_box %[[LOAD]] : (!fir.ref, !fir.class>) -> !fir.class +! CHECK: fir.call @_QMpolymorphic_testPpass_up(%[[EMBOX]]) fastmath : (!fir.class) -> () + end module program test From b7af34c303ca3dc53ee5945b0c692b3b10ebae94 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 21 Mar 2023 07:45:30 -0700 Subject: [PATCH 189/691] [LSR] Add a test case for (another) miscompile in lsr-term-fold Derived from an observation by @nikic on D146457. --- .../LoopStrengthReduce/lsr-term-fold.ll | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll index bb6b74ea8c387..83c4f64b041b9 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll @@ -70,6 +70,41 @@ for.end: ; preds = %for.body ret void } +; In this case, the i8 IVs increment *isn't* nsw. As a result, a N of 0 +; is well defined, and thus the post-inc starts at 255. +; FIXME: miscompile +define void @wrap_around(ptr %a, i8 %N) { +; CHECK-LABEL: @wrap_around( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[N:%.*]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP1]] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[A]], [[ENTRY:%.*]] ] +; CHECK-NEXT: store i8 1, ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 +; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]] +; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %a, %entry ] + %lsr.iv = phi i8 [ %lsr.iv.next, %for.body ], [ %N, %entry ] + store i8 1, ptr %lsr.iv1, align 4 + %lsr.iv.next = add i8 %lsr.iv, -1 + %uglygep2 = getelementptr i8, ptr %lsr.iv1, i64 4 + %exitcond.not = icmp eq i8 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} + ; The replacing AddRec IV is a complicated AddRec. This tests whether ; the fold terminating condition transformation is writing new terminating ; condition in the correct type. From b33f5e7ed3cd04797b721e32b982b2cf6e06e192 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 21 Mar 2023 08:02:32 -0700 Subject: [PATCH 190/691] [LSR] Use evaluateAtIteration in lsr-term-fold This is a follow up to one of the side discussions on D146429. There are two semantic changes contained here. The motivation for the change to the legality condition introduced in D146429 comes from the fact that we only check the post-inc form. As such, as long as the values of the post-inc variable don't self wrap, it's actually okay if we wrap past the starting value of the pre-inc IV. Second, Nikic noticed during review that the test changes changed behavior for TC=0 (i.e. N=0 in the tests). On more careful inspection, it became apparent that the previous manual expansion code was incorrect in the case where the primary IV could wrap without poison, and started with the limit value (i.e. i8 post-inc starts at 255 for 0 exit test, implying pre-inc starts with 0). See @wrap_around test for an example of the (previous) miscompile. Differential Revision: https://reviews.llvm.org/D146457 --- .../Transforms/Scalar/LoopStrengthReduce.cpp | 35 +++++++--------- .../LoopStrengthReduce/lsr-term-fold.ll | 42 +++++++++++-------- 2 files changed, 38 insertions(+), 39 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 0a4d815e67206..7c6cea0dd1497 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -6763,33 +6763,26 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, } // Check that we can compute the value of AddRec on the exiting iteration - // without soundness problems. There are two cases to be worried about: - // 1) BECount could be 255 with type i8. Simply adding one would be - // incorrect. We may need one extra bit to represent the unsigned - // trip count. - // 2) The multiplication of stride by TC may wrap around. This is subtle - // because computing the result accounting for wrap is insufficient. - // In order to use the result in an exit test, we must also know that - // AddRec doesn't take the same value on any previous iteration. - // The simplest case to consider is a candidate IV which is narrower - // than the trip count (and thus original IV), but this can also - // happen due to non-unit strides on the candidate IVs. + // without soundness problems. evaluateAtIteration internally needs + // to multiply the stride of the iteration number - which may wrap around. + // The issue here is subtle because computing the result accounting for + // wrap is insufficient. In order to use the result in an exit test, we + // must also know that AddRec doesn't take the same value on any previous + // iteration. The simplest case to consider is a candidate IV which is + // narrower than the trip count (and thus original IV), but this can + // also happen due to non-unit strides on the candidate IVs. + // TODO: This check should be replaceable with PostInc->hasNoSelfWrap(), + // but in practice we appear to be missing inference for cases we should + // be able to catch. ConstantRange StepCR = SE.getSignedRange(AddRec->getStepRecurrence(SE)); ConstantRange BECountCR = SE.getUnsignedRange(BECount); - unsigned NoOverflowBitWidth = BECountCR.getActiveBits() + 1 + StepCR.getMinSignedBits(); + unsigned NoOverflowBitWidth = BECountCR.getActiveBits() + StepCR.getMinSignedBits(); unsigned ARBitWidth = SE.getTypeSizeInBits(AddRec->getType()); if (NoOverflowBitWidth > ARBitWidth) continue; - const SCEV *TermValueSLocal = SE.getAddExpr( - AddRec->getOperand(0), - SE.getTruncateOrZeroExtend( - SE.getMulExpr( - AddRec->getOperand(1), - SE.getTruncateOrZeroExtend( - SE.getAddExpr(BECount, SE.getOne(BECount->getType())), - AddRec->getOperand(1)->getType())), - AddRec->getOperand(0)->getType())); + const SCEVAddRecExpr *PostInc = AddRec->getPostIncExpr(SE); + const SCEV *TermValueSLocal = PostInc->evaluateAtIteration(BECount, SE); if (!Expander.isSafeToExpand(TermValueSLocal)) { LLVM_DEBUG( dbgs() << "Is not safe to expand terminating value for phi node" << PN diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll index 83c4f64b041b9..b7f55b2172ea6 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll @@ -39,10 +39,11 @@ define void @runtime_tripcount(ptr %a, i32 %N) { ; CHECK-LABEL: @runtime_tripcount( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 84 -; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[N:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 84 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP0:%.*]] = add nsw i32 [[N:%.*]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 88 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY:%.*]] ] @@ -72,13 +73,14 @@ for.end: ; preds = %for.body ; In this case, the i8 IVs increment *isn't* nsw. As a result, a N of 0 ; is well defined, and thus the post-inc starts at 255. -; FIXME: miscompile define void @wrap_around(ptr %a, i8 %N) { ; CHECK-LABEL: @wrap_around( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = zext i8 [[N:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[N:%.*]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i8 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 4 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[TMP3]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[A]], [[ENTRY:%.*]] ] @@ -112,14 +114,16 @@ define void @ptr_of_ptr_addrec(ptr %ptrptr, i32 %length) { ; CHECK-LABEL: @ptr_of_ptr_addrec( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[START_PTRPTR:%.*]] = getelementptr ptr, ptr [[PTRPTR:%.*]] -; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LENGTH:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[START_PTRPTR]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LENGTH:%.*]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 8 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[START_PTRPTR]], i64 [[TMP3]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IT_04:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[START_PTRPTR]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[IT_04]], align 8 -; CHECK-NEXT: tail call void @foo(ptr [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[IT_04]], align 8 +; CHECK-NEXT: tail call void @foo(ptr [[TMP4]]) ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds ptr, ptr [[IT_04]], i64 1 ; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[SCEVGEP]] ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] @@ -152,9 +156,11 @@ define void @iv_start_non_preheader(ptr %mark, i32 signext %length) { ; CHECK-NEXT: [[TOBOOL_NOT3:%.*]] = icmp eq i32 [[LENGTH:%.*]], 0 ; CHECK-NEXT: br i1 [[TOBOOL_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] ; CHECK: for.body.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[LENGTH]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 3 -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[MARK:%.*]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LENGTH]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 3 +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 8 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[MARK:%.*]], i64 [[TMP3]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.cond.cleanup.loopexit: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] @@ -162,8 +168,8 @@ define void @iv_start_non_preheader(ptr %mark, i32 signext %length) { ; CHECK-NEXT: ret void ; CHECK: for.body: ; CHECK-NEXT: [[DST_04:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[MARK]], [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DST_04]], align 8 -; CHECK-NEXT: [[TMP3:%.*]] = call ptr @foo(ptr [[TMP2]]) +; CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DST_04]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = call ptr @foo(ptr [[TMP4]]) ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds ptr, ptr [[DST_04]], i64 1 ; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[SCEVGEP]] ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]] From 514e4359a543ea778c7fee6908a9c6eb10d0ccd9 Mon Sep 17 00:00:00 2001 From: Erich Keane Date: Fri, 17 Mar 2023 09:54:39 -0700 Subject: [PATCH 191/691] inline stmt attribute diagnosing in templates D146089's author discovered that our diagnostics for always/no inline would null-dereference when used in a template. He fixed that by skipping in the dependent case. This patch makes sure we diagnose these after a template instantiation. It also adds infrastructure for other statement attributes to add checking/transformation. Differential Revision: https://reviews.llvm.org/D146323 --- clang/docs/ReleaseNotes.rst | 2 + clang/include/clang/Sema/Sema.h | 5 ++ clang/lib/Sema/SemaStmtAttr.cpp | 80 +++++++++++++++------- clang/lib/Sema/SemaTemplateInstantiate.cpp | 20 ++++++ clang/lib/Sema/TreeTransform.h | 74 +++++++++++++++----- clang/test/Sema/attr-alwaysinline.cpp | 46 +++++++++++-- clang/test/Sema/attr-noinline.cpp | 45 ++++++++++-- 7 files changed, 220 insertions(+), 52 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index e2e4d6f51d81a..6ae71683804d4 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -181,6 +181,8 @@ Improvements to Clang's diagnostics - Clang now avoids duplicate warnings on unreachable ``[[fallthrough]];`` statements previously issued from ``-Wunreachable-code`` and ``-Wunreachable-code-fallthrough`` by prioritizing ``-Wunreachable-code-fallthrough``. +- Clang now correctly diagnoses statement attributes ``[[clang::always_inine]]`` and + ``[[clang::noinline]]`` when used on a statement with dependent call expressions. Bug Fixes in This Version ------------------------- diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 7ff10a9d52e56..63ee0f0ed7fb6 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -4715,6 +4715,11 @@ class Sema final { void CheckAlignasUnderalignment(Decl *D); + bool CheckNoInlineAttr(const Stmt *OrigSt, const Stmt *CurSt, + const AttributeCommonInfo &A); + bool CheckAlwaysInlineAttr(const Stmt *OrigSt, const Stmt *CurSt, + const AttributeCommonInfo &A); + /// Adjust the calling convention of a method to be the ABI default if it /// wasn't specified explicitly. This handles method types formed from /// function type typedefs and typename template arguments. diff --git a/clang/lib/Sema/SemaStmtAttr.cpp b/clang/lib/Sema/SemaStmtAttr.cpp index eeef85373ccb1..50cb5b50aa0de 100644 --- a/clang/lib/Sema/SemaStmtAttr.cpp +++ b/clang/lib/Sema/SemaStmtAttr.cpp @@ -215,6 +215,59 @@ static Attr *handleNoMergeAttr(Sema &S, Stmt *St, const ParsedAttr &A, return ::new (S.Context) NoMergeAttr(S.Context, A); } +template +static bool CheckStmtInlineAttr(Sema &SemaRef, const Stmt *OrigSt, + const Stmt *CurSt, + const AttributeCommonInfo &A) { + CallExprFinder OrigCEF(SemaRef, OrigSt); + CallExprFinder CEF(SemaRef, CurSt); + + // If the call expressions lists are equal in size, we can skip + // previously emitted diagnostics. However, if the statement has a pack + // expansion, we have no way of telling which CallExpr is the instantiated + // version of the other. In this case, we will end up re-diagnosing in the + // instantiation. + // ie: [[clang::always_inline]] non_dependent(), (other_call()...) + // will diagnose nondependent again. + bool CanSuppressDiag = + OrigSt && CEF.getCallExprs().size() == OrigCEF.getCallExprs().size(); + + if (!CEF.foundCallExpr()) { + return SemaRef.Diag(CurSt->getBeginLoc(), + diag::warn_attribute_ignored_no_calls_in_stmt) + << A; + } + + for (auto Tup : + llvm::zip_longest(OrigCEF.getCallExprs(), CEF.getCallExprs())) { + // If the original call expression already had a callee, we already + // diagnosed this, so skip it here. We can't skip if there isn't a 1:1 + // relationship between the two lists of call expressions. + if (!CanSuppressDiag || !(*std::get<0>(Tup))->getCalleeDecl()) { + const Decl *Callee = (*std::get<1>(Tup))->getCalleeDecl(); + if (Callee && + (Callee->hasAttr() || Callee->hasAttr())) { + SemaRef.Diag(CurSt->getBeginLoc(), + diag::warn_function_stmt_attribute_precedence) + << A << (Callee->hasAttr() ? DiagIdx : 1); + SemaRef.Diag(Callee->getBeginLoc(), diag::note_conflicting_attribute); + } + } + } + + return false; +} + +bool Sema::CheckNoInlineAttr(const Stmt *OrigSt, const Stmt *CurSt, + const AttributeCommonInfo &A) { + return CheckStmtInlineAttr(*this, OrigSt, CurSt, A); +} + +bool Sema::CheckAlwaysInlineAttr(const Stmt *OrigSt, const Stmt *CurSt, + const AttributeCommonInfo &A) { + return CheckStmtInlineAttr(*this, OrigSt, CurSt, A); +} + static Attr *handleNoInlineAttr(Sema &S, Stmt *St, const ParsedAttr &A, SourceRange Range) { NoInlineAttr NIA(S.Context, A); @@ -224,20 +277,8 @@ static Attr *handleNoInlineAttr(Sema &S, Stmt *St, const ParsedAttr &A, return nullptr; } - CallExprFinder CEF(S, St); - if (!CEF.foundCallExpr()) { - S.Diag(St->getBeginLoc(), diag::warn_attribute_ignored_no_calls_in_stmt) - << A; + if (S.CheckNoInlineAttr(/*OrigSt=*/nullptr, St, A)) return nullptr; - } - - for (const auto *CallExpr : CEF.getCallExprs()) { - const Decl *Decl = CallExpr->getCalleeDecl(); - if (Decl && - (Decl->hasAttr() || Decl->hasAttr())) - S.Diag(St->getBeginLoc(), diag::warn_function_stmt_attribute_precedence) - << A << (Decl->hasAttr() ? 0 : 1); - } return ::new (S.Context) NoInlineAttr(S.Context, A); } @@ -251,19 +292,8 @@ static Attr *handleAlwaysInlineAttr(Sema &S, Stmt *St, const ParsedAttr &A, return nullptr; } - CallExprFinder CEF(S, St); - if (!CEF.foundCallExpr()) { - S.Diag(St->getBeginLoc(), diag::warn_attribute_ignored_no_calls_in_stmt) - << A; + if (S.CheckAlwaysInlineAttr(/*OrigSt=*/nullptr, St, A)) return nullptr; - } - - for (const auto *CallExpr : CEF.getCallExprs()) { - const Decl *Decl = CallExpr->getCalleeDecl(); - if (Decl && (Decl->hasAttr() || Decl->hasAttr())) - S.Diag(St->getBeginLoc(), diag::warn_function_stmt_attribute_precedence) - << A << (Decl->hasAttr() ? 2 : 1); - } return ::new (S.Context) AlwaysInlineAttr(S.Context, A); } diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index b4649ce4c413c..162cf3cd88344 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -1272,6 +1272,12 @@ namespace { bool AllowInjectedClassName = false); const LoopHintAttr *TransformLoopHintAttr(const LoopHintAttr *LH); + const NoInlineAttr *TransformStmtNoInlineAttr(const Stmt *OrigS, + const Stmt *InstS, + const NoInlineAttr *A); + const AlwaysInlineAttr * + TransformStmtAlwaysInlineAttr(const Stmt *OrigS, const Stmt *InstS, + const AlwaysInlineAttr *A); ExprResult TransformPredefinedExpr(PredefinedExpr *E); ExprResult TransformDeclRefExpr(DeclRefExpr *E); @@ -1767,6 +1773,20 @@ TemplateInstantiator::TransformLoopHintAttr(const LoopHintAttr *LH) { return LoopHintAttr::CreateImplicit(getSema().Context, LH->getOption(), LH->getState(), TransformedExpr, *LH); } +const NoInlineAttr *TemplateInstantiator::TransformStmtNoInlineAttr( + const Stmt *OrigS, const Stmt *InstS, const NoInlineAttr *A) { + if (!A || getSema().CheckNoInlineAttr(OrigS, InstS, *A)) + return nullptr; + + return A; +} +const AlwaysInlineAttr *TemplateInstantiator::TransformStmtAlwaysInlineAttr( + const Stmt *OrigS, const Stmt *InstS, const AlwaysInlineAttr *A) { + if (!A || getSema().CheckAlwaysInlineAttr(OrigS, InstS, *A)) + return nullptr; + + return A; +} ExprResult TemplateInstantiator::transformNonTypeTemplateParmRef( Decl *AssociatedDecl, const NonTypeTemplateParmDecl *parm, diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 21789f96f9154..6dacd74a99e3b 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -377,22 +377,43 @@ class TreeTransform { /// By default, this routine transforms a statement by delegating to the /// appropriate TransformXXXAttr function to transform a specific kind /// of attribute. Subclasses may override this function to transform - /// attributed statements using some other mechanism. + /// attributed statements/types using some other mechanism. /// /// \returns the transformed attribute const Attr *TransformAttr(const Attr *S); -/// Transform the specified attribute. -/// -/// Subclasses should override the transformation of attributes with a pragma -/// spelling to transform expressions stored within the attribute. -/// -/// \returns the transformed attribute. -#define ATTR(X) -#define PRAGMA_SPELLING_ATTR(X) \ + // Transform the given statement attribute. + // + // Delegates to the appropriate TransformXXXAttr function to transform a + // specific kind of statement attribute. Unlike the non-statement taking + // version of this, this implements all attributes, not just pragmas. + const Attr *TransformStmtAttr(const Stmt *OrigS, const Stmt *InstS, + const Attr *A); + + // Transform the specified attribute. + // + // Subclasses should override the transformation of attributes with a pragma + // spelling to transform expressions stored within the attribute. + // + // \returns the transformed attribute. +#define ATTR(X) \ const X##Attr *Transform##X##Attr(const X##Attr *R) { return R; } #include "clang/Basic/AttrList.inc" + // Transform the specified attribute. + // + // Subclasses should override the transformation of attributes to do + // transformation and checking of statement attributes. By default, this + // delegates to the non-statement taking version. + // + // \returns the transformed attribute. +#define ATTR(X) \ + const X##Attr *TransformStmt##X##Attr(const Stmt *, const Stmt *, \ + const X##Attr *A) { \ + return getDerived().Transform##X##Attr(A); \ + } +#include "clang/Basic/AttrList.inc" + /// Transform the given expression. /// /// By default, this routine transforms an expression by delegating to the @@ -7551,9 +7572,8 @@ const Attr *TreeTransform::TransformAttr(const Attr *R) { return R; switch (R->getKind()) { -// Transform attributes with a pragma spelling by calling TransformXXXAttr. -#define ATTR(X) -#define PRAGMA_SPELLING_ATTR(X) \ +// Transform attributes by calling TransformXXXAttr. +#define ATTR(X) \ case attr::X: \ return getDerived().Transform##X##Attr(cast(R)); #include "clang/Basic/AttrList.inc" @@ -7562,25 +7582,45 @@ const Attr *TreeTransform::TransformAttr(const Attr *R) { } } +template +const Attr *TreeTransform::TransformStmtAttr(const Stmt *OrigS, + const Stmt *InstS, + const Attr *R) { + if (!R) + return R; + + switch (R->getKind()) { +// Transform attributes by calling TransformStmtXXXAttr. +#define ATTR(X) \ + case attr::X: \ + return getDerived().TransformStmt##X##Attr(OrigS, InstS, cast(R)); +#include "clang/Basic/AttrList.inc" + default: + return R; + } + return TransformAttr(R); +} + template StmtResult TreeTransform::TransformAttributedStmt(AttributedStmt *S, StmtDiscardKind SDK) { + StmtResult SubStmt = getDerived().TransformStmt(S->getSubStmt(), SDK); + if (SubStmt.isInvalid()) + return StmtError(); + bool AttrsChanged = false; SmallVector Attrs; // Visit attributes and keep track if any are transformed. for (const auto *I : S->getAttrs()) { - const Attr *R = getDerived().TransformAttr(I); + const Attr *R = + getDerived().TransformStmtAttr(S->getSubStmt(), SubStmt.get(), I); AttrsChanged |= (I != R); if (R) Attrs.push_back(R); } - StmtResult SubStmt = getDerived().TransformStmt(S->getSubStmt(), SDK); - if (SubStmt.isInvalid()) - return StmtError(); - if (SubStmt.get() == S->getSubStmt() && !AttrsChanged) return S; diff --git a/clang/test/Sema/attr-alwaysinline.cpp b/clang/test/Sema/attr-alwaysinline.cpp index be3f74c8bfa9d..f60cb5e3adfb1 100644 --- a/clang/test/Sema/attr-alwaysinline.cpp +++ b/clang/test/Sema/attr-alwaysinline.cpp @@ -3,8 +3,9 @@ int bar(); [[gnu::always_inline]] void always_inline_fn(void) {} +// expected-note@+1{{conflicting attribute is here}} [[gnu::flatten]] void flatten_fn(void) {} - +// expected-note@+1{{conflicting attribute is here}} [[gnu::noinline]] void noinline_fn(void) {} void foo() { @@ -32,9 +33,44 @@ int foo(int x) { [[clang::always_inline]] return foo(x + 1); } -// FIXME: This should warn that always_inline statement attribute has higher -// precedence than the noinline function attribute. +template +[[gnu::noinline]] +int dependent(int x){ return x + D;} // #DEP +[[gnu::noinline]] +int non_dependent(int x){return x;} // #NO_DEP + template [[gnu::noinline]] -int bar(int x) { - [[clang::always_inline]] return bar(x + 1); +int baz(int x) { // #BAZ + // expected-warning@+2{{statement attribute 'always_inline' has higher precedence than function attribute 'noinline'}} + // expected-note@#NO_DEP{{conflicting attribute is here}} + [[clang::always_inline]] non_dependent(x); + if constexpr (D>0) { + // expected-warning@+6{{statement attribute 'always_inline' has higher precedence than function attribute 'noinline'}} + // expected-note@#NO_DEP{{conflicting attribute is here}} + // expected-warning@+4 3{{statement attribute 'always_inline' has higher precedence than function attribute 'noinline'}} + // expected-note@#BAZ 3{{conflicting attribute is here}} + // expected-note@#BAZ_INST 3{{in instantiation}} + // expected-note@+1 3{{in instantiation}} + [[clang::always_inline]] return non_dependent(x), baz(x + 1); + } + return x; +} + +// We can't suppress if there is a variadic involved. +template +int variadic_baz(int x) { + // Diagnoses NO_DEP 2x, once during phase 1, the second during instantiation. + // Dianoses DEP 3x, once per variadic expansion. + // expected-warning@+5 2{{statement attribute 'always_inline' has higher precedence than function attribute 'noinline'}} + // expected-note@#NO_DEP 2{{conflicting attribute is here}} + // expected-warning@+3 3{{statement attribute 'always_inline' has higher precedence than function attribute 'noinline'}} + // expected-note@#DEP 3{{conflicting attribute is here}} + // expected-note@#VARIADIC_INST{{in instantiation}} + [[clang::always_inline]] return non_dependent(x) + (dependent(x) + ...); +} + +void use() { + baz<3>(0); // #BAZ_INST + variadic_baz<0, 1, 2>(0); // #VARIADIC_INST + } diff --git a/clang/test/Sema/attr-noinline.cpp b/clang/test/Sema/attr-noinline.cpp index ae0f80ca296eb..b5c3fa1536681 100644 --- a/clang/test/Sema/attr-noinline.cpp +++ b/clang/test/Sema/attr-noinline.cpp @@ -2,9 +2,10 @@ int bar(); +// expected-note@+1{{conflicting attribute is here}} [[gnu::always_inline]] void always_inline_fn(void) { } +// expected-note@+1{{conflicting attribute is here}} [[gnu::flatten]] void flatten_fn(void) { } - [[gnu::noinline]] void noinline_fn(void) { } void foo() { @@ -32,9 +33,43 @@ int foo(int x) { [[clang::noinline]] return foo(x + 1); } -// FIXME: This should warn that noinline statement attribute has higher -// precedence than the always_inline function attribute. +template +[[clang::always_inline]] +int dependent(int x){ return x + D;} // #DEP +[[clang::always_inline]] +int non_dependent(int x){return x;} // #NO_DEP + template [[clang::always_inline]] -int bar(int x) { - [[clang::noinline]] return bar(x + 1); +int baz(int x) { // #BAZ + // expected-warning@+2{{statement attribute 'noinline' has higher precedence than function attribute 'always_inline'}} + // expected-note@#NO_DEP{{conflicting attribute is here}} + [[clang::noinline]] non_dependent(x); + if constexpr (D>0) { + // expected-warning@+6{{statement attribute 'noinline' has higher precedence than function attribute 'always_inline'}} + // expected-note@#NO_DEP{{conflicting attribute is here}} + // expected-warning@+4 3{{statement attribute 'noinline' has higher precedence than function attribute 'always_inline'}} + // expected-note@#BAZ 3{{conflicting attribute is here}} + // expected-note@#BAZ_INST 3{{in instantiation}} + // expected-note@+1 3{{in instantiation}} + [[clang::noinline]] return non_dependent(x), baz(x + 1); + } + return x; +} + +// We can't suppress if there is a variadic involved. +template +int variadic_baz(int x) { + // Diagnoses NO_DEP 2x, once during phase 1, the second during instantiation. + // Dianoses DEP 3x, once per variadic expansion. + // expected-warning@+5 2{{statement attribute 'noinline' has higher precedence than function attribute 'always_inline'}} + // expected-note@#NO_DEP 2{{conflicting attribute is here}} + // expected-warning@+3 3{{statement attribute 'noinline' has higher precedence than function attribute 'always_inline'}} + // expected-note@#DEP 3{{conflicting attribute is here}} + // expected-note@#VARIADIC_INST{{in instantiation}} + [[clang::noinline]] return non_dependent(x) + (dependent(x) + ...); +} + +void use() { + baz<3>(0); // #BAZ_INST + variadic_baz<0, 1, 2>(0); // #VARIADIC_INST } From b4307437e51d3a400de21de624a1610aee23346b Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 21 Mar 2023 16:16:52 +0100 Subject: [PATCH 192/691] [ModuleUtils] Handle globals_ctors/dtors with non-literal type (PR56809) If the global already exists, use its existing type, so we don't try to mix literal and non-literal structs among the elements. Fixes https://github.com/llvm/llvm-project/issues/56809. --- llvm/lib/Transforms/Utils/ModuleUtils.cpp | 10 +++--- .../non-literal-type.ll | 36 +++++++++++++++++++ 2 files changed, 42 insertions(+), 4 deletions(-) create mode 100644 llvm/test/Transforms/LowerGlobalDestructors/non-literal-type.ll diff --git a/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/llvm/lib/Transforms/Utils/ModuleUtils.cpp index 6d17a466957e4..adc2fc0610f3b 100644 --- a/llvm/lib/Transforms/Utils/ModuleUtils.cpp +++ b/llvm/lib/Transforms/Utils/ModuleUtils.cpp @@ -31,11 +31,9 @@ static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F, // Get the current set of static global constructors and add the new ctor // to the list. SmallVector CurrentCtors; - StructType *EltTy = StructType::get( - IRB.getInt32Ty(), PointerType::get(FnTy, F->getAddressSpace()), - IRB.getInt8PtrTy()); - + StructType *EltTy; if (GlobalVariable *GVCtor = M.getNamedGlobal(ArrayName)) { + EltTy = cast(GVCtor->getValueType()->getArrayElementType()); if (Constant *Init = GVCtor->getInitializer()) { unsigned n = Init->getNumOperands(); CurrentCtors.reserve(n + 1); @@ -43,6 +41,10 @@ static void appendToGlobalArray(StringRef ArrayName, Module &M, Function *F, CurrentCtors.push_back(cast(Init->getOperand(i))); } GVCtor->eraseFromParent(); + } else { + EltTy = StructType::get( + IRB.getInt32Ty(), PointerType::get(FnTy, F->getAddressSpace()), + IRB.getInt8PtrTy()); } // Build a 3 field global_ctor entry. We don't take a comdat key. diff --git a/llvm/test/Transforms/LowerGlobalDestructors/non-literal-type.ll b/llvm/test/Transforms/LowerGlobalDestructors/non-literal-type.ll new file mode 100644 index 0000000000000..38f72e7ac70e5 --- /dev/null +++ b/llvm/test/Transforms/LowerGlobalDestructors/non-literal-type.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs --version 2 +; RUN: opt -passes=lower-global-dtors -S < %s | FileCheck %s + +%ty = type { i32, ptr, ptr } + +declare void @ctor() +declare void @dtor() + +@llvm.global_ctors = appending global [1 x %ty] [%ty {i32 65535, ptr @ctor, ptr zeroinitializer }], align 8 +@llvm.global_dtors = appending global [1 x %ty] [%ty {i32 65535, ptr @dtor, ptr zeroinitializer }], align 8 + +;. +; CHECK: @[[__DSO_HANDLE:[a-zA-Z0-9_$"\\.-]+]] = extern_weak hidden constant i8 +; CHECK: @[[LLVM_GLOBAL_CTORS:[a-zA-Z0-9_$"\\.-]+]] = appending global [2 x %ty] [[[TY:%.*]] { i32 65535, ptr @ctor, ptr null }, [[TY]] { i32 65535, ptr @register_call_dtors, ptr null }] +;. +; CHECK-LABEL: define private void @call_dtors +; CHECK-SAME: (ptr [[TMP0:%.*]]) { +; CHECK-NEXT: body: +; CHECK-NEXT: call void @dtor() +; CHECK-NEXT: ret void +; +; +; CHECK-LABEL: define private void @register_call_dtors() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @__cxa_atexit(ptr @call_dtors, ptr null, ptr @__dso_handle) +; CHECK-NEXT: [[TMP0:%.*]] = icmp ne i32 [[CALL]], 0 +; CHECK-NEXT: br i1 [[TMP0]], label [[FAIL:%.*]], label [[RETURN:%.*]] +; CHECK: fail: +; CHECK-NEXT: call void @llvm.trap() +; CHECK-NEXT: unreachable +; CHECK: return: +; CHECK-NEXT: ret void +; +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { cold noreturn nounwind } +;. From 53e9a5ddc0a2bc983fc7dcf1cd0a108b8f91cd2f Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 21 Mar 2023 08:22:18 -0700 Subject: [PATCH 193/691] [LSR] Fix "new use of poison" problem in lsr-term-fold This models the approach used in LFTR. The short summary is that we need to prove the IV is not dead first, and then we have to either prove the poison flag is valid after the new user or delete it. There are two key differences between this and LFTR. First, I allow a non-concrete start to the IV. The goal of LFTR is to canonicalize and IVs with constant starts are canonical, so the very restrictive definition there is mostly okay. Here on the other hand, we're explicitly moving *away* from the canonical form, and thus need to handle non-constant starts. Second, LFTR bails out instead of removing inbounds on a GEP. This is a pragmatic tradeoff since inbounds is hard to infer and assists aliasing. This pass runs very late, and I think the tradeoff runs the other way. A different approach we could take for the post-inc check would be to perform a pre-inc check instead of a post-inc check. We would still have to check the pre-inc IV, but that would avoid the need to drop inbounds. Doing the pre-inc check would basically trade killing a whole IV for an extra register move in the loop. I'm open to suggestions on the right approach here. Note that this analysis is quite expensive compile time wise. I have made no effort to optimize (yet). Differential Revision: https://reviews.llvm.org/D146464 --- .../Transforms/Scalar/LoopStrengthReduce.cpp | 44 +++++++++++++++++-- .../LoopStrengthReduce/lsr-term-fold.ll | 12 ++--- 2 files changed, 47 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 7c6cea0dd1497..35a90bf40debc 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -6681,7 +6681,7 @@ static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE, return nullptr; } -static std::optional> +static std::optional> canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, const LoopInfo &LI) { if (!L->isInnermost()) { @@ -6743,6 +6743,7 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, PHINode *ToHelpFold = nullptr; const SCEV *TermValueS = nullptr; + bool MustDropPoison = false; for (PHINode &PN : L->getHeader()->phis()) { if (ToFold == &PN) continue; @@ -6789,10 +6790,43 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, << "\n"); continue; } + + // The candidate IV may have been otherwise dead and poison from the + // very first iteration. If we can't disprove that, we can't use the IV. + if (!mustExecuteUBIfPoisonOnPathTo(&PN, LoopLatch->getTerminator(), &DT)) { + LLVM_DEBUG(dbgs() << "Can not prove poison safety for IV " + << PN << "\n"); + continue; + } + + // The candidate IV may become poison on the last iteration. If this + // value is not branched on, this is a well defined program. We're + // about to add a new use to this IV, and we have to ensure we don't + // insert UB which didn't previously exist. + bool MustDropPoisonLocal = false; + Instruction *PostIncV = + cast(PN.getIncomingValueForBlock(LoopLatch)); + if (!mustExecuteUBIfPoisonOnPathTo(PostIncV, LoopLatch->getTerminator(), + &DT)) { + LLVM_DEBUG(dbgs() << "Can not prove poison safety to insert use" + << PN << "\n"); + + // If this is a complex recurrance with multiple instructions computing + // the backedge value, we might need to strip poison flags from all of + // them. + if (PostIncV->getOperand(0) != &PN) + continue; + + // In order to perform the transform, we need to drop the poison generating + // flags on this instruction (if any). + MustDropPoisonLocal = PostIncV->hasPoisonGeneratingFlags(); + } + // We pick the last legal alternate IV. We could expore choosing an optimal // alternate IV if we had a decent heuristic to do so. ToHelpFold = &PN; TermValueS = TermValueSLocal; + MustDropPoison = MustDropPoisonLocal; } LLVM_DEBUG(if (ToFold && !ToHelpFold) dbgs() @@ -6808,7 +6842,7 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, if (!ToFold || !ToHelpFold) return std::nullopt; - return std::make_tuple(ToFold, ToHelpFold, TermValueS); + return std::make_tuple(ToFold, ToHelpFold, TermValueS, MustDropPoison); } static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, @@ -6871,7 +6905,7 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, if (AllowTerminatingConditionFoldingAfterLSR) { if (auto Opt = canFoldTermCondOfLoop(L, SE, DT, LI)) { - auto [ToFold, ToHelpFold, TermValueS] = *Opt; + auto [ToFold, ToHelpFold, TermValueS, MustDrop] = *Opt; Changed = true; NumTermFold++; @@ -6889,6 +6923,10 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, (void)StartValue; Value *LoopValue = ToHelpFold->getIncomingValueForBlock(LoopLatch); + // See comment in canFoldTermCondOfLoop on why this is sufficient. + if (MustDrop) + cast(LoopValue)->dropPoisonGeneratingFlags(); + // SCEVExpander for both use in preheader and latch const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); SCEVExpander Expander(SE, DL, "lsr_fold_term_cond"); diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll index b7f55b2172ea6..16e85a94517bc 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll @@ -124,7 +124,7 @@ define void @ptr_of_ptr_addrec(ptr %ptrptr, i32 %length) { ; CHECK-NEXT: [[IT_04:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[START_PTRPTR]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[IT_04]], align 8 ; CHECK-NEXT: tail call void @foo(ptr [[TMP4]]) -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds ptr, ptr [[IT_04]], i64 1 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr ptr, ptr [[IT_04]], i64 1 ; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[SCEVGEP]] ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: @@ -170,7 +170,7 @@ define void @iv_start_non_preheader(ptr %mark, i32 signext %length) { ; CHECK-NEXT: [[DST_04:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[MARK]], [[FOR_BODY_PREHEADER]] ] ; CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DST_04]], align 8 ; CHECK-NEXT: [[TMP5:%.*]] = call ptr @foo(ptr [[TMP4]]) -; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds ptr, ptr [[DST_04]], i64 1 +; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr ptr, ptr [[DST_04]], i64 1 ; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[SCEVGEP]] ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]] ; @@ -197,9 +197,9 @@ for.body: ; preds = %entry, %for.body ; advance the pointer IV by *4* each time, and thus on the iteration we write ; byte 16, %uglygep2 (the pointer increment) is past the end of the underlying ; storage and thus violates the inbounds requirements. As a result, %uglygep2 -; is poison on the final iteration. If we insert a branch on that value, we -; have inserted undefined behavior where it did not previously exist. -; FIXME: miscompile +; is poison on the final iteration. If we insert a branch on that value +; (without stripping the poison flag), we have inserted undefined behavior +; where it did not previously exist. define void @inbounds_poison_use(ptr %a) { ; CHECK-LABEL: @inbounds_poison_use( ; CHECK-NEXT: entry: @@ -208,7 +208,7 @@ define void @inbounds_poison_use(ptr %a) { ; CHECK: for.body: ; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[A]], [[ENTRY:%.*]] ] ; CHECK-NEXT: store i8 1, ptr [[LSR_IV1]], align 4 -; CHECK-NEXT: [[UGLYGEP2]] = getelementptr inbounds i8, ptr [[LSR_IV1]], i64 4 +; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 ; CHECK-NEXT: [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND:%.*]] = icmp eq ptr [[UGLYGEP2]], [[SCEVGEP]] ; CHECK-NEXT: br i1 [[LSR_FOLD_TERM_COND_REPLACED_TERM_COND]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: From 68685a7f6ad19fcaed317d225ba798119f3ea157 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Tue, 21 Mar 2023 22:18:40 +0700 Subject: [PATCH 194/691] Revert "[GuardWidening] Improve analysis of potential widening into hotter block" This reverts commit 8d2885c2ef98b81927c1f816691ec4e77cfc7f3f. I accidentally introduced an infinite loop in this patch, will return when this is fixed. --- llvm/lib/Transforms/Scalar/GuardWidening.cpp | 52 +++++-------------- .../two_forms_behavior_consistency.ll | 40 ++++++++------ 2 files changed, 36 insertions(+), 56 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/llvm/lib/Transforms/Scalar/GuardWidening.cpp index e354b70357cd9..064c7b1d0ad12 100644 --- a/llvm/lib/Transforms/Scalar/GuardWidening.cpp +++ b/llvm/lib/Transforms/Scalar/GuardWidening.cpp @@ -460,55 +460,27 @@ GuardWideningImpl::computeWideningScore(Instruction *DominatedInstr, if (HoistingOutOfLoop) return WS_Positive; - // For a given basic block \p BB, return its successor which is guaranteed or - // highly likely will be taken as its successor. - auto GetLikelySuccessor = [](const BasicBlock * BB)->const BasicBlock * { - if (auto *UniqueSucc = BB->getUniqueSuccessor()) - return UniqueSucc; - auto *Term = BB->getTerminator(); - Value *Cond = nullptr; - const BasicBlock *IfTrue = nullptr, *IfFalse = nullptr; - using namespace PatternMatch; - if (!match(Term, m_Br(m_Value(Cond), m_BasicBlock(IfTrue), - m_BasicBlock(IfFalse)))) - return nullptr; - // For constant conditions, only one dynamical successor is possible - if (auto *ConstCond = dyn_cast(Cond)) - return ConstCond->isAllOnesValue() ? IfTrue : IfFalse; - // If one of successors ends with deopt, another one is likely. - if (IfFalse->getPostdominatingDeoptimizeCall()) - return IfTrue; - if (IfTrue->getPostdominatingDeoptimizeCall()) - return IfFalse; - // TODO: Use branch frequency metatada to allow hoisting through non-deopt - // branches? - return nullptr; - }; - - // Returns true if we might be hoisting above explicit control flow into a - // considerably hotter block. Note that this completely ignores implicit - // control flow (guards, calls which throw, etc...). That choice appears - // arbitrary (we assume that implicit control flow exits are all rare). - auto MaybeHoistingToHotterBlock = [&]() { - const auto *DominatingBlock = DominatingGuard->getParent(); - const auto *DominatedBlock = DominatedInstr->getParent(); - - // Descent as low as we can, always taking the likely successor. - while (DominatedBlock != DominatingBlock) - if (auto *LikelySucc = GetLikelySuccessor(DominatingBlock)) - DominatingBlock = LikelySucc; - else - break; + // Returns true if we might be hoisting above explicit control flow. Note + // that this completely ignores implicit control flow (guards, calls which + // throw, etc...). That choice appears arbitrary. + auto MaybeHoistingOutOfIf = [&]() { + auto *DominatingBlock = DominatingGuard->getParent(); + auto *DominatedBlock = DominatedInstr->getParent(); + if (isGuardAsWidenableBranch(DominatingGuard)) + DominatingBlock = cast(DominatingGuard)->getSuccessor(0); // Same Block? if (DominatedBlock == DominatingBlock) return false; + // Obvious successor (common loop header/preheader case) + if (DominatedBlock == DominatingBlock->getUniqueSuccessor()) + return false; // TODO: diamond, triangle cases if (!PDT) return true; return !PDT->dominates(DominatedBlock, DominatingBlock); }; - return MaybeHoistingToHotterBlock() ? WS_IllegalOrNegative : WS_Neutral; + return MaybeHoistingOutOfIf() ? WS_IllegalOrNegative : WS_Neutral; } bool GuardWideningImpl::canBeHoistedTo( diff --git a/llvm/test/Transforms/GuardWidening/two_forms_behavior_consistency.ll b/llvm/test/Transforms/GuardWidening/two_forms_behavior_consistency.ll index 9b60f4e3e62b7..69bddeea9fbb8 100644 --- a/llvm/test/Transforms/GuardWidening/two_forms_behavior_consistency.ll +++ b/llvm/test/Transforms/GuardWidening/two_forms_behavior_consistency.ll @@ -42,26 +42,30 @@ define void @test_01(i32 %a, i32 %b, i32 %c, i32 %d) { ; BRANCH_FORM-NEXT: entry: ; BRANCH_FORM-NEXT: br label [[LOOP:%.*]] ; BRANCH_FORM: loop: -; BRANCH_FORM-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[GUARDED:%.*]] ] +; BRANCH_FORM-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[GUARDED5:%.*]] ] ; BRANCH_FORM-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; BRANCH_FORM-NEXT: [[C1:%.*]] = icmp ult i32 [[IV]], [[A]] ; BRANCH_FORM-NEXT: [[C2:%.*]] = icmp ult i32 [[IV]], [[B]] ; BRANCH_FORM-NEXT: [[WIDE_CHK:%.*]] = and i1 [[C1]], [[C2]] -; BRANCH_FORM-NEXT: [[C3:%.*]] = icmp ult i32 [[IV]], [[C]] -; BRANCH_FORM-NEXT: [[WIDE_CHK13:%.*]] = and i1 [[WIDE_CHK]], [[C3]] -; BRANCH_FORM-NEXT: [[C4:%.*]] = icmp ult i32 [[IV]], [[D]] -; BRANCH_FORM-NEXT: [[WIDE_CHK14:%.*]] = and i1 [[WIDE_CHK13]], [[C4]] ; BRANCH_FORM-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() -; BRANCH_FORM-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDE_CHK14]], [[WIDENABLE_COND]] -; BRANCH_FORM-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0:![0-9]+]] +; BRANCH_FORM-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]] +; BRANCH_FORM-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0:![0-9]+]] ; BRANCH_FORM: deopt: ; BRANCH_FORM-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] ; BRANCH_FORM-NEXT: ret void ; BRANCH_FORM: guarded: ; BRANCH_FORM-NEXT: [[WIDENABLE_COND3:%.*]] = call i1 @llvm.experimental.widenable.condition() ; BRANCH_FORM-NEXT: [[EXIPLICIT_GUARD_COND4:%.*]] = and i1 [[C2]], [[WIDENABLE_COND3]] +; BRANCH_FORM-NEXT: [[C3:%.*]] = icmp ult i32 [[IV]], [[C]] +; BRANCH_FORM-NEXT: [[C4:%.*]] = icmp ult i32 [[IV]], [[D]] +; BRANCH_FORM-NEXT: [[WIDE_CHK13:%.*]] = and i1 [[C3]], [[C4]] ; BRANCH_FORM-NEXT: [[WIDENABLE_COND7:%.*]] = call i1 @llvm.experimental.widenable.condition() -; BRANCH_FORM-NEXT: [[EXIPLICIT_GUARD_COND8:%.*]] = and i1 [[C3]], [[WIDENABLE_COND7]] +; BRANCH_FORM-NEXT: [[EXIPLICIT_GUARD_COND8:%.*]] = and i1 [[WIDE_CHK13]], [[WIDENABLE_COND7]] +; BRANCH_FORM-NEXT: br i1 [[EXIPLICIT_GUARD_COND8]], label [[GUARDED5]], label [[DEOPT6:%.*]], !prof [[PROF0]] +; BRANCH_FORM: deopt6: +; BRANCH_FORM-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] +; BRANCH_FORM-NEXT: ret void +; BRANCH_FORM: guarded5: ; BRANCH_FORM-NEXT: [[WIDENABLE_COND11:%.*]] = call i1 @llvm.experimental.widenable.condition() ; BRANCH_FORM-NEXT: [[EXIPLICIT_GUARD_COND12:%.*]] = and i1 [[C4]], [[WIDENABLE_COND11]] ; BRANCH_FORM-NEXT: [[LOOP_COND:%.*]] = call i1 @cond() @@ -74,26 +78,30 @@ define void @test_01(i32 %a, i32 %b, i32 %c, i32 %d) { ; BRANCH_FORM_LICM-NEXT: entry: ; BRANCH_FORM_LICM-NEXT: br label [[LOOP:%.*]] ; BRANCH_FORM_LICM: loop: -; BRANCH_FORM_LICM-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[GUARDED:%.*]] ] +; BRANCH_FORM_LICM-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[GUARDED5:%.*]] ] ; BRANCH_FORM_LICM-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; BRANCH_FORM_LICM-NEXT: [[C1:%.*]] = icmp ult i32 [[IV]], [[A]] ; BRANCH_FORM_LICM-NEXT: [[C2:%.*]] = icmp ult i32 [[IV]], [[B]] ; BRANCH_FORM_LICM-NEXT: [[WIDE_CHK:%.*]] = and i1 [[C1]], [[C2]] -; BRANCH_FORM_LICM-NEXT: [[C3:%.*]] = icmp ult i32 [[IV]], [[C]] -; BRANCH_FORM_LICM-NEXT: [[WIDE_CHK13:%.*]] = and i1 [[WIDE_CHK]], [[C3]] -; BRANCH_FORM_LICM-NEXT: [[C4:%.*]] = icmp ult i32 [[IV]], [[D]] -; BRANCH_FORM_LICM-NEXT: [[WIDE_CHK14:%.*]] = and i1 [[WIDE_CHK13]], [[C4]] ; BRANCH_FORM_LICM-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() -; BRANCH_FORM_LICM-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDE_CHK14]], [[WIDENABLE_COND]] -; BRANCH_FORM_LICM-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0:![0-9]+]] +; BRANCH_FORM_LICM-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]] +; BRANCH_FORM_LICM-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0:![0-9]+]] ; BRANCH_FORM_LICM: deopt: ; BRANCH_FORM_LICM-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] ; BRANCH_FORM_LICM-NEXT: ret void ; BRANCH_FORM_LICM: guarded: ; BRANCH_FORM_LICM-NEXT: [[WIDENABLE_COND3:%.*]] = call i1 @llvm.experimental.widenable.condition() ; BRANCH_FORM_LICM-NEXT: [[EXIPLICIT_GUARD_COND4:%.*]] = and i1 [[C2]], [[WIDENABLE_COND3]] +; BRANCH_FORM_LICM-NEXT: [[C3:%.*]] = icmp ult i32 [[IV]], [[C]] +; BRANCH_FORM_LICM-NEXT: [[C4:%.*]] = icmp ult i32 [[IV]], [[D]] +; BRANCH_FORM_LICM-NEXT: [[WIDE_CHK13:%.*]] = and i1 [[C3]], [[C4]] ; BRANCH_FORM_LICM-NEXT: [[WIDENABLE_COND7:%.*]] = call i1 @llvm.experimental.widenable.condition() -; BRANCH_FORM_LICM-NEXT: [[EXIPLICIT_GUARD_COND8:%.*]] = and i1 [[C3]], [[WIDENABLE_COND7]] +; BRANCH_FORM_LICM-NEXT: [[EXIPLICIT_GUARD_COND8:%.*]] = and i1 [[WIDE_CHK13]], [[WIDENABLE_COND7]] +; BRANCH_FORM_LICM-NEXT: br i1 [[EXIPLICIT_GUARD_COND8]], label [[GUARDED5]], label [[DEOPT6:%.*]], !prof [[PROF0]] +; BRANCH_FORM_LICM: deopt6: +; BRANCH_FORM_LICM-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] +; BRANCH_FORM_LICM-NEXT: ret void +; BRANCH_FORM_LICM: guarded5: ; BRANCH_FORM_LICM-NEXT: [[WIDENABLE_COND11:%.*]] = call i1 @llvm.experimental.widenable.condition() ; BRANCH_FORM_LICM-NEXT: [[EXIPLICIT_GUARD_COND12:%.*]] = and i1 [[C4]], [[WIDENABLE_COND11]] ; BRANCH_FORM_LICM-NEXT: [[LOOP_COND:%.*]] = call i1 @cond() From e73dd6254e310a799b492127c0f237af0e2236e2 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Wed, 15 Mar 2023 10:26:38 +0000 Subject: [PATCH 195/691] [lldb] Fix a 32 bit warning in ScriptedProcessInterface ../llvm-project/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h:61:12: warning: implicit conversion from 'unsigned long long' to 'size_t' (aka 'unsigned int') changes value from 18446744073709551615 to 4294967295 [-Wconstant-conversion] ../llvm-project/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp:275:39: warning: result of comparison of constant 18446744073709551615 with expression of type 'size_t' (aka 'unsigned int') is always false [-Wtautological-constant-out-of-range-compare] This happens because size_t on 32 bit is 32 bit, but LLDB_INVALID_OFFSET is UINT64_MAX. Return lldb::offset_t instead, which is 64 bit everywhere. DoWriteMemory still returns size_t but this is because every other Process derived thing does that. As long as the failure check works I think it should be fine. Reviewed By: mib Differential Revision: https://reviews.llvm.org/D146124 --- lldb/include/lldb/Interpreter/ScriptedProcessInterface.h | 6 +++--- lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp | 2 +- .../Python/ScriptedProcessPythonInterface.cpp | 2 +- .../Python/ScriptedProcessPythonInterface.h | 5 +++-- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h b/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h index ba4743077e029..895fc5e2fd3d3 100644 --- a/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h +++ b/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h @@ -55,9 +55,9 @@ class ScriptedProcessInterface : virtual public ScriptedInterface { return {}; } - virtual size_t WriteMemoryAtAddress(lldb::addr_t addr, - lldb::DataExtractorSP data_sp, - Status &error) { + virtual lldb::offset_t WriteMemoryAtAddress(lldb::addr_t addr, + lldb::DataExtractorSP data_sp, + Status &error) { return LLDB_INVALID_OFFSET; }; diff --git a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp index 9a670276a8b82..5e7f88cc2d868 100644 --- a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp +++ b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp @@ -269,7 +269,7 @@ size_t ScriptedProcess::DoWriteMemory(lldb::addr_t vm_addr, const void *buf, if (!data_extractor_sp || !data_extractor_sp->GetByteSize()) return 0; - size_t bytes_written = + lldb::offset_t bytes_written = GetInterface().WriteMemoryAtAddress(vm_addr, data_extractor_sp, error); if (!bytes_written || bytes_written == LLDB_INVALID_OFFSET) diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp index c985065835727..17fc2e5e1852a 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp @@ -136,7 +136,7 @@ lldb::DataExtractorSP ScriptedProcessPythonInterface::ReadMemoryAtAddress( return data_sp; } -size_t ScriptedProcessPythonInterface::WriteMemoryAtAddress( +lldb::offset_t ScriptedProcessPythonInterface::WriteMemoryAtAddress( lldb::addr_t addr, lldb::DataExtractorSP data_sp, Status &error) { Status py_error; StructuredData::ObjectSP obj = diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h index b7b12b93a002b..a96a55df8f4bc 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h +++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h @@ -50,8 +50,9 @@ class ScriptedProcessPythonInterface : public ScriptedProcessInterface, lldb::DataExtractorSP ReadMemoryAtAddress(lldb::addr_t address, size_t size, Status &error) override; - size_t WriteMemoryAtAddress(lldb::addr_t addr, lldb::DataExtractorSP data_sp, - Status &error) override; + lldb::offset_t WriteMemoryAtAddress(lldb::addr_t addr, + lldb::DataExtractorSP data_sp, + Status &error) override; StructuredData::ArraySP GetLoadedImages() override; From 01d05bd407bb49801393098f76d1af1c769c217c Mon Sep 17 00:00:00 2001 From: Erich Keane Date: Tue, 21 Mar 2023 08:34:20 -0700 Subject: [PATCH 196/691] Add warning test to make buildbots happy after 514e4359 --- clang/test/Sema/attr-alwaysinline.cpp | 2 +- clang/test/Sema/attr-noinline.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/Sema/attr-alwaysinline.cpp b/clang/test/Sema/attr-alwaysinline.cpp index f60cb5e3adfb1..6b71a0e86bf12 100644 --- a/clang/test/Sema/attr-alwaysinline.cpp +++ b/clang/test/Sema/attr-alwaysinline.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -verify -fsyntax-only %s +// RUN: %clang_cc1 -verify -fsyntax-only %s -Wno-c++17-extensions int bar(); diff --git a/clang/test/Sema/attr-noinline.cpp b/clang/test/Sema/attr-noinline.cpp index b5c3fa1536681..bd6505b9fe98e 100644 --- a/clang/test/Sema/attr-noinline.cpp +++ b/clang/test/Sema/attr-noinline.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -verify -fsyntax-only %s +// RUN: %clang_cc1 -verify -fsyntax-only %s -Wno-c++17-extensions int bar(); From e7c1b4b64c314c9adb39857d92a660557217cada Mon Sep 17 00:00:00 2001 From: Carl Ritson Date: Tue, 21 Mar 2023 16:13:51 +0100 Subject: [PATCH 197/691] [SystemZ] Fix modelling of composed subreg indices. A rare case where coalescing resulted in a hh32 (high32 of high64 of vector register) subreg usage caused getSubReg() to fail as the vector reg does not have that subreg in its subregs list, but rather h32 which was expected to also act as hh32. See link below for the discussion when solving this. Patch By: critson Reviewed By: uweigand Fixes: https://github.com/llvm/llvm-project/issues/61390 --- llvm/lib/Target/SystemZ/SystemZInstrFP.td | 2 +- llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp | 8 ++++---- llvm/lib/Target/SystemZ/SystemZRegisterInfo.h | 4 ++-- llvm/lib/Target/SystemZ/SystemZRegisterInfo.td | 16 ++++++++-------- .../SystemZ/RAbasic-invalid-LR-update.mir | 6 +++--- llvm/test/CodeGen/SystemZ/cond-move-05.mir | 4 ++-- .../SystemZ/regalloc-fast-invalid-kill-flag.mir | 2 +- 7 files changed, 21 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/llvm/lib/Target/SystemZ/SystemZInstrFP.td index 7cbe125533d30..ea62e99a58399 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrFP.td +++ b/llvm/lib/Target/SystemZ/SystemZInstrFP.td @@ -191,7 +191,7 @@ let Uses = [FPC], mayRaiseFPException = 1 in { let Predicates = [FeatureNoVectorEnhancements1] in { def : Pat<(f32 (any_fpround FP128:$src)), - (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hh32)>; + (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_h32)>; def : Pat<(f64 (any_fpround FP128:$src)), (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>; } diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 7f3d8e8d311ee..e43787a5ce710 100644 --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -30,12 +30,12 @@ static const TargetRegisterClass *getRC32(MachineOperand &MO, const TargetRegisterClass *RC = MRI->getRegClass(MO.getReg()); if (SystemZ::GR32BitRegClass.hasSubClassEq(RC) || - MO.getSubReg() == SystemZ::subreg_l32 || - MO.getSubReg() == SystemZ::subreg_hl32) + MO.getSubReg() == SystemZ::subreg_ll32 || + MO.getSubReg() == SystemZ::subreg_l32) return &SystemZ::GR32BitRegClass; if (SystemZ::GRH32BitRegClass.hasSubClassEq(RC) || - MO.getSubReg() == SystemZ::subreg_h32 || - MO.getSubReg() == SystemZ::subreg_hh32) + MO.getSubReg() == SystemZ::subreg_lh32 || + MO.getSubReg() == SystemZ::subreg_h32) return &SystemZ::GRH32BitRegClass; if (VRM && VRM->hasPhys(MO.getReg())) { diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h index 19305d4e89573..78abeb720b43b 100644 --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -24,10 +24,10 @@ namespace SystemZ { // Return the subreg to use for referring to the even and odd registers // in a GR128 pair. Is32Bit says whether we want a GR32 or GR64. inline unsigned even128(bool Is32bit) { - return Is32bit ? subreg_hl32 : subreg_h64; + return Is32bit ? subreg_l32 : subreg_h64; } inline unsigned odd128(bool Is32bit) { - return Is32bit ? subreg_l32 : subreg_l64; + return Is32bit ? subreg_ll32 : subreg_l64; } // Reg should be a 32-bit GPR. Return true if it is a high register rather diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td index 00005a6d61794..5d66501172b28 100644 --- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -20,12 +20,12 @@ class SystemZRegWithSubregs subregs> } let Namespace = "SystemZ" in { -def subreg_l32 : SubRegIndex<32, 0>; // Also acts as subreg_ll32. -def subreg_h32 : SubRegIndex<32, 32>; // Also acts as subreg_lh32. +def subreg_l32 : SubRegIndex<32, 0>; // Also acts as subreg_hl32. +def subreg_h32 : SubRegIndex<32, 32>; // Also acts as subreg_hh32. def subreg_l64 : SubRegIndex<64, 0>; def subreg_h64 : SubRegIndex<64, 64>; -def subreg_hh32 : ComposedSubRegIndex; -def subreg_hl32 : ComposedSubRegIndex; +def subreg_lh32 : ComposedSubRegIndex; +def subreg_ll32 : ComposedSubRegIndex; } // Define a register class that contains values of types TYPES and an @@ -73,9 +73,9 @@ class GPR64 num, string n, GPR32 low, GPR32 high> // 8 even-odd pairs of GPR64s. class GPR128 num, string n, GPR64 low, GPR64 high> - : SystemZRegWithSubregs { + : SystemZRegWithSubregs { let HWEncoding = num; - let SubRegIndices = [subreg_l64, subreg_h64]; + let SubRegIndices = [subreg_h64, subreg_l64]; let CoveredBySubRegs = 1; } @@ -215,9 +215,9 @@ class FPR64 num, string n, FPR32 high> // 8 pairs of FPR64s, with a one-register gap inbetween. class FPR128 num, string n, FPR64 low, FPR64 high> - : SystemZRegWithSubregs { + : SystemZRegWithSubregs { let HWEncoding = num; - let SubRegIndices = [subreg_l64, subreg_h64]; + let SubRegIndices = [subreg_h64, subreg_l64]; let CoveredBySubRegs = 1; } diff --git a/llvm/test/CodeGen/SystemZ/RAbasic-invalid-LR-update.mir b/llvm/test/CodeGen/SystemZ/RAbasic-invalid-LR-update.mir index 0c02c26fe5b12..79fa84f8a2a49 100644 --- a/llvm/test/CodeGen/SystemZ/RAbasic-invalid-LR-update.mir +++ b/llvm/test/CodeGen/SystemZ/RAbasic-invalid-LR-update.mir @@ -248,9 +248,9 @@ body: | bb.9: %82 = VLVGP %67.subreg_h64, %67.subreg_h64 - %82 = VLVGH %82, %58.subreg_hl32, $noreg, 0 - %82 = VLVGH %82, %80.subreg_hl32, $noreg, 1 - dead %82 = VLVGH %82, %90.subreg_hl32, $noreg, 2 + %82 = VLVGH %82, %58.subreg_l32, $noreg, 0 + %82 = VLVGH %82, %80.subreg_l32, $noreg, 1 + dead %82 = VLVGH %82, %90.subreg_l32, $noreg, 2 %96 = AFIMux %96, 1879048192, implicit-def dead $cc %96 = SRL %96, $noreg, 31 dead %11 = VLVGF %11, %96, $noreg, 1 diff --git a/llvm/test/CodeGen/SystemZ/cond-move-05.mir b/llvm/test/CodeGen/SystemZ/cond-move-05.mir index 3ce98de94fe4a..7cc69bc8f59da 100644 --- a/llvm/test/CodeGen/SystemZ/cond-move-05.mir +++ b/llvm/test/CodeGen/SystemZ/cond-move-05.mir @@ -67,8 +67,8 @@ body: | undef %3.subreg_l64:gr128bit = LGHI 1 %3.subreg_h64:gr128bit = LLILL 0 %3:gr128bit = DLGR %3, %0 - CLFIMux %3.subreg_hl32, 3631842929, implicit-def $cc - %6:grx32bit = LOCRMux undef %6, %3.subreg_hl32, 14, 4, implicit killed $cc + CLFIMux %3.subreg_l32, 3631842929, implicit-def $cc + %6:grx32bit = LOCRMux undef %6, %3.subreg_l32, 14, 4, implicit killed $cc CHIMux %6, 0, implicit-def $cc BRC 14, 8, %bb.2.for.inc591.1.i.i, implicit killed $cc J %bb.1.cleanup584.i.i diff --git a/llvm/test/CodeGen/SystemZ/regalloc-fast-invalid-kill-flag.mir b/llvm/test/CodeGen/SystemZ/regalloc-fast-invalid-kill-flag.mir index cf2c274b1c341..bcd7f51cdf780 100644 --- a/llvm/test/CodeGen/SystemZ/regalloc-fast-invalid-kill-flag.mir +++ b/llvm/test/CodeGen/SystemZ/regalloc-fast-invalid-kill-flag.mir @@ -22,7 +22,7 @@ tracksRegLiveness: true body: | bb.0: %0 : gr128bit = IMPLICIT_DEF - %0.subreg_hl32 = COPY %0.subreg_l32 + %0.subreg_l32 = COPY %0.subreg_ll32 %1 : gr64bit = COPY %0.subreg_l64 %2 : addr64bit = LARL @g_167 STC %1.subreg_l32, %2, 8, $noreg From 912599638027e5cbed7b11318273b8703837c6ae Mon Sep 17 00:00:00 2001 From: Adam Paszke Date: Tue, 21 Mar 2023 08:26:06 -0700 Subject: [PATCH 198/691] Support retrieving the splat value from DenseElementsAttrs in Python This is especially convenient when trying to resize the splat. Reviewed By: jpienaar Differential Revision: https://reviews.llvm.org/D146510 --- mlir/lib/Bindings/Python/IRAttributes.cpp | 10 ++++++++++ mlir/test/python/ir/array_attributes.py | 2 ++ 2 files changed, 12 insertions(+) diff --git a/mlir/lib/Bindings/Python/IRAttributes.cpp b/mlir/lib/Bindings/Python/IRAttributes.cpp index c59a54b6699a7..40598ecfd21a7 100644 --- a/mlir/lib/Bindings/Python/IRAttributes.cpp +++ b/mlir/lib/Bindings/Python/IRAttributes.cpp @@ -777,6 +777,16 @@ class PyDenseElementsAttribute [](PyDenseElementsAttribute &self) -> bool { return mlirDenseElementsAttrIsSplat(self); }) + .def("get_splat_value", + [](PyDenseElementsAttribute &self) -> PyAttribute { + if (!mlirDenseElementsAttrIsSplat(self)) { + throw SetPyError( + PyExc_ValueError, + "get_splat_value called on a non-splat attribute"); + } + return PyAttribute(self.getContext(), + mlirDenseElementsAttrGetSplatValue(self)); + }) .def_buffer(&PyDenseElementsAttribute::accessBuffer); } diff --git a/mlir/test/python/ir/array_attributes.py b/mlir/test/python/ir/array_attributes.py index b618802e52436..c1f1633eecaaf 100644 --- a/mlir/test/python/ir/array_attributes.py +++ b/mlir/test/python/ir/array_attributes.py @@ -43,6 +43,7 @@ def testGetDenseElementsSplatInt(): print(attr) # CHECK: is_splat: True print("is_splat:", attr.is_splat) + assert attr.get_splat_value() == element # CHECK-LABEL: TEST: testGetDenseElementsSplatFloat @@ -55,6 +56,7 @@ def testGetDenseElementsSplatFloat(): attr = DenseElementsAttr.get_splat(shaped_type, element) # CHECK: dense<1.200000e+00> : tensor<2x3x4xf32> print(attr) + assert attr.get_splat_value() == element # CHECK-LABEL: TEST: testGetDenseElementsSplatErrors From 67852bff588285b2dcf1eaa0c42389d297430465 Mon Sep 17 00:00:00 2001 From: Erich Keane Date: Tue, 21 Mar 2023 08:52:08 -0700 Subject: [PATCH 199/691] Fix switch warning from 514e4359a --- clang/lib/Sema/TreeTransform.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 6dacd74a99e3b..8ba8648f17c94 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -7577,9 +7577,8 @@ const Attr *TreeTransform::TransformAttr(const Attr *R) { case attr::X: \ return getDerived().Transform##X##Attr(cast(R)); #include "clang/Basic/AttrList.inc" - default: - return R; } + return R; } template @@ -7595,8 +7594,6 @@ const Attr *TreeTransform::TransformStmtAttr(const Stmt *OrigS, case attr::X: \ return getDerived().TransformStmt##X##Attr(OrigS, InstS, cast(R)); #include "clang/Basic/AttrList.inc" - default: - return R; } return TransformAttr(R); } From e69f8bac42e5c3e636a1b06c08fc2739ac1d9b36 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 21 Mar 2023 14:46:15 +0000 Subject: [PATCH 200/691] [RISCV][NFC] Add test case for SLP reduction vectorization failure Horizontal reductions still occur on RISC-V, despite the maximum SLP VF reported back by TTI being 1, to disable SLP. This can cause the cost model to think it can vectorize a gather into smaller, widened loads, when it will actually fail to do so. This should ultimately be fixed whenever SLP is re-enabled for RISC-V at some point. Reviewed By: reames Differential Revision: https://reviews.llvm.org/D146529 --- .../SLPVectorizer/RISCV/reductions.ll | 92 +++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll index 268e4f3189d5e..10f9c04892972 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll @@ -821,4 +821,96 @@ entry: ret i64 %add.15 } +declare i32 @llvm.abs.i32(i32, i1) +; FIXME: This horizontal reduction occurs because the cost model thinks it can +; vectorize the loads here. However, because -riscv-v-slp-max-vf is set to 1 by +; default, tryToVectorizeList fails and we end up with this very expensive +; scalarized load. +; +; This is the code the cost model thinks it's going to generate, which you can +; get by passing -riscv-v-slp-max-vf=0 +; +; define i32 @stride_sum_abs_diff(ptr %p, ptr %q, i64 %stride) #0 { +; %p.2 = getelementptr inbounds i32, ptr %p, i64 %stride +; %q.2 = getelementptr inbounds i32, ptr %q, i64 %stride +; %p.3 = getelementptr inbounds i32, ptr %p.2, i64 1 +; %q.3 = getelementptr inbounds i32, ptr %q.2, i64 1 +; %1 = load <2 x i32>, ptr %p, align 4 +; %2 = load <2 x i32>, ptr %q, align 4 +; %x.2 = load i32, ptr %p.2, align 4 +; %y.2 = load i32, ptr %q.2, align 4 +; %x.3 = load i32, ptr %p.3, align 4 +; %y.3 = load i32, ptr %q.3, align 4 +; %3 = shufflevector <2 x i32> %1, <2 x i32> poison, <4 x i32> +; %4 = insertelement <4 x i32> %3, i32 %x.2, i32 2 +; %5 = insertelement <4 x i32> %4, i32 %x.3, i32 3 +; %6 = shufflevector <2 x i32> %2, <2 x i32> poison, <4 x i32> +; %7 = insertelement <4 x i32> %6, i32 %y.2, i32 2 +; %8 = insertelement <4 x i32> %7, i32 %y.3, i32 3 +; %9 = sub <4 x i32> %5, %8 +; %10 = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %9, i1 true) +; %11 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %10) +; ret i32 %11 +; } +define i32 @stride_sum_abs_diff(ptr %p, ptr %q, i64 %stride) { +; CHECK-LABEL: @stride_sum_abs_diff( +; CHECK-NEXT: [[P_1:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 1 +; CHECK-NEXT: [[Q_1:%.*]] = getelementptr inbounds i32, ptr [[Q:%.*]], i64 1 +; CHECK-NEXT: [[P_2:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[STRIDE:%.*]] +; CHECK-NEXT: [[Q_2:%.*]] = getelementptr inbounds i32, ptr [[Q]], i64 [[STRIDE]] +; CHECK-NEXT: [[P_3:%.*]] = getelementptr inbounds i32, ptr [[P_2]], i64 1 +; CHECK-NEXT: [[Q_3:%.*]] = getelementptr inbounds i32, ptr [[Q_2]], i64 1 +; CHECK-NEXT: [[X_0:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: [[Y_0:%.*]] = load i32, ptr [[Q]], align 4 +; CHECK-NEXT: [[X_1:%.*]] = load i32, ptr [[P_1]], align 4 +; CHECK-NEXT: [[Y_1:%.*]] = load i32, ptr [[Q_1]], align 4 +; CHECK-NEXT: [[X_2:%.*]] = load i32, ptr [[P_2]], align 4 +; CHECK-NEXT: [[Y_2:%.*]] = load i32, ptr [[Q_2]], align 4 +; CHECK-NEXT: [[X_3:%.*]] = load i32, ptr [[P_3]], align 4 +; CHECK-NEXT: [[Y_3:%.*]] = load i32, ptr [[Q_3]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[X_0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[X_1]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[X_2]], i32 2 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[X_3]], i32 3 +; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[Y_0]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[Y_1]], i32 1 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[Y_2]], i32 2 +; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[Y_3]], i32 3 +; CHECK-NEXT: [[TMP9:%.*]] = sub <4 x i32> [[TMP4]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP9]], i1 true) +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP10]]) +; CHECK-NEXT: ret i32 [[TMP11]] +; + %x.0 = load i32, ptr %p + %y.0 = load i32, ptr %q + %sub.0 = sub i32 %x.0, %y.0 + %abs.0 = tail call i32 @llvm.abs.i32(i32 %sub.0, i1 true) + + %p.1 = getelementptr inbounds i32, ptr %p, i64 1 + %x.1 = load i32, ptr %p.1 + %q.1 = getelementptr inbounds i32, ptr %q, i64 1 + %y.1 = load i32, ptr %q.1 + %sub.1 = sub i32 %x.1, %y.1 + %abs.1 = tail call i32 @llvm.abs.i32(i32 %sub.1, i1 true) + %sum.0 = add i32 %abs.0, %abs.1 + + %p.2 = getelementptr inbounds i32, ptr %p, i64 %stride + %q.2 = getelementptr inbounds i32, ptr %q, i64 %stride + + %x.2 = load i32, ptr %p.2 + %y.2 = load i32, ptr %q.2 + %sub.2 = sub i32 %x.2, %y.2 + %abs.2 = tail call i32 @llvm.abs.i32(i32 %sub.2, i1 true) + %sum.1 = add i32 %sum.0, %abs.2 + + %p.3 = getelementptr inbounds i32, ptr %p.2, i64 1 + %x.3 = load i32, ptr %p.3 + %q.3 = getelementptr inbounds i32, ptr %q.2, i64 1 + %y.3 = load i32, ptr %q.3 + %sub.3 = sub i32 %x.3, %y.3 + %abs.3 = tail call i32 @llvm.abs.i32(i32 %sub.3, i1 true) + %sum.2 = add i32 %sum.1, %abs.3 + + ret i32 %sum.2 +} From 082ec267583100455fee356bb0d4ebd55aba2d46 Mon Sep 17 00:00:00 2001 From: Andrew Litteken Date: Mon, 20 Mar 2023 20:54:44 -0500 Subject: [PATCH 201/691] [IRSim] Check largest sections first when analyzing similarity When we check for similarity, right now there is no order to how it is checked, except for via the suffix tree ordering. We can reduce how much structural analysis we perform by checking the the regions in decreasing size. In doing so, we know that if two large sections match, each of their contained regions also match. This allows us to skip the structural checking for each smaller section. IT does require that we use the large regions as a "bridge" to create the canonical mapping between the two regions. This reduces compile time significantly for some benchmarks. It will not perform as well for programs with many small items. Recommit fixes the IRSimilarity tests. Recommit of: 805ec19d7d9915989be8a8a626176b5e29e19eee Reviewer: paquette Differential Revision: https://reviews.llvm.org/D139338 --- .../llvm/Analysis/IRSimilarityIdentifier.h | 43 ++++ llvm/lib/Analysis/IRSimilarityIdentifier.cpp | 214 +++++++++++++++++- .../Analysis/IRSimilarityIdentifier/basic.ll | 44 ++-- .../debug-inst-starts-block.ll | 21 +- .../IRSimilarityIdentifier/different.ll | 18 +- .../IRSimilarityIdentifier/nothing.ll | 2 + .../Transforms/IROutliner/illegal-assumes.ll | 40 ++-- 7 files changed, 320 insertions(+), 62 deletions(-) diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h index 9f9e7c59b42ba..d40d51e2d3376 100644 --- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -850,6 +850,49 @@ class IRSimilarityCandidate { IRSimilarityCandidate &SourceCand, DenseMap> &ToSourceMapping, DenseMap> &FromSourceMapping); + + /// Create a mapping for the value numbering of the calling + /// IRSimilarityCandidate, to a different separate set of numbers, based on + /// the canonical ordering in \p SourceCand. These are defined based on the + /// found mappings in \p ToSourceMapping and \p FromSourceMapping. Both of + /// these relationships should have the same information, just in opposite + /// directions. Uses the \p OneToOne mapping from target candidate to \p + /// SourceCand GVNs to determine the mapping first for values with multiple + /// mappings. This mapping is created by the ordering of operands in the + /// instruction they are first seen in the candidates. + /// + /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a + /// canonical numbering from. + /// \param [in,out] OneToOne - A mapping of value numbers from candidate + /// \p A to candidate \B using the structure of the original instructions. + /// \param ToSourceMapping - The mapping of value numbers from this candidate + /// to \p SourceCand. + /// \param FromSourceMapping - The mapping of value numbers from \p SoureCand + /// to this candidate. + void createCanonicalRelationFrom( + IRSimilarityCandidate &SourceCand, + DenseMap &OneToOne, + DenseMap> &ToSourceMapping, + DenseMap> &FromSourceMapping); + + /// Create a mapping for the value numbering of the calling + /// IRSimilarityCandidate, to a different separate set of numbers, based on + /// the canonical ordering in \p SourceCand. These are defined based on the + /// canonical mapping defined between \p SoureCandLarge and + /// \p TargetCandLarge. These IRSimilarityCandidates are already structurally + /// similar, and fully encapsulate the IRSimilarityCandidates in question. + /// These are used as a "bridge" from the \p SourceCand to the target. + /// + /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a + /// canonical numbering from. + /// \param SoureCandLarge - The IRSimilarityCandidate fully containing + /// \p SourceCand. + /// \param TargetCandLarge - The IRSimilarityCandidate fully containing + /// this Candidate. + void createCanonicalRelationFrom( + IRSimilarityCandidate &SourceCand, + IRSimilarityCandidate &SourceCandLarge, + IRSimilarityCandidate &TargetCandLarge); /// \param [in,out] BBSet - The set to track the basic blocks. void getBasicBlocks(DenseSet &BBSet) const { diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp index c8007be4142cf..454ce5a6925bf 100644 --- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -1101,6 +1101,76 @@ void IRSimilarityCandidate::createCanonicalRelationFrom( } } +void IRSimilarityCandidate::createCanonicalRelationFrom( + IRSimilarityCandidate &SourceCand, IRSimilarityCandidate &SourceCandLarge, + IRSimilarityCandidate &TargetCandLarge) { + assert(!SourceCand.CanonNumToNumber.empty() && + "Canonical Relationship is non-empty"); + assert(!SourceCand.NumberToCanonNum.empty() && + "Canonical Relationship is non-empty"); + + assert(!SourceCandLarge.CanonNumToNumber.empty() && + "Canonical Relationship is non-empty"); + assert(!SourceCandLarge.NumberToCanonNum.empty() && + "Canonical Relationship is non-empty"); + + assert(!TargetCandLarge.CanonNumToNumber.empty() && + "Canonical Relationship is non-empty"); + assert(!TargetCandLarge.NumberToCanonNum.empty() && + "Canonical Relationship is non-empty"); + + assert(CanonNumToNumber.empty() && "Canonical Relationship is non-empty"); + assert(NumberToCanonNum.empty() && "Canonical Relationship is non-empty"); + + // We're going to use the larger candidates as a "bridge" to create the + // canonical number for the target candidate since we have idetified two + // candidates as subsequences of larger sequences, and therefore must be + // structurally similar. + for (std::pair &ValueNumPair : ValueToNumber) { + Value *CurrVal = ValueNumPair.first; + unsigned TargetCandGVN = ValueNumPair.second; + + // Find the numbering in the large candidate that surrounds the + // current candidate. + std::optional OLargeTargetGVN = TargetCandLarge.getGVN(CurrVal); + assert(OLargeTargetGVN.has_value() && "GVN not found for Value"); + + // Get the canonical numbering in the large target candidate. + std::optional OTargetCandCanon = + TargetCandLarge.getCanonicalNum(OLargeTargetGVN.value()); + assert(OTargetCandCanon.has_value() && + "Canononical Number not found for GVN"); + + // Get the GVN in the large source candidate from the canonical numbering. + std::optional OLargeSourceGVN = + SourceCandLarge.fromCanonicalNum(OTargetCandCanon.value()); + assert(OLargeSourceGVN.has_value() && + "GVN Number not found for Canonical Number"); + + // Get the Value from the GVN in the large source candidate. + std::optional OLargeSourceV = + SourceCandLarge.fromGVN(OLargeSourceGVN.value()); + assert(OLargeSourceV.has_value() && "Value not found for GVN"); + + // Get the GVN number for the Value in the source candidate. + std::optional OSourceGVN = + SourceCand.getGVN(OLargeSourceV.value()); + assert(OSourceGVN.has_value() && "GVN Number not found for Value"); + + // Get the canonical numbering from the GVN/ + std::optional OSourceCanon = + SourceCand.getCanonicalNum(OSourceGVN.value()); + assert(OSourceCanon.has_value() && "Canon Number not found for GVN"); + + // Insert the canonical numbering and GVN pair into their respective + // mappings. + CanonNumToNumber.insert( + std::make_pair(OSourceCanon.value(), TargetCandGVN)); + NumberToCanonNum.insert( + std::make_pair(TargetCandGVN, OSourceCanon.value())); + } +} + void IRSimilarityCandidate::createCanonicalMappingFor( IRSimilarityCandidate &CurrCand) { assert(CurrCand.CanonNumToNumber.size() == 0 && @@ -1118,6 +1188,81 @@ void IRSimilarityCandidate::createCanonicalMappingFor( } } +/// Look for larger IRSimilarityCandidates From the previously matched +/// IRSimilarityCandidates that fully contain \p CandA or \p CandB. If there is +/// an overlap, return a pair of structurally similar, larger +/// IRSimilarityCandidates. +/// +/// \param [in] CandA - The first candidate we are trying to determine the +/// structure of. +/// \param [in] CandB - The second candidate we are trying to determine the +/// structure of. +/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in +/// a circuit to the IRSimilarityCandidates that include this instruction. +/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a +/// number representing the structural group assigned to it. +static std::optional< + std::pair> +CheckLargerCands( + IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB, + DenseMap> &IndexToIncludedCand, + DenseMap &CandToGroup) { + DenseMap IncludedGroupAndCandA; + DenseMap IncludedGroupAndCandB; + DenseSet IncludedGroupsA; + DenseSet IncludedGroupsB; + + // Find the overall similarity group numbers that fully contain the candidate, + // and record the larger candidate for each group. + auto IdxToCandidateIt = IndexToIncludedCand.find(CandA.getStartIdx()); + std::optional> + Result; + + unsigned CandAStart = CandA.getStartIdx(); + unsigned CandAEnd = CandA.getEndIdx(); + unsigned CandBStart = CandB.getStartIdx(); + unsigned CandBEnd = CandB.getEndIdx(); + if (IdxToCandidateIt == IndexToIncludedCand.end()) + return Result; + for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) { + if (MatchedCand->getStartIdx() > CandAStart || + (MatchedCand->getEndIdx() < CandAEnd)) + continue; + unsigned GroupNum = CandToGroup.find(MatchedCand)->second; + IncludedGroupAndCandA.insert(std::make_pair(GroupNum, MatchedCand)); + IncludedGroupsA.insert(GroupNum); + } + + // Find the overall similarity group numbers that fully contain the next + // candidate, and record the larger candidate for each group. + IdxToCandidateIt = IndexToIncludedCand.find(CandBStart); + if (IdxToCandidateIt == IndexToIncludedCand.end()) + return Result; + for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) { + if (MatchedCand->getStartIdx() > CandBStart || + MatchedCand->getEndIdx() < CandBEnd) + continue; + unsigned GroupNum = CandToGroup.find(MatchedCand)->second; + IncludedGroupAndCandB.insert(std::make_pair(GroupNum, MatchedCand)); + IncludedGroupsB.insert(GroupNum); + } + + // Find the intersection between the two groups, these are the groups where + // the larger candidates exist. + set_intersect(IncludedGroupsA, IncludedGroupsB); + + // If there is no intersection between the sets, then we cannot determine + // whether or not there is a match. + if (IncludedGroupsA.empty()) + return Result; + + // Create a pair that contains the larger candidates. + auto ItA = IncludedGroupAndCandA.find(*IncludedGroupsA.begin()); + auto ItB = IncludedGroupAndCandB.find(*IncludedGroupsA.begin()); + Result = std::make_pair(ItA->second, ItB->second); + return Result; +} + /// From the list of IRSimilarityCandidates, perform a comparison between each /// IRSimilarityCandidate to determine if there are overlapping /// IRInstructionData, or if they do not have the same structure. @@ -1127,9 +1272,16 @@ void IRSimilarityCandidate::createCanonicalMappingFor( /// \param [out] StructuralGroups - the mapping of unsigned integers to vector /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the /// vector are structurally similar to one another. +/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in +/// a circuit to the IRSimilarityCandidates that include this instruction. +/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a +/// number representing the structural group assigned to it. static void findCandidateStructures( std::vector &CandsForRepSubstring, - DenseMap &StructuralGroups) { + DenseMap &StructuralGroups, + DenseMap> &IndexToIncludedCand, + DenseMap &CandToOverallGroup + ) { std::vector::iterator CandIt, CandEndIt, InnerCandIt, InnerCandEndIt; @@ -1192,6 +1344,24 @@ static void findCandidateStructures( if (CandToGroupItInner != CandToGroup.end()) continue; + // Check if we have found structural similarity between two candidates + // that fully contains the first and second candidates. + std::optional> + LargerPair = CheckLargerCands( + *CandIt, *InnerCandIt, IndexToIncludedCand, CandToOverallGroup); + + // If a pair was found, it means that we can assume that these smaller + // substrings are also structurally similar. Use the larger candidates to + // determine the canonical mapping between the two sections. + if (LargerPair.has_value()) { + SameStructure = true; + InnerCandIt->createCanonicalRelationFrom( + *CandIt, *LargerPair.value().first, *LargerPair.value().second); + CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum)); + CurrentGroupPair->second.push_back(*InnerCandIt); + continue; + } + // Otherwise we determine if they have the same structure and add it to // vector if they match. ValueNumberMappingA.clear(); @@ -1218,24 +1388,58 @@ void IRSimilarityIdentifier::findCandidates( std::vector NewCandidateGroups; DenseMap StructuralGroups; + DenseMap> IndexToIncludedCand; + DenseMap CandToGroup; // Iterate over the subsequences found by the Suffix Tree to create // IRSimilarityCandidates for each repeated subsequence and determine which // instances are structurally similar to one another. - for (SuffixTree::RepeatedSubstring &RS : ST) { + + // Sort the suffix tree from longest substring to shortest. + std::vector RSes; + for (SuffixTree::RepeatedSubstring &RS : ST) + RSes.push_back(RS); + + llvm::stable_sort(RSes, [](const SuffixTree::RepeatedSubstring &LHS, + const SuffixTree::RepeatedSubstring &RHS) { + return LHS.Length > RHS.Length; + }); + for (SuffixTree::RepeatedSubstring &RS : RSes) { createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS, CandsForRepSubstring); if (CandsForRepSubstring.size() < 2) continue; - findCandidateStructures(CandsForRepSubstring, StructuralGroups); - for (std::pair &Group : StructuralGroups) + findCandidateStructures(CandsForRepSubstring, StructuralGroups, + IndexToIncludedCand, CandToGroup); + for (std::pair &Group : StructuralGroups) { // We only add the group if it contains more than one // IRSimilarityCandidate. If there is only one, that means there is no // other repeated subsequence with the same structure. - if (Group.second.size() > 1) + if (Group.second.size() > 1) { SimilarityCandidates->push_back(Group.second); + // Iterate over each candidate in the group, and add an entry for each + // instruction included with a mapping to a set of + // IRSimilarityCandidates that include that instruction. + for (IRSimilarityCandidate &IRCand : SimilarityCandidates->back()) { + for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx(); + Idx <= Edx; ++Idx) { + DenseMap>::iterator + IdIt; + IdIt = IndexToIncludedCand.find(Idx); + bool Inserted = false; + if (IdIt == IndexToIncludedCand.end()) + std::tie(IdIt, Inserted) = IndexToIncludedCand.insert( + std::make_pair(Idx, DenseSet())); + IdIt->second.insert(&IRCand); + } + // Add mapping of candidate to the overall similarity group number. + CandToGroup.insert( + std::make_pair(&IRCand, SimilarityCandidates->size() - 1)); + } + } + } CandsForRepSubstring.clear(); StructuralGroups.clear(); diff --git a/llvm/test/Analysis/IRSimilarityIdentifier/basic.ll b/llvm/test/Analysis/IRSimilarityIdentifier/basic.ll index 79ae402e9f9f5..1c08cb407c2e3 100644 --- a/llvm/test/Analysis/IRSimilarityIdentifier/basic.ll +++ b/llvm/test/Analysis/IRSimilarityIdentifier/basic.ll @@ -4,33 +4,33 @@ ; This is a simple test to make sure the IRSimilarityIdentifier and ; IRSimilarityPrinterPass is working. -; CHECK: 4 candidates of length 2. Found in: +; CHECK: 4 candidates of length 6. Found in: ; CHECK-NEXT: Function: turtle, Basic Block: (unnamed) -; CHECK-NEXT: Start Instruction: store i32 5, ptr %5, align 4 +; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 ; CHECK-NEXT: End Instruction: store i32 6, ptr %6, align 4 ; CHECK-NEXT: Function: cat, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 4, ptr %4, align 4 +; CHECK-NEXT: Start Instruction: store i32 6, ptr %0, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 ; CHECK-NEXT: Function: fish, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 4, ptr %4, align 4 +; CHECK-NEXT: Start Instruction: store i32 6, ptr %0, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 ; CHECK-NEXT: Function: dog, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 4, ptr %4, align 4 +; CHECK-NEXT: Start Instruction: store i32 6, ptr %0, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 -; CHECK-NEXT:4 candidates of length 3. Found in: +; CHECK-NEXT:4 candidates of length 5. Found in: ; CHECK-NEXT: Function: turtle, Basic Block: (unnamed) -; CHECK-NEXT: Start Instruction: store i32 4, ptr %4, align 4 +; CHECK-NEXT: Start Instruction: store i32 2, ptr %2, align 4 ; CHECK-NEXT: End Instruction: store i32 6, ptr %6, align 4 ; CHECK-NEXT: Function: cat, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 3, ptr %3, align 4 +; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 ; CHECK-NEXT: Function: fish, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 3, ptr %3, align 4 +; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 ; CHECK-NEXT: Function: dog, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 3, ptr %3, align 4 +; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 -; CHECK-NEXT:4 candidates of length 4. Found in: +; CHECK-NEXT:4 candidates of length 4. Found in: ; CHECK-NEXT: Function: turtle, Basic Block: (unnamed) ; CHECK-NEXT: Start Instruction: store i32 3, ptr %3, align 4 ; CHECK-NEXT: End Instruction: store i32 6, ptr %6, align 4 @@ -43,31 +43,31 @@ ; CHECK-NEXT: Function: dog, Basic Block: entry ; CHECK-NEXT: Start Instruction: store i32 2, ptr %2, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 -; CHECK-NEXT:4 candidates of length 5. Found in: +; CHECK-NEXT:4 candidates of length 3. Found in: ; CHECK-NEXT: Function: turtle, Basic Block: (unnamed) -; CHECK-NEXT: Start Instruction: store i32 2, ptr %2, align 4 +; CHECK-NEXT: Start Instruction: store i32 4, ptr %4, align 4 ; CHECK-NEXT: End Instruction: store i32 6, ptr %6, align 4 ; CHECK-NEXT: Function: cat, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 +; CHECK-NEXT: Start Instruction: store i32 3, ptr %3, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 ; CHECK-NEXT: Function: fish, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 +; CHECK-NEXT: Start Instruction: store i32 3, ptr %3, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 ; CHECK-NEXT: Function: dog, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 +; CHECK-NEXT: Start Instruction: store i32 3, ptr %3, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 -; CHECK-NEXT:4 candidates of length 6. Found in: +; CHECK-NEXT:4 candidates of length 2. Found in: ; CHECK-NEXT: Function: turtle, Basic Block: (unnamed) -; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 +; CHECK-NEXT: Start Instruction: store i32 5, ptr %5, align 4 ; CHECK-NEXT: End Instruction: store i32 6, ptr %6, align 4 ; CHECK-NEXT: Function: cat, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 6, ptr %0, align 4 +; CHECK-NEXT: Start Instruction: store i32 4, ptr %4, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 ; CHECK-NEXT: Function: fish, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 6, ptr %0, align 4 +; CHECK-NEXT: Start Instruction: store i32 4, ptr %4, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 ; CHECK-NEXT: Function: dog, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 6, ptr %0, align 4 +; CHECK-NEXT: Start Instruction: store i32 4, ptr %4, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 define linkonce_odr void @fish() { @@ -136,3 +136,5 @@ entry: store i32 5, ptr %5, align 4 ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/Analysis/IRSimilarityIdentifier/debug-inst-starts-block.ll b/llvm/test/Analysis/IRSimilarityIdentifier/debug-inst-starts-block.ll index 72fa361077587..78ecec92cc31a 100644 --- a/llvm/test/Analysis/IRSimilarityIdentifier/debug-inst-starts-block.ll +++ b/llvm/test/Analysis/IRSimilarityIdentifier/debug-inst-starts-block.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -disable-output -S -passes=print-ir-similarity < %s 2>&1 | FileCheck %s ; When a debug instruction is the first instruction in a block, when that block @@ -5,27 +6,27 @@ ; counted in similarity matching they must be ignored when creating canonical ; relations from one region to another. This checks that this is enforced. -; CHECK: 2 candidates of length 3. Found in: +; CHECK: 2 candidates of length 4. Found in: +; CHECK-NEXT: Function: main, Basic Block: entry +; CHECK-NEXT: Start Instruction: %0 = add i32 1, 4 +; CHECK-NEXT: End Instruction: %1 = sub i32 1, 4 +; CHECK-NEXT: Function: main, Basic Block: for.body169 +; CHECK-NEXT: Start Instruction: %2 = add i32 1, 4 +; CHECK-NEXT: End Instruction: %3 = sub i32 1, 4 +; CHECK-NEXT: 2 candidates of length 3. Found in: ; CHECK-NEXT: Function: main, Basic Block: entry ; CHECK-NEXT: Start Instruction: br label %for.body169 ; CHECK-NEXT: End Instruction: %1 = sub i32 1, 4 ; CHECK-NEXT: Function: main, Basic Block: for.body169 ; CHECK-NEXT: Start Instruction: br label %for.end122 ; CHECK-NEXT: End Instruction: %3 = sub i32 1, 4 -; CHECK-NEXT: 2 candidates of length 2. Found in: +; CHECK-NEXT: 2 candidates of length 2. Found in: ; CHECK-NEXT: Function: main, Basic Block: for.end122 ; CHECK-NEXT: Start Instruction: store i32 30, ptr undef, align 1 ; CHECK-NEXT: End Instruction: %1 = sub i32 1, 4 ; CHECK-NEXT: Function: main, Basic Block: for.end246 ; CHECK-NEXT: Start Instruction: store i32 0, ptr undef, align 1 ; CHECK-NEXT: End Instruction: %3 = sub i32 1, 4 -; CHECK-NEXT: 2 candidates of length 4. Found in: -; CHECK-NEXT: Function: main, Basic Block: entry -; CHECK-NEXT: Start Instruction: %0 = add i32 1, 4 -; CHECK-NEXT: End Instruction: %1 = sub i32 1, 4 -; CHECK-NEXT: Function: main, Basic Block: for.body169 -; CHECK-NEXT: Start Instruction: %2 = add i32 1, 4 -; CHECK-NEXT: End Instruction: %3 = sub i32 1, 4 source_filename = "irsimilarity_crash.ll" @@ -72,3 +73,5 @@ attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willre !9 = !{} !10 = !DIBasicType(name: "long", size: 32, encoding: DW_ATE_signed) !11 = !DILocation(line: 522, column: 23, scope: !2) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/Analysis/IRSimilarityIdentifier/different.ll b/llvm/test/Analysis/IRSimilarityIdentifier/different.ll index 701af7e21b32e..e5c9970b159b9 100644 --- a/llvm/test/Analysis/IRSimilarityIdentifier/different.ll +++ b/llvm/test/Analysis/IRSimilarityIdentifier/different.ll @@ -5,20 +5,20 @@ ; return items only within the same function when there are different sets of ; instructions in functions. -; CHECK: 2 candidates of length 3. Found in: -; CHECK-NEXT: Function: turtle, Basic Block: (unnamed) -; CHECK-NEXT: Start Instruction: %b = load i32, ptr %1, align 4 -; CHECK-NEXT: End Instruction: %d = load i32, ptr %3, align 4 -; CHECK-NEXT: Function: turtle, Basic Block: (unnamed) -; CHECK-NEXT: Start Instruction: %a = load i32, ptr %0, align 4 -; CHECK-NEXT: End Instruction: %c = load i32, ptr %2, align 4 -; CHECK-NEXT: 2 candidates of length 5. Found in: +; CHECK: 2 candidates of length 5. Found in: ; CHECK-NEXT: Function: fish, Basic Block: entry ; CHECK-NEXT: Start Instruction: store i32 6, ptr %0, align 4 ; CHECK-NEXT: End Instruction: store i32 4, ptr %4, align 4 ; CHECK-NEXT: Function: fish, Basic Block: entry ; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 +; CHECK-NEXT: 2 candidates of length 3. Found in: +; CHECK-NEXT: Function: turtle, Basic Block: (unnamed) +; CHECK-NEXT: Start Instruction: %b = load i32, ptr %1, align 4 +; CHECK-NEXT: End Instruction: %d = load i32, ptr %3, align 4 +; CHECK-NEXT: Function: turtle, Basic Block: (unnamed) +; CHECK-NEXT: Start Instruction: %a = load i32, ptr %0, align 4 +; CHECK-NEXT: End Instruction: %c = load i32, ptr %2, align 4 define linkonce_odr void @fish() { entry: @@ -44,3 +44,5 @@ define void @turtle(ptr %0, ptr %1, ptr %2, ptr %3) { %d = load i32, ptr %3 ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/Analysis/IRSimilarityIdentifier/nothing.ll b/llvm/test/Analysis/IRSimilarityIdentifier/nothing.ll index 5c7210790f472..4891a587d776f 100644 --- a/llvm/test/Analysis/IRSimilarityIdentifier/nothing.ll +++ b/llvm/test/Analysis/IRSimilarityIdentifier/nothing.ll @@ -10,3 +10,5 @@ define linkonce_odr void @fish() { entry: ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/Transforms/IROutliner/illegal-assumes.ll b/llvm/test/Transforms/IROutliner/illegal-assumes.ll index b91f8ec92200e..d863fe7a29903 100644 --- a/llvm/test/Transforms/IROutliner/illegal-assumes.ll +++ b/llvm/test/Transforms/IROutliner/illegal-assumes.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=verify,iroutliner -ir-outlining-no-cost < %s | FileCheck %s +; RUN: opt -S -p iroutliner,verify -ir-outlining-no-cost < %s | FileCheck %s ; This test ensures that we do not include llvm.assumes. There are exceptions ; in the CodeExtractor's algorithm for llvm.assumes, so we ignore it for now. @@ -13,13 +13,13 @@ define void @outline_assumes() { ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[DL_LOC]]) -; CHECK-NEXT: call void @outlined_ir_func_3(i1 true, ptr [[D]], ptr [[DL_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_4(i1 true, ptr [[D]], ptr [[DL_LOC]]) ; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, ptr [[DL_LOC]], align 1 ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr [[DL_LOC]]) ; CHECK-NEXT: [[SPLIT_INST:%.*]] = sub i1 [[DL_RELOAD]], [[DL_RELOAD]] -; CHECK-NEXT: call void @outlined_ir_func_0(ptr [[A]], ptr [[B]], ptr [[C]]) -; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) ; CHECK-NEXT: call void @outlined_ir_func_1(ptr [[A]], ptr [[B]], ptr [[C]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) +; CHECK-NEXT: call void @outlined_ir_func_2(ptr [[A]], ptr [[B]], ptr [[C]]) ; CHECK-NEXT: ret void ; entry: @@ -49,12 +49,12 @@ define void @outline_assumes2() { ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[DL_LOC]]) -; CHECK-NEXT: call void @outlined_ir_func_3(i1 false, ptr [[D]], ptr [[DL_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_4(i1 false, ptr [[D]], ptr [[DL_LOC]]) ; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, ptr [[DL_LOC]], align 1 ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr [[DL_LOC]]) -; CHECK-NEXT: call void @outlined_ir_func_0(ptr [[A]], ptr [[B]], ptr [[C]]) -; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) ; CHECK-NEXT: call void @outlined_ir_func_1(ptr [[A]], ptr [[B]], ptr [[C]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) +; CHECK-NEXT: call void @outlined_ir_func_2(ptr [[A]], ptr [[B]], ptr [[C]]) ; CHECK-NEXT: ret void ; entry: @@ -77,16 +77,17 @@ entry: define void @outline_assumes3() { ; CHECK-LABEL: @outline_assumes3( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[DL_LOC:%.*]] = alloca i1, align 1 ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 -; CHECK-NEXT: store i1 true, ptr [[D]], align 4 -; CHECK-NEXT: [[DL:%.*]] = load i1, ptr [[D]], align 1 -; CHECK-NEXT: [[SPLIT_INST:%.*]] = add i1 [[DL]], [[DL]] -; CHECK-NEXT: call void @outlined_ir_func_0(ptr [[A]], ptr [[B]], ptr [[C]]) -; CHECK-NEXT: call void @llvm.assume(i1 [[DL]]) -; CHECK-NEXT: call void @outlined_ir_func_2(ptr [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[DL_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i1 true, ptr [[D]], ptr [[A]], ptr [[B]], ptr [[C]], ptr [[DL_LOC]]) +; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, ptr [[DL_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr [[DL_LOC]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) +; CHECK-NEXT: call void @outlined_ir_func_3(ptr [[A]]) ; CHECK-NEXT: ret void ; entry: @@ -109,16 +110,17 @@ entry: define void @outline_assumes4() { ; CHECK-LABEL: @outline_assumes4( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[DL_LOC:%.*]] = alloca i1, align 1 ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 -; CHECK-NEXT: store i1 false, ptr [[D]], align 4 -; CHECK-NEXT: [[DL:%.*]] = load i1, ptr [[D]], align 1 -; CHECK-NEXT: [[SPLIT_INST:%.*]] = add i1 [[DL]], [[DL]] -; CHECK-NEXT: call void @outlined_ir_func_0(ptr [[A]], ptr [[B]], ptr [[C]]) -; CHECK-NEXT: call void @llvm.assume(i1 [[DL]]) -; CHECK-NEXT: call void @outlined_ir_func_2(ptr [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[DL_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i1 false, ptr [[D]], ptr [[A]], ptr [[B]], ptr [[C]], ptr [[DL_LOC]]) +; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, ptr [[DL_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr [[DL_LOC]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) +; CHECK-NEXT: call void @outlined_ir_func_3(ptr [[A]]) ; CHECK-NEXT: ret void ; entry: From 586ad89638dd1282a41780a3c369e1bd881a7f42 Mon Sep 17 00:00:00 2001 From: Mirko Brkusanin Date: Tue, 21 Mar 2023 17:00:57 +0100 Subject: [PATCH 202/691] [AMDGPU][NFC] Update GFX11 test checks --- .../llvm.amdgcn.image.atomic.dim.mir | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir index 9a139381dff54..7e5b3fe3be7f3 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.atomic.dim.mir @@ -2,6 +2,7 @@ # RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -run-pass=instruction-select -mcpu=tahiti -o - %s | FileCheck -check-prefix=GFX6 %s # RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -run-pass=instruction-select -mcpu=fiji -o - %s | FileCheck -check-prefix=GFX8 %s # RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -run-pass=instruction-select -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s +# RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -run-pass=instruction-select -mcpu=gfx1100 -o - %s | FileCheck -check-prefix=GFX11 %s --- name: atomic_cmpswap_i32_1d @@ -43,6 +44,16 @@ body: | ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_]].sub0 ; GFX10-NEXT: $vgpr0 = COPY [[COPY3]] ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 + ; GFX11-LABEL: name: atomic_cmpswap_i32_1d + ; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx11_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx11_]].sub0 + ; GFX11-NEXT: $vgpr0 = COPY [[COPY3]] + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -85,6 +96,14 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 7) ; GFX10-NEXT: S_ENDPGM 0 + ; GFX11-LABEL: name: atomic_cmpswap_i32_1d_no_return + ; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1, $vgpr2 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx11_:%[0-9]+]]:vreg_64 = IMAGE_ATOMIC_CMPSWAP_V1_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 3, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), addrspace 7) + ; GFX11-NEXT: S_ENDPGM 0 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1 %2:vgpr(s32) = COPY $vgpr2 @@ -132,6 +151,16 @@ body: | ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_]].sub0_sub1 ; GFX10-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 + ; GFX11-LABEL: name: atomic_cmpswap_i64_1d + ; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) + ; GFX11-NEXT: [[COPY3:%[0-9]+]]:vreg_64 = COPY killed [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_]].sub0_sub1 + ; GFX11-NEXT: $vgpr0_vgpr1 = COPY [[COPY3]] + ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0_vgpr1 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %2:vgpr(s32) = COPY $vgpr4 @@ -174,6 +203,14 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GFX10-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx10 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) ; GFX10-NEXT: S_ENDPGM 0 + ; GFX11-LABEL: name: atomic_cmpswap_i64_1d_no_return + ; GFX11: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 + ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sgpr_256 = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 + ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX11-NEXT: [[IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11_:%[0-9]+]]:vreg_128 = IMAGE_ATOMIC_CMPSWAP_V2_V1_gfx11 [[COPY1]], [[COPY2]], [[COPY]], 15, 0, 1, 1, 0, 0, 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), addrspace 7) + ; GFX11-NEXT: S_ENDPGM 0 %0:sgpr(<8 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 %1:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %2:vgpr(s32) = COPY $vgpr4 From 4277d932ef180090f314f7eb7f47d63e76881d30 Mon Sep 17 00:00:00 2001 From: Anna Thomas Date: Wed, 8 Mar 2023 17:32:50 -0500 Subject: [PATCH 203/691] [LV] Use speculatability within entire loop to avoid strided load predication Use existing functionality for identifying total access size by strided loads. If we can speculate the load across all vector iterations, we can avoid predication for these strided loads (or masked gathers in architectures which support it). Differential Revision: https://reviews.llvm.org/D145616 --- llvm/lib/Analysis/Loads.cpp | 15 +- .../LoopVectorize/X86/load-deref-pred.ll | 448 ++++++------------ 2 files changed, 150 insertions(+), 313 deletions(-) diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index 48e435a06ba15..90be40da8c6fd 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -286,15 +286,22 @@ bool llvm::isDereferenceableAndAlignedInLoop(LoadInst *LI, Loop *L, auto* Step = dyn_cast(AddRec->getStepRecurrence(SE)); if (!Step) return false; - // TODO: generalize to access patterns which have gaps - if (Step->getAPInt() != EltSize) - return false; auto TC = SE.getSmallConstantMaxTripCount(L); if (!TC) return false; - const APInt AccessSize = TC * EltSize; + // TODO: Handle overlapping accesses. + // We should be computing AccessSize as (TC - 1) * Step + EltSize. + if (EltSize.sgt(Step->getAPInt())) + return false; + + // Compute the total access size for access patterns with unit stride and + // patterns with gaps. For patterns with unit stride, Step and EltSize are the + // same. + // For patterns with gaps (i.e. non unit stride), we are + // accessing EltSize bytes at every Step. + const APInt AccessSize = TC * Step->getAPInt(); auto *StartS = dyn_cast(AddRec->getStart()); if (!StartS) diff --git a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll index dfb210772f44a..e53f5433f06f1 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/load-deref-pred.ll @@ -1154,8 +1154,6 @@ loop_exit: ret i32 %accum.next } - -;; TODO: handle non-unit strides define i32 @test_non_unit_stride(i64 %len, ptr %test_base) { ; CHECK-LABEL: @test_non_unit_stride( ; CHECK-NEXT: entry: @@ -1165,11 +1163,11 @@ define i32 @test_non_unit_stride(i64 %len, ptr %test_base) { ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE33:%.*]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP148:%.*]], [[PRED_LOAD_CONTINUE33]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP149:%.*]], [[PRED_LOAD_CONTINUE33]] ] -; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP150:%.*]], [[PRED_LOAD_CONTINUE33]] ] -; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP151:%.*]], [[PRED_LOAD_CONTINUE33]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP116:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP117:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP118:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP119:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 @@ -1235,175 +1233,79 @@ define i32 @test_non_unit_stride(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP61:%.*]] = insertelement <4 x i1> [[TMP60]], i1 [[TMP57]], i32 1 ; CHECK-NEXT: [[TMP62:%.*]] = insertelement <4 x i1> [[TMP61]], i1 [[TMP58]], i32 2 ; CHECK-NEXT: [[TMP63:%.*]] = insertelement <4 x i1> [[TMP62]], i1 [[TMP59]], i32 3 -; CHECK-NEXT: [[TMP64:%.*]] = extractelement <4 x i1> [[TMP39]], i32 0 -; CHECK-NEXT: br i1 [[TMP64]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] -; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP66:%.*]] = load i32, ptr [[TMP65]], align 4 -; CHECK-NEXT: [[TMP67:%.*]] = insertelement <4 x i32> poison, i32 [[TMP66]], i32 0 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] -; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP68:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP67]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP69:%.*]] = extractelement <4 x i1> [[TMP39]], i32 1 -; CHECK-NEXT: br i1 [[TMP69]], label [[PRED_LOAD_IF4:%.*]], label [[PRED_LOAD_CONTINUE5:%.*]] -; CHECK: pred.load.if4: -; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP71:%.*]] = load i32, ptr [[TMP70]], align 4 -; CHECK-NEXT: [[TMP72:%.*]] = insertelement <4 x i32> [[TMP68]], i32 [[TMP71]], i32 1 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE5]] -; CHECK: pred.load.continue5: -; CHECK-NEXT: [[TMP73:%.*]] = phi <4 x i32> [ [[TMP68]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP72]], [[PRED_LOAD_IF4]] ] -; CHECK-NEXT: [[TMP74:%.*]] = extractelement <4 x i1> [[TMP39]], i32 2 -; CHECK-NEXT: br i1 [[TMP74]], label [[PRED_LOAD_IF6:%.*]], label [[PRED_LOAD_CONTINUE7:%.*]] -; CHECK: pred.load.if6: -; CHECK-NEXT: [[TMP75:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP76:%.*]] = load i32, ptr [[TMP75]], align 4 -; CHECK-NEXT: [[TMP77:%.*]] = insertelement <4 x i32> [[TMP73]], i32 [[TMP76]], i32 2 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE7]] -; CHECK: pred.load.continue7: -; CHECK-NEXT: [[TMP78:%.*]] = phi <4 x i32> [ [[TMP73]], [[PRED_LOAD_CONTINUE5]] ], [ [[TMP77]], [[PRED_LOAD_IF6]] ] -; CHECK-NEXT: [[TMP79:%.*]] = extractelement <4 x i1> [[TMP39]], i32 3 -; CHECK-NEXT: br i1 [[TMP79]], label [[PRED_LOAD_IF8:%.*]], label [[PRED_LOAD_CONTINUE9:%.*]] -; CHECK: pred.load.if8: -; CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP80]], align 4 -; CHECK-NEXT: [[TMP82:%.*]] = insertelement <4 x i32> [[TMP78]], i32 [[TMP81]], i32 3 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE9]] -; CHECK: pred.load.continue9: -; CHECK-NEXT: [[TMP83:%.*]] = phi <4 x i32> [ [[TMP78]], [[PRED_LOAD_CONTINUE7]] ], [ [[TMP82]], [[PRED_LOAD_IF8]] ] -; CHECK-NEXT: [[TMP84:%.*]] = extractelement <4 x i1> [[TMP47]], i32 0 -; CHECK-NEXT: br i1 [[TMP84]], label [[PRED_LOAD_IF10:%.*]], label [[PRED_LOAD_CONTINUE11:%.*]] -; CHECK: pred.load.if10: -; CHECK-NEXT: [[TMP85:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP4]] -; CHECK-NEXT: [[TMP86:%.*]] = load i32, ptr [[TMP85]], align 4 -; CHECK-NEXT: [[TMP87:%.*]] = insertelement <4 x i32> poison, i32 [[TMP86]], i32 0 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE11]] -; CHECK: pred.load.continue11: -; CHECK-NEXT: [[TMP88:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE9]] ], [ [[TMP87]], [[PRED_LOAD_IF10]] ] -; CHECK-NEXT: [[TMP89:%.*]] = extractelement <4 x i1> [[TMP47]], i32 1 -; CHECK-NEXT: br i1 [[TMP89]], label [[PRED_LOAD_IF12:%.*]], label [[PRED_LOAD_CONTINUE13:%.*]] -; CHECK: pred.load.if12: -; CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP91:%.*]] = load i32, ptr [[TMP90]], align 4 -; CHECK-NEXT: [[TMP92:%.*]] = insertelement <4 x i32> [[TMP88]], i32 [[TMP91]], i32 1 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE13]] -; CHECK: pred.load.continue13: -; CHECK-NEXT: [[TMP93:%.*]] = phi <4 x i32> [ [[TMP88]], [[PRED_LOAD_CONTINUE11]] ], [ [[TMP92]], [[PRED_LOAD_IF12]] ] -; CHECK-NEXT: [[TMP94:%.*]] = extractelement <4 x i1> [[TMP47]], i32 2 -; CHECK-NEXT: br i1 [[TMP94]], label [[PRED_LOAD_IF14:%.*]], label [[PRED_LOAD_CONTINUE15:%.*]] -; CHECK: pred.load.if14: -; CHECK-NEXT: [[TMP95:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP96:%.*]] = load i32, ptr [[TMP95]], align 4 -; CHECK-NEXT: [[TMP97:%.*]] = insertelement <4 x i32> [[TMP93]], i32 [[TMP96]], i32 2 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE15]] -; CHECK: pred.load.continue15: -; CHECK-NEXT: [[TMP98:%.*]] = phi <4 x i32> [ [[TMP93]], [[PRED_LOAD_CONTINUE13]] ], [ [[TMP97]], [[PRED_LOAD_IF14]] ] -; CHECK-NEXT: [[TMP99:%.*]] = extractelement <4 x i1> [[TMP47]], i32 3 -; CHECK-NEXT: br i1 [[TMP99]], label [[PRED_LOAD_IF16:%.*]], label [[PRED_LOAD_CONTINUE17:%.*]] -; CHECK: pred.load.if16: -; CHECK-NEXT: [[TMP100:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP7]] -; CHECK-NEXT: [[TMP101:%.*]] = load i32, ptr [[TMP100]], align 4 -; CHECK-NEXT: [[TMP102:%.*]] = insertelement <4 x i32> [[TMP98]], i32 [[TMP101]], i32 3 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE17]] -; CHECK: pred.load.continue17: -; CHECK-NEXT: [[TMP103:%.*]] = phi <4 x i32> [ [[TMP98]], [[PRED_LOAD_CONTINUE15]] ], [ [[TMP102]], [[PRED_LOAD_IF16]] ] -; CHECK-NEXT: [[TMP104:%.*]] = extractelement <4 x i1> [[TMP55]], i32 0 -; CHECK-NEXT: br i1 [[TMP104]], label [[PRED_LOAD_IF18:%.*]], label [[PRED_LOAD_CONTINUE19:%.*]] -; CHECK: pred.load.if18: -; CHECK-NEXT: [[TMP105:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP8]] -; CHECK-NEXT: [[TMP106:%.*]] = load i32, ptr [[TMP105]], align 4 -; CHECK-NEXT: [[TMP107:%.*]] = insertelement <4 x i32> poison, i32 [[TMP106]], i32 0 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE19]] -; CHECK: pred.load.continue19: -; CHECK-NEXT: [[TMP108:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE17]] ], [ [[TMP107]], [[PRED_LOAD_IF18]] ] -; CHECK-NEXT: [[TMP109:%.*]] = extractelement <4 x i1> [[TMP55]], i32 1 -; CHECK-NEXT: br i1 [[TMP109]], label [[PRED_LOAD_IF20:%.*]], label [[PRED_LOAD_CONTINUE21:%.*]] -; CHECK: pred.load.if20: -; CHECK-NEXT: [[TMP110:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP9]] -; CHECK-NEXT: [[TMP111:%.*]] = load i32, ptr [[TMP110]], align 4 -; CHECK-NEXT: [[TMP112:%.*]] = insertelement <4 x i32> [[TMP108]], i32 [[TMP111]], i32 1 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE21]] -; CHECK: pred.load.continue21: -; CHECK-NEXT: [[TMP113:%.*]] = phi <4 x i32> [ [[TMP108]], [[PRED_LOAD_CONTINUE19]] ], [ [[TMP112]], [[PRED_LOAD_IF20]] ] -; CHECK-NEXT: [[TMP114:%.*]] = extractelement <4 x i1> [[TMP55]], i32 2 -; CHECK-NEXT: br i1 [[TMP114]], label [[PRED_LOAD_IF22:%.*]], label [[PRED_LOAD_CONTINUE23:%.*]] -; CHECK: pred.load.if22: -; CHECK-NEXT: [[TMP115:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP10]] -; CHECK-NEXT: [[TMP116:%.*]] = load i32, ptr [[TMP115]], align 4 -; CHECK-NEXT: [[TMP117:%.*]] = insertelement <4 x i32> [[TMP113]], i32 [[TMP116]], i32 2 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE23]] -; CHECK: pred.load.continue23: -; CHECK-NEXT: [[TMP118:%.*]] = phi <4 x i32> [ [[TMP113]], [[PRED_LOAD_CONTINUE21]] ], [ [[TMP117]], [[PRED_LOAD_IF22]] ] -; CHECK-NEXT: [[TMP119:%.*]] = extractelement <4 x i1> [[TMP55]], i32 3 -; CHECK-NEXT: br i1 [[TMP119]], label [[PRED_LOAD_IF24:%.*]], label [[PRED_LOAD_CONTINUE25:%.*]] -; CHECK: pred.load.if24: -; CHECK-NEXT: [[TMP120:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP11]] -; CHECK-NEXT: [[TMP121:%.*]] = load i32, ptr [[TMP120]], align 4 -; CHECK-NEXT: [[TMP122:%.*]] = insertelement <4 x i32> [[TMP118]], i32 [[TMP121]], i32 3 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE25]] -; CHECK: pred.load.continue25: -; CHECK-NEXT: [[TMP123:%.*]] = phi <4 x i32> [ [[TMP118]], [[PRED_LOAD_CONTINUE23]] ], [ [[TMP122]], [[PRED_LOAD_IF24]] ] -; CHECK-NEXT: [[TMP124:%.*]] = extractelement <4 x i1> [[TMP63]], i32 0 -; CHECK-NEXT: br i1 [[TMP124]], label [[PRED_LOAD_IF26:%.*]], label [[PRED_LOAD_CONTINUE27:%.*]] -; CHECK: pred.load.if26: -; CHECK-NEXT: [[TMP125:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP126:%.*]] = load i32, ptr [[TMP125]], align 4 -; CHECK-NEXT: [[TMP127:%.*]] = insertelement <4 x i32> poison, i32 [[TMP126]], i32 0 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE27]] -; CHECK: pred.load.continue27: -; CHECK-NEXT: [[TMP128:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE25]] ], [ [[TMP127]], [[PRED_LOAD_IF26]] ] -; CHECK-NEXT: [[TMP129:%.*]] = extractelement <4 x i1> [[TMP63]], i32 1 -; CHECK-NEXT: br i1 [[TMP129]], label [[PRED_LOAD_IF28:%.*]], label [[PRED_LOAD_CONTINUE29:%.*]] -; CHECK: pred.load.if28: -; CHECK-NEXT: [[TMP130:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP13]] -; CHECK-NEXT: [[TMP131:%.*]] = load i32, ptr [[TMP130]], align 4 -; CHECK-NEXT: [[TMP132:%.*]] = insertelement <4 x i32> [[TMP128]], i32 [[TMP131]], i32 1 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE29]] -; CHECK: pred.load.continue29: -; CHECK-NEXT: [[TMP133:%.*]] = phi <4 x i32> [ [[TMP128]], [[PRED_LOAD_CONTINUE27]] ], [ [[TMP132]], [[PRED_LOAD_IF28]] ] -; CHECK-NEXT: [[TMP134:%.*]] = extractelement <4 x i1> [[TMP63]], i32 2 -; CHECK-NEXT: br i1 [[TMP134]], label [[PRED_LOAD_IF30:%.*]], label [[PRED_LOAD_CONTINUE31:%.*]] -; CHECK: pred.load.if30: -; CHECK-NEXT: [[TMP135:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP14]] -; CHECK-NEXT: [[TMP136:%.*]] = load i32, ptr [[TMP135]], align 4 -; CHECK-NEXT: [[TMP137:%.*]] = insertelement <4 x i32> [[TMP133]], i32 [[TMP136]], i32 2 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE31]] -; CHECK: pred.load.continue31: -; CHECK-NEXT: [[TMP138:%.*]] = phi <4 x i32> [ [[TMP133]], [[PRED_LOAD_CONTINUE29]] ], [ [[TMP137]], [[PRED_LOAD_IF30]] ] -; CHECK-NEXT: [[TMP139:%.*]] = extractelement <4 x i1> [[TMP63]], i32 3 -; CHECK-NEXT: br i1 [[TMP139]], label [[PRED_LOAD_IF32:%.*]], label [[PRED_LOAD_CONTINUE33]] -; CHECK: pred.load.if32: -; CHECK-NEXT: [[TMP140:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP15]] -; CHECK-NEXT: [[TMP141:%.*]] = load i32, ptr [[TMP140]], align 4 -; CHECK-NEXT: [[TMP142:%.*]] = insertelement <4 x i32> [[TMP138]], i32 [[TMP141]], i32 3 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE33]] -; CHECK: pred.load.continue33: -; CHECK-NEXT: [[TMP143:%.*]] = phi <4 x i32> [ [[TMP138]], [[PRED_LOAD_CONTINUE31]] ], [ [[TMP142]], [[PRED_LOAD_IF32]] ] -; CHECK-NEXT: [[TMP144:%.*]] = xor <4 x i1> [[TMP39]], -; CHECK-NEXT: [[TMP145:%.*]] = xor <4 x i1> [[TMP47]], -; CHECK-NEXT: [[TMP146:%.*]] = xor <4 x i1> [[TMP55]], -; CHECK-NEXT: [[TMP147:%.*]] = xor <4 x i1> [[TMP63]], -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[TMP83]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI34:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[TMP103]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI35:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[TMP123]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[PREDPHI36:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[TMP143]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP148]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]] -; CHECK-NEXT: [[TMP149]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI34]] -; CHECK-NEXT: [[TMP150]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI35]] -; CHECK-NEXT: [[TMP151]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI36]] +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP65:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP75:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP77:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP14]] +; CHECK-NEXT: [[TMP79:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP80:%.*]] = load i32, ptr [[TMP64]], align 4 +; CHECK-NEXT: [[TMP81:%.*]] = load i32, ptr [[TMP65]], align 4 +; CHECK-NEXT: [[TMP82:%.*]] = load i32, ptr [[TMP66]], align 4 +; CHECK-NEXT: [[TMP83:%.*]] = load i32, ptr [[TMP67]], align 4 +; CHECK-NEXT: [[TMP84:%.*]] = insertelement <4 x i32> poison, i32 [[TMP80]], i32 0 +; CHECK-NEXT: [[TMP85:%.*]] = insertelement <4 x i32> [[TMP84]], i32 [[TMP81]], i32 1 +; CHECK-NEXT: [[TMP86:%.*]] = insertelement <4 x i32> [[TMP85]], i32 [[TMP82]], i32 2 +; CHECK-NEXT: [[TMP87:%.*]] = insertelement <4 x i32> [[TMP86]], i32 [[TMP83]], i32 3 +; CHECK-NEXT: [[TMP88:%.*]] = load i32, ptr [[TMP68]], align 4 +; CHECK-NEXT: [[TMP89:%.*]] = load i32, ptr [[TMP69]], align 4 +; CHECK-NEXT: [[TMP90:%.*]] = load i32, ptr [[TMP70]], align 4 +; CHECK-NEXT: [[TMP91:%.*]] = load i32, ptr [[TMP71]], align 4 +; CHECK-NEXT: [[TMP92:%.*]] = insertelement <4 x i32> poison, i32 [[TMP88]], i32 0 +; CHECK-NEXT: [[TMP93:%.*]] = insertelement <4 x i32> [[TMP92]], i32 [[TMP89]], i32 1 +; CHECK-NEXT: [[TMP94:%.*]] = insertelement <4 x i32> [[TMP93]], i32 [[TMP90]], i32 2 +; CHECK-NEXT: [[TMP95:%.*]] = insertelement <4 x i32> [[TMP94]], i32 [[TMP91]], i32 3 +; CHECK-NEXT: [[TMP96:%.*]] = load i32, ptr [[TMP72]], align 4 +; CHECK-NEXT: [[TMP97:%.*]] = load i32, ptr [[TMP73]], align 4 +; CHECK-NEXT: [[TMP98:%.*]] = load i32, ptr [[TMP74]], align 4 +; CHECK-NEXT: [[TMP99:%.*]] = load i32, ptr [[TMP75]], align 4 +; CHECK-NEXT: [[TMP100:%.*]] = insertelement <4 x i32> poison, i32 [[TMP96]], i32 0 +; CHECK-NEXT: [[TMP101:%.*]] = insertelement <4 x i32> [[TMP100]], i32 [[TMP97]], i32 1 +; CHECK-NEXT: [[TMP102:%.*]] = insertelement <4 x i32> [[TMP101]], i32 [[TMP98]], i32 2 +; CHECK-NEXT: [[TMP103:%.*]] = insertelement <4 x i32> [[TMP102]], i32 [[TMP99]], i32 3 +; CHECK-NEXT: [[TMP104:%.*]] = load i32, ptr [[TMP76]], align 4 +; CHECK-NEXT: [[TMP105:%.*]] = load i32, ptr [[TMP77]], align 4 +; CHECK-NEXT: [[TMP106:%.*]] = load i32, ptr [[TMP78]], align 4 +; CHECK-NEXT: [[TMP107:%.*]] = load i32, ptr [[TMP79]], align 4 +; CHECK-NEXT: [[TMP108:%.*]] = insertelement <4 x i32> poison, i32 [[TMP104]], i32 0 +; CHECK-NEXT: [[TMP109:%.*]] = insertelement <4 x i32> [[TMP108]], i32 [[TMP105]], i32 1 +; CHECK-NEXT: [[TMP110:%.*]] = insertelement <4 x i32> [[TMP109]], i32 [[TMP106]], i32 2 +; CHECK-NEXT: [[TMP111:%.*]] = insertelement <4 x i32> [[TMP110]], i32 [[TMP107]], i32 3 +; CHECK-NEXT: [[TMP112:%.*]] = xor <4 x i1> [[TMP39]], +; CHECK-NEXT: [[TMP113:%.*]] = xor <4 x i1> [[TMP47]], +; CHECK-NEXT: [[TMP114:%.*]] = xor <4 x i1> [[TMP55]], +; CHECK-NEXT: [[TMP115:%.*]] = xor <4 x i1> [[TMP63]], +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP39]], <4 x i32> [[TMP87]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI4:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[TMP95]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI5:%.*]] = select <4 x i1> [[TMP55]], <4 x i32> [[TMP103]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI6:%.*]] = select <4 x i1> [[TMP63]], <4 x i32> [[TMP111]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP116]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]] +; CHECK-NEXT: [[TMP117]] = add <4 x i32> [[VEC_PHI1]], [[PREDPHI4]] +; CHECK-NEXT: [[TMP118]] = add <4 x i32> [[VEC_PHI2]], [[PREDPHI5]] +; CHECK-NEXT: [[TMP119]] = add <4 x i32> [[VEC_PHI3]], [[PREDPHI6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP152:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2048 -; CHECK-NEXT: br i1 [[TMP152]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: [[TMP120:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2048 +; CHECK-NEXT: br i1 [[TMP120]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP149]], [[TMP148]] -; CHECK-NEXT: [[BIN_RDX37:%.*]] = add <4 x i32> [[TMP150]], [[BIN_RDX]] -; CHECK-NEXT: [[BIN_RDX38:%.*]] = add <4 x i32> [[TMP151]], [[BIN_RDX37]] -; CHECK-NEXT: [[TMP153:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX38]]) +; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP117]], [[TMP116]] +; CHECK-NEXT: [[BIN_RDX7:%.*]] = add <4 x i32> [[TMP118]], [[BIN_RDX]] +; CHECK-NEXT: [[BIN_RDX8:%.*]] = add <4 x i32> [[TMP119]], [[BIN_RDX7]] +; CHECK-NEXT: [[TMP121:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX8]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 2048, 2048 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4096, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP153]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP121]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] @@ -1422,7 +1324,7 @@ define i32 @test_non_unit_stride(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 4093 ; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP153]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP121]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] ; entry: @@ -2622,8 +2524,8 @@ define i32 @test_stride_three(i64 %len, ptr %test_base) { ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP37:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 3 @@ -2641,55 +2543,31 @@ define i32 @test_stride_three(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i1> [[TMP12]], i1 [[TMP9]], i32 1 ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i1> [[TMP13]], i1 [[TMP10]], i32 2 ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i1> [[TMP14]], i1 [[TMP11]], i32 3 -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP15]], i32 0 -; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] -; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> poison, i32 [[TMP18]], i32 0 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] -; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP20:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP19]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP15]], i32 1 -; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] -; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP23]], i32 1 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] -; CHECK: pred.load.continue2: -; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x i32> [ [[TMP20]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP24]], [[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[TMP15]], i32 2 -; CHECK-NEXT: br i1 [[TMP26]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] -; CHECK: pred.load.if3: -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -; CHECK-NEXT: [[TMP29:%.*]] = insertelement <4 x i32> [[TMP25]], i32 [[TMP28]], i32 2 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] -; CHECK: pred.load.continue4: -; CHECK-NEXT: [[TMP30:%.*]] = phi <4 x i32> [ [[TMP25]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP29]], [[PRED_LOAD_IF3]] ] -; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP15]], i32 3 -; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] -; CHECK: pred.load.if5: -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP30]], i32 [[TMP33]], i32 3 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] -; CHECK: pred.load.continue6: -; CHECK-NEXT: [[TMP35:%.*]] = phi <4 x i32> [ [[TMP30]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP36:%.*]] = xor <4 x i1> [[TMP15]], -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP15]], <4 x i32> [[TMP35]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP37]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP16]], align 4 +; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP17]], align 4 +; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP18]], align 4 +; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP19]], align 4 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> poison, i32 [[TMP20]], i32 0 +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x i32> [[TMP24]], i32 [[TMP21]], i32 1 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i32> [[TMP25]], i32 [[TMP22]], i32 2 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP26]], i32 [[TMP23]], i32 3 +; CHECK-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP15]], +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP15]], <4 x i32> [[TMP27]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP29]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 -; CHECK-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] +; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 32 +; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP39:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP37]]) +; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP29]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 35, 32 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] @@ -2708,7 +2586,7 @@ define i32 @test_stride_three(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 100 ; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP31:![0-9]+]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] ; entry: @@ -2745,8 +2623,8 @@ define i32 @test_non_unit_stride_four(i64 %len, ptr %test_base) { ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP37:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 4 @@ -2764,55 +2642,31 @@ define i32 @test_non_unit_stride_four(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i1> [[TMP12]], i1 [[TMP9]], i32 1 ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i1> [[TMP13]], i1 [[TMP10]], i32 2 ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i1> [[TMP14]], i1 [[TMP11]], i32 3 -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP15]], i32 0 -; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] -; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> poison, i32 [[TMP18]], i32 0 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] -; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP20:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP19]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP15]], i32 1 -; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] -; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP23]], i32 1 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] -; CHECK: pred.load.continue2: -; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x i32> [ [[TMP20]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP24]], [[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[TMP15]], i32 2 -; CHECK-NEXT: br i1 [[TMP26]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] -; CHECK: pred.load.if3: -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -; CHECK-NEXT: [[TMP29:%.*]] = insertelement <4 x i32> [[TMP25]], i32 [[TMP28]], i32 2 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] -; CHECK: pred.load.continue4: -; CHECK-NEXT: [[TMP30:%.*]] = phi <4 x i32> [ [[TMP25]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP29]], [[PRED_LOAD_IF3]] ] -; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP15]], i32 3 -; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] -; CHECK: pred.load.if5: -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP30]], i32 [[TMP33]], i32 3 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] -; CHECK: pred.load.continue6: -; CHECK-NEXT: [[TMP35:%.*]] = phi <4 x i32> [ [[TMP30]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP36:%.*]] = xor <4 x i1> [[TMP15]], -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP15]], <4 x i32> [[TMP35]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP37]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP16]], align 4 +; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP17]], align 4 +; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP18]], align 4 +; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP19]], align 4 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> poison, i32 [[TMP20]], i32 0 +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x i32> [[TMP24]], i32 [[TMP21]], i32 1 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i32> [[TMP25]], i32 [[TMP22]], i32 2 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP26]], i32 [[TMP23]], i32 3 +; CHECK-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP15]], +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP15]], <4 x i32> [[TMP27]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP29]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 -; CHECK-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] +; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24 +; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP39:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP37]]) +; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP29]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 27, 24 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] @@ -2831,7 +2685,7 @@ define i32 @test_non_unit_stride_four(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 100 ; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP33:![0-9]+]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] ; entry: @@ -2868,8 +2722,8 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) { ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP37:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 5 ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 5 @@ -2887,55 +2741,31 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i1> [[TMP12]], i1 [[TMP9]], i32 1 ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i1> [[TMP13]], i1 [[TMP10]], i32 2 ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <4 x i1> [[TMP14]], i1 [[TMP11]], i32 3 -; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP15]], i32 0 -; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] -; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> poison, i32 [[TMP18]], i32 0 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] -; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP20:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP19]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP15]], i32 1 -; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] -; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 -; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP23]], i32 1 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] -; CHECK: pred.load.continue2: -; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x i32> [ [[TMP20]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP24]], [[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i1> [[TMP15]], i32 2 -; CHECK-NEXT: br i1 [[TMP26]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] -; CHECK: pred.load.if3: -; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 -; CHECK-NEXT: [[TMP29:%.*]] = insertelement <4 x i32> [[TMP25]], i32 [[TMP28]], i32 2 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] -; CHECK: pred.load.continue4: -; CHECK-NEXT: [[TMP30:%.*]] = phi <4 x i32> [ [[TMP25]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP29]], [[PRED_LOAD_IF3]] ] -; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP15]], i32 3 -; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] -; CHECK: pred.load.if5: -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr [[TMP32]], align 4 -; CHECK-NEXT: [[TMP34:%.*]] = insertelement <4 x i32> [[TMP30]], i32 [[TMP33]], i32 3 -; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] -; CHECK: pred.load.continue6: -; CHECK-NEXT: [[TMP35:%.*]] = phi <4 x i32> [ [[TMP30]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP34]], [[PRED_LOAD_IF5]] ] -; CHECK-NEXT: [[TMP36:%.*]] = xor <4 x i1> [[TMP15]], -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP15]], <4 x i32> [[TMP35]], <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP37]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[ALLOCA]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP16]], align 4 +; CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[TMP17]], align 4 +; CHECK-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP18]], align 4 +; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP19]], align 4 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> poison, i32 [[TMP20]], i32 0 +; CHECK-NEXT: [[TMP25:%.*]] = insertelement <4 x i32> [[TMP24]], i32 [[TMP21]], i32 1 +; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i32> [[TMP25]], i32 [[TMP22]], i32 2 +; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x i32> [[TMP26]], i32 [[TMP23]], i32 3 +; CHECK-NEXT: [[TMP28:%.*]] = xor <4 x i1> [[TMP15]], +; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP15]], <4 x i32> [[TMP27]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP29]] = add <4 x i32> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP38:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20 -; CHECK-NEXT: br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] +; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20 +; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP39:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP37]]) +; CHECK-NEXT: [[TMP31:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP29]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 22, 20 ; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] @@ -2954,7 +2784,7 @@ define i32 @test_non_unit_stride_five(i64 %len, ptr %test_base) { ; CHECK-NEXT: [[EXIT:%.*]] = icmp ugt i64 [[IV]], 100 ; CHECK-NEXT: br i1 [[EXIT]], label [[LOOP_EXIT]], label [[LOOP]], !llvm.loop [[LOOP35:![0-9]+]] ; CHECK: loop_exit: -; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[ACCUM_NEXT_LCSSA:%.*]] = phi i32 [ [[ACCUM_NEXT]], [[LATCH]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[ACCUM_NEXT_LCSSA]] ; entry: From 9baaf4fb26f4f50c6514f12ab09c32a070ff308a Mon Sep 17 00:00:00 2001 From: Andrew Litteken Date: Tue, 21 Mar 2023 11:11:27 -0500 Subject: [PATCH 204/691] Revert "[IRSim] Check largest sections first when analyzing similarity" llvm-sim test still misbehaving on other platforms. This reverts commit 082ec267583100455fee356bb0d4ebd55aba2d46. --- .../llvm/Analysis/IRSimilarityIdentifier.h | 43 ---- llvm/lib/Analysis/IRSimilarityIdentifier.cpp | 214 +----------------- .../Analysis/IRSimilarityIdentifier/basic.ll | 44 ++-- .../debug-inst-starts-block.ll | 21 +- .../IRSimilarityIdentifier/different.ll | 18 +- .../IRSimilarityIdentifier/nothing.ll | 2 - .../Transforms/IROutliner/illegal-assumes.ll | 40 ++-- 7 files changed, 62 insertions(+), 320 deletions(-) diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h index d40d51e2d3376..9f9e7c59b42ba 100644 --- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -850,49 +850,6 @@ class IRSimilarityCandidate { IRSimilarityCandidate &SourceCand, DenseMap> &ToSourceMapping, DenseMap> &FromSourceMapping); - - /// Create a mapping for the value numbering of the calling - /// IRSimilarityCandidate, to a different separate set of numbers, based on - /// the canonical ordering in \p SourceCand. These are defined based on the - /// found mappings in \p ToSourceMapping and \p FromSourceMapping. Both of - /// these relationships should have the same information, just in opposite - /// directions. Uses the \p OneToOne mapping from target candidate to \p - /// SourceCand GVNs to determine the mapping first for values with multiple - /// mappings. This mapping is created by the ordering of operands in the - /// instruction they are first seen in the candidates. - /// - /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a - /// canonical numbering from. - /// \param [in,out] OneToOne - A mapping of value numbers from candidate - /// \p A to candidate \B using the structure of the original instructions. - /// \param ToSourceMapping - The mapping of value numbers from this candidate - /// to \p SourceCand. - /// \param FromSourceMapping - The mapping of value numbers from \p SoureCand - /// to this candidate. - void createCanonicalRelationFrom( - IRSimilarityCandidate &SourceCand, - DenseMap &OneToOne, - DenseMap> &ToSourceMapping, - DenseMap> &FromSourceMapping); - - /// Create a mapping for the value numbering of the calling - /// IRSimilarityCandidate, to a different separate set of numbers, based on - /// the canonical ordering in \p SourceCand. These are defined based on the - /// canonical mapping defined between \p SoureCandLarge and - /// \p TargetCandLarge. These IRSimilarityCandidates are already structurally - /// similar, and fully encapsulate the IRSimilarityCandidates in question. - /// These are used as a "bridge" from the \p SourceCand to the target. - /// - /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a - /// canonical numbering from. - /// \param SoureCandLarge - The IRSimilarityCandidate fully containing - /// \p SourceCand. - /// \param TargetCandLarge - The IRSimilarityCandidate fully containing - /// this Candidate. - void createCanonicalRelationFrom( - IRSimilarityCandidate &SourceCand, - IRSimilarityCandidate &SourceCandLarge, - IRSimilarityCandidate &TargetCandLarge); /// \param [in,out] BBSet - The set to track the basic blocks. void getBasicBlocks(DenseSet &BBSet) const { diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp index 454ce5a6925bf..c8007be4142cf 100644 --- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -1101,76 +1101,6 @@ void IRSimilarityCandidate::createCanonicalRelationFrom( } } -void IRSimilarityCandidate::createCanonicalRelationFrom( - IRSimilarityCandidate &SourceCand, IRSimilarityCandidate &SourceCandLarge, - IRSimilarityCandidate &TargetCandLarge) { - assert(!SourceCand.CanonNumToNumber.empty() && - "Canonical Relationship is non-empty"); - assert(!SourceCand.NumberToCanonNum.empty() && - "Canonical Relationship is non-empty"); - - assert(!SourceCandLarge.CanonNumToNumber.empty() && - "Canonical Relationship is non-empty"); - assert(!SourceCandLarge.NumberToCanonNum.empty() && - "Canonical Relationship is non-empty"); - - assert(!TargetCandLarge.CanonNumToNumber.empty() && - "Canonical Relationship is non-empty"); - assert(!TargetCandLarge.NumberToCanonNum.empty() && - "Canonical Relationship is non-empty"); - - assert(CanonNumToNumber.empty() && "Canonical Relationship is non-empty"); - assert(NumberToCanonNum.empty() && "Canonical Relationship is non-empty"); - - // We're going to use the larger candidates as a "bridge" to create the - // canonical number for the target candidate since we have idetified two - // candidates as subsequences of larger sequences, and therefore must be - // structurally similar. - for (std::pair &ValueNumPair : ValueToNumber) { - Value *CurrVal = ValueNumPair.first; - unsigned TargetCandGVN = ValueNumPair.second; - - // Find the numbering in the large candidate that surrounds the - // current candidate. - std::optional OLargeTargetGVN = TargetCandLarge.getGVN(CurrVal); - assert(OLargeTargetGVN.has_value() && "GVN not found for Value"); - - // Get the canonical numbering in the large target candidate. - std::optional OTargetCandCanon = - TargetCandLarge.getCanonicalNum(OLargeTargetGVN.value()); - assert(OTargetCandCanon.has_value() && - "Canononical Number not found for GVN"); - - // Get the GVN in the large source candidate from the canonical numbering. - std::optional OLargeSourceGVN = - SourceCandLarge.fromCanonicalNum(OTargetCandCanon.value()); - assert(OLargeSourceGVN.has_value() && - "GVN Number not found for Canonical Number"); - - // Get the Value from the GVN in the large source candidate. - std::optional OLargeSourceV = - SourceCandLarge.fromGVN(OLargeSourceGVN.value()); - assert(OLargeSourceV.has_value() && "Value not found for GVN"); - - // Get the GVN number for the Value in the source candidate. - std::optional OSourceGVN = - SourceCand.getGVN(OLargeSourceV.value()); - assert(OSourceGVN.has_value() && "GVN Number not found for Value"); - - // Get the canonical numbering from the GVN/ - std::optional OSourceCanon = - SourceCand.getCanonicalNum(OSourceGVN.value()); - assert(OSourceCanon.has_value() && "Canon Number not found for GVN"); - - // Insert the canonical numbering and GVN pair into their respective - // mappings. - CanonNumToNumber.insert( - std::make_pair(OSourceCanon.value(), TargetCandGVN)); - NumberToCanonNum.insert( - std::make_pair(TargetCandGVN, OSourceCanon.value())); - } -} - void IRSimilarityCandidate::createCanonicalMappingFor( IRSimilarityCandidate &CurrCand) { assert(CurrCand.CanonNumToNumber.size() == 0 && @@ -1188,81 +1118,6 @@ void IRSimilarityCandidate::createCanonicalMappingFor( } } -/// Look for larger IRSimilarityCandidates From the previously matched -/// IRSimilarityCandidates that fully contain \p CandA or \p CandB. If there is -/// an overlap, return a pair of structurally similar, larger -/// IRSimilarityCandidates. -/// -/// \param [in] CandA - The first candidate we are trying to determine the -/// structure of. -/// \param [in] CandB - The second candidate we are trying to determine the -/// structure of. -/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in -/// a circuit to the IRSimilarityCandidates that include this instruction. -/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a -/// number representing the structural group assigned to it. -static std::optional< - std::pair> -CheckLargerCands( - IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB, - DenseMap> &IndexToIncludedCand, - DenseMap &CandToGroup) { - DenseMap IncludedGroupAndCandA; - DenseMap IncludedGroupAndCandB; - DenseSet IncludedGroupsA; - DenseSet IncludedGroupsB; - - // Find the overall similarity group numbers that fully contain the candidate, - // and record the larger candidate for each group. - auto IdxToCandidateIt = IndexToIncludedCand.find(CandA.getStartIdx()); - std::optional> - Result; - - unsigned CandAStart = CandA.getStartIdx(); - unsigned CandAEnd = CandA.getEndIdx(); - unsigned CandBStart = CandB.getStartIdx(); - unsigned CandBEnd = CandB.getEndIdx(); - if (IdxToCandidateIt == IndexToIncludedCand.end()) - return Result; - for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) { - if (MatchedCand->getStartIdx() > CandAStart || - (MatchedCand->getEndIdx() < CandAEnd)) - continue; - unsigned GroupNum = CandToGroup.find(MatchedCand)->second; - IncludedGroupAndCandA.insert(std::make_pair(GroupNum, MatchedCand)); - IncludedGroupsA.insert(GroupNum); - } - - // Find the overall similarity group numbers that fully contain the next - // candidate, and record the larger candidate for each group. - IdxToCandidateIt = IndexToIncludedCand.find(CandBStart); - if (IdxToCandidateIt == IndexToIncludedCand.end()) - return Result; - for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) { - if (MatchedCand->getStartIdx() > CandBStart || - MatchedCand->getEndIdx() < CandBEnd) - continue; - unsigned GroupNum = CandToGroup.find(MatchedCand)->second; - IncludedGroupAndCandB.insert(std::make_pair(GroupNum, MatchedCand)); - IncludedGroupsB.insert(GroupNum); - } - - // Find the intersection between the two groups, these are the groups where - // the larger candidates exist. - set_intersect(IncludedGroupsA, IncludedGroupsB); - - // If there is no intersection between the sets, then we cannot determine - // whether or not there is a match. - if (IncludedGroupsA.empty()) - return Result; - - // Create a pair that contains the larger candidates. - auto ItA = IncludedGroupAndCandA.find(*IncludedGroupsA.begin()); - auto ItB = IncludedGroupAndCandB.find(*IncludedGroupsA.begin()); - Result = std::make_pair(ItA->second, ItB->second); - return Result; -} - /// From the list of IRSimilarityCandidates, perform a comparison between each /// IRSimilarityCandidate to determine if there are overlapping /// IRInstructionData, or if they do not have the same structure. @@ -1272,16 +1127,9 @@ CheckLargerCands( /// \param [out] StructuralGroups - the mapping of unsigned integers to vector /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the /// vector are structurally similar to one another. -/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in -/// a circuit to the IRSimilarityCandidates that include this instruction. -/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a -/// number representing the structural group assigned to it. static void findCandidateStructures( std::vector &CandsForRepSubstring, - DenseMap &StructuralGroups, - DenseMap> &IndexToIncludedCand, - DenseMap &CandToOverallGroup - ) { + DenseMap &StructuralGroups) { std::vector::iterator CandIt, CandEndIt, InnerCandIt, InnerCandEndIt; @@ -1344,24 +1192,6 @@ static void findCandidateStructures( if (CandToGroupItInner != CandToGroup.end()) continue; - // Check if we have found structural similarity between two candidates - // that fully contains the first and second candidates. - std::optional> - LargerPair = CheckLargerCands( - *CandIt, *InnerCandIt, IndexToIncludedCand, CandToOverallGroup); - - // If a pair was found, it means that we can assume that these smaller - // substrings are also structurally similar. Use the larger candidates to - // determine the canonical mapping between the two sections. - if (LargerPair.has_value()) { - SameStructure = true; - InnerCandIt->createCanonicalRelationFrom( - *CandIt, *LargerPair.value().first, *LargerPair.value().second); - CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum)); - CurrentGroupPair->second.push_back(*InnerCandIt); - continue; - } - // Otherwise we determine if they have the same structure and add it to // vector if they match. ValueNumberMappingA.clear(); @@ -1388,58 +1218,24 @@ void IRSimilarityIdentifier::findCandidates( std::vector NewCandidateGroups; DenseMap StructuralGroups; - DenseMap> IndexToIncludedCand; - DenseMap CandToGroup; // Iterate over the subsequences found by the Suffix Tree to create // IRSimilarityCandidates for each repeated subsequence and determine which // instances are structurally similar to one another. - - // Sort the suffix tree from longest substring to shortest. - std::vector RSes; - for (SuffixTree::RepeatedSubstring &RS : ST) - RSes.push_back(RS); - - llvm::stable_sort(RSes, [](const SuffixTree::RepeatedSubstring &LHS, - const SuffixTree::RepeatedSubstring &RHS) { - return LHS.Length > RHS.Length; - }); - for (SuffixTree::RepeatedSubstring &RS : RSes) { + for (SuffixTree::RepeatedSubstring &RS : ST) { createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS, CandsForRepSubstring); if (CandsForRepSubstring.size() < 2) continue; - findCandidateStructures(CandsForRepSubstring, StructuralGroups, - IndexToIncludedCand, CandToGroup); - for (std::pair &Group : StructuralGroups) { + findCandidateStructures(CandsForRepSubstring, StructuralGroups); + for (std::pair &Group : StructuralGroups) // We only add the group if it contains more than one // IRSimilarityCandidate. If there is only one, that means there is no // other repeated subsequence with the same structure. - if (Group.second.size() > 1) { + if (Group.second.size() > 1) SimilarityCandidates->push_back(Group.second); - // Iterate over each candidate in the group, and add an entry for each - // instruction included with a mapping to a set of - // IRSimilarityCandidates that include that instruction. - for (IRSimilarityCandidate &IRCand : SimilarityCandidates->back()) { - for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx(); - Idx <= Edx; ++Idx) { - DenseMap>::iterator - IdIt; - IdIt = IndexToIncludedCand.find(Idx); - bool Inserted = false; - if (IdIt == IndexToIncludedCand.end()) - std::tie(IdIt, Inserted) = IndexToIncludedCand.insert( - std::make_pair(Idx, DenseSet())); - IdIt->second.insert(&IRCand); - } - // Add mapping of candidate to the overall similarity group number. - CandToGroup.insert( - std::make_pair(&IRCand, SimilarityCandidates->size() - 1)); - } - } - } CandsForRepSubstring.clear(); StructuralGroups.clear(); diff --git a/llvm/test/Analysis/IRSimilarityIdentifier/basic.ll b/llvm/test/Analysis/IRSimilarityIdentifier/basic.ll index 1c08cb407c2e3..79ae402e9f9f5 100644 --- a/llvm/test/Analysis/IRSimilarityIdentifier/basic.ll +++ b/llvm/test/Analysis/IRSimilarityIdentifier/basic.ll @@ -4,33 +4,33 @@ ; This is a simple test to make sure the IRSimilarityIdentifier and ; IRSimilarityPrinterPass is working. -; CHECK: 4 candidates of length 6. Found in: +; CHECK: 4 candidates of length 2. Found in: ; CHECK-NEXT: Function: turtle, Basic Block: (unnamed) -; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 +; CHECK-NEXT: Start Instruction: store i32 5, ptr %5, align 4 ; CHECK-NEXT: End Instruction: store i32 6, ptr %6, align 4 ; CHECK-NEXT: Function: cat, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 6, ptr %0, align 4 +; CHECK-NEXT: Start Instruction: store i32 4, ptr %4, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 ; CHECK-NEXT: Function: fish, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 6, ptr %0, align 4 +; CHECK-NEXT: Start Instruction: store i32 4, ptr %4, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 ; CHECK-NEXT: Function: dog, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 6, ptr %0, align 4 +; CHECK-NEXT: Start Instruction: store i32 4, ptr %4, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 -; CHECK-NEXT:4 candidates of length 5. Found in: +; CHECK-NEXT:4 candidates of length 3. Found in: ; CHECK-NEXT: Function: turtle, Basic Block: (unnamed) -; CHECK-NEXT: Start Instruction: store i32 2, ptr %2, align 4 +; CHECK-NEXT: Start Instruction: store i32 4, ptr %4, align 4 ; CHECK-NEXT: End Instruction: store i32 6, ptr %6, align 4 ; CHECK-NEXT: Function: cat, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 +; CHECK-NEXT: Start Instruction: store i32 3, ptr %3, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 ; CHECK-NEXT: Function: fish, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 +; CHECK-NEXT: Start Instruction: store i32 3, ptr %3, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 ; CHECK-NEXT: Function: dog, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 +; CHECK-NEXT: Start Instruction: store i32 3, ptr %3, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 -; CHECK-NEXT:4 candidates of length 4. Found in: +; CHECK-NEXT:4 candidates of length 4. Found in: ; CHECK-NEXT: Function: turtle, Basic Block: (unnamed) ; CHECK-NEXT: Start Instruction: store i32 3, ptr %3, align 4 ; CHECK-NEXT: End Instruction: store i32 6, ptr %6, align 4 @@ -43,31 +43,31 @@ ; CHECK-NEXT: Function: dog, Basic Block: entry ; CHECK-NEXT: Start Instruction: store i32 2, ptr %2, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 -; CHECK-NEXT:4 candidates of length 3. Found in: +; CHECK-NEXT:4 candidates of length 5. Found in: ; CHECK-NEXT: Function: turtle, Basic Block: (unnamed) -; CHECK-NEXT: Start Instruction: store i32 4, ptr %4, align 4 +; CHECK-NEXT: Start Instruction: store i32 2, ptr %2, align 4 ; CHECK-NEXT: End Instruction: store i32 6, ptr %6, align 4 ; CHECK-NEXT: Function: cat, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 3, ptr %3, align 4 +; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 ; CHECK-NEXT: Function: fish, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 3, ptr %3, align 4 +; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 ; CHECK-NEXT: Function: dog, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 3, ptr %3, align 4 +; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 -; CHECK-NEXT:4 candidates of length 2. Found in: +; CHECK-NEXT:4 candidates of length 6. Found in: ; CHECK-NEXT: Function: turtle, Basic Block: (unnamed) -; CHECK-NEXT: Start Instruction: store i32 5, ptr %5, align 4 +; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 ; CHECK-NEXT: End Instruction: store i32 6, ptr %6, align 4 ; CHECK-NEXT: Function: cat, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 4, ptr %4, align 4 +; CHECK-NEXT: Start Instruction: store i32 6, ptr %0, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 ; CHECK-NEXT: Function: fish, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 4, ptr %4, align 4 +; CHECK-NEXT: Start Instruction: store i32 6, ptr %0, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 ; CHECK-NEXT: Function: dog, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 4, ptr %4, align 4 +; CHECK-NEXT: Start Instruction: store i32 6, ptr %0, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 define linkonce_odr void @fish() { @@ -136,5 +136,3 @@ entry: store i32 5, ptr %5, align 4 ret void } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK: {{.*}} diff --git a/llvm/test/Analysis/IRSimilarityIdentifier/debug-inst-starts-block.ll b/llvm/test/Analysis/IRSimilarityIdentifier/debug-inst-starts-block.ll index 78ecec92cc31a..72fa361077587 100644 --- a/llvm/test/Analysis/IRSimilarityIdentifier/debug-inst-starts-block.ll +++ b/llvm/test/Analysis/IRSimilarityIdentifier/debug-inst-starts-block.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -disable-output -S -passes=print-ir-similarity < %s 2>&1 | FileCheck %s ; When a debug instruction is the first instruction in a block, when that block @@ -6,27 +5,27 @@ ; counted in similarity matching they must be ignored when creating canonical ; relations from one region to another. This checks that this is enforced. -; CHECK: 2 candidates of length 4. Found in: -; CHECK-NEXT: Function: main, Basic Block: entry -; CHECK-NEXT: Start Instruction: %0 = add i32 1, 4 -; CHECK-NEXT: End Instruction: %1 = sub i32 1, 4 -; CHECK-NEXT: Function: main, Basic Block: for.body169 -; CHECK-NEXT: Start Instruction: %2 = add i32 1, 4 -; CHECK-NEXT: End Instruction: %3 = sub i32 1, 4 -; CHECK-NEXT: 2 candidates of length 3. Found in: +; CHECK: 2 candidates of length 3. Found in: ; CHECK-NEXT: Function: main, Basic Block: entry ; CHECK-NEXT: Start Instruction: br label %for.body169 ; CHECK-NEXT: End Instruction: %1 = sub i32 1, 4 ; CHECK-NEXT: Function: main, Basic Block: for.body169 ; CHECK-NEXT: Start Instruction: br label %for.end122 ; CHECK-NEXT: End Instruction: %3 = sub i32 1, 4 -; CHECK-NEXT: 2 candidates of length 2. Found in: +; CHECK-NEXT: 2 candidates of length 2. Found in: ; CHECK-NEXT: Function: main, Basic Block: for.end122 ; CHECK-NEXT: Start Instruction: store i32 30, ptr undef, align 1 ; CHECK-NEXT: End Instruction: %1 = sub i32 1, 4 ; CHECK-NEXT: Function: main, Basic Block: for.end246 ; CHECK-NEXT: Start Instruction: store i32 0, ptr undef, align 1 ; CHECK-NEXT: End Instruction: %3 = sub i32 1, 4 +; CHECK-NEXT: 2 candidates of length 4. Found in: +; CHECK-NEXT: Function: main, Basic Block: entry +; CHECK-NEXT: Start Instruction: %0 = add i32 1, 4 +; CHECK-NEXT: End Instruction: %1 = sub i32 1, 4 +; CHECK-NEXT: Function: main, Basic Block: for.body169 +; CHECK-NEXT: Start Instruction: %2 = add i32 1, 4 +; CHECK-NEXT: End Instruction: %3 = sub i32 1, 4 source_filename = "irsimilarity_crash.ll" @@ -73,5 +72,3 @@ attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willre !9 = !{} !10 = !DIBasicType(name: "long", size: 32, encoding: DW_ATE_signed) !11 = !DILocation(line: 522, column: 23, scope: !2) -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK: {{.*}} diff --git a/llvm/test/Analysis/IRSimilarityIdentifier/different.ll b/llvm/test/Analysis/IRSimilarityIdentifier/different.ll index e5c9970b159b9..701af7e21b32e 100644 --- a/llvm/test/Analysis/IRSimilarityIdentifier/different.ll +++ b/llvm/test/Analysis/IRSimilarityIdentifier/different.ll @@ -5,20 +5,20 @@ ; return items only within the same function when there are different sets of ; instructions in functions. -; CHECK: 2 candidates of length 5. Found in: -; CHECK-NEXT: Function: fish, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 6, ptr %0, align 4 -; CHECK-NEXT: End Instruction: store i32 4, ptr %4, align 4 -; CHECK-NEXT: Function: fish, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 -; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 -; CHECK-NEXT: 2 candidates of length 3. Found in: +; CHECK: 2 candidates of length 3. Found in: ; CHECK-NEXT: Function: turtle, Basic Block: (unnamed) ; CHECK-NEXT: Start Instruction: %b = load i32, ptr %1, align 4 ; CHECK-NEXT: End Instruction: %d = load i32, ptr %3, align 4 ; CHECK-NEXT: Function: turtle, Basic Block: (unnamed) ; CHECK-NEXT: Start Instruction: %a = load i32, ptr %0, align 4 ; CHECK-NEXT: End Instruction: %c = load i32, ptr %2, align 4 +; CHECK-NEXT: 2 candidates of length 5. Found in: +; CHECK-NEXT: Function: fish, Basic Block: entry +; CHECK-NEXT: Start Instruction: store i32 6, ptr %0, align 4 +; CHECK-NEXT: End Instruction: store i32 4, ptr %4, align 4 +; CHECK-NEXT: Function: fish, Basic Block: entry +; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 +; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 define linkonce_odr void @fish() { entry: @@ -44,5 +44,3 @@ define void @turtle(ptr %0, ptr %1, ptr %2, ptr %3) { %d = load i32, ptr %3 ret void } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK: {{.*}} diff --git a/llvm/test/Analysis/IRSimilarityIdentifier/nothing.ll b/llvm/test/Analysis/IRSimilarityIdentifier/nothing.ll index 4891a587d776f..5c7210790f472 100644 --- a/llvm/test/Analysis/IRSimilarityIdentifier/nothing.ll +++ b/llvm/test/Analysis/IRSimilarityIdentifier/nothing.ll @@ -10,5 +10,3 @@ define linkonce_odr void @fish() { entry: ret void } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK: {{.*}} diff --git a/llvm/test/Transforms/IROutliner/illegal-assumes.ll b/llvm/test/Transforms/IROutliner/illegal-assumes.ll index d863fe7a29903..b91f8ec92200e 100644 --- a/llvm/test/Transforms/IROutliner/illegal-assumes.ll +++ b/llvm/test/Transforms/IROutliner/illegal-assumes.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -p iroutliner,verify -ir-outlining-no-cost < %s | FileCheck %s +; RUN: opt -S -passes=verify,iroutliner -ir-outlining-no-cost < %s | FileCheck %s ; This test ensures that we do not include llvm.assumes. There are exceptions ; in the CodeExtractor's algorithm for llvm.assumes, so we ignore it for now. @@ -13,13 +13,13 @@ define void @outline_assumes() { ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[DL_LOC]]) -; CHECK-NEXT: call void @outlined_ir_func_4(i1 true, ptr [[D]], ptr [[DL_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_3(i1 true, ptr [[D]], ptr [[DL_LOC]]) ; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, ptr [[DL_LOC]], align 1 ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr [[DL_LOC]]) ; CHECK-NEXT: [[SPLIT_INST:%.*]] = sub i1 [[DL_RELOAD]], [[DL_RELOAD]] -; CHECK-NEXT: call void @outlined_ir_func_1(ptr [[A]], ptr [[B]], ptr [[C]]) +; CHECK-NEXT: call void @outlined_ir_func_0(ptr [[A]], ptr [[B]], ptr [[C]]) ; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) -; CHECK-NEXT: call void @outlined_ir_func_2(ptr [[A]], ptr [[B]], ptr [[C]]) +; CHECK-NEXT: call void @outlined_ir_func_1(ptr [[A]], ptr [[B]], ptr [[C]]) ; CHECK-NEXT: ret void ; entry: @@ -49,12 +49,12 @@ define void @outline_assumes2() { ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[DL_LOC]]) -; CHECK-NEXT: call void @outlined_ir_func_4(i1 false, ptr [[D]], ptr [[DL_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_3(i1 false, ptr [[D]], ptr [[DL_LOC]]) ; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, ptr [[DL_LOC]], align 1 ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr [[DL_LOC]]) -; CHECK-NEXT: call void @outlined_ir_func_1(ptr [[A]], ptr [[B]], ptr [[C]]) +; CHECK-NEXT: call void @outlined_ir_func_0(ptr [[A]], ptr [[B]], ptr [[C]]) ; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) -; CHECK-NEXT: call void @outlined_ir_func_2(ptr [[A]], ptr [[B]], ptr [[C]]) +; CHECK-NEXT: call void @outlined_ir_func_1(ptr [[A]], ptr [[B]], ptr [[C]]) ; CHECK-NEXT: ret void ; entry: @@ -77,17 +77,16 @@ entry: define void @outline_assumes3() { ; CHECK-LABEL: @outline_assumes3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[DL_LOC:%.*]] = alloca i1, align 1 ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[DL_LOC]]) -; CHECK-NEXT: call void @outlined_ir_func_0(i1 true, ptr [[D]], ptr [[A]], ptr [[B]], ptr [[C]], ptr [[DL_LOC]]) -; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, ptr [[DL_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr [[DL_LOC]]) -; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) -; CHECK-NEXT: call void @outlined_ir_func_3(ptr [[A]]) +; CHECK-NEXT: store i1 true, ptr [[D]], align 4 +; CHECK-NEXT: [[DL:%.*]] = load i1, ptr [[D]], align 1 +; CHECK-NEXT: [[SPLIT_INST:%.*]] = add i1 [[DL]], [[DL]] +; CHECK-NEXT: call void @outlined_ir_func_0(ptr [[A]], ptr [[B]], ptr [[C]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[DL]]) +; CHECK-NEXT: call void @outlined_ir_func_2(ptr [[A]]) ; CHECK-NEXT: ret void ; entry: @@ -110,17 +109,16 @@ entry: define void @outline_assumes4() { ; CHECK-LABEL: @outline_assumes4( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[DL_LOC:%.*]] = alloca i1, align 1 ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[DL_LOC]]) -; CHECK-NEXT: call void @outlined_ir_func_0(i1 false, ptr [[D]], ptr [[A]], ptr [[B]], ptr [[C]], ptr [[DL_LOC]]) -; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, ptr [[DL_LOC]], align 1 -; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr [[DL_LOC]]) -; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) -; CHECK-NEXT: call void @outlined_ir_func_3(ptr [[A]]) +; CHECK-NEXT: store i1 false, ptr [[D]], align 4 +; CHECK-NEXT: [[DL:%.*]] = load i1, ptr [[D]], align 1 +; CHECK-NEXT: [[SPLIT_INST:%.*]] = add i1 [[DL]], [[DL]] +; CHECK-NEXT: call void @outlined_ir_func_0(ptr [[A]], ptr [[B]], ptr [[C]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[DL]]) +; CHECK-NEXT: call void @outlined_ir_func_2(ptr [[A]]) ; CHECK-NEXT: ret void ; entry: From ec2a726a63e414f456c9d3a5d6ca22453fedafd5 Mon Sep 17 00:00:00 2001 From: Congcong Cai Date: Wed, 22 Mar 2023 00:15:25 +0800 Subject: [PATCH 205/691] [Webassembly][multivalue] update libcall signature for f128 when multivalue feature enabled further update for [D146271](https://reviews.llvm.org/D146271) Reviewed By: tlively Differential Revision: https://reviews.llvm.org/D146499 --- .../WebAssemblyRuntimeLibcallSignatures.cpp | 154 ++++++++---------- .../CodeGen/WebAssembly/multivalue_libcall.ll | 81 ++++++--- 2 files changed, 127 insertions(+), 108 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp index 9bf2596bf3ff8..20891839686d7 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp @@ -62,6 +62,8 @@ enum RuntimeLibcallSignature { i32_func_i32_i32_iPTR, i64_func_i64_i64, i64_func_i64_i64_iPTR, + i64_i64_func_i32, + i64_i64_func_i64, i64_i64_func_f32, i64_i64_func_f64, i16_i16_func_i16_i16, @@ -71,20 +73,13 @@ enum RuntimeLibcallSignature { i64_i64_func_i64_i64_i64_i64_iPTR, i64_i64_i64_i64_func_i64_i64_i64_i64, i64_i64_func_i64_i64_i32, + i64_i64_func_i64_i64_i64_i64_i64_i64, iPTR_func_i32, iPTR_func_iPTR_i32_iPTR, iPTR_func_iPTR_iPTR_iPTR, f32_func_f32_f32_f32, f64_func_f64_f64_f64, func_i64_i64_iPTR_iPTR, - func_iPTR_f32, - func_iPTR_f64, - func_iPTR_i32, - func_iPTR_i64, - func_iPTR_i64_i64, - func_iPTR_i64_i64_i32, - func_iPTR_i64_i64_i64_i64, - func_iPTR_i64_i64_i64_i64_i64_i64, i32_func_i64_i64, i32_func_i64_i64_i64_i64, iPTR_func_f32, @@ -156,73 +151,73 @@ struct RuntimeLibcallSignatureTable { // All F80 and PPCF128 routines are unsupported. Table[RTLIB::ADD_F32] = f32_func_f32_f32; Table[RTLIB::ADD_F64] = f64_func_f64_f64; - Table[RTLIB::ADD_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::ADD_F128] = i64_i64_func_i64_i64_i64_i64; Table[RTLIB::SUB_F32] = f32_func_f32_f32; Table[RTLIB::SUB_F64] = f64_func_f64_f64; - Table[RTLIB::SUB_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::SUB_F128] = i64_i64_func_i64_i64_i64_i64; Table[RTLIB::MUL_F32] = f32_func_f32_f32; Table[RTLIB::MUL_F64] = f64_func_f64_f64; - Table[RTLIB::MUL_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::MUL_F128] = i64_i64_func_i64_i64_i64_i64; Table[RTLIB::DIV_F32] = f32_func_f32_f32; Table[RTLIB::DIV_F64] = f64_func_f64_f64; - Table[RTLIB::DIV_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::DIV_F128] = i64_i64_func_i64_i64_i64_i64; Table[RTLIB::REM_F32] = f32_func_f32_f32; Table[RTLIB::REM_F64] = f64_func_f64_f64; - Table[RTLIB::REM_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::REM_F128] = i64_i64_func_i64_i64_i64_i64; Table[RTLIB::FMA_F32] = f32_func_f32_f32_f32; Table[RTLIB::FMA_F64] = f64_func_f64_f64_f64; - Table[RTLIB::FMA_F128] = func_iPTR_i64_i64_i64_i64_i64_i64; + Table[RTLIB::FMA_F128] = i64_i64_func_i64_i64_i64_i64_i64_i64; Table[RTLIB::POWI_F32] = f32_func_f32_i32; Table[RTLIB::POWI_F64] = f64_func_f64_i32; - Table[RTLIB::POWI_F128] = func_iPTR_i64_i64_i32; + Table[RTLIB::POWI_F128] = i64_i64_func_i64_i64_i32; Table[RTLIB::SQRT_F32] = f32_func_f32; Table[RTLIB::SQRT_F64] = f64_func_f64; - Table[RTLIB::SQRT_F128] = func_iPTR_i64_i64; + Table[RTLIB::SQRT_F128] = i64_i64_func_i64_i64; Table[RTLIB::CBRT_F32] = f32_func_f32; Table[RTLIB::CBRT_F64] = f64_func_f64; - Table[RTLIB::CBRT_F128] = func_iPTR_i64_i64; + Table[RTLIB::CBRT_F128] = i64_i64_func_i64_i64; Table[RTLIB::LOG_F32] = f32_func_f32; Table[RTLIB::LOG_F64] = f64_func_f64; - Table[RTLIB::LOG_F128] = func_iPTR_i64_i64; + Table[RTLIB::LOG_F128] = i64_i64_func_i64_i64; Table[RTLIB::LOG2_F32] = f32_func_f32; Table[RTLIB::LOG2_F64] = f64_func_f64; - Table[RTLIB::LOG2_F128] = func_iPTR_i64_i64; + Table[RTLIB::LOG2_F128] = i64_i64_func_i64_i64; Table[RTLIB::LOG10_F32] = f32_func_f32; Table[RTLIB::LOG10_F64] = f64_func_f64; - Table[RTLIB::LOG10_F128] = func_iPTR_i64_i64; + Table[RTLIB::LOG10_F128] = i64_i64_func_i64_i64; Table[RTLIB::EXP_F32] = f32_func_f32; Table[RTLIB::EXP_F64] = f64_func_f64; - Table[RTLIB::EXP_F128] = func_iPTR_i64_i64; + Table[RTLIB::EXP_F128] = i64_i64_func_i64_i64; Table[RTLIB::EXP2_F32] = f32_func_f32; Table[RTLIB::EXP2_F64] = f64_func_f64; - Table[RTLIB::EXP2_F128] = func_iPTR_i64_i64; + Table[RTLIB::EXP2_F128] = i64_i64_func_i64_i64; Table[RTLIB::SIN_F32] = f32_func_f32; Table[RTLIB::SIN_F64] = f64_func_f64; - Table[RTLIB::SIN_F128] = func_iPTR_i64_i64; + Table[RTLIB::SIN_F128] = i64_i64_func_i64_i64; Table[RTLIB::COS_F32] = f32_func_f32; Table[RTLIB::COS_F64] = f64_func_f64; - Table[RTLIB::COS_F128] = func_iPTR_i64_i64; + Table[RTLIB::COS_F128] = i64_i64_func_i64_i64; Table[RTLIB::SINCOS_F32] = func_f32_iPTR_iPTR; Table[RTLIB::SINCOS_F64] = func_f64_iPTR_iPTR; Table[RTLIB::SINCOS_F128] = func_i64_i64_iPTR_iPTR; Table[RTLIB::POW_F32] = f32_func_f32_f32; Table[RTLIB::POW_F64] = f64_func_f64_f64; - Table[RTLIB::POW_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::POW_F128] = i64_i64_func_i64_i64_i64_i64; Table[RTLIB::CEIL_F32] = f32_func_f32; Table[RTLIB::CEIL_F64] = f64_func_f64; - Table[RTLIB::CEIL_F128] = func_iPTR_i64_i64; + Table[RTLIB::CEIL_F128] = i64_i64_func_i64_i64; Table[RTLIB::TRUNC_F32] = f32_func_f32; Table[RTLIB::TRUNC_F64] = f64_func_f64; - Table[RTLIB::TRUNC_F128] = func_iPTR_i64_i64; + Table[RTLIB::TRUNC_F128] = i64_i64_func_i64_i64; Table[RTLIB::RINT_F32] = f32_func_f32; Table[RTLIB::RINT_F64] = f64_func_f64; - Table[RTLIB::RINT_F128] = func_iPTR_i64_i64; + Table[RTLIB::RINT_F128] = i64_i64_func_i64_i64; Table[RTLIB::NEARBYINT_F32] = f32_func_f32; Table[RTLIB::NEARBYINT_F64] = f64_func_f64; - Table[RTLIB::NEARBYINT_F128] = func_iPTR_i64_i64; + Table[RTLIB::NEARBYINT_F128] = i64_i64_func_i64_i64; Table[RTLIB::ROUND_F32] = f32_func_f32; Table[RTLIB::ROUND_F64] = f64_func_f64; - Table[RTLIB::ROUND_F128] = func_iPTR_i64_i64; + Table[RTLIB::ROUND_F128] = i64_i64_func_i64_i64; Table[RTLIB::LROUND_F32] = iPTR_func_f32; Table[RTLIB::LROUND_F64] = iPTR_func_f64; Table[RTLIB::LROUND_F128] = iPTR_func_i64_i64; @@ -237,21 +232,21 @@ struct RuntimeLibcallSignatureTable { Table[RTLIB::LLRINT_F128] = i64_func_i64_i64; Table[RTLIB::FLOOR_F32] = f32_func_f32; Table[RTLIB::FLOOR_F64] = f64_func_f64; - Table[RTLIB::FLOOR_F128] = func_iPTR_i64_i64; + Table[RTLIB::FLOOR_F128] = i64_i64_func_i64_i64; Table[RTLIB::COPYSIGN_F32] = f32_func_f32_f32; Table[RTLIB::COPYSIGN_F64] = f64_func_f64_f64; - Table[RTLIB::COPYSIGN_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::COPYSIGN_F128] = i64_i64_func_i64_i64_i64_i64; Table[RTLIB::FMIN_F32] = f32_func_f32_f32; Table[RTLIB::FMIN_F64] = f64_func_f64_f64; - Table[RTLIB::FMIN_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::FMIN_F128] = i64_i64_func_i64_i64_i64_i64; Table[RTLIB::FMAX_F32] = f32_func_f32_f32; Table[RTLIB::FMAX_F64] = f64_func_f64_f64; - Table[RTLIB::FMAX_F128] = func_iPTR_i64_i64_i64_i64; + Table[RTLIB::FMAX_F128] = i64_i64_func_i64_i64_i64_i64; // Conversion // All F80 and PPCF128 routines are unsupported. - Table[RTLIB::FPEXT_F64_F128] = func_iPTR_f64; - Table[RTLIB::FPEXT_F32_F128] = func_iPTR_f32; + Table[RTLIB::FPEXT_F64_F128] = i64_i64_func_f64; + Table[RTLIB::FPEXT_F32_F128] = i64_i64_func_f32; Table[RTLIB::FPEXT_F32_F64] = f64_func_f32; Table[RTLIB::FPEXT_F16_F32] = f32_func_i16; Table[RTLIB::FPROUND_F32_F16] = i16_func_f32; @@ -280,22 +275,22 @@ struct RuntimeLibcallSignatureTable { Table[RTLIB::FPTOUINT_F128_I128] = i64_i64_func_i64_i64; Table[RTLIB::SINTTOFP_I32_F32] = f32_func_i32; Table[RTLIB::SINTTOFP_I32_F64] = f64_func_i32; - Table[RTLIB::SINTTOFP_I32_F128] = func_iPTR_i32; + Table[RTLIB::SINTTOFP_I32_F128] = i64_i64_func_i32; Table[RTLIB::SINTTOFP_I64_F32] = f32_func_i64; Table[RTLIB::SINTTOFP_I64_F64] = f64_func_i64; - Table[RTLIB::SINTTOFP_I64_F128] = func_iPTR_i64; + Table[RTLIB::SINTTOFP_I64_F128] = i64_i64_func_i64; Table[RTLIB::SINTTOFP_I128_F32] = f32_func_i64_i64; Table[RTLIB::SINTTOFP_I128_F64] = f64_func_i64_i64; - Table[RTLIB::SINTTOFP_I128_F128] = func_iPTR_i64_i64; + Table[RTLIB::SINTTOFP_I128_F128] = i64_i64_func_i64_i64; Table[RTLIB::UINTTOFP_I32_F32] = f32_func_i32; Table[RTLIB::UINTTOFP_I32_F64] = f64_func_i64; - Table[RTLIB::UINTTOFP_I32_F128] = func_iPTR_i32; + Table[RTLIB::UINTTOFP_I32_F128] = i64_i64_func_i32; Table[RTLIB::UINTTOFP_I64_F32] = f32_func_i64; Table[RTLIB::UINTTOFP_I64_F64] = f64_func_i64; - Table[RTLIB::UINTTOFP_I64_F128] = func_iPTR_i64; + Table[RTLIB::UINTTOFP_I64_F128] = i64_i64_func_i64; Table[RTLIB::UINTTOFP_I128_F32] = f32_func_i64_i64; Table[RTLIB::UINTTOFP_I128_F64] = f64_func_i64_i64; - Table[RTLIB::UINTTOFP_I128_F128] = func_iPTR_i64_i64; + Table[RTLIB::UINTTOFP_I128_F128] = i64_i64_func_i64_i64; // Comparison // ALl F80 and PPCF128 routines are unsupported. @@ -820,49 +815,6 @@ void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget, Params.push_back(PtrTy); Params.push_back(PtrTy); break; - case func_iPTR_f32: - Params.push_back(PtrTy); - Params.push_back(wasm::ValType::F32); - break; - case func_iPTR_f64: - Params.push_back(PtrTy); - Params.push_back(wasm::ValType::F64); - break; - case func_iPTR_i32: - Params.push_back(PtrTy); - Params.push_back(wasm::ValType::I32); - break; - case func_iPTR_i64: - Params.push_back(PtrTy); - Params.push_back(wasm::ValType::I64); - break; - case func_iPTR_i64_i64: - Params.push_back(PtrTy); - Params.push_back(wasm::ValType::I64); - Params.push_back(wasm::ValType::I64); - break; - case func_iPTR_i64_i64_i32: - Params.push_back(PtrTy); - Params.push_back(wasm::ValType::I64); - Params.push_back(wasm::ValType::I64); - Params.push_back(wasm::ValType::I32); - break; - case func_iPTR_i64_i64_i64_i64: - Params.push_back(PtrTy); - Params.push_back(wasm::ValType::I64); - Params.push_back(wasm::ValType::I64); - Params.push_back(wasm::ValType::I64); - Params.push_back(wasm::ValType::I64); - break; - case func_iPTR_i64_i64_i64_i64_i64_i64: - Params.push_back(PtrTy); - Params.push_back(wasm::ValType::I64); - Params.push_back(wasm::ValType::I64); - Params.push_back(wasm::ValType::I64); - Params.push_back(wasm::ValType::I64); - Params.push_back(wasm::ValType::I64); - Params.push_back(wasm::ValType::I64); - break; case i32_func_i64_i64: Rets.push_back(wasm::ValType::I32); Params.push_back(wasm::ValType::I64); @@ -888,6 +840,38 @@ void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget, Params.push_back(wasm::ValType::I64); Params.push_back(wasm::ValType::I64); break; + case i64_i64_func_i64_i64_i64_i64_i64_i64: + if (Subtarget.hasMultivalue()) { + Rets.push_back(wasm::ValType::I64); + Rets.push_back(wasm::ValType::I64); + } else { + Params.push_back(PtrTy); + } + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + Params.push_back(wasm::ValType::I64); + break; + case i64_i64_func_i32: + if (Subtarget.hasMultivalue()) { + Rets.push_back(wasm::ValType::I64); + Rets.push_back(wasm::ValType::I64); + } else { + Params.push_back(PtrTy); + } + Params.push_back(wasm::ValType::I32); + break; + case i64_i64_func_i64: + if (Subtarget.hasMultivalue()) { + Rets.push_back(wasm::ValType::I64); + Rets.push_back(wasm::ValType::I64); + } else { + Params.push_back(PtrTy); + } + Params.push_back(wasm::ValType::I64); + break; case unsupported: llvm_unreachable("unsupported runtime library signature"); } diff --git a/llvm/test/CodeGen/WebAssembly/multivalue_libcall.ll b/llvm/test/CodeGen/WebAssembly/multivalue_libcall.ll index 270fdae336678..682eb912b87b9 100644 --- a/llvm/test/CodeGen/WebAssembly/multivalue_libcall.ll +++ b/llvm/test/CodeGen/WebAssembly/multivalue_libcall.ll @@ -6,58 +6,93 @@ target triple = "wasm32-unknown-unknown" -@c = global i128 0, align 16 - -define void @multivalue_sdiv(i128 noundef %a, i128 noundef %b) #0 { +define i128 @multivalue_sdiv(i128 %a, i128 %b) { ; MULTIVALUE-LABEL: multivalue_sdiv: -; MULTIVALUE: .functype multivalue_sdiv (i64, i64, i64, i64) -> () +; MULTIVALUE: .functype multivalue_sdiv (i64, i64, i64, i64) -> (i64, i64) ; MULTIVALUE-NEXT: # %bb.0: ; MULTIVALUE-NEXT: local.get 0 ; MULTIVALUE-NEXT: local.get 1 ; MULTIVALUE-NEXT: local.get 2 ; MULTIVALUE-NEXT: local.get 3 ; MULTIVALUE-NEXT: call __divti3 -; MULTIVALUE-NEXT: local.set 2 -; MULTIVALUE-NEXT: local.set 3 -; MULTIVALUE-NEXT: i32.const c -; MULTIVALUE-NEXT: local.get 2 -; MULTIVALUE-NEXT: i64.store 8 -; MULTIVALUE-NEXT: i32.const 0 -; MULTIVALUE-NEXT: local.get 3 -; MULTIVALUE-NEXT: i64.store c ; MULTIVALUE-NEXT: # fallthrough-return ; ; NO_MULTIVALUE-LABEL: multivalue_sdiv: -; NO_MULTIVALUE: .functype multivalue_sdiv (i64, i64, i64, i64) -> () +; NO_MULTIVALUE: .functype multivalue_sdiv (i32, i64, i64, i64, i64) -> () ; NO_MULTIVALUE-NEXT: .local i32 ; NO_MULTIVALUE-NEXT: # %bb.0: ; NO_MULTIVALUE-NEXT: global.get __stack_pointer ; NO_MULTIVALUE-NEXT: i32.const 16 ; NO_MULTIVALUE-NEXT: i32.sub -; NO_MULTIVALUE-NEXT: local.tee 4 +; NO_MULTIVALUE-NEXT: local.tee 5 ; NO_MULTIVALUE-NEXT: global.set __stack_pointer +; NO_MULTIVALUE-NEXT: local.get 5 +; NO_MULTIVALUE-NEXT: local.get 1 +; NO_MULTIVALUE-NEXT: local.get 2 +; NO_MULTIVALUE-NEXT: local.get 3 ; NO_MULTIVALUE-NEXT: local.get 4 +; NO_MULTIVALUE-NEXT: call __divti3 +; NO_MULTIVALUE-NEXT: local.get 0 +; NO_MULTIVALUE-NEXT: local.get 5 +; NO_MULTIVALUE-NEXT: i32.const 8 +; NO_MULTIVALUE-NEXT: i32.add +; NO_MULTIVALUE-NEXT: i64.load 0 +; NO_MULTIVALUE-NEXT: i64.store 8 ; NO_MULTIVALUE-NEXT: local.get 0 +; NO_MULTIVALUE-NEXT: local.get 5 +; NO_MULTIVALUE-NEXT: i64.load 0 +; NO_MULTIVALUE-NEXT: i64.store 0 +; NO_MULTIVALUE-NEXT: local.get 5 +; NO_MULTIVALUE-NEXT: i32.const 16 +; NO_MULTIVALUE-NEXT: i32.add +; NO_MULTIVALUE-NEXT: global.set __stack_pointer +; NO_MULTIVALUE-NEXT: # fallthrough-return + %div = sdiv i128 %a, %b + ret i128 %div +} + + +define fp128 @multivalue_fsub(fp128 %a, fp128 %b) { +; MULTIVALUE-LABEL: multivalue_fsub: +; MULTIVALUE: .functype multivalue_fsub (i64, i64, i64, i64) -> (i64, i64) +; MULTIVALUE-NEXT: # %bb.0: +; MULTIVALUE-NEXT: local.get 0 +; MULTIVALUE-NEXT: local.get 1 +; MULTIVALUE-NEXT: local.get 2 +; MULTIVALUE-NEXT: local.get 3 +; MULTIVALUE-NEXT: call __subtf3 +; MULTIVALUE-NEXT: # fallthrough-return +; +; NO_MULTIVALUE-LABEL: multivalue_fsub: +; NO_MULTIVALUE: .functype multivalue_fsub (i32, i64, i64, i64, i64) -> () +; NO_MULTIVALUE-NEXT: .local i32 +; NO_MULTIVALUE-NEXT: # %bb.0: +; NO_MULTIVALUE-NEXT: global.get __stack_pointer +; NO_MULTIVALUE-NEXT: i32.const 16 +; NO_MULTIVALUE-NEXT: i32.sub +; NO_MULTIVALUE-NEXT: local.tee 5 +; NO_MULTIVALUE-NEXT: global.set __stack_pointer +; NO_MULTIVALUE-NEXT: local.get 5 ; NO_MULTIVALUE-NEXT: local.get 1 ; NO_MULTIVALUE-NEXT: local.get 2 ; NO_MULTIVALUE-NEXT: local.get 3 -; NO_MULTIVALUE-NEXT: call __divti3 -; NO_MULTIVALUE-NEXT: i32.const c ; NO_MULTIVALUE-NEXT: local.get 4 +; NO_MULTIVALUE-NEXT: call __subtf3 +; NO_MULTIVALUE-NEXT: local.get 0 +; NO_MULTIVALUE-NEXT: local.get 5 ; NO_MULTIVALUE-NEXT: i32.const 8 ; NO_MULTIVALUE-NEXT: i32.add ; NO_MULTIVALUE-NEXT: i64.load 0 ; NO_MULTIVALUE-NEXT: i64.store 8 -; NO_MULTIVALUE-NEXT: i32.const 0 -; NO_MULTIVALUE-NEXT: local.get 4 +; NO_MULTIVALUE-NEXT: local.get 0 +; NO_MULTIVALUE-NEXT: local.get 5 ; NO_MULTIVALUE-NEXT: i64.load 0 -; NO_MULTIVALUE-NEXT: i64.store c -; NO_MULTIVALUE-NEXT: local.get 4 +; NO_MULTIVALUE-NEXT: i64.store 0 +; NO_MULTIVALUE-NEXT: local.get 5 ; NO_MULTIVALUE-NEXT: i32.const 16 ; NO_MULTIVALUE-NEXT: i32.add ; NO_MULTIVALUE-NEXT: global.set __stack_pointer ; NO_MULTIVALUE-NEXT: # fallthrough-return - %div = sdiv i128 %a, %b - store i128 %div, ptr @c, align 16 - ret void + %sub = fsub fp128 %a, %b + ret fp128 %sub } From 511ddfe2650ac4429f36c927865cf45122501585 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 20 Mar 2023 18:01:50 -0700 Subject: [PATCH 206/691] [test] Split up new-pm-thinlto-defaults.ll into prelink and postlink pipelines They're becoming different enough that it's getting annoying to figure out how allocate check prefixes. Reviewed By: tejohnson Differential Revision: https://reviews.llvm.org/D146486 --- ...ll => new-pm-thinlto-postlink-defaults.ll} | 32 --- .../Other/new-pm-thinlto-prelink-defaults.ll | 202 ++++++++++++++++++ 2 files changed, 202 insertions(+), 32 deletions(-) rename llvm/test/Other/{new-pm-thinlto-defaults.ll => new-pm-thinlto-postlink-defaults.ll} (82%) create mode 100644 llvm/test/Other/new-pm-thinlto-prelink-defaults.ll diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll similarity index 82% rename from llvm/test/Other/new-pm-thinlto-defaults.ll rename to llvm/test/Other/new-pm-thinlto-postlink-defaults.ll index e89a62112db77..5f46a2b715929 100644 --- a/llvm/test/Other/new-pm-thinlto-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll @@ -7,32 +7,6 @@ ; Any invalidation that shows up here is a bug, unless we started modifying ; the IR, in which case we need to make it immutable harder. ; -; Prelink pipelines: -; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ -; RUN: -passes='thinlto-pre-link' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-NO-FUNC-SPEC,CHECK-O1,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS -; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ -; RUN: -passes='thinlto-pre-link' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-NO-FUNC-SPEC,CHECK-O2,CHECK-O23SZ,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS -; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ -; RUN: -passes='thinlto-pre-link' -S -passes-ep-pipeline-start='no-op-module' %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-NO-FUNC-SPEC,CHECK-O3,CHECK-O23SZ,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-EP-PIPELINE-START -; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ -; RUN: -passes='thinlto-pre-link' -S -passes-ep-optimizer-early='no-op-module' %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-NO-FUNC-SPEC,CHECK-O3,CHECK-O23SZ,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-EP-OPT-EARLY -; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ -; RUN: -passes='thinlto-pre-link' -S -passes-ep-optimizer-last='no-op-module' %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-NO-FUNC-SPEC,CHECK-O3,CHECK-O23SZ,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS,CHECK-EP-OPT-LAST -; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ -; RUN: -passes='thinlto-pre-link' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-NO-FUNC-SPEC,CHECK-O23SZ,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS -; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ -; RUN: -passes='thinlto-pre-link' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-NO-FUNC-SPEC,CHECK-O23SZ,CHECK-PRELINK-O,CHECK-PRELINK-O-NODIS -; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -debug-info-for-profiling \ -; RUN: -passes='thinlto-pre-link' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-DIS,CHECK-O,CHECK-NO-FUNC-SPEC,CHECK-O2,CHECK-O23SZ,CHECK-PRELINK-O -; ; Postlink pipelines: ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -passes='thinlto' -S %s 2>&1 \ @@ -72,7 +46,6 @@ ; CHECK-POSTLINK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-POSTLINK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis ; CHECK-O-NEXT: Running pass: InferFunctionAttrsPass -; CHECK-PRELINK-O-NODIS-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis ; CHECK-O-NEXT: Running pass: CoroEarlyPass ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass @@ -92,7 +65,6 @@ ; CHECK-O-NEXT: Running pass: GlobalOptPass ; CHECK-O-NEXT: Running pass: PromotePass ; CHECK-O-NEXT: Running pass: InstCombinePass -; CHECK-PRELINK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis ; CHECK-O-NEXT: Running analysis: AAManager ; CHECK-O-NEXT: Running analysis: BasicAA ; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA @@ -107,7 +79,6 @@ ; CHECK-O-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}AAManager ; CHECK-O-NEXT: Invalidating analysis: AAManager ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis -; CHECK-PRELINK-O-NEXT: Running analysis: ProfileSummaryAnalysis ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis ; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy @@ -193,7 +164,6 @@ ; CHECK-O-NEXT: Invalidating analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: DeadArgumentEliminationPass ; CHECK-O-NEXT: Running pass: CoroCleanupPass -; CHECK-PRELINK-O-NEXT: Running pass: GlobalOptPass ; CHECK-POSTLINK-O-NEXT: Running pass: GlobalOptPass ; CHECK-POSTLINK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-POSTLINK-O-NEXT: Running pass: EliminateAvailableExternallyPass @@ -242,8 +212,6 @@ ; CHECK-EP-OPT-EARLY-NEXT: Running pass: NoOpModulePass ; CHECK-EP-OPT-LAST-NEXT: Running pass: NoOpModulePass ; CHECK-O-NEXT: Running pass: AnnotationRemarksPass on foo -; CHECK-PRELINK-O-NEXT: Running pass: CanonicalizeAliasesPass -; CHECK-PRELINK-O-NEXT: Running pass: NameAnonGlobalPass ; CHECK-O-NEXT: Running pass: PrintModulePass ; Make sure we get the IR back out without changes when we print the module. diff --git a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll new file mode 100644 index 0000000000000..0dca9532bd23a --- /dev/null +++ b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll @@ -0,0 +1,202 @@ +; The IR below was crafted so as: +; 1) To have a loop, so we create a loop pass manager +; 2) To be "immutable" in the sense that no pass in the standard +; pipeline will modify it. +; Since no transformations take place, we don't expect any analyses +; to be invalidated. +; Any invalidation that shows up here is a bug, unless we started modifying +; the IR, in which case we need to make it immutable harder. +; +; Prelink pipelines: +; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ +; RUN: -passes='thinlto-pre-link' -S %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,CHECK-O-NODIS +; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ +; RUN: -passes='thinlto-pre-link' -S %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-O-NODIS +; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ +; RUN: -passes='thinlto-pre-link' -S -passes-ep-pipeline-start='no-op-module' %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-O-NODIS,CHECK-EP-PIPELINE-START +; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ +; RUN: -passes='thinlto-pre-link' -S -passes-ep-optimizer-early='no-op-module' %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-O-NODIS,CHECK-EP-OPT-EARLY +; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ +; RUN: -passes='thinlto-pre-link' -S -passes-ep-optimizer-last='no-op-module' %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-O-NODIS,CHECK-EP-OPT-LAST +; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ +; RUN: -passes='thinlto-pre-link' -S %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O23SZ,CHECK-O-NODIS +; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ +; RUN: -passes='thinlto-pre-link' -S %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O23SZ,CHECK-O-NODIS +; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -debug-info-for-profiling \ +; RUN: -passes='thinlto-pre-link' -S %s 2>&1 \ +; RUN: | FileCheck %s --check-prefixes=CHECK-DIS,CHECK-O,CHECK-O2,CHECK-O23SZ +; + +; Suppress FileCheck --allow-unused-prefixes=false diagnostics. +; CHECK-NOEXT: {{^}} + +; CHECK-O: Running pass: Annotation2Metadata +; CHECK-O-NEXT: Running pass: ForceFunctionAttrsPass +; CHECK-EP-PIPELINE-START-NEXT: Running pass: NoOpModulePass +; CHECK-DIS-NEXT: Running analysis: InnerAnalysisManagerProxy +; CHECK-DIS-NEXT: Running pass: AddDiscriminatorsPass +; CHECK-O-NEXT: Running pass: InferFunctionAttrsPass +; CHECK-O-NODIS-NEXT: Running analysis: InnerAnalysisManagerProxy +; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis +; CHECK-O-NEXT: Running pass: CoroEarlyPass +; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass +; CHECK-O-NEXT: Running pass: SimplifyCFGPass +; CHECK-O-NEXT: Running analysis: TargetIRAnalysis +; CHECK-O-NEXT: Running analysis: AssumptionAnalysis +; CHECK-O-NEXT: Running pass: SROAPass +; CHECK-O-NEXT: Running analysis: DominatorTreeAnalysis +; CHECK-O-NEXT: Running pass: EarlyCSEPass +; CHECK-O-NEXT: Running analysis: TargetLibraryAnalysis +; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass +; CHECK-O-NEXT: Running pass: OpenMPOptPass +; CHECK-O-NEXT: Running pass: IPSCCPPass +; CHECK-O-NEXT: Running pass: CalledValuePropagationPass +; CHECK-O-NEXT: Running pass: GlobalOptPass +; CHECK-O-NEXT: Running pass: PromotePass +; CHECK-O-NEXT: Running pass: InstCombinePass +; CHECK-O-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis +; CHECK-O-NEXT: Running analysis: AAManager +; CHECK-O-NEXT: Running analysis: BasicAA +; CHECK-O-NEXT: Running analysis: ScopedNoAliasAA +; CHECK-O-NEXT: Running analysis: TypeBasedAA +; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy +; CHECK-O-NEXT: Running pass: SimplifyCFGPass +; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass +; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis +; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA +; CHECK-O-NEXT: Running analysis: GlobalsAA +; CHECK-O-NEXT: Running analysis: CallGraphAnalysis +; CHECK-O-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}AAManager +; CHECK-O-NEXT: Invalidating analysis: AAManager +; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis +; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis +; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy +; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis +; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy +; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy +; CHECK-O-NEXT: Running pass: DevirtSCCRepeatedPass +; CHECK-O-NEXT: Running pass: InlinerPass +; CHECK-O-NEXT: Running pass: InlinerPass +; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass +; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass +; CHECK-O2-NEXT: Running pass: OpenMPOptCGSCCPass on (foo) +; CHECK-O3-NEXT: Running pass: OpenMPOptCGSCCPass on (foo) +; CHECK-O-NEXT: Running pass: SROAPass +; CHECK-O-NEXT: Running pass: EarlyCSEPass +; CHECK-O-NEXT: Running analysis: MemorySSAAnalysis +; CHECK-O-NEXT: Running analysis: AAManager +; CHECK-O23SZ-NEXT: Running pass: SpeculativeExecutionPass +; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass +; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis +; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass +; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis +; CHECK-O-NEXT: Running pass: SimplifyCFGPass +; CHECK-O-NEXT: Running pass: InstCombinePass +; CHECK-O3-NEXT: Running pass: AggressiveInstCombinePass +; CHECK-O23SZ-NEXT: Running pass: ConstraintEliminationPass +; CHECK-O1-NEXT: Running pass: LibCallsShrinkWrapPass +; CHECK-O2-NEXT: Running pass: LibCallsShrinkWrapPass +; CHECK-O3-NEXT: Running pass: LibCallsShrinkWrapPass +; CHECK-O23SZ-NEXT: Running pass: TailCallElimPass +; CHECK-O-NEXT: Running pass: SimplifyCFGPass +; CHECK-O-NEXT: Running pass: ReassociatePass +; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}OptimizationRemarkEmitterAnalysis +; CHECK-O-NEXT: Running pass: LoopSimplifyPass +; CHECK-O-NEXT: Running analysis: LoopAnalysis +; CHECK-O-NEXT: Running pass: LCSSAPass +; CHECK-O-NEXT: Running analysis: ScalarEvolutionAnalysis +; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy +; CHECK-O-NEXT: Running pass: LoopInstSimplifyPass +; CHECK-O-NEXT: Running pass: LoopSimplifyCFGPass +; CHECK-O-NEXT: Running pass: LICM +; CHECK-O-NEXT: Running pass: LoopRotatePass +; CHECK-O-NEXT: Running pass: LICM +; CHECK-O-NEXT: Running pass: SimpleLoopUnswitchPass +; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy +; CHECK-O-NEXT: Running pass: SimplifyCFGPass +; CHECK-O-NEXT: Running pass: InstCombinePass +; CHECK-O-NEXT: Running pass: LoopSimplifyPass +; CHECK-O-NEXT: Running pass: LCSSAPass +; CHECK-O-NEXT: Running pass: LoopIdiomRecognizePass +; CHECK-O-NEXT: Running pass: IndVarSimplifyPass +; CHECK-O-NEXT: Running pass: LoopDeletionPass +; CHECK-O-NEXT: Running pass: LoopFullUnrollPass +; CHECK-O-NEXT: Running pass: SROAPass on foo +; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass +; CHECK-O23SZ-NEXT: Running pass: MergedLoadStoreMotionPass +; CHECK-O23SZ-NEXT: Running pass: GVNPass +; CHECK-O23SZ-NEXT: Running analysis: MemoryDependenceAnalysis +; CHECK-O1-NEXT: Running pass: MemCpyOptPass +; CHECK-O-NEXT: Running pass: SCCPPass +; CHECK-O-NEXT: Running pass: BDCEPass +; CHECK-O-NEXT: Running analysis: DemandedBitsAnalysis +; CHECK-O-NEXT: Running pass: InstCombinePass +; CHECK-O23SZ-NEXT: Running pass: JumpThreadingPass +; CHECK-O23SZ-NEXT: Running analysis: LazyValueAnalysis +; CHECK-O23SZ-NEXT: Running pass: CorrelatedValuePropagationPass +; CHECK-O23SZ-NEXT: Invalidating analysis: LazyValueAnalysis +; CHECK-O1-NEXT: Running pass: CoroElidePass +; CHECK-O-NEXT: Running pass: ADCEPass +; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis +; CHECK-O23SZ-NEXT: Running pass: MemCpyOptPass +; CHECK-O23SZ-NEXT: Running pass: DSEPass +; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass +; CHECK-O23SZ-NEXT: Running pass: LCSSAPass +; CHECK-O23SZ-NEXT: Running pass: LICMPass on loop +; CHECK-O23SZ-NEXT: Running pass: CoroElidePass +; CHECK-O-NEXT: Running pass: SimplifyCFGPass +; CHECK-O-NEXT: Running pass: InstCombinePass +; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass +; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ShouldNotRunFunctionPassesAnalysis +; CHECK-O-NEXT: Running analysis: ShouldNotRunFunctionPassesAnalysis +; CHECK-O-NEXT: Running pass: CoroSplitPass +; CHECK-O-NEXT: Running pass: InvalidateAnalysisPass<{{.*}}ShouldNotRunFunctionPassesAnalysis +; CHECK-O-NEXT: Invalidating analysis: ShouldNotRunFunctionPassesAnalysis +; CHECK-O-NEXT: Invalidating analysis: InlineAdvisorAnalysis +; CHECK-O-NEXT: Running pass: DeadArgumentEliminationPass +; CHECK-O-NEXT: Running pass: CoroCleanupPass +; CHECK-O-NEXT: Running pass: GlobalOptPass +; CHECK-EXT: Running pass: {{.*}}::Bye +; CHECK-EP-OPT-EARLY-NEXT: Running pass: NoOpModulePass +; CHECK-EP-OPT-LAST-NEXT: Running pass: NoOpModulePass +; CHECK-O-NEXT: Running pass: AnnotationRemarksPass on foo +; CHECK-O-NEXT: Running pass: CanonicalizeAliasesPass +; CHECK-O-NEXT: Running pass: NameAnonGlobalPass +; CHECK-O-NEXT: Running pass: PrintModulePass + +; Make sure we get the IR back out without changes when we print the module. +; CHECK-O-LABEL: define void @foo(i32 %n) local_unnamed_addr { +; CHECK-O-NEXT: entry: +; CHECK-O-NEXT: br label %loop +; CHECK-O: loop: +; CHECK-O-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] +; CHECK-O-NEXT: %iv.next = add i32 %iv, 1 +; CHECK-O-NEXT: tail call void @bar() +; CHECK-O-NEXT: %cmp = icmp eq i32 %iv, %n +; CHECK-O-NEXT: br i1 %cmp, label %exit, label %loop +; CHECK-O: exit: +; CHECK-O-NEXT: ret void +; CHECK-O-NEXT: } +; + +declare void @bar() local_unnamed_addr + +define void @foo(i32 %n) local_unnamed_addr { +entry: + br label %loop +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %iv.next = add i32 %iv, 1 + tail call void @bar() + %cmp = icmp eq i32 %iv, %n + br i1 %cmp, label %exit, label %loop +exit: + ret void +} From e9df5d62c8353a81815658bdbf6f9fc0e54a9c37 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 21 Mar 2023 08:30:38 -0700 Subject: [PATCH 207/691] [LSR] Remove a couple stale comments in lsr-term-fold --- llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 35a90bf40debc..3e5e0987e10c8 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -6700,9 +6700,6 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, } BasicBlock *LoopLatch = L->getLoopLatch(); - - // TODO: Can we do something for greater than and less than? - // Terminating condition is foldable when it is an eq/ne icmp BranchInst *BI = cast(LoopLatch->getTerminator()); if (BI->isUnconditional()) return std::nullopt; @@ -6948,8 +6945,6 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, BranchInst *BI = cast(LoopLatch->getTerminator()); ICmpInst *OldTermCond = cast(BI->getCondition()); IRBuilder<> LatchBuilder(LoopLatch->getTerminator()); - // FIXME: We are adding a use of an IV here without account for poison safety. - // This is incorrect. Value *NewTermCond = LatchBuilder.CreateICmp(CmpInst::ICMP_EQ, LoopValue, TermValue, "lsr_fold_term_cond.replaced_term_cond"); From 00fdd2cb6c55744ae21f18c377854e9359273454 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 21 Mar 2023 09:28:27 -0700 Subject: [PATCH 208/691] [LSR] Don't crash on non-branch terminator in -lsr-term-fold Reported in https://reviews.llvm.org/D146415. I rewrote the patch and aded the test case. Per that report, spec2006.483.xalancbmk crashes without this fix. --- .../Transforms/Scalar/LoopStrengthReduce.cpp | 4 +-- .../LoopStrengthReduce/lsr-term-fold.ll | 34 +++++++++++++++++++ 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 3e5e0987e10c8..82312de71c72c 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -6700,8 +6700,8 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, } BasicBlock *LoopLatch = L->getLoopLatch(); - BranchInst *BI = cast(LoopLatch->getTerminator()); - if (BI->isUnconditional()) + BranchInst *BI = dyn_cast(LoopLatch->getTerminator()); + if (!BI || BI->isUnconditional()) return std::nullopt; auto *TermCond = dyn_cast(BI->getCondition()); if (!TermCond) { diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll index 16e85a94517bc..c9b66896eac78 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll @@ -439,3 +439,37 @@ another.branch: for.end: ; preds = %for.body ret void } + + +define void @non_branch_terminator(ptr %a) { +; CHECK-LABEL: @non_branch_terminator( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 84 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[LSR_IV2:%.*]] = phi i64 [ [[LSR_IV_NEXT3:%.*]], [[FOR_BODY]] ], [ 378, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[UGLYGEP2:%.*]], [[FOR_BODY]] ], [ [[UGLYGEP]], [[ENTRY]] ] +; CHECK-NEXT: store i32 1, ptr [[LSR_IV1]], align 4 +; CHECK-NEXT: [[UGLYGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 +; CHECK-NEXT: [[LSR_IV_NEXT3]] = add nsw i64 [[LSR_IV2]], -1 +; CHECK-NEXT: switch i64 [[LSR_IV2]], label [[FOR_BODY]] [ +; CHECK-NEXT: i64 0, label [[FOR_END:%.*]] +; CHECK-NEXT: ] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + %uglygep = getelementptr i8, ptr %a, i64 84 + br label %for.body + +for.body: ; preds = %for.body, %entry + %lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %uglygep, %entry ] + %lsr.iv = phi i64 [ %lsr.iv.next, %for.body ], [ 379, %entry ] + store i32 1, ptr %lsr.iv1, align 4 + %lsr.iv.next = add nsw i64 %lsr.iv, -1 + %uglygep2 = getelementptr i8, ptr %lsr.iv1, i64 4 + switch i64 %lsr.iv.next, label %for.body [i64 0, label %for.end] + +for.end: ; preds = %for.body + ret void +} From fb855eb941b6d740cc6560297d0b4d3201dcaf9f Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Tue, 14 Mar 2023 21:27:03 +0100 Subject: [PATCH 209/691] [libc++] Qualifies size_t. This has been done using the following command find libcxx/test -type f -exec perl -pi -e 's|^([^/]+?)((? -constexpr void* __builtin_memmove(Dst*, Src*, size_t) { +constexpr void* __builtin_memmove(Dst*, Src*, std::size_t) { assert(false); return nullptr; } @@ -172,7 +172,7 @@ constexpr void test_one(Func func) { using To = typename std::iterator_traits::value_type; { - const size_t N = 5; + const std::size_t N = 5; From input[N] = {make(0), make(1), make(2), make(3), make(4)}; To output[N]; @@ -192,7 +192,7 @@ constexpr void test_one(Func func) { } { - const size_t N = 0; + const std::size_t N = 0; From input[1] = {make(1)}; To output[1] = {make(2)}; @@ -211,25 +211,25 @@ template class SentWrapper, class OutIter> constexpr void test_copy() { // Classic. if constexpr (std::same_as>) { - test_one([](auto first, auto last, auto out, size_t) { + test_one([](auto first, auto last, auto out, std::size_t) { std::copy(first, last, out); }); - test_one([](auto first, auto last, auto out, size_t n) { + test_one([](auto first, auto last, auto out, std::size_t n) { std::copy_backward(first, last, out + n); }); - test_one([](auto first, auto, auto out, size_t n) { + test_one([](auto first, auto, auto out, std::size_t n) { std::copy_n(first, n, out); }); } // Ranges. - test_one([](auto first, auto last, auto out, size_t) { + test_one([](auto first, auto last, auto out, std::size_t) { std::ranges::copy(first, last, out); }); - test_one([](auto first, auto last, auto out, size_t n) { + test_one([](auto first, auto last, auto out, std::size_t n) { std::ranges::copy_backward(first, last, out + n); }); - test_one([](auto first, auto, auto out, size_t n) { + test_one([](auto first, auto, auto out, std::size_t n) { std::ranges::copy_n(first, n, out); }); } @@ -237,19 +237,19 @@ constexpr void test_copy() { template class SentWrapper, class OutIter> constexpr void test_move() { if constexpr (std::same_as>) { - test_one([](auto first, auto last, auto out, size_t) { + test_one([](auto first, auto last, auto out, std::size_t) { std::move(first, last, out); }); - test_one([](auto first, auto last, auto out, size_t n) { + test_one([](auto first, auto last, auto out, std::size_t n) { std::move_backward(first, last, out + n); }); } // Ranges. - test_one([](auto first, auto last, auto out, size_t) { + test_one([](auto first, auto last, auto out, std::size_t) { std::ranges::move(first, last, out); }); - test_one([](auto first, auto last, auto out, size_t n) { + test_one([](auto first, auto last, auto out, std::size_t n) { std::ranges::move_backward(first, last, out + n); }); } diff --git a/libcxx/test/libcxx/algorithms/alg.modifying.operations/copy_move_trivial.pass.cpp b/libcxx/test/libcxx/algorithms/alg.modifying.operations/copy_move_trivial.pass.cpp index 7505e5727cd76..7c2d388a69947 100644 --- a/libcxx/test/libcxx/algorithms/alg.modifying.operations/copy_move_trivial.pass.cpp +++ b/libcxx/test/libcxx/algorithms/alg.modifying.operations/copy_move_trivial.pass.cpp @@ -36,7 +36,7 @@ static bool memmove_called = false; // This template is a better match than the actual `builtin_memmove` (it can match the pointer type exactly, without an // implicit conversion to `void*`), so it should hijack the call inside `std::copy` and similar algorithms if it's made. template -constexpr void* __builtin_memmove(Dst* dst, Src* src, size_t count) { +constexpr void* __builtin_memmove(Dst* dst, Src* src, std::size_t count) { memmove_called = true; return __builtin_memmove(static_cast(dst), static_cast(src), count); } @@ -135,7 +135,7 @@ void test_one(Func func) { // Normal case. { - const size_t N = 4; + const std::size_t N = 4; From input[N] = {make(1), make(2), make(3), make(4)}; To output[N]; @@ -162,7 +162,7 @@ void test_one(Func func) { // Empty input sequence. { - const size_t N = 0; + const std::size_t N = 0; From input[1] = {make(1)}; To output[1] = {make(2)}; @@ -185,37 +185,37 @@ template class SentWrapper, class OutIter> void test_copy_and_move() { // Classic. if constexpr (std::same_as>) { - test_one([](auto first, auto last, auto out, size_t) { + test_one([](auto first, auto last, auto out, std::size_t) { std::copy(first, last, out); }); - test_one([](auto first, auto last, auto out, size_t n) { + test_one([](auto first, auto last, auto out, std::size_t n) { std::copy_backward(first, last, out + n); }); - test_one([](auto first, auto, auto out, size_t n) { + test_one([](auto first, auto, auto out, std::size_t n) { std::copy_n(first, n, out); }); - test_one([](auto first, auto last, auto out, size_t) { + test_one([](auto first, auto last, auto out, std::size_t) { std::move(first, last, out); }); - test_one([](auto first, auto last, auto out, size_t n) { + test_one([](auto first, auto last, auto out, std::size_t n) { std::move_backward(first, last, out + n); }); } // Ranges. - test_one([](auto first, auto last, auto out, size_t) { + test_one([](auto first, auto last, auto out, std::size_t) { std::ranges::copy(first, last, out); }); - test_one([](auto first, auto last, auto out, size_t n) { + test_one([](auto first, auto last, auto out, std::size_t n) { std::ranges::copy_backward(first, last, out + n); }); - test_one([](auto first, auto, auto out, size_t n) { + test_one([](auto first, auto, auto out, std::size_t n) { std::ranges::copy_n(first, n, out); }); - test_one([](auto first, auto last, auto out, size_t) { + test_one([](auto first, auto last, auto out, std::size_t) { std::ranges::move(first, last, out); }); - test_one([](auto first, auto last, auto out, size_t n) { + test_one([](auto first, auto last, auto out, std::size_t n) { std::ranges::move_backward(first, last, out + n); }); } @@ -281,36 +281,36 @@ void test_different_signedness() { } }; - check([](auto first, auto last, auto out, size_t) { + check([](auto first, auto last, auto out, std::size_t) { std::copy(first, last, out); }); - check([](auto first, auto last, auto out, size_t n) { + check([](auto first, auto last, auto out, std::size_t n) { std::copy_backward(first, last, out + n); }); - check([](auto first, auto, auto out, size_t n) { + check([](auto first, auto, auto out, std::size_t n) { std::copy_n(first, n, out); }); - check([](auto first, auto last, auto out, size_t) { + check([](auto first, auto last, auto out, std::size_t) { std::move(first, last, out); }); - check([](auto first, auto last, auto out, size_t n) { + check([](auto first, auto last, auto out, std::size_t n) { std::move_backward(first, last, out + n); }); // Ranges. - check([](auto first, auto last, auto out, size_t) { + check([](auto first, auto last, auto out, std::size_t) { std::ranges::copy(first, last, out); }); - check([](auto first, auto last, auto out, size_t n) { + check([](auto first, auto last, auto out, std::size_t n) { std::ranges::copy_backward(first, last, out + n); }); - check([](auto first, auto, auto out, size_t n) { + check([](auto first, auto, auto out, std::size_t n) { std::ranges::copy_n(first, n, out); }); - check([](auto first, auto last, auto out, size_t) { + check([](auto first, auto last, auto out, std::size_t) { std::ranges::move(first, last, out); }); - check([](auto first, auto last, auto out, size_t n) { + check([](auto first, auto last, auto out, std::size_t n) { std::ranges::move_backward(first, last, out + n); }); } diff --git a/libcxx/test/libcxx/algorithms/alg.modifying.operations/copy_move_unwrap_reverse.pass.cpp b/libcxx/test/libcxx/algorithms/alg.modifying.operations/copy_move_unwrap_reverse.pass.cpp index bb01cc4e7322e..f295b807864ea 100644 --- a/libcxx/test/libcxx/algorithms/alg.modifying.operations/copy_move_unwrap_reverse.pass.cpp +++ b/libcxx/test/libcxx/algorithms/alg.modifying.operations/copy_move_unwrap_reverse.pass.cpp @@ -22,13 +22,13 @@ #include "test_iterators.h" -template +template requires (N == 0) constexpr auto wrap_n_times(Iter i) { return i; } -template +template requires (N != 0) constexpr auto wrap_n_times(Iter i) { return std::make_reverse_iterator(wrap_n_times(i)); @@ -37,12 +37,12 @@ constexpr auto wrap_n_times(Iter i) { static_assert(std::is_same_v(std::declval())), std::reverse_iterator>>); -template class SentWrapper, class OutIter, size_t W1, size_t W2, class Func> +template class SentWrapper, class OutIter, std::size_t W1, size_t W2, class Func> constexpr void test_one(Func func) { using From = std::iter_value_t; using To = std::iter_value_t; - const size_t N = 4; + const std::size_t N = 4; From input[N] = {{1}, {2}, {3}, {4}}; To output[N]; @@ -60,46 +60,46 @@ constexpr void test_one(Func func) { })); } -template class SentWrapper, class OutIter, size_t W1, size_t W2> +template class SentWrapper, class OutIter, std::size_t W1, size_t W2> constexpr void test_copy_and_move() { // Classic. if constexpr (std::same_as>) { - test_one([](auto first, auto last, auto out, size_t) { + test_one([](auto first, auto last, auto out, std::size_t) { std::copy(first, last, out); }); - test_one([](auto first, auto last, auto out, size_t n) { + test_one([](auto first, auto last, auto out, std::size_t n) { std::copy_backward(first, last, out + n); }); - test_one([](auto first, auto, auto out, size_t n) { + test_one([](auto first, auto, auto out, std::size_t n) { std::copy_n(first, n, out); }); - test_one([](auto first, auto last, auto out, size_t) { + test_one([](auto first, auto last, auto out, std::size_t) { std::move(first, last, out); }); - test_one([](auto first, auto last, auto out, size_t n) { + test_one([](auto first, auto last, auto out, std::size_t n) { std::move_backward(first, last, out + n); }); } // Ranges. - test_one([](auto first, auto last, auto out, size_t) { + test_one([](auto first, auto last, auto out, std::size_t) { std::ranges::copy(first, last, out); }); - test_one([](auto first, auto last, auto out, size_t n) { + test_one([](auto first, auto last, auto out, std::size_t n) { std::ranges::copy_backward(first, last, out + n); }); - test_one([](auto first, auto, auto out, size_t n) { + test_one([](auto first, auto, auto out, std::size_t n) { std::ranges::copy_n(first, n, out); }); - test_one([](auto first, auto last, auto out, size_t) { + test_one([](auto first, auto last, auto out, std::size_t) { std::ranges::move(first, last, out); }); - test_one([](auto first, auto last, auto out, size_t n) { + test_one([](auto first, auto last, auto out, std::size_t n) { std::ranges::move_backward(first, last, out + n); }); } -template class SentWrapper> +template class SentWrapper> constexpr void test_all_permutations_with_counts_from_to_sent() { test_copy_and_move(); test_copy_and_move, SentWrapper, To*, W1, W2>(); @@ -114,7 +114,7 @@ constexpr void test_all_permutations_with_counts_from_to_sent() { } } -template +template constexpr void test_all_permutations_with_counts() { test_all_permutations_with_counts_from_to_sent(); test_all_permutations_with_counts_from_to_sent(); diff --git a/libcxx/test/libcxx/algorithms/debug_less.pass.cpp b/libcxx/test/libcxx/algorithms/debug_less.pass.cpp index 89f12e5aa0cd3..4d6422a07a020 100644 --- a/libcxx/test/libcxx/algorithms/debug_less.pass.cpp +++ b/libcxx/test/libcxx/algorithms/debug_less.pass.cpp @@ -206,10 +206,10 @@ void test_non_const_arg_cmp() { struct ValueIterator { typedef std::input_iterator_tag iterator_category; - typedef size_t value_type; + typedef std::size_t value_type; typedef ptrdiff_t difference_type; - typedef size_t reference; - typedef size_t* pointer; + typedef std::size_t reference; + typedef std::size_t* pointer; ValueIterator() { } diff --git a/libcxx/test/libcxx/algorithms/half_positive.pass.cpp b/libcxx/test/libcxx/algorithms/half_positive.pass.cpp index 997679629dac6..82d18cba37f34 100644 --- a/libcxx/test/libcxx/algorithms/half_positive.pass.cpp +++ b/libcxx/test/libcxx/algorithms/half_positive.pass.cpp @@ -35,7 +35,7 @@ int main(int, char**) assert(test()); assert(test()); assert((test, int>())); - assert(test()); + assert(test()); #if !defined(TEST_HAS_NO_INT128) assert(test<__int128_t>()); #endif // !defined(TEST_HAS_NO_INT128) @@ -46,7 +46,7 @@ int main(int, char**) static_assert(test(), ""); static_assert(test(), ""); static_assert(test(), ""); - static_assert(test(), ""); + static_assert(test(), ""); #if !defined(TEST_HAS_NO_INT128) static_assert(test<__int128_t>(), ""); #endif // !defined(TEST_HAS_NO_INT128) diff --git a/libcxx/test/libcxx/containers/sequences/vector/exception_safety_exceptions_disabled.pass.cpp b/libcxx/test/libcxx/containers/sequences/vector/exception_safety_exceptions_disabled.pass.cpp index dc771116cfa6a..df562474c185a 100644 --- a/libcxx/test/libcxx/containers/sequences/vector/exception_safety_exceptions_disabled.pass.cpp +++ b/libcxx/test/libcxx/containers/sequences/vector/exception_safety_exceptions_disabled.pass.cpp @@ -41,8 +41,8 @@ int main(int, char**) { // Create a vector containing some number of elements that will // have to be moved when it is resized. v.reserve(10); - size_t old_cap = v.capacity(); - for (size_t i = 0; i < v.capacity(); ++i) { + std::size_t old_cap = v.capacity(); + for (std::size_t i = 0; i < v.capacity(); ++i) { v.emplace_back(42); } assert(v.capacity() == old_cap); diff --git a/libcxx/test/libcxx/containers/unord/next_pow2.pass.cpp b/libcxx/test/libcxx/containers/unord/next_pow2.pass.cpp index 5146f41dbe49a..4aa87d30b3f0b 100644 --- a/libcxx/test/libcxx/containers/unord/next_pow2.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/next_pow2.pass.cpp @@ -31,7 +31,7 @@ is_power_of_two(unsigned long n) return __builtin_popcount(n) == 1; } -void test_next_pow2_val(size_t n) +void test_next_pow2_val(std::size_t n) { std::size_t npow2 = std::__next_hash_pow2(n); assert(is_power_of_two(npow2) && npow2 > n); diff --git a/libcxx/test/libcxx/containers/unord/next_prime.pass.cpp b/libcxx/test/libcxx/containers/unord/next_prime.pass.cpp index 642e5fa1d63f5..d19ab1ebb505a 100644 --- a/libcxx/test/libcxx/containers/unord/next_prime.pass.cpp +++ b/libcxx/test/libcxx/containers/unord/next_prime.pass.cpp @@ -22,7 +22,7 @@ #include "test_macros.h" bool -is_prime(size_t n) +is_prime(std::size_t n) { switch (n) { @@ -30,7 +30,7 @@ is_prime(size_t n) case 1: return false; } - for (size_t i = 2; i*i <= n; ++i) + for (std::size_t i = 2; i*i <= n; ++i) { if (n % i == 0) return false; diff --git a/libcxx/test/libcxx/containers/unord/non_const_comparator.verify.cpp b/libcxx/test/libcxx/containers/unord/non_const_comparator.verify.cpp index 0452d080a7124..a5d529dacef4c 100644 --- a/libcxx/test/libcxx/containers/unord/non_const_comparator.verify.cpp +++ b/libcxx/test/libcxx/containers/unord/non_const_comparator.verify.cpp @@ -17,7 +17,7 @@ struct BadHash { template - size_t operator()(T const& t) { + std::size_t operator()(T const& t) { return std::hash{}(t); } }; diff --git a/libcxx/test/libcxx/containers/unord/unord.set/missing_hash_specialization.fail.cpp b/libcxx/test/libcxx/containers/unord/unord.set/missing_hash_specialization.fail.cpp index 45be37d1c86de..f492b760edf26 100644 --- a/libcxx/test/libcxx/containers/unord/unord.set/missing_hash_specialization.fail.cpp +++ b/libcxx/test/libcxx/containers/unord/unord.set/missing_hash_specialization.fail.cpp @@ -28,7 +28,7 @@ struct BadHashNoCopy { BadHashNoCopy(BadHashNoCopy const&) = delete; template - size_t operator()(T const&) const { return 0; } + std::size_t operator()(T const&) const { return 0; } }; struct BadHashNoCall { @@ -39,7 +39,7 @@ struct BadHashNoCall { struct GoodHashNoDefault { explicit GoodHashNoDefault(void*) {} template - size_t operator()(T const&) const { return 0; } + std::size_t operator()(T const&) const { return 0; } }; int main(int, char**) { diff --git a/libcxx/test/libcxx/debug/containers.multithread.pass.cpp b/libcxx/test/libcxx/debug/containers.multithread.pass.cpp index a389c31b2142e..eb2373eb412a5 100644 --- a/libcxx/test/libcxx/debug/containers.multithread.pass.cpp +++ b/libcxx/test/libcxx/debug/containers.multithread.pass.cpp @@ -32,14 +32,14 @@ Container makeContainer(int size) { template void ThreadUseIter() { - const size_t maxRounds = 7; + const std::size_t maxRounds = 7; struct TestRunner{ void operator()() { - for (size_t count = 0; count < maxRounds; count++) { - const size_t containerCount = 11; + for (std::size_t count = 0; count < maxRounds; count++) { + const std::size_t containerCount = 11; std::vector containers; std::vector iterators; - for (size_t containerIndex = 0; containerIndex < containerCount; containerIndex++) { + for (std::size_t containerIndex = 0; containerIndex < containerCount; containerIndex++) { containers.push_back(makeContainer(3)); Container& c = containers.back(); iterators.push_back(c.begin()); @@ -50,12 +50,12 @@ void ThreadUseIter() { }; TestRunner r; - const size_t threadCount = 4; + const std::size_t threadCount = 4; std::vector threads; - for (size_t count = 0; count < threadCount; count++) + for (std::size_t count = 0; count < threadCount; count++) threads.emplace_back(r); r(); - for (size_t count = 0; count < threadCount; count++) + for (std::size_t count = 0; count < threadCount; count++) threads[count].join(); } diff --git a/libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/allocate.cxx2a.pass.cpp b/libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/allocate.cxx2a.pass.cpp index cca85103bcb12..da35465c5295b 100644 --- a/libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/allocate.cxx2a.pass.cpp +++ b/libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/allocate.cxx2a.pass.cpp @@ -29,15 +29,15 @@ static const bool UsingAlignedNew = true; #endif #ifdef __STDCPP_DEFAULT_NEW_ALIGNMENT__ -static const size_t MaxAligned = __STDCPP_DEFAULT_NEW_ALIGNMENT__; +static const std::size_t MaxAligned = __STDCPP_DEFAULT_NEW_ALIGNMENT__; #else -static const size_t MaxAligned = std::alignment_of::value; +static const std::size_t MaxAligned = std::alignment_of::value; #endif -static const size_t OverAligned = MaxAligned * 2; +static const std::size_t OverAligned = MaxAligned * 2; -template +template struct TEST_ALIGNAS(Align) AlignedType { char data; static int constructed; @@ -45,11 +45,11 @@ struct TEST_ALIGNAS(Align) AlignedType { AlignedType(AlignedType const&) { ++constructed; } ~AlignedType() { --constructed; } }; -template +template int AlignedType::constructed = 0; -template +template void test_aligned() { typedef AlignedType T; T::constructed = 0; diff --git a/libcxx/test/libcxx/fuzzing/nth_element.pass.cpp b/libcxx/test/libcxx/fuzzing/nth_element.pass.cpp index 5e491e76dfa22..45f4aed6fa412 100644 --- a/libcxx/test/libcxx/fuzzing/nth_element.pass.cpp +++ b/libcxx/test/libcxx/fuzzing/nth_element.pass.cpp @@ -18,7 +18,7 @@ // Use the first element as a position into the data extern "C" int LLVMFuzzerTestOneInput(const std::uint8_t *data, std::size_t size) { if (size <= 1) return 0; - const size_t partition_point = data[0] % size; + const std::size_t partition_point = data[0] % size; std::vector working(data + 1, data + size); const auto partition_iter = working.begin() + partition_point; std::nth_element(working.begin(), partition_iter, working.end()); diff --git a/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp b/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp index 28c964d4ccab9..7ef943fd30c1d 100644 --- a/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp +++ b/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp @@ -107,7 +107,7 @@ void operator delete(void* p)TEST_NOEXCEPT { } #ifndef NO_SIZE -void operator delete(void* p, size_t n)TEST_NOEXCEPT { +void operator delete(void* p, std::size_t n)TEST_NOEXCEPT { ::free(p); stats.sized_called++; stats.last_size = n; @@ -123,7 +123,7 @@ void operator delete(void* p, std::align_val_t a)TEST_NOEXCEPT { stats.last_size = -1; } -void operator delete(void* p, size_t n, std::align_val_t a)TEST_NOEXCEPT { +void operator delete(void* p, std::size_t n, std::align_val_t a)TEST_NOEXCEPT { std::__libcpp_aligned_free(p); stats.aligned_sized_called++; stats.last_align = static_cast(a); @@ -134,12 +134,12 @@ void operator delete(void* p, size_t n, std::align_val_t a)TEST_NOEXCEPT { void test_libcpp_dealloc() { void* p = nullptr; #ifdef __STDCPP_DEFAULT_NEW_ALIGNMENT__ - size_t over_align_val = __STDCPP_DEFAULT_NEW_ALIGNMENT__ * 2; + std::size_t over_align_val = __STDCPP_DEFAULT_NEW_ALIGNMENT__ * 2; #else - size_t over_align_val = TEST_ALIGNOF(std::max_align_t) * 2; + std::size_t over_align_val = TEST_ALIGNOF(std::max_align_t) * 2; #endif - size_t under_align_val = TEST_ALIGNOF(int); - size_t with_size_val = 2; + std::size_t under_align_val = TEST_ALIGNOF(int); + std::size_t with_size_val = 2; { std::__libcpp_deallocate_unsized(p, under_align_val); diff --git a/libcxx/test/libcxx/language.support/support.dynamic/new_faligned_allocation.pass.cpp b/libcxx/test/libcxx/language.support/support.dynamic/new_faligned_allocation.pass.cpp index aecdcd6c787ae..32ef97e0241c8 100644 --- a/libcxx/test/libcxx/language.support/support.dynamic/new_faligned_allocation.pass.cpp +++ b/libcxx/test/libcxx/language.support/support.dynamic/new_faligned_allocation.pass.cpp @@ -29,7 +29,7 @@ #include "test_macros.h" -static void test_allocations(size_t size, size_t alignment) { +static void test_allocations(std::size_t size, size_t alignment) { { void* ptr = ::operator new(size, std::align_val_t(alignment)); assert(ptr); diff --git a/libcxx/test/libcxx/numerics/numeric.ops/midpoint.integer.pass.cpp b/libcxx/test/libcxx/numerics/numeric.ops/midpoint.integer.pass.cpp index a89a3d69476cd..8df3f8ad8cade 100644 --- a/libcxx/test/libcxx/numerics/numeric.ops/midpoint.integer.pass.cpp +++ b/libcxx/test/libcxx/numerics/numeric.ops/midpoint.integer.pass.cpp @@ -63,7 +63,7 @@ int main(int, char**) test(); test(); - test(); + test(); return 0; } diff --git a/libcxx/test/libcxx/ranges/range.adaptors/range.lazy.split/no_unique_address.compile.pass.cpp b/libcxx/test/libcxx/ranges/range.adaptors/range.lazy.split/no_unique_address.compile.pass.cpp index 37cd7151b3577..2918d4f86afb5 100644 --- a/libcxx/test/libcxx/ranges/range.adaptors/range.lazy.split/no_unique_address.compile.pass.cpp +++ b/libcxx/test/libcxx/ranges/range.adaptors/range.lazy.split/no_unique_address.compile.pass.cpp @@ -54,7 +54,7 @@ namespace test2 { int x; int* begin() const; int* end() const; - constexpr static size_t size() { return 1; } + constexpr static std::size_t size() { return 1; } }; static_assert( std::ranges::forward_range); static_assert( std::ranges::__tiny_range); @@ -107,7 +107,7 @@ namespace test4 { struct EmptyTinyView : std::ranges::view_base { int* begin() const; int* end() const; - constexpr static size_t size() { return 1; } + constexpr static std::size_t size() { return 1; } }; static_assert( std::ranges::forward_range); static_assert( std::ranges::__tiny_range); diff --git a/libcxx/test/libcxx/ranges/range.adaptors/range.lazy.split/types.h b/libcxx/test/libcxx/ranges/range.adaptors/range.lazy.split/types.h index a66b378451542..12b0de9aa6330 100644 --- a/libcxx/test/libcxx/ranges/range.adaptors/range.lazy.split/types.h +++ b/libcxx/test/libcxx/ranges/range.adaptors/range.lazy.split/types.h @@ -10,6 +10,7 @@ #define TEST_LIBCXX_RANGES_RANGE_ADAPTORS_RANGE_LAZY_SPLIT_TYPES_H #include +#include #include #include #include "test_iterators.h" @@ -53,7 +54,7 @@ struct ForwardTinyView : std::ranges::view_base { constexpr ForwardTinyView() = default; constexpr forward_iterator begin() const { return forward_iterator(nullptr); } constexpr forward_iterator end() const { return forward_iterator(nullptr); } - constexpr static size_t size() { return 1; } + constexpr static std::size_t size() { return 1; } }; static_assert(std::ranges::forward_range); static_assert(std::ranges::view); diff --git a/libcxx/test/libcxx/strings/basic.string/alignof.compile.pass.cpp b/libcxx/test/libcxx/strings/basic.string/alignof.compile.pass.cpp index 1613fd7b5fdaa..504b8c8a551da 100644 --- a/libcxx/test/libcxx/strings/basic.string/alignof.compile.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/alignof.compile.pass.cpp @@ -132,5 +132,5 @@ static_assert(alignof(small_string) == 4, ""); # endif #else -# error "size_t has an unexpected size" +# error "std::size_t has an unexpected size" #endif diff --git a/libcxx/test/libcxx/strings/basic.string/sizeof.compile.pass.cpp b/libcxx/test/libcxx/strings/basic.string/sizeof.compile.pass.cpp index 7a1835d0479f5..4d3eeb1bbce6e 100644 --- a/libcxx/test/libcxx/strings/basic.string/sizeof.compile.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/sizeof.compile.pass.cpp @@ -131,5 +131,5 @@ static_assert(sizeof(small_string) == 12, ""); # endif #else -# error "size_t has an unexpected size" +# error "std::size_t has an unexpected size" #endif diff --git a/libcxx/test/libcxx/strings/basic.string/string.capacity/max_size.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.capacity/max_size.pass.cpp index 69b45f8a235b9..4bc622f843da6 100644 --- a/libcxx/test/libcxx/strings/basic.string/string.capacity/max_size.pass.cpp +++ b/libcxx/test/libcxx/strings/basic.string/string.capacity/max_size.pass.cpp @@ -18,48 +18,48 @@ #include "test_macros.h" // alignment of the string heap buffer is hardcoded to 16 -static const size_t alignment = 16; +static const std::size_t alignment = 16; template TEST_CONSTEXPR_CXX20 void full_size() { std::string str; - assert(str.max_size() == std::numeric_limits::max() - alignment); + assert(str.max_size() == std::numeric_limits::max() - alignment); #ifndef TEST_HAS_NO_CHAR8_T std::u8string u8str; - assert(u8str.max_size() == std::numeric_limits::max() - alignment); + assert(u8str.max_size() == std::numeric_limits::max() - alignment); #endif #ifndef TEST_HAS_NO_WIDE_CHARACTERS std::wstring wstr; - assert(wstr.max_size() == std::numeric_limits::max() / sizeof(wchar_t) - alignment); + assert(wstr.max_size() == std::numeric_limits::max() / sizeof(wchar_t) - alignment); #endif std::u16string u16str; std::u32string u32str; - assert(u16str.max_size() == std::numeric_limits::max() / 2 - alignment); - assert(u32str.max_size() == std::numeric_limits::max() / 4 - alignment); + assert(u16str.max_size() == std::numeric_limits::max() / 2 - alignment); + assert(u32str.max_size() == std::numeric_limits::max() / 4 - alignment); } template TEST_CONSTEXPR_CXX20 void half_size() { std::string str; - assert(str.max_size() == std::numeric_limits::max() / 2 - alignment); + assert(str.max_size() == std::numeric_limits::max() / 2 - alignment); #ifndef TEST_HAS_NO_CHAR8_T std::u8string u8str; - assert(u8str.max_size() == std::numeric_limits::max() / 2 - alignment); + assert(u8str.max_size() == std::numeric_limits::max() / 2 - alignment); #endif #ifndef TEST_HAS_NO_WIDE_CHARACTERS std::wstring wstr; - assert(wstr.max_size() == std::numeric_limits::max() / std::max(2ul, sizeof(wchar_t)) - alignment); + assert(wstr.max_size() == std::numeric_limits::max() / std::max(2ul, sizeof(wchar_t)) - alignment); #endif std::u16string u16str; std::u32string u32str; - assert(u16str.max_size() == std::numeric_limits::max() / 2 - alignment); - assert(u32str.max_size() == std::numeric_limits::max() / 4 - alignment); + assert(u16str.max_size() == std::numeric_limits::max() / 2 - alignment); + assert(u32str.max_size() == std::numeric_limits::max() / 4 - alignment); } TEST_CONSTEXPR_CXX20 bool test() { diff --git a/libcxx/test/libcxx/type_traits/is_specialization.verify.cpp b/libcxx/test/libcxx/type_traits/is_specialization.verify.cpp index 0dbdbf56f75c8..2fd1176417538 100644 --- a/libcxx/test/libcxx/type_traits/is_specialization.verify.cpp +++ b/libcxx/test/libcxx/type_traits/is_specialization.verify.cpp @@ -19,4 +19,4 @@ #include // expected-error@+1 {{template template argument has different template parameters than its corresponding template template parameter}} -static_assert(!std::__is_specialization_v, std::array>); +static_assert(!std::__is_specialization_v, std::array>); diff --git a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.h b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.h index cfe7f4eb394a1..204dcacb1152c 100644 --- a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.h +++ b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.h @@ -78,7 +78,7 @@ struct data { /// The offset of the last code units of the extended grapheme clusters in the input. /// /// The vector has the same number of entries as \ref code_points. - std::vector breaks; + std::vector breaks; }; /// The data for UTF-8. diff --git a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.pass.cpp b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.pass.cpp index 9039ed051b612..512d27854e4a0 100644 --- a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.pass.cpp +++ b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/extended_grapheme_cluster.pass.cpp @@ -63,7 +63,7 @@ constexpr void test(const Data& data) { assert(d.code_points.size() == d.breaks.size()); std::__unicode::__extended_grapheme_cluster_view view{d.input.begin(), d.input.end()}; - for (size_t i = 0; i < d.breaks.size(); ++i) { + for (std::size_t i = 0; i < d.breaks.size(); ++i) { auto r = view.__consume(); assert(r.__code_point_ == d.code_points[i]); assert(r.__last_ == d.input.begin() + d.breaks[i]); diff --git a/libcxx/test/libcxx/utilities/function.objects/unord.hash/murmur2_or_cityhash_ubsan_unsigned_overflow_ignored.pass.cpp b/libcxx/test/libcxx/utilities/function.objects/unord.hash/murmur2_or_cityhash_ubsan_unsigned_overflow_ignored.pass.cpp index 73ad7aa5d03cb..11f0724f990c8 100644 --- a/libcxx/test/libcxx/utilities/function.objects/unord.hash/murmur2_or_cityhash_ubsan_unsigned_overflow_ignored.pass.cpp +++ b/libcxx/test/libcxx/utilities/function.objects/unord.hash/murmur2_or_cityhash_ubsan_unsigned_overflow_ignored.pass.cpp @@ -35,8 +35,8 @@ int main(int, char**) { "00000000000000000000000000000000000000000000000000000000000000000000000", "1237546895+54+4554985416849484213464984765465464654564565645645646546456546546" }; - const size_t NumCases = sizeof(TestCases)/sizeof(TestCases[0]); - for (size_t i=0; i < NumCases; ++i) + const std::size_t NumCases = sizeof(TestCases)/sizeof(TestCases[0]); + for (std::size_t i=0; i < NumCases; ++i) test(TestCases[i].data(), TestCases[i].length()); return 0; diff --git a/libcxx/test/libcxx/utilities/intseq/for_each_index_sequence.pass.cpp b/libcxx/test/libcxx/utilities/intseq/for_each_index_sequence.pass.cpp index 77d7ddc3c38be..99345274da29e 100644 --- a/libcxx/test/libcxx/utilities/intseq/for_each_index_sequence.pass.cpp +++ b/libcxx/test/libcxx/utilities/intseq/for_each_index_sequence.pass.cpp @@ -19,7 +19,7 @@ constexpr bool test() { int count = 0; - std::__for_each_index_sequence(std::make_index_sequence<8>(), [&] { count += _Index; }); + std::__for_each_index_sequence(std::make_index_sequence<8>(), [&] { count += _Index; }); assert(count == 28); return true; diff --git a/libcxx/test/libcxx/utilities/meta/stress_tests/stress_test_variant_overloads_impl.sh.cpp b/libcxx/test/libcxx/utilities/meta/stress_tests/stress_test_variant_overloads_impl.sh.cpp index 6fdd02362386f..58b9db45d37a2 100644 --- a/libcxx/test/libcxx/utilities/meta/stress_tests/stress_test_variant_overloads_impl.sh.cpp +++ b/libcxx/test/libcxx/utilities/meta/stress_tests/stress_test_variant_overloads_impl.sh.cpp @@ -42,7 +42,7 @@ #include "test_macros.h" #include "template_cost_testing.h" -template +template struct TestType {}; template @@ -54,7 +54,7 @@ namespace flat_impl { struct OverloadBase { void operator()() const; }; -template +template struct Overload { auto operator()(Tp, Tp) const -> ID; }; @@ -65,7 +65,7 @@ struct AllOverloads : OverloadBase, Bases... {}; template struct MakeOverloads; -template +template struct MakeOverloads > { template using Apply = AllOverloads...>; diff --git a/libcxx/test/libcxx/utilities/variant/variant.variant/variant_size.pass.cpp b/libcxx/test/libcxx/utilities/variant/variant.variant/variant_size.pass.cpp index 4e2d787117bc8..a2dc58bce1b64 100644 --- a/libcxx/test/libcxx/utilities/variant/variant.variant/variant_size.pass.cpp +++ b/libcxx/test/libcxx/utilities/variant/variant.variant/variant_size.pass.cpp @@ -22,13 +22,13 @@ template struct make_variant_imp; -template -struct make_variant_imp> { - template using AlwaysChar = char; +template +struct make_variant_imp> { + template using AlwaysChar = char; using type = std::variant...>; }; -template +template using make_variant_t = typename make_variant_imp>::type; constexpr bool ExpectEqual = diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy_n.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy_n.pass.cpp index 4a8963edae089..237d1ef115090 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy_n.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.copy/ranges.copy_n.pass.cpp @@ -23,7 +23,7 @@ #include "almost_satisfies_types.h" #include "test_iterators.h" -template +template concept HasCopyNIt = requires(In in, Count count, Out out) { std::ranges::copy_n(in, count, out); }; static_assert(HasCopyNIt); diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill_n.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill_n.pass.cpp index 6bb784201b896..7d6770de702bf 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill_n.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.fill/fill_n.pass.cpp @@ -22,7 +22,7 @@ #if TEST_STD_VER > 17 TEST_CONSTEXPR bool test_constexpr() { - const size_t N = 5; + const std::size_t N = 5; int ib[] = {0, 0, 0, 0, 0, 0}; // one bigger than N auto it = std::fill_n(std::begin(ib), N, 5); diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.generate/generate_n.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.generate/generate_n.pass.cpp index 3eb361a819d58..13fd1cbf1f33c 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.generate/generate_n.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.generate/generate_n.pass.cpp @@ -31,7 +31,7 @@ struct gen_test #if TEST_STD_VER > 17 TEST_CONSTEXPR bool test_constexpr() { - const size_t N = 5; + const std::size_t N = 5; int ib[] = {0, 0, 0, 0, 0, 0}; // one bigger than N auto it = std::generate_n(std::begin(ib), N, gen_test()); diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.generate/ranges_generate.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.generate/ranges_generate.pass.cpp index 7cd53dc751139..56fbe58e2dfd0 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.generate/ranges_generate.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.generate/ranges_generate.pass.cpp @@ -99,7 +99,7 @@ static_assert(!HasGenerateRange, UninvocableGen>); static_assert(!HasGenerateRange); static_assert(!HasGenerateRange, IntPtrGen>); -template +template constexpr void test_one(const std::array input, Gen gen, std::array expected) { { // (iterator, sentinel) overload. auto in = input; @@ -174,7 +174,7 @@ constexpr bool test() { { // (iterator, sentinel) overload. int gen_invocations = 0; auto gen = [&gen_invocations] { ++gen_invocations; return AssignedOnce(); }; - constexpr size_t N = 10; + constexpr std::size_t N = 10; std::array in; std::ranges::generate(in.begin(), in.end(), gen); @@ -185,7 +185,7 @@ constexpr bool test() { { // (range) overload. int gen_invocations = 0; auto gen = [&gen_invocations] { ++gen_invocations; return AssignedOnce(); }; - constexpr size_t N = 10; + constexpr std::size_t N = 10; std::array in; std::ranges::generate(in, gen); diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.generate/ranges_generate_n.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.generate/ranges_generate_n.pass.cpp index 6a678322cb156..e6af0116ea0d2 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.generate/ranges_generate_n.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.generate/ranges_generate_n.pass.cpp @@ -66,8 +66,8 @@ static_assert(!HasGenerateNIter); // !indirectly_writable> static_assert(!HasGenerateNIter); -template -constexpr void test_one(std::array in, size_t n, Gen gen, std::array expected) { +template +constexpr void test_one(std::array in, std::size_t n, Gen gen, std::array expected) { assert(n <= N); auto begin = Iter(in.data()); @@ -120,8 +120,8 @@ constexpr bool test() { int gen_invocations = 0; auto gen = [&gen_invocations] { ++gen_invocations; return AssignedOnce(); }; - constexpr size_t N1 = 10; - constexpr size_t N2 = N1 / 2; + constexpr std::size_t N1 = 10; + constexpr std::size_t N2 = N1 / 2; std::array in; auto result = std::ranges::generate_n(in.begin(), N2, gen); diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.partitions/ranges_partition.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.partitions/ranges_partition.pass.cpp index 6779f78309edb..6d9be0441bfef 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.partitions/ranges_partition.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.partitions/ranges_partition.pass.cpp @@ -82,8 +82,8 @@ static_assert(!HasPartitionRange, UnaryPred>); static_assert(!HasPartitionRange, UnaryPred>); // `partition` isn't a stable algorithm so this function cannot test the exact output. -template -constexpr void test_one(std::array input, Pred pred, size_t partition_point) { +template +constexpr void test_one(std::array input, Pred pred, std::size_t partition_point) { auto neg_pred = [&](int x) { return !pred(x); }; { // (iterator, sentinel) overload. diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.partitions/ranges_partition_copy.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.partitions/ranges_partition_copy.pass.cpp index ba8a3479d707f..7b5b80dd8aaf8 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.partitions/ranges_partition_copy.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.partitions/ranges_partition_copy.pass.cpp @@ -118,7 +118,7 @@ static_assert(!HasPartitionCopyRange, int*, Uncopyable*>); static_assert(std::is_same_v, std::ranges::in_out_out_result>); -template +template constexpr void test_one(std::array input, Pred pred, std::array expected_true, std::array expected_false) { static_assert(N2 + N3 == N1); diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.partitions/ranges_partition_point.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.partitions/ranges_partition_point.pass.cpp index f9dadc28493b4..932e833aa499d 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.partitions/ranges_partition_point.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.partitions/ranges_partition_point.pass.cpp @@ -76,8 +76,8 @@ static_assert(!HasPartitionPointRange); static_assert(!HasPartitionPointRange, IndirectUnaryPredicateNotPredicate>); static_assert(!HasPartitionPointRange, IndirectUnaryPredicateNotCopyConstructible>); -template -constexpr void test_one(std::array input, Pred pred, size_t partition_point) { +template +constexpr void test_one(std::array input, Pred pred, std::size_t partition_point) { assert(std::ranges::is_partitioned(input, pred)); auto begin = Iter(input.data()); diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.partitions/ranges_stable_partition.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.partitions/ranges_stable_partition.pass.cpp index b624abc6d5315..5c721059424da 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.partitions/ranges_stable_partition.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.partitions/ranges_stable_partition.pass.cpp @@ -84,8 +84,8 @@ static_assert(!HasStablePartitionRange, IndirectUnaryPredicateNotCopyCon static_assert(!HasStablePartitionRange, UnaryPred>); static_assert(!HasStablePartitionRange, UnaryPred>); -template -void test_one(std::array input, Pred pred, size_t partition_point, std::array expected) { +template +void test_one(std::array input, Pred pred, std::size_t partition_point, std::array expected) { auto neg_pred = [&](int x) { return !pred(x); }; { // (iterator, sentinel) overload. diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.random.sample/ranges_sample.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.random.sample/ranges_sample.pass.cpp index 0a5d83e85b4a4..69db960ff362f 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.random.sample/ranges_sample.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.random.sample/ranges_sample.pass.cpp @@ -36,10 +36,10 @@ class RandGen { public: - constexpr static size_t min() { return 0; } - constexpr static size_t max() { return 255; } + constexpr static std::size_t min() { return 0; } + constexpr static std::size_t max() { return 255; } - constexpr size_t operator()() { + constexpr std::size_t operator()() { flip = !flip; return flip; } @@ -54,9 +54,9 @@ static_assert(std::uniform_random_bit_generator); LIBCPP_STATIC_ASSERT(!std::__libcpp_random_is_valid_urng::value); struct BadGen { - constexpr static size_t min() { return 255; } - constexpr static size_t max() { return 0; } - constexpr size_t operator()() const; + constexpr static std::size_t min() { return 255; } + constexpr static std::size_t max() { return 0; } + constexpr std::size_t operator()() const; }; static_assert(!std::uniform_random_bit_generator); @@ -148,9 +148,9 @@ static_assert(!HasSampleRange, int**>); // !uniform_random_bit_generator> static_assert(!HasSampleRange, int*, BadGen>); -template -void test_one(std::array in, size_t n, Gen gen) { - assert(n <= static_cast(N)); +template +void test_one(std::array in, std::size_t n, Gen gen) { + assert(n <= static_cast(N)); auto verify_is_subsequence = [&] (auto output) { auto sorted_input = in; @@ -276,18 +276,18 @@ void test_generator() { // generator class has a const or non-const invocation operator (or both). void test_generators() { struct GenBase { - constexpr static size_t min() { return 0; } - constexpr static size_t max() { return 255; } + constexpr static std::size_t min() { return 0; } + constexpr static std::size_t max() { return 255; } }; struct NonconstGen : GenBase { - size_t operator()() { return 1; } + std::size_t operator()() { return 1; } }; struct ConstGen : GenBase { - size_t operator()() const { return 1; } + std::size_t operator()() const { return 1; } }; struct ConstAndNonconstGen : GenBase { - size_t operator()() { return 1; } - size_t operator()() const { return 1; } + std::size_t operator()() { return 1; } + std::size_t operator()() const { return 1; } }; test_generator(); diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.random.shuffle/ranges_shuffle.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.random.shuffle/ranges_shuffle.pass.cpp index 6f6be6d0cb934..24237b94502f9 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.random.shuffle/ranges_shuffle.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.random.shuffle/ranges_shuffle.pass.cpp @@ -34,10 +34,10 @@ class RandGen { public: - constexpr static size_t min() { return 0; } - constexpr static size_t max() { return 255; } + constexpr static std::size_t min() { return 0; } + constexpr static std::size_t max() { return 255; } - constexpr size_t operator()() { + constexpr std::size_t operator()() { flip = !flip; return flip; } @@ -52,9 +52,9 @@ static_assert(std::uniform_random_bit_generator); LIBCPP_STATIC_ASSERT(!std::__libcpp_random_is_valid_urng::value); struct BadGen { - constexpr static size_t min() { return 255; } - constexpr static size_t max() { return 0; } - constexpr size_t operator()() const; + constexpr static std::size_t min() { return 255; } + constexpr static std::size_t max() { return 0; } + constexpr std::size_t operator()() const; }; static_assert(!std::uniform_random_bit_generator); @@ -109,7 +109,7 @@ static_assert(!HasShuffleRange); // !uniform_random_bit_generator> static_assert(!HasShuffleRange, BadGen>); -template +template void test_one(const std::array input, Gen gen) { { // (iterator, sentinel) overload. auto shuffled = input; @@ -219,18 +219,18 @@ void test_generator() { // generator class has a const or non-const invocation operator (or both). void test_generators() { struct GenBase { - constexpr static size_t min() { return 0; } - constexpr static size_t max() { return 255; } + constexpr static std::size_t min() { return 0; } + constexpr static std::size_t max() { return 255; } }; struct NonconstGen : GenBase { - size_t operator()() { return 1; } + std::size_t operator()() { return 1; } }; struct ConstGen : GenBase { - size_t operator()() const { return 1; } + std::size_t operator()() const { return 1; } }; struct ConstAndNonconstGen : GenBase { - size_t operator()() { return 1; } - size_t operator()() const { return 1; } + std::size_t operator()() { return 1; } + std::size_t operator()() const { return 1; } }; test_generator(); diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.reverse/ranges.reverse.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.reverse/ranges.reverse.pass.cpp index fbeb8a433354f..c1011f3ada649 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.reverse/ranges.reverse.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.reverse/ranges.reverse.pass.cpp @@ -45,7 +45,7 @@ static_assert(!HasReverseR); static_assert(!HasReverseR); static_assert(!HasReverseR); -template +template constexpr void test(std::array value, std::array expected) { { auto val = value; diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/ranges.rotate_copy.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/ranges.rotate_copy.pass.cpp index b41ff4ed7f217..1f18d787c2f4c 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/ranges.rotate_copy.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/ranges.rotate_copy.pass.cpp @@ -51,7 +51,7 @@ static_assert(!HasRotateCopyR, OutputIteratorNotInputOrOutp static_assert(std::is_same_v, std::ranges::in_out_result>); template -constexpr void test(std::array value, size_t middle, std::array expected) { +constexpr void test(std::array value, std::size_t middle, std::array expected) { { std::array out; std::same_as> decltype(auto) ret = diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/ranges_rotate.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/ranges_rotate.pass.cpp index c34dac1c4798a..1506ce79b5dbd 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/ranges_rotate.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/ranges_rotate.pass.cpp @@ -68,8 +68,8 @@ static_assert(!HasRotateRange); static_assert(!HasRotateRange); static_assert(!HasRotateRange); -template -constexpr void test_one(const std::array input, size_t mid_index, std::array expected) { +template +constexpr void test_one(const std::array input, std::size_t mid_index, std::array expected) { assert(mid_index <= N); { // (iterator, sentinel) overload. @@ -159,7 +159,7 @@ constexpr bool test() { auto begin = adl::Iterator::TrackSwaps(in.data(), swaps); auto end = adl::Iterator::TrackSwaps(in.data() + in.size(), swaps); - for (size_t mid = 0; mid != input.size(); ++mid) { + for (std::size_t mid = 0; mid != input.size(); ++mid) { std::ranges::rotate(begin, begin + mid, end); assert(swaps <= expected); } @@ -172,7 +172,7 @@ constexpr bool test() { auto end = adl::Iterator::TrackSwaps(in.data() + in.size(), swaps); auto range = std::ranges::subrange(begin, end); - for (size_t mid = 0; mid != input.size(); ++mid) { + for (std::size_t mid = 0; mid != input.size(); ++mid) { std::ranges::rotate(range, begin + mid); assert(swaps <= expected); } diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate_copy.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate_copy.pass.cpp index cba0d3b1b7a95..c19f44a6d7eeb 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate_copy.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.rotate/rotate_copy.pass.cpp @@ -115,7 +115,7 @@ TEST_CONSTEXPR_CXX20 void test() { int const n_ints = sizeof(ints)/sizeof(int); int zeros[n_ints] = {0}; - const size_t N = 2; + const std::size_t N = 2; const auto middle = std::begin(ints) + N; auto it = std::rotate_copy(std::begin(ints), middle, std::end(ints), std::begin(zeros)); assert(std::distance(std::begin(zeros), it) == n_ints); diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.unique/unique.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.unique/unique.pass.cpp index 7046d6a2bd408..b145d567f47bf 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.unique/unique.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.unique/unique.pass.cpp @@ -25,7 +25,7 @@ TEST_CONSTEXPR bool test_constexpr() { int ia[] = {0, 1, 1, 3, 4}; const int expected[] = {0, 1, 3, 4}; - const size_t N = 4; + const std::size_t N = 4; auto it = std::unique(std::begin(ia), std::end(ia)); return it == (std::begin(ia) + N) diff --git a/libcxx/test/std/algorithms/alg.modifying.operations/alg.unique/unique_pred.pass.cpp b/libcxx/test/std/algorithms/alg.modifying.operations/alg.unique/unique_pred.pass.cpp index 9f0c695e212dc..91535f081ca8b 100644 --- a/libcxx/test/std/algorithms/alg.modifying.operations/alg.unique/unique_pred.pass.cpp +++ b/libcxx/test/std/algorithms/alg.modifying.operations/alg.unique/unique_pred.pass.cpp @@ -25,7 +25,7 @@ TEST_CONSTEXPR bool test_constexpr() { int ia[] = {0, 1, 1, 3, 4}; const int expected[] = {0, 1, 3, 4}; - const size_t N = 4; + const std::size_t N = 4; auto it = std::unique(std::begin(ia), std::end(ia), [](int a, int b) {return a == b; }); return it == (std::begin(ia) + N) diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.adjacent.find/ranges.adjacent_find.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.adjacent.find/ranges.adjacent_find.pass.cpp index c040e27cc9b0f..d2e0bb5d182d6 100644 --- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.adjacent.find/ranges.adjacent_find.pass.cpp +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.adjacent.find/ranges.adjacent_find.pass.cpp @@ -51,13 +51,13 @@ static_assert(!HasAdjacentFindR); static_assert(!HasAdjacentFindR); static_assert(!HasAdjacentFindR>); -template +template struct Data { std::array input; int expected; }; -template +template constexpr void test(Data d) { { std::same_as decltype(auto) ret = diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each_n.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each_n.pass.cpp index 6eb5c247a211e..371f6c92f1ed1 100644 --- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each_n.pass.cpp +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/for_each_n.pass.cpp @@ -25,7 +25,7 @@ TEST_CONSTEXPR bool test_constexpr() { int ia[] = {1, 3, 6, 7}; int expected[] = {3, 5, 8, 9}; - const size_t N = 4; + const std::size_t N = 4; auto it = std::for_each_n(std::begin(ia), N, [](int &a) { a += 2; }); return it == (std::begin(ia) + N) diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/is.heap/ranges_is_heap.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/is.heap/ranges_is_heap.pass.cpp index f294d9de25033..fc207c9fc7529 100644 --- a/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/is.heap/ranges_is_heap.pass.cpp +++ b/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/is.heap/ranges_is_heap.pass.cpp @@ -72,7 +72,7 @@ static_assert(!HasIsHeapRange); // !indirect_strict_weak_order, Proj>> static_assert(!HasIsHeapRange>); -template +template constexpr void test_one(std::array input, bool expected) { auto begin = Iter(input.data()); auto end = Sent(Iter(input.data() + input.size())); diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/is.heap/ranges_is_heap_until.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/is.heap/ranges_is_heap_until.pass.cpp index e6a74fe0f8d35..f1dc6adbeaae1 100644 --- a/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/is.heap/ranges_is_heap_until.pass.cpp +++ b/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/is.heap/ranges_is_heap_until.pass.cpp @@ -72,8 +72,8 @@ static_assert(!HasIsHeapUntilRange); // !indirect_strict_weak_order, Proj>> static_assert(!HasIsHeapUntilRange>); -template -constexpr void test_one(std::array input, size_t until_index) { +template +constexpr void test_one(std::array input, std::size_t until_index) { auto begin = Iter(input.data()); auto end = Sent(Iter(input.data() + input.size())); diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/make.heap/ranges_make_heap.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/make.heap/ranges_make_heap.pass.cpp index 59b8283c16c23..5d8086df450ab 100644 --- a/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/make.heap/ranges_make_heap.pass.cpp +++ b/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/make.heap/ranges_make_heap.pass.cpp @@ -58,14 +58,14 @@ static_assert(!HasMakeHeapR, BadComparator>); static_assert(!HasMakeHeapR>); // Doesn't satisfy `sortable`. -template +template constexpr void verify_heap(const std::array& heapified, Iter last, std::array expected) { assert(heapified == expected); assert(base(last) == heapified.data() + heapified.size()); assert(std::is_heap(heapified.begin(), heapified.end())); } -template +template constexpr void test_one(const std::array input, std::array expected) { { // (iterator, sentinel) overload. auto heapified = input; diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/pop.heap/ranges_pop_heap.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/pop.heap/ranges_pop_heap.pass.cpp index 6ce05db648f1f..e190586fef813 100644 --- a/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/pop.heap/ranges_pop_heap.pass.cpp +++ b/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/pop.heap/ranges_pop_heap.pass.cpp @@ -58,7 +58,7 @@ static_assert(!HasPopHeapR, BadComparator>); static_assert(!HasPopHeapR>); // Doesn't satisfy `sortable`. -template +template constexpr void verify_heap(const std::array& heapified, Iter last, std::array expected) { assert(heapified == expected); assert(base(last) == heapified.data() + heapified.size()); @@ -66,7 +66,7 @@ constexpr void verify_heap(const std::array& heapified, Iter last, std::ar assert(*std::max_element(heapified.begin(), heapified.end()) == heapified.back()); } -template +template constexpr void test_one(const std::array input, std::array expected) { assert(!input.empty()); assert(std::is_heap(input.begin(), input.end())); diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/push.heap/ranges_push_heap.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/push.heap/ranges_push_heap.pass.cpp index e2dc0accea457..331f07755fae4 100644 --- a/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/push.heap/ranges_push_heap.pass.cpp +++ b/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/push.heap/ranges_push_heap.pass.cpp @@ -58,14 +58,14 @@ static_assert(!HasPushHeapR, BadComparator>); static_assert(!HasPushHeapR>); // Doesn't satisfy `sortable`. -template +template constexpr void verify_heap(const std::array& heapified, Iter last, std::array expected) { assert(heapified == expected); assert(base(last) == heapified.data() + heapified.size()); assert(std::is_heap(heapified.begin(), heapified.end())); } -template +template constexpr void test_one(const std::array input, std::array expected) { if (!input.empty()) { assert(std::is_heap(input.begin(), input.end() - 1)); diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/sort.heap/ranges_sort_heap.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/sort.heap/ranges_sort_heap.pass.cpp index abf317a753406..128ff80aadbef 100644 --- a/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/sort.heap/ranges_sort_heap.pass.cpp +++ b/libcxx/test/std/algorithms/alg.sorting/alg.heap.operations/sort.heap/ranges_sort_heap.pass.cpp @@ -59,14 +59,14 @@ static_assert(!HasSortHeapR, BadComparator>); static_assert(!HasSortHeapR>); // Doesn't satisfy `sortable`. -template +template constexpr void verify_sorted(const std::array& sorted, Iter last, std::array expected) { assert(sorted == expected); assert(base(last) == sorted.data() + sorted.size()); assert(std::is_sorted(sorted.begin(), sorted.end())); } -template +template constexpr void test_one(const std::array input, std::array expected) { assert(std::is_heap(input.begin(), input.end())); diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.nth.element/ranges_nth_element.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.nth.element/ranges_nth_element.pass.cpp index a648e7df10a10..ad3c8ab699ab4 100644 --- a/libcxx/test/std/algorithms/alg.sorting/alg.nth.element/ranges_nth_element.pass.cpp +++ b/libcxx/test/std/algorithms/alg.sorting/alg.nth.element/ranges_nth_element.pass.cpp @@ -64,8 +64,8 @@ static_assert(!HasNthElementR, BadComparator>); static_assert(!HasNthElementR>); // Doesn't satisfy `sortable`. -template -constexpr void verify_nth(const std::array& partially_sorted, size_t nth_index, Iter last, T expected_nth) { +template +constexpr void verify_nth(const std::array& partially_sorted, std::size_t nth_index, Iter last, T expected_nth) { // Note that the exact output of `nth_element` is unspecified and may vary between implementations. assert(base(last) == partially_sorted.end()); @@ -92,8 +92,8 @@ constexpr void verify_nth(const std::array& partially_sorted, size_t nth_i } } -template -constexpr void test_one(std::array input, size_t nth_index, std::optional expected_nth = {}) { +template +constexpr void test_one(std::array input, std::size_t nth_index, std::optional expected_nth = {}) { assert(expected_nth || nth_index == N); { // (iterator, sentinel) overload. @@ -126,7 +126,7 @@ constexpr void test_one(std::array input, size_t nth_index, std::optiona } } -template +template constexpr void test_all_cases(std::array input) { auto sorted = input; std::sort(sorted.begin(), sorted.end()); @@ -162,7 +162,7 @@ constexpr void test_iterators() { { // nth element is in the right place. std::array input = {6, 5, 3, 1, 4, 2}; - constexpr size_t N = input.size(); + constexpr std::size_t N = input.size(); test_one(input, 2, /*expected_nth=*/3); } diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.permutation.generators/ranges.next_permutation.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.permutation.generators/ranges.next_permutation.pass.cpp index 5e7b8392909a4..77a7799ff11ad 100644 --- a/libcxx/test/std/algorithms/alg.sorting/alg.permutation.generators/ranges.next_permutation.pass.cpp +++ b/libcxx/test/std/algorithms/alg.sorting/alg.permutation.generators/ranges.next_permutation.pass.cpp @@ -50,7 +50,7 @@ static_assert(!HasNextPermutationR); static_assert(!HasNextPermutationR); static_assert(!HasNextPermutationR>); // not sortable -constexpr size_t factorial(size_t i) { +constexpr std::size_t factorial(size_t i) { std::array memoized = {1, 1, 2, 6, 24, 120, 720, 5040, 40320}; return memoized[i]; } @@ -87,8 +87,8 @@ constexpr void test_next_permutations(Func call_next_permutation) { // lexicographically greater than the previous. If these two conditions hold (the number of permutations is `N!`, and // each permutation is lexicographically greater than the previous one), it follows that the // `ranges::next_permutation` algorithm works correctly. - for (size_t i = 0; i <= current_permutation.size(); ++i) { - size_t count = 0; + for (std::size_t i = 0; i <= current_permutation.size(); ++i) { + std::size_t count = 0; bool next_found = true; while (next_found) { diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.permutation.generators/ranges.prev_permutation.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.permutation.generators/ranges.prev_permutation.pass.cpp index 3ec1af18e8466..a62138ed4f3b4 100644 --- a/libcxx/test/std/algorithms/alg.sorting/alg.permutation.generators/ranges.prev_permutation.pass.cpp +++ b/libcxx/test/std/algorithms/alg.sorting/alg.permutation.generators/ranges.prev_permutation.pass.cpp @@ -50,7 +50,7 @@ static_assert(!HasPrevPermutationR); static_assert(!HasPrevPermutationR); static_assert(!HasPrevPermutationR>); // not sortable -constexpr size_t factorial(size_t i) { +constexpr std::size_t factorial(size_t i) { std::array memoized = {1, 1, 2, 6, 24, 120, 720, 5040, 40320}; return memoized[i]; } @@ -87,8 +87,8 @@ constexpr void test_prev_permutations(Func call_prev_permutation) { // lexicographically less than the previous. If these two conditions hold (the number of permutations is `N!`, and // each permutation is lexicographically less than the previous one), it follows that the `ranges::prev_permutation` // algorithm works correctly. - for (size_t i = 0; i <= current_permutation.size(); ++i) { - size_t count = 0; + for (std::size_t i = 0; i <= current_permutation.size(); ++i) { + std::size_t count = 0; bool next_found = true; while (next_found) { diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.sort/partial.sort.copy/ranges_partial_sort_copy.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.sort/partial.sort.copy/ranges_partial_sort_copy.pass.cpp index 7a4ebd9c3ca03..532ee9b03efc0 100644 --- a/libcxx/test/std/algorithms/alg.sorting/alg.sort/partial.sort.copy/ranges_partial_sort_copy.pass.cpp +++ b/libcxx/test/std/algorithms/alg.sorting/alg.sort/partial.sort.copy/ranges_partial_sort_copy.pass.cpp @@ -119,17 +119,17 @@ static_assert(!HasPartialSortCopyRange, R>); static_assert(std::is_same_v, std::ranges::in_out_result>); -template +template constexpr void test_one( - std::array input, size_t input_size, size_t output_size, std::array sorted) { + std::array input, std::size_t input_size, size_t output_size, std::array sorted) { assert(input_size <= N); assert(output_size <= N + 1); // To support testing the case where output size exceeds input size. using ResultT = std::ranges::partial_sort_copy_result; // To support testing the case where output size exceeds input size; also makes sure calling `out.data() + int()` is // valid. - constexpr size_t OutputSize = N + 1; - size_t result_size = std::ranges::min(input_size, output_size); + constexpr std::size_t OutputSize = N + 1; + std::size_t result_size = std::ranges::min(input_size, output_size); auto begin = input.data(); auto end = input.data() + input_size; @@ -167,13 +167,13 @@ constexpr void test_one( } -template +template constexpr void test_all_subsequences(const std::array input) { auto sorted = input; std::sort(sorted.begin(), sorted.end()); // Whole input, increasing output size. Also check the case when `output_size` exceeds input size. - for (size_t out_size = 0; out_size <= N + 1; ++out_size) { + for (std::size_t out_size = 0; out_size <= N + 1; ++out_size) { test_one(input, N, out_size, sorted); } } diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.sort/partial.sort/ranges_partial_sort.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.sort/partial.sort/ranges_partial_sort.pass.cpp index e70828f055f89..af304662599ca 100644 --- a/libcxx/test/std/algorithms/alg.sorting/alg.sort/partial.sort/ranges_partial_sort.pass.cpp +++ b/libcxx/test/std/algorithms/alg.sorting/alg.sort/partial.sort/ranges_partial_sort.pass.cpp @@ -62,8 +62,8 @@ static_assert(!HasPartialSortR, BadComparator>); static_assert(!HasPartialSortR>); // Doesn't satisfy `sortable`. -template -constexpr void test_one(std::array input, size_t mid_index, std::array sorted) { +template +constexpr void test_one(std::array input, std::size_t mid_index, std::array sorted) { { // (iterator, sentinel) overload. auto partially_sorted = input; auto begin = Iter(partially_sorted.data()); @@ -89,12 +89,12 @@ constexpr void test_one(std::array input, size_t mid_index, std::array +template constexpr void test_all_subsequences(std::array input) { auto sorted = input; std::sort(sorted.begin(), sorted.end()); - for (size_t n = 0; n <= N; ++n) { + for (std::size_t n = 0; n <= N; ++n) { test_one(input, n, sorted); } } diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.sort/sort/ranges.sort.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.sort/sort/ranges.sort.pass.cpp index 96f4c1c2f5736..2463bf733196d 100644 --- a/libcxx/test/std/algorithms/alg.sorting/alg.sort/sort/ranges.sort.pass.cpp +++ b/libcxx/test/std/algorithms/alg.sorting/alg.sort/sort/ranges.sort.pass.cpp @@ -58,7 +58,7 @@ static_assert(!HasSortR, BadComparator>); static_assert(!HasSortR>); // Doesn't satisfy `sortable`. -template +template constexpr void test_one(std::array input, std::array expected) { { // (iterator, sentinel) overload. auto sorted = input; diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/ranges.stable.sort.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/ranges.stable.sort.pass.cpp index f83d093c12a8b..ea0a3ebdf85b2 100644 --- a/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/ranges.stable.sort.pass.cpp +++ b/libcxx/test/std/algorithms/alg.sorting/alg.sort/stable.sort/ranges.stable.sort.pass.cpp @@ -57,7 +57,7 @@ static_assert(!HasStableSortR, BadComparator>); static_assert(!HasStableSortR>); // Doesn't satisfy `sortable`. -template +template void test_one(std::array input, std::array expected) { { // (iterator, sentinel) overload. auto sorted = input; diff --git a/libcxx/test/std/algorithms/alg.sorting/sortable_helpers.h b/libcxx/test/std/algorithms/alg.sorting/sortable_helpers.h index 31385bdea05ad..d8cc781bfe913 100644 --- a/libcxx/test/std/algorithms/alg.sorting/sortable_helpers.h +++ b/libcxx/test/std/algorithms/alg.sorting/sortable_helpers.h @@ -9,6 +9,7 @@ #ifndef SORTABLE_HELPERS_H #define SORTABLE_HELPERS_H +#include #include #include "test_macros.h" @@ -108,7 +109,7 @@ struct TracedCopy { template struct NonBorrowedRange { int* data_; - size_t size_; + std::size_t size_; // TODO: some algorithms calls std::__copy // std::__copy(contiguous_iterator, sentinel_wrapper>, contiguous_iterator) doesn't seem to work. @@ -116,7 +117,7 @@ struct NonBorrowedRange { // sentinel_wrapper> using Sent = std::conditional_t, Iter, sentinel_wrapper>; - constexpr NonBorrowedRange(int* d, size_t s) : data_{d}, size_{s} {} + constexpr NonBorrowedRange(int* d, std::size_t s) : data_{d}, size_{s} {} constexpr Iter begin() const { return Iter{data_}; }; constexpr Sent end() const { return Sent{Iter{data_ + size_}}; }; diff --git a/libcxx/test/std/algorithms/ranges_robust_against_dangling.pass.cpp b/libcxx/test/std/algorithms/ranges_robust_against_dangling.pass.cpp index b19f42f2a4079..c71b57ebb1fb6 100644 --- a/libcxx/test/std/algorithms/ranges_robust_against_dangling.pass.cpp +++ b/libcxx/test/std/algorithms/ranges_robust_against_dangling.pass.cpp @@ -29,9 +29,9 @@ struct NonBorrowedRange { using Sent = sentinel_wrapper; int* data_; - size_t size_; + std::size_t size_; - template + template constexpr explicit NonBorrowedRange(std::array& arr) : data_{arr.data()}, size_{arr.size()} {} constexpr Iter begin() const { return data_; }; @@ -109,7 +109,7 @@ constexpr bool test_all() { auto out2 = output.begin() + 1; int x = 2; - size_t count = 1; + std::size_t count = 1; dangling_1st(std::ranges::find, in, x); dangling_1st(std::ranges::find_if, in, unary_pred); diff --git a/libcxx/test/std/algorithms/ranges_robust_against_omitting_invoke.pass.cpp b/libcxx/test/std/algorithms/ranges_robust_against_omitting_invoke.pass.cpp index 35b9f928b739f..c5e8502ef2de6 100644 --- a/libcxx/test/std/algorithms/ranges_robust_against_omitting_invoke.pass.cpp +++ b/libcxx/test/std/algorithms/ranges_robust_against_omitting_invoke.pass.cpp @@ -69,7 +69,7 @@ constexpr bool test_all() { Bar c{Foo{3}}; Foo x{2}; - size_t count = 1; + std::size_t count = 1; test(std::ranges::any_of, in, &Foo::unary_pred, &Bar::val); test(std::ranges::all_of, in, &Foo::unary_pred, &Bar::val); diff --git a/libcxx/test/std/algorithms/robust_against_adl_on_new.pass.cpp b/libcxx/test/std/algorithms/robust_against_adl_on_new.pass.cpp index 5dcfde3735b96..6bd2abded2463 100644 --- a/libcxx/test/std/algorithms/robust_against_adl_on_new.pass.cpp +++ b/libcxx/test/std/algorithms/robust_against_adl_on_new.pass.cpp @@ -19,7 +19,7 @@ struct A { static bool isEven(const A& a) { return a.i % 2 == 0; } }; -void *operator new(size_t, A*) = delete; +void *operator new(std::size_t, A*) = delete; int main(int, char**) { diff --git a/libcxx/test/std/algorithms/robust_against_proxy_iterators_lifetime_bugs.pass.cpp b/libcxx/test/std/algorithms/robust_against_proxy_iterators_lifetime_bugs.pass.cpp index 166806e60c77f..3a15d255dc246 100644 --- a/libcxx/test/std/algorithms/robust_against_proxy_iterators_lifetime_bugs.pass.cpp +++ b/libcxx/test/std/algorithms/robust_against_proxy_iterators_lifetime_bugs.pass.cpp @@ -547,15 +547,15 @@ class ConstexprIterator { #endif // TEST_STD_VER > 17 -template +template class Input { using Array = std::array; - size_t size_ = 0; + std::size_t size_ = 0; Array values_ = {}; public: - template + template TEST_CONSTEXPR_CXX20 Input(std::array from) { static_assert(N2 <= N, ""); @@ -565,7 +565,7 @@ class Input { TEST_CONSTEXPR_CXX20 typename Array::iterator begin() { return values_.begin(); } TEST_CONSTEXPR_CXX20 typename Array::iterator end() { return values_.begin() + size_; } - TEST_CONSTEXPR_CXX20 size_t size() const { return size_; } + TEST_CONSTEXPR_CXX20 std::size_t size() const { return size_; } }; // TODO: extend `Value` and `Reference` so that it's possible to pass plain integers to all the algorithms. @@ -624,14 +624,14 @@ TEST_CONSTEXPR_CXX20 std::array, 8> get_sort_te return result; } -template +template TEST_CONSTEXPR_CXX20 void test(std::array inputs, Func func) { for (auto&& in : inputs) { func(in.begin(), in.end()); } } -template +template TEST_CONSTEXPR_CXX20 void test_n(std::array inputs, Func func) { for (auto&& in : inputs) { func(in.begin(), in.size()); @@ -698,17 +698,17 @@ TEST_CONSTEXPR_CXX20 bool test() { // TODO: is_permutation test(simple_in, [&](I b, I e) { (void) std::for_each(b, e, is_neg); }); #if TEST_STD_VER > 14 - test_n(simple_in, [&](I b, size_t n) { (void) std::for_each_n(b, n, is_neg); }); + test_n(simple_in, [&](I b, std::size_t n) { (void) std::for_each_n(b, n, is_neg); }); #endif test(simple_in, [&](I b, I e) { (void) std::copy(b, e, out); }); - test_n(simple_in, [&](I b, size_t n) { (void) std::copy_n(b, n, out); }); + test_n(simple_in, [&](I b, std::size_t n) { (void) std::copy_n(b, n, out); }); test(simple_in, [&](I b, I e) { (void) std::copy_backward(b, e, out + N); }); test(simple_in, [&](I b, I e) { (void) std::copy_if(b, e, out, is_neg); }); test(simple_in, [&](I b, I e) { (void) std::move(b, e, out); }); test(simple_in, [&](I b, I e) { (void) std::move_backward(b, e, out + N); }); test(simple_in, [&](I b, I e) { (void) std::transform(b, e, out, identity); }); test(simple_in, [&](I b, I e) { (void) std::generate(b, e, gen); }); - test_n(simple_in, [&](I b, size_t n) { (void) std::generate_n(b, n, gen); }); + test_n(simple_in, [&](I b, std::size_t n) { (void) std::generate_n(b, n, gen); }); test(simple_in, [&](I b, I e) { (void) std::remove_copy(b, e, out, x); }); test(simple_in, [&](I b, I e) { (void) std::remove_copy_if(b, e, out, is_neg); }); test(simple_in, [&](I b, I e) { (void) std::replace(b, e, x, y); }); diff --git a/libcxx/test/std/atomics/stdatomic.h.syn/types.compile.pass.cpp b/libcxx/test/std/atomics/stdatomic.h.syn/types.compile.pass.cpp index 5d0a2859bfcdc..84f49a76a810f 100644 --- a/libcxx/test/std/atomics/stdatomic.h.syn/types.compile.pass.cpp +++ b/libcxx/test/std/atomics/stdatomic.h.syn/types.compile.pass.cpp @@ -202,7 +202,7 @@ void f() { static_assert(std::is_same_v, ::atomic_intptr_t>); static_assert(std::is_same_v, ::atomic_uintptr_t>); - static_assert(std::is_same_v, ::atomic_size_t>); + static_assert(std::is_same_v, ::atomic_size_t>); static_assert(std::is_same_v, ::atomic_ptrdiff_t>); static_assert(std::is_same_v, ::atomic_intmax_t>); static_assert(std::is_same_v, ::atomic_uintmax_t>); diff --git a/libcxx/test/std/atomics/types.pass.cpp b/libcxx/test/std/atomics/types.pass.cpp index ee5cca2237745..e0b617071c04f 100644 --- a/libcxx/test/std/atomics/types.pass.cpp +++ b/libcxx/test/std/atomics/types.pass.cpp @@ -148,7 +148,7 @@ int main(int, char**) test (); test (); - test (); + test (); test (); test (); test (); diff --git a/libcxx/test/std/containers/associative/map/map.access/max_size.pass.cpp b/libcxx/test/std/containers/associative/map/map.access/max_size.pass.cpp index b38cf11463897..e10f20359bb18 100644 --- a/libcxx/test/std/containers/associative/map/map.access/max_size.pass.cpp +++ b/libcxx/test/std/containers/associative/map/map.access/max_size.pass.cpp @@ -31,7 +31,7 @@ int main(int, char**) LIBCPP_ASSERT(c.max_size() == 10); } { - typedef limited_allocator A; + typedef limited_allocator A; typedef std::map, A> C; const C::size_type max_dist = static_cast(std::numeric_limits::max()); diff --git a/libcxx/test/std/containers/associative/map/map.cons/copy_assign.pass.cpp b/libcxx/test/std/containers/associative/map/map.cons/copy_assign.pass.cpp index 3fd051f790619..c8201e2899711 100644 --- a/libcxx/test/std/containers/associative/map/map.cons/copy_assign.pass.cpp +++ b/libcxx/test/std/containers/associative/map/map.cons/copy_assign.pass.cpp @@ -42,12 +42,12 @@ class counting_allocatorT { template bool operator==(const counting_allocatorT& other) const noexcept { return foo == other.foo; } template bool operator!=(const counting_allocatorT& other) const noexcept { return foo != other.foo; } - T* allocate(size_t n) const { + T* allocate(std::size_t n) const { ca_allocs.push_back(foo); void * const pv = ::malloc(n * sizeof(T)); return static_cast(pv); } - void deallocate(T* p, size_t) const noexcept { + void deallocate(T* p, std::size_t) const noexcept { ca_deallocs.push_back(foo); free(p); } @@ -65,12 +65,12 @@ class counting_allocatorF { template bool operator==(const counting_allocatorF& other) const noexcept { return foo == other.foo; } template bool operator!=(const counting_allocatorF& other) const noexcept { return foo != other.foo; } - T* allocate(size_t n) const { + T* allocate(std::size_t n) const { ca_allocs.push_back(foo); void * const pv = ::malloc(n * sizeof(T)); return static_cast(pv); } - void deallocate(T* p, size_t) const noexcept { + void deallocate(T* p, std::size_t) const noexcept { ca_deallocs.push_back(foo); free(p); } diff --git a/libcxx/test/std/containers/associative/map/map.cons/move_alloc.pass.cpp b/libcxx/test/std/containers/associative/map/map.cons/move_alloc.pass.cpp index bcfafe86449c3..63630164b2e64 100644 --- a/libcxx/test/std/containers/associative/map/map.cons/move_alloc.pass.cpp +++ b/libcxx/test/std/containers/associative/map/map.cons/move_alloc.pass.cpp @@ -166,7 +166,7 @@ int main(int, char**) V(3, 2), V(3, 3) }; - const size_t num = sizeof(a1)/sizeof(a1[0]); + const std::size_t num = sizeof(a1)/sizeof(a1[0]); assert(Counter_base::gConstructed == num); M m1(I(a1), I(a1+num), C(), A()); diff --git a/libcxx/test/std/containers/associative/map/map.erasure/erase_if.pass.cpp b/libcxx/test/std/containers/associative/map/map.erasure/erase_if.pass.cpp index ee55466244407..6dfbbfea848af 100644 --- a/libcxx/test/std/containers/associative/map/map.erasure/erase_if.pass.cpp +++ b/libcxx/test/std/containers/associative/map/map.erasure/erase_if.pass.cpp @@ -30,7 +30,7 @@ M make (Init vals) } template -void test0(Init vals, Pred p, Init expected, size_t expected_erased_count) { +void test0(Init vals, Pred p, Init expected, std::size_t expected_erased_count) { M s = make(vals); ASSERT_SAME_TYPE(typename M::size_type, decltype(std::erase_if(s, p))); assert(expected_erased_count == std::erase_if(s, p)); diff --git a/libcxx/test/std/containers/associative/map/map.modifiers/extract_iterator.pass.cpp b/libcxx/test/std/containers/associative/map/map.modifiers/extract_iterator.pass.cpp index b036531a0efce..6cdfac774a2c3 100644 --- a/libcxx/test/std/containers/associative/map/map.modifiers/extract_iterator.pass.cpp +++ b/libcxx/test/std/containers/associative/map/map.modifiers/extract_iterator.pass.cpp @@ -22,7 +22,7 @@ template void test(Container& c) { - size_t sz = c.size(); + std::size_t sz = c.size(); auto some_key = c.cbegin()->first; diff --git a/libcxx/test/std/containers/associative/map/map.modifiers/extract_key.pass.cpp b/libcxx/test/std/containers/associative/map/map.modifiers/extract_key.pass.cpp index 80e7e1fc69c7d..c0ffa343497e2 100644 --- a/libcxx/test/std/containers/associative/map/map.modifiers/extract_key.pass.cpp +++ b/libcxx/test/std/containers/associative/map/map.modifiers/extract_key.pass.cpp @@ -22,8 +22,8 @@ template void test(Container& c, KeyTypeIter first, KeyTypeIter last) { - size_t sz = c.size(); - assert((size_t)std::distance(first, last) == sz); + std::size_t sz = c.size(); + assert((std::size_t)std::distance(first, last) == sz); for (KeyTypeIter copy = first; copy != last; ++copy) { diff --git a/libcxx/test/std/containers/associative/map/map.modifiers/insert_node_type_hint.pass.cpp b/libcxx/test/std/containers/associative/map/map.modifiers/insert_node_type_hint.pass.cpp index 1d09443255a3d..3ef005480e8c2 100644 --- a/libcxx/test/std/containers/associative/map/map.modifiers/insert_node_type_hint.pass.cpp +++ b/libcxx/test/std/containers/associative/map/map.modifiers/insert_node_type_hint.pass.cpp @@ -38,7 +38,7 @@ void test(Container& c) { typename Container::node_type node = nf(i, i + 1); assert(!node.empty()); - size_t prev = c.size(); + std::size_t prev = c.size(); auto it = c.insert(c.end(), std::move(node)); assert(node.empty()); assert(prev + 1 == c.size()); diff --git a/libcxx/test/std/containers/associative/multimap/max_size.pass.cpp b/libcxx/test/std/containers/associative/multimap/max_size.pass.cpp index a4537c3d1b297..f7b8b5e071be9 100644 --- a/libcxx/test/std/containers/associative/multimap/max_size.pass.cpp +++ b/libcxx/test/std/containers/associative/multimap/max_size.pass.cpp @@ -31,7 +31,7 @@ int main(int, char**) LIBCPP_ASSERT(c.max_size() == 10); } { - typedef limited_allocator A; + typedef limited_allocator A; typedef std::multimap, A> C; const C::size_type max_dist = static_cast(std::numeric_limits::max()); diff --git a/libcxx/test/std/containers/associative/multimap/multimap.cons/move_alloc.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.cons/move_alloc.pass.cpp index a4edd473c6294..e246842ea7d4e 100644 --- a/libcxx/test/std/containers/associative/multimap/multimap.cons/move_alloc.pass.cpp +++ b/libcxx/test/std/containers/associative/multimap/multimap.cons/move_alloc.pass.cpp @@ -165,7 +165,7 @@ int main(int, char**) V(3, 2), V(3, 3) }; - const size_t num = sizeof(a1)/sizeof(a1[0]); + const std::size_t num = sizeof(a1)/sizeof(a1[0]); assert(Counter_base::gConstructed == num); M m1(I(a1), I(a1+num), C(), A()); diff --git a/libcxx/test/std/containers/associative/multimap/multimap.erasure/erase_if.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.erasure/erase_if.pass.cpp index f270c4e9f9355..89ab932f47b00 100644 --- a/libcxx/test/std/containers/associative/multimap/multimap.erasure/erase_if.pass.cpp +++ b/libcxx/test/std/containers/associative/multimap/multimap.erasure/erase_if.pass.cpp @@ -30,7 +30,7 @@ M make (Init vals) } template -void test0(Init vals, Pred p, Init expected, size_t expected_erased_count) { +void test0(Init vals, Pred p, Init expected, std::size_t expected_erased_count) { M s = make(vals); ASSERT_SAME_TYPE(typename M::size_type, decltype(std::erase_if(s, p))); assert(expected_erased_count == std::erase_if(s, p)); diff --git a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/extract_iterator.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/extract_iterator.pass.cpp index c5f5b4d095905..c4751d81ec953 100644 --- a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/extract_iterator.pass.cpp +++ b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/extract_iterator.pass.cpp @@ -22,7 +22,7 @@ template void test(Container& c) { - size_t sz = c.size(); + std::size_t sz = c.size(); auto some_key = c.cbegin()->first; diff --git a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/extract_key.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/extract_key.pass.cpp index 891e78cdc4915..b558f4e3f1207 100644 --- a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/extract_key.pass.cpp +++ b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/extract_key.pass.cpp @@ -22,8 +22,8 @@ template void test(Container& c, KeyTypeIter first, KeyTypeIter last) { - size_t sz = c.size(); - assert((size_t)std::distance(first, last) == sz); + std::size_t sz = c.size(); + assert((std::size_t)std::distance(first, last) == sz); for (KeyTypeIter copy = first; copy != last; ++copy) { diff --git a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_node_type_hint.pass.cpp b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_node_type_hint.pass.cpp index d4ba115a3fe23..68e6b8b795f9d 100644 --- a/libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_node_type_hint.pass.cpp +++ b/libcxx/test/std/containers/associative/multimap/multimap.modifiers/insert_node_type_hint.pass.cpp @@ -37,7 +37,7 @@ void test(Container& c) { typename Container::node_type node = nf(i, i + 1); assert(!node.empty()); - size_t prev = c.size(); + std::size_t prev = c.size(); auto it = c.insert(c.end(), std::move(node)); assert(node.empty()); assert(prev + 1 == c.size()); diff --git a/libcxx/test/std/containers/associative/multiset/extract_iterator.pass.cpp b/libcxx/test/std/containers/associative/multiset/extract_iterator.pass.cpp index bce568ece6a18..ac8c5e3b3d459 100644 --- a/libcxx/test/std/containers/associative/multiset/extract_iterator.pass.cpp +++ b/libcxx/test/std/containers/associative/multiset/extract_iterator.pass.cpp @@ -22,7 +22,7 @@ template void test(Container& c) { - size_t sz = c.size(); + std::size_t sz = c.size(); for (auto first = c.cbegin(); first != c.cend();) { diff --git a/libcxx/test/std/containers/associative/multiset/extract_key.pass.cpp b/libcxx/test/std/containers/associative/multiset/extract_key.pass.cpp index d92d4ed9bf2a9..bad5d1bb3d394 100644 --- a/libcxx/test/std/containers/associative/multiset/extract_key.pass.cpp +++ b/libcxx/test/std/containers/associative/multiset/extract_key.pass.cpp @@ -22,8 +22,8 @@ template void test(Container& c, KeyTypeIter first, KeyTypeIter last) { - size_t sz = c.size(); - assert((size_t)std::distance(first, last) == sz); + std::size_t sz = c.size(); + assert((std::size_t)std::distance(first, last) == sz); for (KeyTypeIter copy = first; copy != last; ++copy) { diff --git a/libcxx/test/std/containers/associative/multiset/insert_node_type_hint.pass.cpp b/libcxx/test/std/containers/associative/multiset/insert_node_type_hint.pass.cpp index 00d0b67eea496..e5c628e8509bc 100644 --- a/libcxx/test/std/containers/associative/multiset/insert_node_type_hint.pass.cpp +++ b/libcxx/test/std/containers/associative/multiset/insert_node_type_hint.pass.cpp @@ -36,7 +36,7 @@ void test(Container& c) { typename Container::node_type node = nf(i); assert(!node.empty()); - size_t prev = c.size(); + std::size_t prev = c.size(); auto it = c.insert(c.end(), std::move(node)); assert(prev + 1 == c.size()); assert(*it == i); diff --git a/libcxx/test/std/containers/associative/multiset/max_size.pass.cpp b/libcxx/test/std/containers/associative/multiset/max_size.pass.cpp index 5986df295545a..09faf8f9b97e6 100644 --- a/libcxx/test/std/containers/associative/multiset/max_size.pass.cpp +++ b/libcxx/test/std/containers/associative/multiset/max_size.pass.cpp @@ -30,7 +30,7 @@ int main(int, char**) LIBCPP_ASSERT(c.max_size() == 10); } { - typedef limited_allocator A; + typedef limited_allocator A; typedef std::multiset, A> C; const C::size_type max_dist = static_cast(std::numeric_limits::max()); diff --git a/libcxx/test/std/containers/associative/multiset/multiset.cons/move_alloc.pass.cpp b/libcxx/test/std/containers/associative/multiset/multiset.cons/move_alloc.pass.cpp index d279dba40a197..090be76483be2 100644 --- a/libcxx/test/std/containers/associative/multiset/multiset.cons/move_alloc.pass.cpp +++ b/libcxx/test/std/containers/associative/multiset/multiset.cons/move_alloc.pass.cpp @@ -160,7 +160,7 @@ int main(int, char**) V(3), V(3) }; - const size_t num = sizeof(a1)/sizeof(a1[0]); + const std::size_t num = sizeof(a1)/sizeof(a1[0]); assert(Counter_base::gConstructed == num); M m1(I(a1), I(a1+num), C(), A()); diff --git a/libcxx/test/std/containers/associative/multiset/multiset.erasure/erase_if.pass.cpp b/libcxx/test/std/containers/associative/multiset/multiset.erasure/erase_if.pass.cpp index f39797551b7e9..476f4f54e28e1 100644 --- a/libcxx/test/std/containers/associative/multiset/multiset.erasure/erase_if.pass.cpp +++ b/libcxx/test/std/containers/associative/multiset/multiset.erasure/erase_if.pass.cpp @@ -20,7 +20,7 @@ #include "min_allocator.h" template -void test0(S s, Pred p, S expected, size_t expected_erased_count) { +void test0(S s, Pred p, S expected, std::size_t expected_erased_count) { ASSERT_SAME_TYPE(typename S::size_type, decltype(std::erase_if(s, p))); assert(expected_erased_count == std::erase_if(s, p)); assert(s == expected); diff --git a/libcxx/test/std/containers/associative/set/extract_iterator.pass.cpp b/libcxx/test/std/containers/associative/set/extract_iterator.pass.cpp index 91b97fcd92450..c6ef11301d7a6 100644 --- a/libcxx/test/std/containers/associative/set/extract_iterator.pass.cpp +++ b/libcxx/test/std/containers/associative/set/extract_iterator.pass.cpp @@ -22,7 +22,7 @@ template void test(Container& c) { - size_t sz = c.size(); + std::size_t sz = c.size(); for (auto first = c.cbegin(); first != c.cend();) { diff --git a/libcxx/test/std/containers/associative/set/extract_key.pass.cpp b/libcxx/test/std/containers/associative/set/extract_key.pass.cpp index 90ebbb7bc56ce..f37665b4a2a28 100644 --- a/libcxx/test/std/containers/associative/set/extract_key.pass.cpp +++ b/libcxx/test/std/containers/associative/set/extract_key.pass.cpp @@ -22,8 +22,8 @@ template void test(Container& c, KeyTypeIter first, KeyTypeIter last) { - size_t sz = c.size(); - assert((size_t)std::distance(first, last) == sz); + std::size_t sz = c.size(); + assert((std::size_t)std::distance(first, last) == sz); for (KeyTypeIter copy = first; copy != last; ++copy) { diff --git a/libcxx/test/std/containers/associative/set/insert_node_type_hint.pass.cpp b/libcxx/test/std/containers/associative/set/insert_node_type_hint.pass.cpp index d58cbe3a27c9d..6f4cbe97e2ab6 100644 --- a/libcxx/test/std/containers/associative/set/insert_node_type_hint.pass.cpp +++ b/libcxx/test/std/containers/associative/set/insert_node_type_hint.pass.cpp @@ -37,7 +37,7 @@ void test(Container& c) { typename Container::node_type node = nf(i); assert(!node.empty()); - size_t prev = c.size(); + std::size_t prev = c.size(); auto it = c.insert(c.end(), std::move(node)); assert(node.empty()); assert(prev + 1 == c.size()); diff --git a/libcxx/test/std/containers/associative/set/max_size.pass.cpp b/libcxx/test/std/containers/associative/set/max_size.pass.cpp index e37bfe7142b06..f921378df21d1 100644 --- a/libcxx/test/std/containers/associative/set/max_size.pass.cpp +++ b/libcxx/test/std/containers/associative/set/max_size.pass.cpp @@ -30,7 +30,7 @@ int main(int, char**) LIBCPP_ASSERT(c.max_size() == 10); } { - typedef limited_allocator A; + typedef limited_allocator A; typedef std::set, A> C; const C::size_type max_dist = static_cast(std::numeric_limits::max()); diff --git a/libcxx/test/std/containers/associative/set/set.cons/move_alloc.pass.cpp b/libcxx/test/std/containers/associative/set/set.cons/move_alloc.pass.cpp index 59d536ef5c7a0..886cad9fa2123 100644 --- a/libcxx/test/std/containers/associative/set/set.cons/move_alloc.pass.cpp +++ b/libcxx/test/std/containers/associative/set/set.cons/move_alloc.pass.cpp @@ -160,7 +160,7 @@ int main(int, char**) V(3), V(3) }; - const size_t num = sizeof(a1)/sizeof(a1[0]); + const std::size_t num = sizeof(a1)/sizeof(a1[0]); assert(Counter_base::gConstructed == num); M m1(I(a1), I(a1+num), C(), A()); diff --git a/libcxx/test/std/containers/associative/set/set.erasure/erase_if.pass.cpp b/libcxx/test/std/containers/associative/set/set.erasure/erase_if.pass.cpp index 4c9095562a5f7..2e0f026376ac5 100644 --- a/libcxx/test/std/containers/associative/set/set.erasure/erase_if.pass.cpp +++ b/libcxx/test/std/containers/associative/set/set.erasure/erase_if.pass.cpp @@ -20,7 +20,7 @@ #include "min_allocator.h" template -void test0(S s, Pred p, S expected, size_t expected_erased_count) { +void test0(S s, Pred p, S expected, std::size_t expected_erased_count) { ASSERT_SAME_TYPE(typename S::size_type, decltype(std::erase_if(s, p))); assert(expected_erased_count == std::erase_if(s, p)); assert(s == expected); diff --git a/libcxx/test/std/containers/check_consecutive.h b/libcxx/test/std/containers/check_consecutive.h index 5f549c6c4ec27..0fe46e7f19557 100644 --- a/libcxx/test/std/containers/check_consecutive.h +++ b/libcxx/test/std/containers/check_consecutive.h @@ -18,9 +18,9 @@ // Check consecutive equal values in an unordered_multiset iterator template -void CheckConsecutiveValues(Iter pos, Iter end, typename Iter::value_type value, size_t count) +void CheckConsecutiveValues(Iter pos, Iter end, typename Iter::value_type value, std::size_t count) { - for ( size_t i = 0; i < count; ++i ) + for ( std::size_t i = 0; i < count; ++i ) { assert(pos != end); assert(*pos == value); diff --git a/libcxx/test/std/containers/sequences/array/contiguous.pass.cpp b/libcxx/test/std/containers/sequences/array/contiguous.pass.cpp index fec48b325e90e..85e6ed7ae9aee 100644 --- a/libcxx/test/std/containers/sequences/array/contiguous.pass.cpp +++ b/libcxx/test/std/containers/sequences/array/contiguous.pass.cpp @@ -19,7 +19,7 @@ template TEST_CONSTEXPR_CXX14 void assert_contiguous(Container const& c) { - for (size_t i = 0; i < c.size(); ++i) + for (std::size_t i = 0; i < c.size(); ++i) assert(*(c.begin() + i) == *(std::addressof(*c.begin()) + i)); } diff --git a/libcxx/test/std/containers/sequences/array/size_and_alignment.pass.cpp b/libcxx/test/std/containers/sequences/array/size_and_alignment.pass.cpp index 242215689a81a..6fbc844a11eac 100644 --- a/libcxx/test/std/containers/sequences/array/size_and_alignment.pass.cpp +++ b/libcxx/test/std/containers/sequences/array/size_and_alignment.pass.cpp @@ -25,12 +25,12 @@ #include "test_macros.h" -template +template struct MyArray { T elems[Size]; }; -template +template void test() { typedef T CArrayT[Size == 0 ? 1 : Size]; typedef std::array ArrayT; diff --git a/libcxx/test/std/containers/sequences/deque/abi.compile.pass.cpp b/libcxx/test/std/containers/sequences/deque/abi.compile.pass.cpp index 36a76b2e11d61..37e87d59503ee 100644 --- a/libcxx/test/std/containers/sequences/deque/abi.compile.pass.cpp +++ b/libcxx/test/std/containers/sequences/deque/abi.compile.pass.cpp @@ -82,5 +82,5 @@ static_assert(TEST_ALIGNOF(std::deque >) == 4, ""); static_assert(TEST_ALIGNOF(std::deque >) == 2, ""); #else -# error size_t has an unexpected size +# error std::size_t has an unexpected size #endif diff --git a/libcxx/test/std/containers/sequences/deque/deque.capacity/max_size.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.capacity/max_size.pass.cpp index 230a46519b107..7e2f72eb1e7e0 100644 --- a/libcxx/test/std/containers/sequences/deque/deque.capacity/max_size.pass.cpp +++ b/libcxx/test/std/containers/sequences/deque/deque.capacity/max_size.pass.cpp @@ -27,7 +27,7 @@ int main(int, char**) { LIBCPP_ASSERT(c.max_size() == 10); } { - typedef limited_allocator A; + typedef limited_allocator A; typedef std::deque C; const C::size_type max_dist = static_cast(std::numeric_limits::max()); diff --git a/libcxx/test/std/containers/sequences/deque/deque.cons/default_noexcept.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.cons/default_noexcept.pass.cpp index 5bb3dc99d1191..88672fa044fe2 100644 --- a/libcxx/test/std/containers/sequences/deque/deque.cons/default_noexcept.pass.cpp +++ b/libcxx/test/std/containers/sequences/deque/deque.cons/default_noexcept.pass.cpp @@ -27,7 +27,7 @@ struct some_alloc { typedef T value_type; some_alloc(const some_alloc&); - void allocate(size_t); + void allocate(std::size_t); }; int main(int, char**) diff --git a/libcxx/test/std/containers/sequences/deque/deque.cons/dtor_noexcept.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.cons/dtor_noexcept.pass.cpp index 8dfcebd0e1d62..88fce25a1f7e3 100644 --- a/libcxx/test/std/containers/sequences/deque/deque.cons/dtor_noexcept.pass.cpp +++ b/libcxx/test/std/containers/sequences/deque/deque.cons/dtor_noexcept.pass.cpp @@ -25,7 +25,7 @@ struct some_alloc typedef T value_type; some_alloc(const some_alloc&); ~some_alloc() noexcept(false); - void allocate(size_t); + void allocate(std::size_t); }; int main(int, char**) diff --git a/libcxx/test/std/containers/sequences/deque/deque.cons/move_assign_noexcept.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.cons/move_assign_noexcept.pass.cpp index a87c8e00ab2fb..cbefbf6dae912 100644 --- a/libcxx/test/std/containers/sequences/deque/deque.cons/move_assign_noexcept.pass.cpp +++ b/libcxx/test/std/containers/sequences/deque/deque.cons/move_assign_noexcept.pass.cpp @@ -29,7 +29,7 @@ struct some_alloc { typedef T value_type; some_alloc(const some_alloc&); - void allocate(size_t); + void allocate(std::size_t); }; int main(int, char**) diff --git a/libcxx/test/std/containers/sequences/deque/deque.cons/move_noexcept.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.cons/move_noexcept.pass.cpp index d90dd810a499c..add19ec61c8cc 100644 --- a/libcxx/test/std/containers/sequences/deque/deque.cons/move_noexcept.pass.cpp +++ b/libcxx/test/std/containers/sequences/deque/deque.cons/move_noexcept.pass.cpp @@ -27,7 +27,7 @@ struct some_alloc { typedef T value_type; some_alloc(const some_alloc&); - void allocate(size_t); + void allocate(std::size_t); }; int main(int, char**) diff --git a/libcxx/test/std/containers/sequences/deque/deque.erasure/erase.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.erasure/erase.pass.cpp index 4c5a14e82374b..656884a345fa1 100644 --- a/libcxx/test/std/containers/sequences/deque/deque.erasure/erase.pass.cpp +++ b/libcxx/test/std/containers/sequences/deque/deque.erasure/erase.pass.cpp @@ -21,7 +21,7 @@ #include "min_allocator.h" template -void test0(S s, U val, S expected, size_t expected_erased_count) { +void test0(S s, U val, S expected, std::size_t expected_erased_count) { ASSERT_SAME_TYPE(typename S::size_type, decltype(std::erase(s, val))); assert(expected_erased_count == std::erase(s, val)); assert(s == expected); diff --git a/libcxx/test/std/containers/sequences/deque/deque.erasure/erase_if.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.erasure/erase_if.pass.cpp index 7f6e4c5ed3c4b..57657e4b48c86 100644 --- a/libcxx/test/std/containers/sequences/deque/deque.erasure/erase_if.pass.cpp +++ b/libcxx/test/std/containers/sequences/deque/deque.erasure/erase_if.pass.cpp @@ -20,7 +20,7 @@ #include "min_allocator.h" template -void test0(S s, Pred p, S expected, size_t expected_erased_count) { +void test0(S s, Pred p, S expected, std::size_t expected_erased_count) { ASSERT_SAME_TYPE(typename S::size_type, decltype(std::erase_if(s, p))); assert(expected_erased_count == std::erase_if(s, p)); assert(s == expected); diff --git a/libcxx/test/std/containers/sequences/deque/deque.modifiers/erase_iter_iter.invalidation.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.modifiers/erase_iter_iter.invalidation.pass.cpp index 4c82b51a06b4a..0ac48aaa33d7b 100644 --- a/libcxx/test/std/containers/sequences/deque/deque.modifiers/erase_iter_iter.invalidation.pass.cpp +++ b/libcxx/test/std/containers/sequences/deque/deque.modifiers/erase_iter_iter.invalidation.pass.cpp @@ -21,7 +21,7 @@ #include "test_macros.h" template -void del_at_start(C c, size_t num) +void del_at_start(C c, std::size_t num) { typename C::iterator first = c.begin(); typename C::iterator last = first + num; @@ -41,7 +41,7 @@ void del_at_start(C c, size_t num) } template -void del_at_end(C c, size_t num) +void del_at_end(C c, std::size_t num) { typename C::iterator last = c.end(); typename C::iterator first = last - num; @@ -69,7 +69,7 @@ int main(int, char**) while (queue.size() > 1) { - for (size_t i = 1; i < queue.size(); ++i) + for (std::size_t i = 1; i < queue.size(); ++i) { del_at_start(queue, i); del_at_end (queue, i); diff --git a/libcxx/test/std/containers/sequences/deque/deque.special/swap_noexcept.pass.cpp b/libcxx/test/std/containers/sequences/deque/deque.special/swap_noexcept.pass.cpp index 4ce47f0374ce6..d6257b13aaaec 100644 --- a/libcxx/test/std/containers/sequences/deque/deque.special/swap_noexcept.pass.cpp +++ b/libcxx/test/std/containers/sequences/deque/deque.special/swap_noexcept.pass.cpp @@ -34,7 +34,7 @@ struct some_alloc some_alloc() {} some_alloc(const some_alloc&); - void allocate(size_t); + void allocate(std::size_t); void deallocate(void*, unsigned) {} typedef std::true_type propagate_on_container_swap; @@ -47,7 +47,7 @@ struct some_alloc2 some_alloc2() {} some_alloc2(const some_alloc2&); - void allocate(size_t); + void allocate(std::size_t); void deallocate(void*, unsigned) {} typedef std::false_type propagate_on_container_swap; diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/default_noexcept.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/default_noexcept.pass.cpp index 772a985eeb559..b131cc9cdcafc 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/default_noexcept.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/default_noexcept.pass.cpp @@ -27,7 +27,7 @@ struct some_alloc { typedef T value_type; some_alloc(const some_alloc&); - void allocate(size_t); + void allocate(std::size_t); }; int main(int, char**) diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/dtor_noexcept.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/dtor_noexcept.pass.cpp index ae6fd62b60190..db8f0d22e62f7 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/dtor_noexcept.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/dtor_noexcept.pass.cpp @@ -25,7 +25,7 @@ struct some_alloc typedef T value_type; some_alloc(const some_alloc&); ~some_alloc() noexcept(false); - void allocate(size_t); + void allocate(std::size_t); }; int main(int, char**) diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/move_assign_noexcept.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/move_assign_noexcept.pass.cpp index 63168c40ae6f5..f361e373d23ff 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/move_assign_noexcept.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/move_assign_noexcept.pass.cpp @@ -29,7 +29,7 @@ struct some_alloc { typedef T value_type; some_alloc(const some_alloc&); - void allocate(size_t); + void allocate(std::size_t); }; int main(int, char**) diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/move_noexcept.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/move_noexcept.pass.cpp index edd3449b06785..f132f0ed0b25d 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/move_noexcept.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/move_noexcept.pass.cpp @@ -27,7 +27,7 @@ struct some_alloc { typedef T value_type; some_alloc(const some_alloc&); - void allocate(size_t); + void allocate(std::size_t); }; int main(int, char**) diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/size.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/size.pass.cpp index ca3931d107d55..3c27a6dfa34f1 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/size.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.cons/size.pass.cpp @@ -37,8 +37,8 @@ int main(int, char**) { { // test that the ctor is explicit typedef std::forward_list C; - static_assert((std::is_constructible::value), ""); - static_assert((!std::is_convertible::value), ""); + static_assert((std::is_constructible::value), ""); + static_assert((!std::is_convertible::value), ""); } { typedef DefaultOnly T; diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.erasure/erase.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.erasure/erase.pass.cpp index e6eb6f426153a..df9aa4139f736 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.erasure/erase.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.erasure/erase.pass.cpp @@ -21,7 +21,7 @@ #include "min_allocator.h" template -void test0(S s, U val, S expected, size_t expected_erased_count) { +void test0(S s, U val, S expected, std::size_t expected_erased_count) { ASSERT_SAME_TYPE(typename S::size_type, decltype(std::erase(s, val))); assert(expected_erased_count == std::erase(s, val)); assert(s == expected); diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.erasure/erase_if.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.erasure/erase_if.pass.cpp index 21de03aa4c622..756febd1a1f17 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.erasure/erase_if.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.erasure/erase_if.pass.cpp @@ -20,7 +20,7 @@ #include "min_allocator.h" template -void test0(S s, Pred p, S expected, size_t expected_erased_count) { +void test0(S s, Pred p, S expected, std::size_t expected_erased_count) { ASSERT_SAME_TYPE(typename S::size_type, decltype(std::erase_if(s, p))); assert(expected_erased_count == std::erase_if(s, p)); assert(s == expected); diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/merge_lvalue.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/merge_lvalue.pass.cpp index cb30b03150353..6e73b2fd73726 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/merge_lvalue.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/merge_lvalue.pass.cpp @@ -68,7 +68,7 @@ int main(int, char**) { c1.merge(c2); assert(c2.empty()); - for (size_t i = 0; i < 3; ++i) { + for (std::size_t i = 0; i < 3; ++i) { assert(to[i] == *io[i]); #if TEST_STD_VER >= 11 assert(to[i] == ro[i].get()); diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/merge_lvalue_pred.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/merge_lvalue_pred.pass.cpp index d8df4eb261f5b..fddf9f9dc0f46 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/merge_lvalue_pred.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/merge_lvalue_pred.pass.cpp @@ -68,7 +68,7 @@ int main(int, char**) { c1.merge(c2, std::greater()); assert(c2.empty()); - for (size_t i = 0; i < 3; ++i) { + for (std::size_t i = 0; i < 3; ++i) { assert(to[i] == *io[i]); #if TEST_STD_VER >= 11 assert(to[i] == ro[i].get()); diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/merge_rvalue.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/merge_rvalue.pass.cpp index f6db68669b491..3a7a8744ad46a 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/merge_rvalue.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/merge_rvalue.pass.cpp @@ -65,7 +65,7 @@ int main(int, char**) { c1.merge(std::move(c2)); assert(c2.empty()); - for (size_t i = 0; i < 3; ++i) { + for (std::size_t i = 0; i < 3; ++i) { assert(to[i] == *io[i]); assert(to[i] == ro[i].get()); assert(to[i] == *po[i]); diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/merge_rvalue_pred.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/merge_rvalue_pred.pass.cpp index 1b45946affbdf..f1ddbfb620d29 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/merge_rvalue_pred.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/merge_rvalue_pred.pass.cpp @@ -65,7 +65,7 @@ int main(int, char**) { c1.merge(std::move(c2), std::greater()); assert(c2.empty()); - for (size_t i = 0; i < 3; ++i) { + for (std::size_t i = 0; i < 3; ++i) { assert(to[i] == *io[i]); assert(to[i] == ro[i].get()); assert(to[i] == *po[i]); diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/remove_if.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/remove_if.pass.cpp index 7142c8afe77eb..7e26b8b48a82d 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/remove_if.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/remove_if.pass.cpp @@ -120,7 +120,7 @@ int main(int, char**) int a2[] = { 2, 3, 5, 8, 11}; std::forward_list c(a1, a1 + 7); do_remove_if(c, std::ref(c.front()), 2); - for (size_t i = 0; i < 5; ++i) + for (std::size_t i = 0; i < 5; ++i) { assert(!c.empty()); assert(c.front() == a2[i]); diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/unique_pred.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/unique_pred.pass.cpp index bfadd6febc017..aed12da1c6c4a 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/unique_pred.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/unique_pred.pass.cpp @@ -105,7 +105,7 @@ int main(int, char**) int a2[] = {1, 2, 3, 5, 2, 11}; std::forward_list c1(a1, a1 + 8); do_unique(c1, std::ref(c1.front()), 2); - for (size_t i = 0; i < 6; ++i) + for (std::size_t i = 0; i < 6; ++i) { assert(!c1.empty()); assert(c1.front() == a2[i]); diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/swap_noexcept.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/swap_noexcept.pass.cpp index e9450ecaa388d..b4568837a2e01 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/swap_noexcept.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.spec/swap_noexcept.pass.cpp @@ -35,7 +35,7 @@ struct some_alloc some_alloc() {} some_alloc(const some_alloc&); - void allocate(size_t); + void allocate(std::size_t); void deallocate(void*, unsigned) {} typedef std::true_type propagate_on_container_swap; @@ -48,7 +48,7 @@ struct some_alloc2 some_alloc2() {} some_alloc2(const some_alloc2&); - void allocate(size_t); + void allocate(std::size_t); void deallocate(void*, unsigned) {} typedef std::false_type propagate_on_container_swap; diff --git a/libcxx/test/std/containers/sequences/forwardlist/max_size.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/max_size.pass.cpp index 08d21d641163f..c5145fde52af7 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/max_size.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/max_size.pass.cpp @@ -28,7 +28,7 @@ int main(int, char**) LIBCPP_ASSERT(c.max_size() == 10); } { - typedef limited_allocator A; + typedef limited_allocator A; typedef std::forward_list C; const C::size_type max_dist = static_cast(std::numeric_limits::max()); diff --git a/libcxx/test/std/containers/sequences/list/list.capacity/max_size.pass.cpp b/libcxx/test/std/containers/sequences/list/list.capacity/max_size.pass.cpp index 8560a6a87f05f..c39fefe1cb11f 100644 --- a/libcxx/test/std/containers/sequences/list/list.capacity/max_size.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.capacity/max_size.pass.cpp @@ -27,7 +27,7 @@ int main(int, char**) { LIBCPP_ASSERT(c.max_size() == 10); } { - typedef limited_allocator A; + typedef limited_allocator A; typedef std::list C; const C::size_type max_dist = static_cast(std::numeric_limits::max()); diff --git a/libcxx/test/std/containers/sequences/list/list.cons/default_noexcept.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/default_noexcept.pass.cpp index d34efac592bf7..f21225777af5e 100644 --- a/libcxx/test/std/containers/sequences/list/list.cons/default_noexcept.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.cons/default_noexcept.pass.cpp @@ -27,7 +27,7 @@ struct some_alloc { typedef T value_type; some_alloc(const some_alloc&); - void allocate(size_t); + void allocate(std::size_t); }; int main(int, char**) diff --git a/libcxx/test/std/containers/sequences/list/list.cons/dtor_noexcept.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/dtor_noexcept.pass.cpp index a6c9a83f32f0a..ae7e264c64a3a 100644 --- a/libcxx/test/std/containers/sequences/list/list.cons/dtor_noexcept.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.cons/dtor_noexcept.pass.cpp @@ -25,7 +25,7 @@ struct some_alloc typedef T value_type; some_alloc(const some_alloc&); ~some_alloc() noexcept(false); - void allocate(size_t); + void allocate(std::size_t); }; int main(int, char**) diff --git a/libcxx/test/std/containers/sequences/list/list.cons/move_assign_noexcept.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/move_assign_noexcept.pass.cpp index daa2410d77df3..9df8413bb19c4 100644 --- a/libcxx/test/std/containers/sequences/list/list.cons/move_assign_noexcept.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.cons/move_assign_noexcept.pass.cpp @@ -29,7 +29,7 @@ struct some_alloc { typedef T value_type; some_alloc(const some_alloc&); - void allocate(size_t); + void allocate(std::size_t); }; int main(int, char**) diff --git a/libcxx/test/std/containers/sequences/list/list.cons/move_noexcept.pass.cpp b/libcxx/test/std/containers/sequences/list/list.cons/move_noexcept.pass.cpp index 6c1c89c84a934..5c0b4c2b36d25 100644 --- a/libcxx/test/std/containers/sequences/list/list.cons/move_noexcept.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.cons/move_noexcept.pass.cpp @@ -27,7 +27,7 @@ struct some_alloc { typedef T value_type; some_alloc(const some_alloc&); - void allocate(size_t); + void allocate(std::size_t); }; int main(int, char**) diff --git a/libcxx/test/std/containers/sequences/list/list.erasure/erase.pass.cpp b/libcxx/test/std/containers/sequences/list/list.erasure/erase.pass.cpp index 6b08308cf6d86..977cf24d1e11a 100644 --- a/libcxx/test/std/containers/sequences/list/list.erasure/erase.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.erasure/erase.pass.cpp @@ -21,7 +21,7 @@ #include "min_allocator.h" template -void test0(S s, U val, S expected, size_t expected_erased_count) { +void test0(S s, U val, S expected, std::size_t expected_erased_count) { ASSERT_SAME_TYPE(typename S::size_type, decltype(std::erase(s, val))); assert(expected_erased_count == std::erase(s, val)); assert(s == expected); diff --git a/libcxx/test/std/containers/sequences/list/list.erasure/erase_if.pass.cpp b/libcxx/test/std/containers/sequences/list/list.erasure/erase_if.pass.cpp index b6670cbaee35e..db8fd8b21189b 100644 --- a/libcxx/test/std/containers/sequences/list/list.erasure/erase_if.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.erasure/erase_if.pass.cpp @@ -20,7 +20,7 @@ #include "min_allocator.h" template -void test0(S s, Pred p, S expected, size_t expected_erased_count) { +void test0(S s, Pred p, S expected, std::size_t expected_erased_count) { ASSERT_SAME_TYPE(typename S::size_type, decltype(std::erase_if(s, p))); assert(expected_erased_count == std::erase_if(s, p)); assert(s == expected); diff --git a/libcxx/test/std/containers/sequences/list/list.ops/remove_if.pass.cpp b/libcxx/test/std/containers/sequences/list/list.ops/remove_if.pass.cpp index b8503ef5eca45..c57d81ba718da 100644 --- a/libcxx/test/std/containers/sequences/list/list.ops/remove_if.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.ops/remove_if.pass.cpp @@ -77,7 +77,7 @@ int main(int, char**) std::list c(a1, a1 + 7); c.remove_if(std::ref(c.front())); assert(c.size() == 5); - for (size_t i = 0; i < c.size(); ++i) + for (std::size_t i = 0; i < c.size(); ++i) { assert(c.front() == a2[i]); c.pop_front(); diff --git a/libcxx/test/std/containers/sequences/list/list.ops/unique_pred.pass.cpp b/libcxx/test/std/containers/sequences/list/list.ops/unique_pred.pass.cpp index a33cad8e1a221..e10725438ba0f 100644 --- a/libcxx/test/std/containers/sequences/list/list.ops/unique_pred.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.ops/unique_pred.pass.cpp @@ -59,7 +59,7 @@ int main(int, char**) c.unique(std::ref(c.front())); #endif assert(c.size() == 6); - for (size_t i = 0; i < c.size(); ++i) + for (std::size_t i = 0; i < c.size(); ++i) { assert(c.front() == a2[i]); c.pop_front(); diff --git a/libcxx/test/std/containers/sequences/list/list.special/swap_noexcept.pass.cpp b/libcxx/test/std/containers/sequences/list/list.special/swap_noexcept.pass.cpp index 228d3ef15cb99..ae76236c5d5cc 100644 --- a/libcxx/test/std/containers/sequences/list/list.special/swap_noexcept.pass.cpp +++ b/libcxx/test/std/containers/sequences/list/list.special/swap_noexcept.pass.cpp @@ -34,7 +34,7 @@ struct some_alloc some_alloc() {} some_alloc(const some_alloc&); - void allocate(size_t); + void allocate(std::size_t); void deallocate(void*, unsigned) {} typedef std::true_type propagate_on_container_swap; @@ -47,7 +47,7 @@ struct some_alloc2 some_alloc2() {} some_alloc2(const some_alloc2&); - void allocate(size_t); + void allocate(std::size_t); void deallocate(void*, unsigned) {} typedef std::false_type propagate_on_container_swap; diff --git a/libcxx/test/std/containers/sequences/vector.bool/insert_iter_iter_iter.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/insert_iter_iter_iter.pass.cpp index d4756cd0fee63..d3e1297aeec92 100644 --- a/libcxx/test/std/containers/sequences/vector.bool/insert_iter_iter_iter.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector.bool/insert_iter_iter_iter.pass.cpp @@ -57,7 +57,7 @@ TEST_CONSTEXPR_CXX20 bool tests() { std::vector v(100); while(v.size() < v.capacity()) v.push_back(false); - size_t sz = v.size(); + std::size_t sz = v.size(); bool a[] = {1, 0, 0, 1, 1}; const unsigned N = sizeof(a)/sizeof(a[0]); std::vector::iterator i = v.insert(v.cbegin() + 10, forward_iterator(a), @@ -76,7 +76,7 @@ TEST_CONSTEXPR_CXX20 bool tests() std::vector v(100); while(v.size() < v.capacity()) v.push_back(false); v.pop_back(); v.pop_back(); v.pop_back(); - size_t sz = v.size(); + std::size_t sz = v.size(); bool a[] = {1, 0, 0, 1, 1}; const unsigned N = sizeof(a)/sizeof(a[0]); std::vector::iterator i = v.insert(v.cbegin() + 10, forward_iterator(a), diff --git a/libcxx/test/std/containers/sequences/vector.bool/insert_iter_size_value.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/insert_iter_size_value.pass.cpp index feea1888b70a3..484c57eadb653 100644 --- a/libcxx/test/std/containers/sequences/vector.bool/insert_iter_size_value.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector.bool/insert_iter_size_value.pass.cpp @@ -36,7 +36,7 @@ TEST_CONSTEXPR_CXX20 bool tests() { std::vector v(100); while(v.size() < v.capacity()) v.push_back(false); - size_t sz = v.size(); + std::size_t sz = v.size(); std::vector::iterator i = v.insert(v.cbegin() + 10, 5, 1); assert(v.size() == sz + 5); assert(i == v.begin() + 10); @@ -52,7 +52,7 @@ TEST_CONSTEXPR_CXX20 bool tests() std::vector v(100); while(v.size() < v.capacity()) v.push_back(false); v.pop_back(); v.pop_back(); - size_t sz = v.size(); + std::size_t sz = v.size(); std::vector::iterator i = v.insert(v.cbegin() + 10, 5, 1); assert(v.size() == sz + 5); assert(i == v.begin() + 10); diff --git a/libcxx/test/std/containers/sequences/vector.bool/insert_iter_value.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/insert_iter_value.pass.cpp index b9a926eef5ec7..bb59de3b185a9 100644 --- a/libcxx/test/std/containers/sequences/vector.bool/insert_iter_value.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector.bool/insert_iter_value.pass.cpp @@ -35,7 +35,7 @@ TEST_CONSTEXPR_CXX20 bool tests() { std::vector v(100); while(v.size() < v.capacity()) v.push_back(false); - size_t sz = v.size(); + std::size_t sz = v.size(); std::vector::iterator i = v.insert(v.cbegin() + 10, 1); assert(v.size() == sz + 1); assert(i == v.begin() + 10); @@ -50,7 +50,7 @@ TEST_CONSTEXPR_CXX20 bool tests() std::vector v(100); while(v.size() < v.capacity()) v.push_back(false); v.pop_back(); v.pop_back(); - size_t sz = v.size(); + std::size_t sz = v.size(); std::vector::iterator i = v.insert(v.cbegin() + 10, 1); assert(v.size() == sz + 1); assert(i == v.begin() + 10); diff --git a/libcxx/test/std/containers/sequences/vector/contiguous.pass.cpp b/libcxx/test/std/containers/sequences/vector/contiguous.pass.cpp index ad18e65bb8eac..3c992ba9bb594 100644 --- a/libcxx/test/std/containers/sequences/vector/contiguous.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/contiguous.pass.cpp @@ -20,7 +20,7 @@ template TEST_CONSTEXPR_CXX20 void test_contiguous(const C &c) { - for ( size_t i = 0; i < c.size(); ++i ) + for ( std::size_t i = 0; i < c.size(); ++i ) assert ( *(c.begin() + static_cast(i)) == *(std::addressof(*c.begin()) + i)); } diff --git a/libcxx/test/std/containers/sequences/vector/vector.capacity/max_size.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.capacity/max_size.pass.cpp index 3f8d62e2f2d6b..33abaa04b1207 100644 --- a/libcxx/test/std/containers/sequences/vector/vector.capacity/max_size.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/vector.capacity/max_size.pass.cpp @@ -28,7 +28,7 @@ TEST_CONSTEXPR_CXX20 bool test() { LIBCPP_ASSERT(c.max_size() == 10); } { - typedef limited_allocator A; + typedef limited_allocator A; typedef std::vector C; const C::size_type max_dist = static_cast(std::numeric_limits::max()); diff --git a/libcxx/test/std/containers/sequences/vector/vector.capacity/reserve.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.capacity/reserve.pass.cpp index 387657cc3d2c1..b8548ad72d437 100644 --- a/libcxx/test/std/containers/sequences/vector/vector.capacity/reserve.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/vector.capacity/reserve.pass.cpp @@ -51,7 +51,7 @@ TEST_CONSTEXPR_CXX20 bool tests() { #ifndef TEST_HAS_NO_EXCEPTIONS if (!TEST_IS_CONSTANT_EVALUATED) { std::vector v; - size_t sz = v.max_size() + 1; + std::size_t sz = v.max_size() + 1; try { v.reserve(sz); @@ -64,8 +64,8 @@ TEST_CONSTEXPR_CXX20 bool tests() { if (!TEST_IS_CONSTANT_EVALUATED) { std::vector v(10, 42); int* previous_data = v.data(); - size_t previous_capacity = v.capacity(); - size_t sz = v.max_size() + 1; + std::size_t previous_capacity = v.capacity(); + std::size_t sz = v.max_size() + 1; try { v.reserve(sz); diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/assign_iter_iter.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/assign_iter_iter.pass.cpp index 4daa320673ba2..9b52885b9bf8d 100644 --- a/libcxx/test/std/containers/sequences/vector/vector.cons/assign_iter_iter.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/vector.cons/assign_iter_iter.pass.cpp @@ -71,7 +71,7 @@ TEST_CONSTEXPR_CXX20 bool test() { std::vector dst(10); - size_t n = dst.capacity() * 2; + std::size_t n = dst.capacity() * 2; std::vector src(n); dst.assign(It(src.data()), It(src.data() + src.size())); diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/default_noexcept.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/default_noexcept.pass.cpp index 3bd4a169d5f92..054ea6430f82a 100644 --- a/libcxx/test/std/containers/sequences/vector/vector.cons/default_noexcept.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/vector.cons/default_noexcept.pass.cpp @@ -27,7 +27,7 @@ struct some_alloc { typedef T value_type; some_alloc(const some_alloc&); - void allocate(size_t); + void allocate(std::size_t); }; TEST_CONSTEXPR_CXX20 bool tests() { diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/dtor_noexcept.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/dtor_noexcept.pass.cpp index 793475955506d..1b98b7598b97b 100644 --- a/libcxx/test/std/containers/sequences/vector/vector.cons/dtor_noexcept.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/vector.cons/dtor_noexcept.pass.cpp @@ -25,7 +25,7 @@ struct some_alloc typedef T value_type; some_alloc(const some_alloc&); ~some_alloc() noexcept(false); - void allocate(size_t); + void allocate(std::size_t); }; TEST_CONSTEXPR_CXX20 bool tests() diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/move_assign_noexcept.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/move_assign_noexcept.pass.cpp index fdd74a3f566b7..f466809bf2dd0 100644 --- a/libcxx/test/std/containers/sequences/vector/vector.cons/move_assign_noexcept.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/vector.cons/move_assign_noexcept.pass.cpp @@ -29,7 +29,7 @@ struct some_alloc { typedef T value_type; some_alloc(const some_alloc&); - void allocate(size_t); + void allocate(std::size_t); }; template @@ -39,7 +39,7 @@ struct some_alloc2 some_alloc2() {} some_alloc2(const some_alloc2&); - void allocate(size_t); + void allocate(std::size_t); void deallocate(void*, unsigned) {} typedef std::false_type propagate_on_container_move_assignment; @@ -53,7 +53,7 @@ struct some_alloc3 some_alloc3() {} some_alloc3(const some_alloc3&); - void allocate(size_t); + void allocate(std::size_t); void deallocate(void*, unsigned) {} typedef std::false_type propagate_on_container_move_assignment; diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/move_noexcept.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/move_noexcept.pass.cpp index 30e9f46dee615..c2bfa5234065d 100644 --- a/libcxx/test/std/containers/sequences/vector/vector.cons/move_noexcept.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/vector.cons/move_noexcept.pass.cpp @@ -27,7 +27,7 @@ struct some_alloc { typedef T value_type; some_alloc(const some_alloc&); - void allocate(size_t); + void allocate(std::size_t); }; int main(int, char**) diff --git a/libcxx/test/std/containers/sequences/vector/vector.erasure/erase.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.erasure/erase.pass.cpp index 01a43a9570830..88b27d9634c54 100644 --- a/libcxx/test/std/containers/sequences/vector/vector.erasure/erase.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/vector.erasure/erase.pass.cpp @@ -21,7 +21,7 @@ #include "min_allocator.h" template -TEST_CONSTEXPR_CXX20 void test0(S s, U val, S expected, size_t expected_erased_count) { +TEST_CONSTEXPR_CXX20 void test0(S s, U val, S expected, std::size_t expected_erased_count) { ASSERT_SAME_TYPE(typename S::size_type, decltype(std::erase(s, val))); assert(expected_erased_count == std::erase(s, val)); assert(s == expected); diff --git a/libcxx/test/std/containers/sequences/vector/vector.erasure/erase_if.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.erasure/erase_if.pass.cpp index 3da8eca862a28..015824a944571 100644 --- a/libcxx/test/std/containers/sequences/vector/vector.erasure/erase_if.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/vector.erasure/erase_if.pass.cpp @@ -20,7 +20,7 @@ #include "min_allocator.h" template -TEST_CONSTEXPR_CXX20 void test0(S s, Pred p, S expected, size_t expected_erased_count) { +TEST_CONSTEXPR_CXX20 void test0(S s, Pred p, S expected, std::size_t expected_erased_count) { ASSERT_SAME_TYPE(typename S::size_type, decltype(std::erase_if(s, p))); assert(expected_erased_count == std::erase_if(s, p)); assert(s == expected); diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/emplace_extra.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/emplace_extra.pass.cpp index 8c0caa74a9322..f186a0d97ae39 100644 --- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/emplace_extra.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/vector.modifiers/emplace_extra.pass.cpp @@ -77,7 +77,7 @@ TEST_CONSTEXPR_CXX20 bool tests() { { std::vector v; v.reserve(8); - size_t old_capacity = v.capacity(); + std::size_t old_capacity = v.capacity(); assert(old_capacity >= 8); v.resize(4); // keep the existing capacity diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_iter_iter.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_iter_iter.pass.cpp index fe18d2da3b9e5..934b85ce01c67 100644 --- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_iter_iter.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_iter_iter.pass.cpp @@ -68,7 +68,7 @@ TEST_CONSTEXPR_CXX20 bool tests() typedef std::vector V; V v(100); while(v.size() < v.capacity()) v.push_back(0); // force reallocation - size_t sz = v.size(); + std::size_t sz = v.size(); int a[] = {1, 2, 3, 4, 5}; const unsigned N = sizeof(a)/sizeof(a[0]); V::iterator i = v.insert(v.cbegin() + 10, forward_iterator(a), @@ -87,7 +87,7 @@ TEST_CONSTEXPR_CXX20 bool tests() typedef std::vector V; V v(100); v.reserve(128); // force no reallocation - size_t sz = v.size(); + std::size_t sz = v.size(); int a[] = {1, 2, 3, 4, 5}; const unsigned N = sizeof(a)/sizeof(a[0]); V::iterator i = v.insert(v.cbegin() + 10, forward_iterator(a), diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_lvalue.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_lvalue.pass.cpp index 8985435e11c9b..b5582df5bd2c4 100644 --- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_lvalue.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_lvalue.pass.cpp @@ -36,7 +36,7 @@ TEST_CONSTEXPR_CXX20 bool test() { assert(v[j] == 0); } { - const size_t n = 100; + const std::size_t n = 100; std::vector v(n); v.reserve(n + 1); const int lvalue = 1; @@ -47,7 +47,7 @@ TEST_CONSTEXPR_CXX20 bool test() { assert(v.size() == n + 1); assert(is_contiguous_container_asan_correct(v)); assert(it == v.begin() + n); - for (size_t i = 0; i < n; ++i) { + for (std::size_t i = 0; i < n; ++i) { assert(v[i] == 0); } assert(v[n] == lvalue); @@ -55,7 +55,7 @@ TEST_CONSTEXPR_CXX20 bool test() { { std::vector v(100); while(v.size() < v.capacity()) v.push_back(0); // force reallocation - size_t sz = v.size(); + std::size_t sz = v.size(); const int lvalue = 1; std::vector::iterator i = v.insert(v.cbegin() + 10, lvalue); assert(v.size() == sz + 1); @@ -72,7 +72,7 @@ TEST_CONSTEXPR_CXX20 bool test() { std::vector v(100); while(v.size() < v.capacity()) v.push_back(0); v.pop_back(); v.pop_back(); // force no reallocation - size_t sz = v.size(); + std::size_t sz = v.size(); const int lvalue = 1; std::vector::iterator i = v.insert(v.cbegin() + 10, lvalue); assert(v.size() == sz + 1); diff --git a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_size_value.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_size_value.pass.cpp index f6e447b2ff294..8325fcfd0670f 100644 --- a/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_size_value.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/vector.modifiers/insert_iter_size_value.pass.cpp @@ -38,7 +38,7 @@ TEST_CONSTEXPR_CXX20 bool tests() { std::vector v(100); while(v.size() < v.capacity()) v.push_back(0); // force reallocation - size_t sz = v.size(); + std::size_t sz = v.size(); std::vector::iterator i = v.insert(v.cbegin() + 10, 5, 1); assert(v.size() == sz + 5); assert(is_contiguous_container_asan_correct(v)); @@ -54,7 +54,7 @@ TEST_CONSTEXPR_CXX20 bool tests() { std::vector v(100); v.reserve(128); // force no reallocation - size_t sz = v.size(); + std::size_t sz = v.size(); std::vector::iterator i = v.insert(v.cbegin() + 10, 5, 1); assert(v.size() == sz + 5); assert(is_contiguous_container_asan_correct(v)); diff --git a/libcxx/test/std/containers/sequences/vector/vector.special/swap_noexcept.compile.pass.cpp b/libcxx/test/std/containers/sequences/vector/vector.special/swap_noexcept.compile.pass.cpp index 830d05c7012db..d38a00c789f8e 100644 --- a/libcxx/test/std/containers/sequences/vector/vector.special/swap_noexcept.compile.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector/vector.special/swap_noexcept.compile.pass.cpp @@ -35,7 +35,7 @@ struct some_alloc some_alloc() {} some_alloc(const some_alloc&); - void allocate(size_t); + void allocate(std::size_t); void deallocate(void*, unsigned) {} typedef std::true_type propagate_on_container_swap; @@ -48,7 +48,7 @@ struct some_alloc2 some_alloc2() {} some_alloc2(const some_alloc2&); - void allocate(size_t); + void allocate(std::size_t); void deallocate(void*, unsigned) {} typedef std::false_type propagate_on_container_swap; diff --git a/libcxx/test/std/containers/unord/unord.map/bucket.pass.cpp b/libcxx/test/std/containers/unord/unord.map/bucket.pass.cpp index 0f2e7216b61eb..14e5b2fe0ad36 100644 --- a/libcxx/test/std/containers/unord/unord.map/bucket.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.map/bucket.pass.cpp @@ -37,9 +37,9 @@ int main(int, char**) P(2, "four"), }; const C c(std::begin(a), std::end(a)); - size_t bc = c.bucket_count(); + std::size_t bc = c.bucket_count(); assert(bc >= 5); - for (size_t i = 0; i < 13; ++i) + for (std::size_t i = 0; i < 13; ++i) LIBCPP_ASSERT(c.bucket(i) == i % bc); } #if TEST_STD_VER >= 11 @@ -57,9 +57,9 @@ int main(int, char**) P(2, "four"), }; const C c(std::begin(a), std::end(a)); - size_t bc = c.bucket_count(); + std::size_t bc = c.bucket_count(); assert(bc >= 5); - for (size_t i = 0; i < 13; ++i) + for (std::size_t i = 0; i < 13; ++i) LIBCPP_ASSERT(c.bucket(i) == i % bc); } #endif diff --git a/libcxx/test/std/containers/unord/unord.map/compare.pass.cpp b/libcxx/test/std/containers/unord/unord.map/compare.pass.cpp index 12364fa2dd3fe..200107cddf12a 100644 --- a/libcxx/test/std/containers/unord/unord.map/compare.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.map/compare.pass.cpp @@ -30,7 +30,7 @@ namespace std template <> struct hash { - size_t operator()(Key const &) const {return 0;} + std::size_t operator()(Key const &) const {return 0;} }; } diff --git a/libcxx/test/std/containers/unord/unord.map/erase_if.pass.cpp b/libcxx/test/std/containers/unord/unord.map/erase_if.pass.cpp index 54a8b9b3d3067..329ead0811944 100644 --- a/libcxx/test/std/containers/unord/unord.map/erase_if.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.map/erase_if.pass.cpp @@ -31,7 +31,7 @@ M make (Init vals) } template -void test0(Init vals, Pred p, Init expected, size_t expected_erased_count) { +void test0(Init vals, Pred p, Init expected, std::size_t expected_erased_count) { M s = make(vals); ASSERT_SAME_TYPE(typename M::size_type, decltype(std::erase_if(s, p))); assert(expected_erased_count == std::erase_if(s, p)); diff --git a/libcxx/test/std/containers/unord/unord.map/max_size.pass.cpp b/libcxx/test/std/containers/unord/unord.map/max_size.pass.cpp index 7c2ec58de57f4..33c009b67dffd 100644 --- a/libcxx/test/std/containers/unord/unord.map/max_size.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.map/max_size.pass.cpp @@ -32,7 +32,7 @@ int main(int, char**) LIBCPP_ASSERT(c.max_size() == 10); } { - typedef limited_allocator A; + typedef limited_allocator A; typedef std::unordered_map, std::equal_to, A> C; const C::size_type max_dist = diff --git a/libcxx/test/std/containers/unord/unord.map/rehash.pass.cpp b/libcxx/test/std/containers/unord/unord.map/rehash.pass.cpp index c8f079fb61780..737e40d6c41cc 100644 --- a/libcxx/test/std/containers/unord/unord.map/rehash.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.map/rehash.pass.cpp @@ -22,7 +22,7 @@ #include "min_allocator.h" template -void rehash_postcondition(const C& c, size_t n) +void rehash_postcondition(const C& c, std::size_t n) { assert(c.bucket_count() >= c.size() / c.max_load_factor() && c.bucket_count() >= n); } diff --git a/libcxx/test/std/containers/unord/unord.map/reserve.pass.cpp b/libcxx/test/std/containers/unord/unord.map/reserve.pass.cpp index 622a9691e4575..358da35526306 100644 --- a/libcxx/test/std/containers/unord/unord.map/reserve.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.map/reserve.pass.cpp @@ -31,14 +31,14 @@ void test(const C& c) assert(c.at(4) == "four"); } -void reserve_invariant(size_t n) // LWG #2156 +void reserve_invariant(std::size_t n) // LWG #2156 { - for (size_t i = 0; i < n; ++i) + for (std::size_t i = 0; i < n; ++i) { - std::unordered_map c; + std::unordered_map c; c.reserve(n); - size_t buckets = c.bucket_count(); - for (size_t j = 0; j < i; ++j) + std::size_t buckets = c.bucket_count(); + for (std::size_t j = 0; j < i; ++j) { c[i] = i; assert(buckets == c.bucket_count()); diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.pass.cpp index 1d0b18bf1d47e..42cc9d1408378 100644 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.pass.cpp @@ -28,7 +28,7 @@ struct TemplateConstructor }; bool operator==(const TemplateConstructor&, const TemplateConstructor&) { return false; } -struct Hash { size_t operator() (const TemplateConstructor &) const { return 0; } }; +struct Hash { std::size_t operator() (const TemplateConstructor &) const { return 0; } }; int main(int, char**) { diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/extract_iterator.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/extract_iterator.pass.cpp index 2ce98884c3317..ac241c07d972d 100644 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/extract_iterator.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/extract_iterator.pass.cpp @@ -22,7 +22,7 @@ template void test(Container& c) { - size_t sz = c.size(); + std::size_t sz = c.size(); auto some_key = c.cbegin()->first; diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/extract_key.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/extract_key.pass.cpp index e1c893f3daf51..78721db16e0be 100644 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/extract_key.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/extract_key.pass.cpp @@ -24,8 +24,8 @@ template void test(Container& c, KeyTypeIter first, KeyTypeIter last) { - size_t sz = c.size(); - assert((size_t)std::distance(first, last) == sz); + std::size_t sz = c.size(); + assert((std::size_t)std::distance(first, last) == sz); for (KeyTypeIter copy = first; copy != last; ++copy) { diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_node_type_hint.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_node_type_hint.pass.cpp index 821eec9192fd3..8677810187fb0 100644 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_node_type_hint.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_node_type_hint.pass.cpp @@ -38,7 +38,7 @@ void test(Container& c) { typename Container::node_type node = nf(i, i + 1); assert(!node.empty()); - size_t prev = c.size(); + std::size_t prev = c.size(); auto it = c.insert(c.end(), std::move(node)); assert(node.empty()); assert(prev + 1 == c.size()); diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_or_assign.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_or_assign.pass.cpp index 1b7c9e00da470..df2138395adf2 100644 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_or_assign.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_or_assign.pass.cpp @@ -52,7 +52,7 @@ class Moveable {return int_ == x.int_ && double_ == x.double_;} bool operator<(const Moveable& x) const {return int_ < x.int_ || (int_ == x.int_ && double_ < x.double_);} - size_t hash () const { return std::hash()(int_) + std::hash()(double_); } + std::size_t hash () const { return std::hash()(int_) + std::hash()(double_); } int get() const {return int_;} bool moved() const {return int_ == -1;} @@ -60,7 +60,7 @@ class Moveable namespace std { template <> struct hash { - size_t operator () (const Moveable &m) const { return m.hash(); } + std::size_t operator () (const Moveable &m) const { return m.hash(); } }; } diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/merge.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/merge.pass.cpp index 9953dc8e675c7..e5c29b13dcf12 100644 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/merge.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/merge.pass.cpp @@ -40,7 +40,7 @@ struct throw_hasher throw_hasher(bool& should_throw) : should_throw_(should_throw) {} - size_t operator()(const T& p) const + std::size_t operator()(const T& p) const { if (should_throw_) throw 0; @@ -95,7 +95,7 @@ int main(int, char**) struct hasher { hasher() = default; - size_t operator()(const Counter& p) const + std::size_t operator()(const Counter& p) const { return std::hash>()(p); } diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try.emplace.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try.emplace.pass.cpp index bf2c317f712eb..b9c82390ab3a5 100644 --- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try.emplace.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try.emplace.pass.cpp @@ -51,7 +51,7 @@ class Moveable {return int_ == x.int_ && double_ == x.double_;} bool operator<(const Moveable& x) const {return int_ < x.int_ || (int_ == x.int_ && double_ < x.double_);} - size_t hash () const { return std::hash()(int_) + std::hash()(double_); } + std::size_t hash () const { return std::hash()(int_) + std::hash()(double_); } int get() const {return int_;} bool moved() const {return int_ == -1;} @@ -59,7 +59,7 @@ class Moveable namespace std { template <> struct hash { - size_t operator () (const Moveable &m) const { return m.hash(); } + std::size_t operator () (const Moveable &m) const { return m.hash(); } }; } diff --git a/libcxx/test/std/containers/unord/unord.multimap/bucket.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/bucket.pass.cpp index 80c97ddc99cee..034408740315a 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/bucket.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multimap/bucket.pass.cpp @@ -37,9 +37,9 @@ int main(int, char**) P(2, "four"), }; const C c(std::begin(a), std::end(a)); - size_t bc = c.bucket_count(); + std::size_t bc = c.bucket_count(); assert(bc >= 7); - for (size_t i = 0; i < 13; ++i) + for (std::size_t i = 0; i < 13; ++i) LIBCPP_ASSERT(c.bucket(i) == i % bc); } #if TEST_STD_VER >= 11 @@ -57,9 +57,9 @@ int main(int, char**) P(2, "four"), }; const C c(std::begin(a), std::end(a)); - size_t bc = c.bucket_count(); + std::size_t bc = c.bucket_count(); assert(bc >= 7); - for (size_t i = 0; i < 13; ++i) + for (std::size_t i = 0; i < 13; ++i) LIBCPP_ASSERT(c.bucket(i) == i % bc); } #endif diff --git a/libcxx/test/std/containers/unord/unord.multimap/erase_if.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/erase_if.pass.cpp index 2da3b175a7ae1..90a6cae088c4f 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/erase_if.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multimap/erase_if.pass.cpp @@ -31,7 +31,7 @@ M make (Init vals) } template -void test0(Init vals, Pred p, Init expected, size_t expected_erased_count) { +void test0(Init vals, Pred p, Init expected, std::size_t expected_erased_count) { M s = make(vals); ASSERT_SAME_TYPE(typename M::size_type, decltype(std::erase_if(s, p))); assert(expected_erased_count == std::erase_if(s, p)); diff --git a/libcxx/test/std/containers/unord/unord.multimap/max_size.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/max_size.pass.cpp index d03cf67ae7c69..182f5a3693b03 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/max_size.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multimap/max_size.pass.cpp @@ -33,7 +33,7 @@ int main(int, char**) LIBCPP_ASSERT(c.max_size() == 10); } { - typedef limited_allocator A; + typedef limited_allocator A; typedef std::unordered_multimap, std::equal_to, A> C; diff --git a/libcxx/test/std/containers/unord/unord.multimap/rehash.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/rehash.pass.cpp index 99538e32d8276..33d2bc0ed107b 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/rehash.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multimap/rehash.pass.cpp @@ -26,7 +26,7 @@ #include "min_allocator.h" template -void rehash_postcondition(const C& c, size_t n) +void rehash_postcondition(const C& c, std::size_t n) { assert(c.bucket_count() >= c.size() / c.max_load_factor() && c.bucket_count() >= n); } diff --git a/libcxx/test/std/containers/unord/unord.multimap/reserve.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/reserve.pass.cpp index 2d3d8f2983942..b658539c401e5 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/reserve.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multimap/reserve.pass.cpp @@ -46,16 +46,16 @@ void test(const C& c) assert(c.find(4)->second == "four"); } -void reserve_invariant(size_t n) // LWG #2156 +void reserve_invariant(std::size_t n) // LWG #2156 { - for (size_t i = 0; i < n; ++i) + for (std::size_t i = 0; i < n; ++i) { - std::unordered_multimap c; + std::unordered_multimap c; c.reserve(n); - size_t buckets = c.bucket_count(); - for (size_t j = 0; j < i; ++j) + std::size_t buckets = c.bucket_count(); + for (std::size_t j = 0; j < i; ++j) { - c.insert(std::unordered_multimap::value_type(i,i)); + c.insert(std::unordered_multimap::value_type(i,i)); assert(buckets == c.bucket_count()); } } diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_const_iter.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_const_iter.pass.cpp index b237e3a26b9fb..24e9249bd78b2 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_const_iter.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/erase_const_iter.pass.cpp @@ -31,7 +31,7 @@ struct TemplateConstructor }; bool operator==(const TemplateConstructor&, const TemplateConstructor&) { return false; } -struct Hash { size_t operator() (const TemplateConstructor &) const { return 0; } }; +struct Hash { std::size_t operator() (const TemplateConstructor &) const { return 0; } }; int main(int, char**) { diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/extract_iterator.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/extract_iterator.pass.cpp index 1b9e741539e03..8905dc2c119c4 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/extract_iterator.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/extract_iterator.pass.cpp @@ -22,7 +22,7 @@ template void test(Container& c) { - size_t sz = c.size(); + std::size_t sz = c.size(); auto some_key = c.cbegin()->first; diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/extract_key.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/extract_key.pass.cpp index 293e0c0a378df..3f2f69f031b7b 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/extract_key.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/extract_key.pass.cpp @@ -22,8 +22,8 @@ template void test(Container& c, KeyTypeIter first, KeyTypeIter last) { - size_t sz = c.size(); - assert((size_t)std::distance(first, last) == sz); + std::size_t sz = c.size(); + assert((std::size_t)std::distance(first, last) == sz); for (KeyTypeIter copy = first; copy != last; ++copy) { diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_node_type_hint.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_node_type_hint.pass.cpp index 5b8cd8e39baba..461f7a33d35d0 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_node_type_hint.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/insert_node_type_hint.pass.cpp @@ -37,7 +37,7 @@ void test(Container& c) { typename Container::node_type node = nf(i, i + 1); assert(!node.empty()); - size_t prev = c.size(); + std::size_t prev = c.size(); auto it = c.insert(c.end(), std::move(node)); assert(node.empty()); assert(prev + 1 == c.size()); diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/merge.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/merge.pass.cpp index 04b112682e841..e9c0c3fb75a2a 100644 --- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/merge.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/merge.pass.cpp @@ -40,7 +40,7 @@ struct throw_hasher throw_hasher(bool& should_throw) : should_throw_(should_throw) {} - size_t operator()(const T& p) const + std::size_t operator()(const T& p) const { if (should_throw_) throw 0; @@ -95,7 +95,7 @@ int main(int, char**) struct hasher { hasher() = default; - size_t operator()(const Counter& p) const + std::size_t operator()(const Counter& p) const { return std::hash>()(p); } diff --git a/libcxx/test/std/containers/unord/unord.multiset/bucket.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/bucket.pass.cpp index 5436c1e942208..dd487106c4358 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/bucket.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multiset/bucket.pass.cpp @@ -35,9 +35,9 @@ int main(int, char**) P(2) }; const C c(std::begin(a), std::end(a)); - size_t bc = c.bucket_count(); + std::size_t bc = c.bucket_count(); assert(bc >= 7); - for (size_t i = 0; i < 13; ++i) + for (std::size_t i = 0; i < 13; ++i) LIBCPP_ASSERT(c.bucket(i) == i % bc); } #if TEST_STD_VER >= 11 @@ -55,9 +55,9 @@ int main(int, char**) P(2) }; const C c(std::begin(a), std::end(a)); - size_t bc = c.bucket_count(); + std::size_t bc = c.bucket_count(); assert(bc >= 7); - for (size_t i = 0; i < 13; ++i) + for (std::size_t i = 0; i < 13; ++i) LIBCPP_ASSERT(c.bucket(i) == i % bc); } #endif diff --git a/libcxx/test/std/containers/unord/unord.multiset/erase_const_iter.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/erase_const_iter.pass.cpp index 2dee50143df8f..ffdcde80d14d3 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/erase_const_iter.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multiset/erase_const_iter.pass.cpp @@ -27,7 +27,7 @@ struct TemplateConstructor }; bool operator==(const TemplateConstructor&, const TemplateConstructor&) { return false; } -struct Hash { size_t operator() (const TemplateConstructor &) const { return 0; } }; +struct Hash { std::size_t operator() (const TemplateConstructor &) const { return 0; } }; int main(int, char**) { diff --git a/libcxx/test/std/containers/unord/unord.multiset/erase_if.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/erase_if.pass.cpp index a7fe88deb2fb4..7bdc4f871571f 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/erase_if.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multiset/erase_if.pass.cpp @@ -32,7 +32,7 @@ M make (Init vals) } template -void test0(Init vals, Pred p, Init expected, size_t expected_erased_count) { +void test0(Init vals, Pred p, Init expected, std::size_t expected_erased_count) { M s = make(vals); ASSERT_SAME_TYPE(typename M::size_type, decltype(std::erase_if(s, p))); assert(expected_erased_count == std::erase_if(s, p)); diff --git a/libcxx/test/std/containers/unord/unord.multiset/extract_iterator.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/extract_iterator.pass.cpp index 153e56d54bd8d..d88e33bddd70d 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/extract_iterator.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multiset/extract_iterator.pass.cpp @@ -22,7 +22,7 @@ template void test(Container& c) { - size_t sz = c.size(); + std::size_t sz = c.size(); for (auto first = c.cbegin(); first != c.cend();) { diff --git a/libcxx/test/std/containers/unord/unord.multiset/extract_key.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/extract_key.pass.cpp index 3c4de6ee2d04c..ec1ace86b5661 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/extract_key.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multiset/extract_key.pass.cpp @@ -22,8 +22,8 @@ template void test(Container& c, KeyTypeIter first, KeyTypeIter last) { - size_t sz = c.size(); - assert((size_t)std::distance(first, last) == sz); + std::size_t sz = c.size(); + assert((std::size_t)std::distance(first, last) == sz); for (KeyTypeIter copy = first; copy != last; ++copy) { diff --git a/libcxx/test/std/containers/unord/unord.multiset/insert_node_type_hint.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/insert_node_type_hint.pass.cpp index ff1fc987815f9..2706899b19a95 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/insert_node_type_hint.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multiset/insert_node_type_hint.pass.cpp @@ -36,7 +36,7 @@ void test(Container& c) { typename Container::node_type node = nf(i); assert(!node.empty()); - size_t prev = c.size(); + std::size_t prev = c.size(); auto it = c.insert(c.end(), std::move(node)); assert(prev + 1 == c.size()); assert(*it == i); diff --git a/libcxx/test/std/containers/unord/unord.multiset/max_size.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/max_size.pass.cpp index d08cdb6221fd3..47b165f230007 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/max_size.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multiset/max_size.pass.cpp @@ -32,7 +32,7 @@ int main(int, char**) LIBCPP_ASSERT(c.max_size() == 10); } { - typedef limited_allocator A; + typedef limited_allocator A; typedef std::unordered_multiset, std::equal_to, A> C; diff --git a/libcxx/test/std/containers/unord/unord.multiset/merge.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/merge.pass.cpp index fc986ec232e82..00435d8df6a06 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/merge.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multiset/merge.pass.cpp @@ -40,7 +40,7 @@ struct throw_hasher throw_hasher(bool& should_throw) : should_throw_(should_throw) {} - size_t operator()(const T& p) const + std::size_t operator()(const T& p) const { if (should_throw_) throw 0; @@ -95,7 +95,7 @@ int main(int, char**) struct hasher { hasher() = default; - size_t operator()(const Counter& p) const { return std::hash>()(p); } + std::size_t operator()(const Counter& p) const { return std::hash>()(p); } }; { typedef std::unordered_multiset, std::hash>, std::equal_to>> first_set_type; diff --git a/libcxx/test/std/containers/unord/unord.multiset/rehash.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/rehash.pass.cpp index 8c6699b18e247..1ef3bdcb99d0b 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/rehash.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multiset/rehash.pass.cpp @@ -21,7 +21,7 @@ #include "min_allocator.h" template -void rehash_postcondition(const C& c, size_t n) +void rehash_postcondition(const C& c, std::size_t n) { assert(c.bucket_count() >= c.size() / c.max_load_factor() && c.bucket_count() >= n); } diff --git a/libcxx/test/std/containers/unord/unord.multiset/reserve.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/reserve.pass.cpp index 54eada5a76989..557a9241b4de1 100644 --- a/libcxx/test/std/containers/unord/unord.multiset/reserve.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.multiset/reserve.pass.cpp @@ -30,14 +30,14 @@ void test(const C& c) assert(c.count(4) == 1); } -void reserve_invariant(size_t n) // LWG #2156 +void reserve_invariant(std::size_t n) // LWG #2156 { - for (size_t i = 0; i < n; ++i) + for (std::size_t i = 0; i < n; ++i) { - std::unordered_multiset c; + std::unordered_multiset c; c.reserve(n); - size_t buckets = c.bucket_count(); - for (size_t j = 0; j < i; ++j) + std::size_t buckets = c.bucket_count(); + for (std::size_t j = 0; j < i; ++j) { c.insert(i); assert(buckets == c.bucket_count()); diff --git a/libcxx/test/std/containers/unord/unord.set/bucket.pass.cpp b/libcxx/test/std/containers/unord/unord.set/bucket.pass.cpp index bcaa77b4aa1fb..969c3f6b93e74 100644 --- a/libcxx/test/std/containers/unord/unord.set/bucket.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.set/bucket.pass.cpp @@ -35,9 +35,9 @@ int main(int, char**) P(2) }; const C c(std::begin(a), std::end(a)); - size_t bc = c.bucket_count(); + std::size_t bc = c.bucket_count(); assert(bc >= 5); - for (size_t i = 0; i < 13; ++i) + for (std::size_t i = 0; i < 13; ++i) LIBCPP_ASSERT(c.bucket(i) == i % bc); } #if TEST_STD_VER >= 11 @@ -54,9 +54,9 @@ int main(int, char**) P(2) }; const C c(std::begin(a), std::end(a)); - size_t bc = c.bucket_count(); + std::size_t bc = c.bucket_count(); assert(bc >= 5); - for (size_t i = 0; i < 13; ++i) + for (std::size_t i = 0; i < 13; ++i) LIBCPP_ASSERT(c.bucket(i) == i % bc); } #endif diff --git a/libcxx/test/std/containers/unord/unord.set/erase_const_iter.pass.cpp b/libcxx/test/std/containers/unord/unord.set/erase_const_iter.pass.cpp index cc709132d9102..4a66329d57177 100644 --- a/libcxx/test/std/containers/unord/unord.set/erase_const_iter.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.set/erase_const_iter.pass.cpp @@ -27,7 +27,7 @@ struct TemplateConstructor }; bool operator==(const TemplateConstructor&, const TemplateConstructor&) { return false; } -struct Hash { size_t operator() (const TemplateConstructor &) const { return 0; } }; +struct Hash { std::size_t operator() (const TemplateConstructor &) const { return 0; } }; int main(int, char**) { diff --git a/libcxx/test/std/containers/unord/unord.set/erase_if.pass.cpp b/libcxx/test/std/containers/unord/unord.set/erase_if.pass.cpp index a2c7325b1c470..ba5f3bfe181fc 100644 --- a/libcxx/test/std/containers/unord/unord.set/erase_if.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.set/erase_if.pass.cpp @@ -32,7 +32,7 @@ M make (Init vals) } template -void test0(Init vals, Pred p, Init expected, size_t expected_erased_count) { +void test0(Init vals, Pred p, Init expected, std::size_t expected_erased_count) { M s = make(vals); ASSERT_SAME_TYPE(typename M::size_type, decltype(std::erase_if(s, p))); assert(expected_erased_count == std::erase_if(s, p)); diff --git a/libcxx/test/std/containers/unord/unord.set/extract_iterator.pass.cpp b/libcxx/test/std/containers/unord/unord.set/extract_iterator.pass.cpp index d4a0a7c8ce464..f6382502e6354 100644 --- a/libcxx/test/std/containers/unord/unord.set/extract_iterator.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.set/extract_iterator.pass.cpp @@ -22,7 +22,7 @@ template void test(Container& c) { - size_t sz = c.size(); + std::size_t sz = c.size(); for (auto first = c.cbegin(); first != c.cend();) { diff --git a/libcxx/test/std/containers/unord/unord.set/extract_key.pass.cpp b/libcxx/test/std/containers/unord/unord.set/extract_key.pass.cpp index e0d48567a7c3e..3bfb6c358c8b6 100644 --- a/libcxx/test/std/containers/unord/unord.set/extract_key.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.set/extract_key.pass.cpp @@ -22,8 +22,8 @@ template void test(Container& c, KeyTypeIter first, KeyTypeIter last) { - size_t sz = c.size(); - assert((size_t)std::distance(first, last) == sz); + std::size_t sz = c.size(); + assert((std::size_t)std::distance(first, last) == sz); for (KeyTypeIter copy = first; copy != last; ++copy) { diff --git a/libcxx/test/std/containers/unord/unord.set/insert_node_type_hint.pass.cpp b/libcxx/test/std/containers/unord/unord.set/insert_node_type_hint.pass.cpp index 3bb7d4fb53370..b4b3d8822592c 100644 --- a/libcxx/test/std/containers/unord/unord.set/insert_node_type_hint.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.set/insert_node_type_hint.pass.cpp @@ -37,7 +37,7 @@ void test(Container& c) { typename Container::node_type node = nf(i); assert(!node.empty()); - size_t prev = c.size(); + std::size_t prev = c.size(); auto it = c.insert(c.end(), std::move(node)); assert(node.empty()); assert(prev + 1 == c.size()); diff --git a/libcxx/test/std/containers/unord/unord.set/max_size.pass.cpp b/libcxx/test/std/containers/unord/unord.set/max_size.pass.cpp index aeb1354da5621..289c4d3231749 100644 --- a/libcxx/test/std/containers/unord/unord.set/max_size.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.set/max_size.pass.cpp @@ -30,7 +30,7 @@ int main(int, char**) LIBCPP_ASSERT(c.max_size() == 10); } { - typedef limited_allocator A; + typedef limited_allocator A; typedef std::unordered_set, std::equal_to, A> C; const C::size_type max_dist = static_cast(std::numeric_limits::max()); diff --git a/libcxx/test/std/containers/unord/unord.set/merge.pass.cpp b/libcxx/test/std/containers/unord/unord.set/merge.pass.cpp index ef06ca8bdbf73..c1b0e174cc0c1 100644 --- a/libcxx/test/std/containers/unord/unord.set/merge.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.set/merge.pass.cpp @@ -40,7 +40,7 @@ struct throw_hasher throw_hasher(bool& should_throw) : should_throw_(should_throw) {} - size_t operator()(const T& p) const + std::size_t operator()(const T& p) const { if (should_throw_) throw 0; @@ -95,7 +95,7 @@ int main(int, char**) struct hasher { hasher() = default; - size_t operator()(const Counter& p) const { return std::hash>()(p); } + std::size_t operator()(const Counter& p) const { return std::hash>()(p); } }; { typedef std::unordered_set, std::hash>, std::equal_to>> first_set_type; diff --git a/libcxx/test/std/containers/unord/unord.set/rehash.pass.cpp b/libcxx/test/std/containers/unord/unord.set/rehash.pass.cpp index e45327ad7ec51..a0482fc2db706 100644 --- a/libcxx/test/std/containers/unord/unord.set/rehash.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.set/rehash.pass.cpp @@ -21,7 +21,7 @@ #include "min_allocator.h" template -void rehash_postcondition(const C& c, size_t n) +void rehash_postcondition(const C& c, std::size_t n) { assert(c.bucket_count() >= c.size() / c.max_load_factor() && c.bucket_count() >= n); } diff --git a/libcxx/test/std/containers/unord/unord.set/reserve.pass.cpp b/libcxx/test/std/containers/unord/unord.set/reserve.pass.cpp index 7ea358e889d48..afcccf9e03795 100644 --- a/libcxx/test/std/containers/unord/unord.set/reserve.pass.cpp +++ b/libcxx/test/std/containers/unord/unord.set/reserve.pass.cpp @@ -30,14 +30,14 @@ void test(const C& c) assert(c.count(4) == 1); } -void reserve_invariant(size_t n) // LWG #2156 +void reserve_invariant(std::size_t n) // LWG #2156 { - for (size_t i = 0; i < n; ++i) + for (std::size_t i = 0; i < n; ++i) { - std::unordered_set c; + std::unordered_set c; c.reserve(n); - size_t buckets = c.bucket_count(); - for (size_t j = 0; j < i; ++j) + std::size_t buckets = c.bucket_count(); + for (std::size_t j = 0; j < i; ++j) { c.insert(i); assert(buckets == c.bucket_count()); diff --git a/libcxx/test/std/containers/views/views.span/span.cons/assign.pass.cpp b/libcxx/test/std/containers/views/views.span/span.cons/assign.pass.cpp index 7481987862ed5..3f358116b8c6f 100644 --- a/libcxx/test/std/containers/views/views.span/span.cons/assign.pass.cpp +++ b/libcxx/test/std/containers/views/views.span/span.cons/assign.pass.cpp @@ -241,8 +241,8 @@ int main(int, char**) {arr + 1, arr + 3} // same size as s2 }; - for (size_t i = 0; i < std::size(spans); ++i) - for (size_t j = i; j < std::size(spans); ++j) + for (std::size_t i = 0; i < std::size(spans); ++i) + for (std::size_t j = i; j < std::size(spans); ++j) assert((doAssign(spans[i], spans[j]))); } @@ -255,8 +255,8 @@ int main(int, char**) spanType{arr + 2, arr + 4} }; - for (size_t i = 0; i < std::size(spans); ++i) - for (size_t j = i; j < std::size(spans); ++j) + for (std::size_t i = 0; i < std::size(spans); ++i) + for (std::size_t j = i; j < std::size(spans); ++j) assert((doAssign(spans[i], spans[j]))); } @@ -275,8 +275,8 @@ int main(int, char**) {strs + 3, strs + 3} }; - for (size_t i = 0; i < std::size(spans); ++i) - for (size_t j = i; j < std::size(spans); ++j) + for (std::size_t i = 0; i < std::size(spans); ++i) + for (std::size_t j = i; j < std::size(spans); ++j) assert((doAssign(spans[i], spans[j]))); } @@ -288,8 +288,8 @@ int main(int, char**) spanType{strs + 2, strs + 3} }; - for (size_t i = 0; i < std::size(spans); ++i) - for (size_t j = i; j < std::size(spans); ++j) + for (std::size_t i = 0; i < std::size(spans); ++i) + for (std::size_t j = i; j < std::size(spans); ++j) assert((doAssign(spans[i], spans[j]))); } diff --git a/libcxx/test/std/containers/views/views.span/span.cons/iterator_len.pass.cpp b/libcxx/test/std/containers/views/views.span/span.cons/iterator_len.pass.cpp index f197330331411..fbbd3d6ff4044 100644 --- a/libcxx/test/std/containers/views/views.span/span.cons/iterator_len.pass.cpp +++ b/libcxx/test/std/containers/views/views.span/span.cons/iterator_len.pass.cpp @@ -20,23 +20,23 @@ #include #include -template +template constexpr void test_constructibility() { struct Other {}; - static_assert(std::is_constructible_v, int*, size_t>); - static_assert(!std::is_constructible_v, const int*, size_t>); - static_assert(std::is_constructible_v, int*, size_t>); - static_assert(std::is_constructible_v, const int*, size_t>); - static_assert(!std::is_constructible_v, volatile int*, size_t>); - static_assert(!std::is_constructible_v, const volatile int*, size_t>); - static_assert(!std::is_constructible_v, volatile int*, size_t>); - static_assert(!std::is_constructible_v, const volatile int*, size_t>); - static_assert(!std::is_constructible_v, const int*, size_t>); - static_assert(!std::is_constructible_v, const volatile int*, size_t>); + static_assert(std::is_constructible_v, int*, std::size_t>); + static_assert(!std::is_constructible_v, const int*, std::size_t>); + static_assert(std::is_constructible_v, int*, std::size_t>); + static_assert(std::is_constructible_v, const int*, std::size_t>); + static_assert(!std::is_constructible_v, volatile int*, std::size_t>); + static_assert(!std::is_constructible_v, const volatile int*, std::size_t>); + static_assert(!std::is_constructible_v, volatile int*, std::size_t>); + static_assert(!std::is_constructible_v, const volatile int*, std::size_t>); + static_assert(!std::is_constructible_v, const int*, std::size_t>); + static_assert(!std::is_constructible_v, const volatile int*, std::size_t>); static_assert( - !std::is_constructible_v, double*, size_t>); // iterator type differs from span type - static_assert(!std::is_constructible_v, size_t, size_t>); - static_assert(!std::is_constructible_v, Other*, size_t>); // unrelated iterator type + !std::is_constructible_v, double*, std::size_t>); // iterator type differs from span type + static_assert(!std::is_constructible_v, std::size_t, size_t>); + static_assert(!std::is_constructible_v, Other*, std::size_t>); // unrelated iterator type } template diff --git a/libcxx/test/std/containers/views/views.span/span.cons/iterator_len.verify.cpp b/libcxx/test/std/containers/views/views.span/span.cons/iterator_len.verify.cpp index dc287cbe83114..3836c97e94c6d 100644 --- a/libcxx/test/std/containers/views/views.span/span.cons/iterator_len.verify.cpp +++ b/libcxx/test/std/containers/views/views.span/span.cons/iterator_len.verify.cpp @@ -17,8 +17,8 @@ #include #include -template -std::span createImplicitSpan(T* ptr, size_t len) { +template +std::span createImplicitSpan(T* ptr, std::size_t len) { return {ptr, len}; // expected-error {{chosen constructor is explicit in copy-initialization}} } diff --git a/libcxx/test/std/containers/views/views.span/span.cons/iterator_sentinel.pass.cpp b/libcxx/test/std/containers/views/views.span/span.cons/iterator_sentinel.pass.cpp index 335d2b850f6df..c2d650bfaffa5 100644 --- a/libcxx/test/std/containers/views/views.span/span.cons/iterator_sentinel.pass.cpp +++ b/libcxx/test/std/containers/views/views.span/span.cons/iterator_sentinel.pass.cpp @@ -34,7 +34,7 @@ constexpr bool test_ctor() { return true; } -template +template constexpr void test_constructibility() { static_assert(std::is_constructible_v, int*, int*>); static_assert(!std::is_constructible_v, const int*, const int*>); diff --git a/libcxx/test/std/containers/views/views.span/span.cons/iterator_sentinel.verify.cpp b/libcxx/test/std/containers/views/views.span/span.cons/iterator_sentinel.verify.cpp index d0d81df313d37..a31aa2af7b9dd 100644 --- a/libcxx/test/std/containers/views/views.span/span.cons/iterator_sentinel.verify.cpp +++ b/libcxx/test/std/containers/views/views.span/span.cons/iterator_sentinel.verify.cpp @@ -18,7 +18,7 @@ #include #include -template +template std::span createImplicitSpan(T* first, T* last) { return {first, last}; // expected-error {{chosen constructor is explicit in copy-initialization}} } diff --git a/libcxx/test/std/containers/views/views.span/span.cons/range.pass.cpp b/libcxx/test/std/containers/views/views.span/span.cons/range.pass.cpp index d4847549785be..71fee430f8f42 100644 --- a/libcxx/test/std/containers/views/views.span/span.cons/range.pass.cpp +++ b/libcxx/test/std/containers/views/views.span/span.cons/range.pass.cpp @@ -22,7 +22,7 @@ #include "test_iterators.h" -template +template constexpr void test_from_range() { T val[3]{}; std::span s{val}; diff --git a/libcxx/test/std/containers/views/views.span/span.cons/span.fail.cpp b/libcxx/test/std/containers/views/views.span/span.cons/span.fail.cpp index c3c3631c5f3d9..784530ebcb106 100644 --- a/libcxx/test/std/containers/views/views.span/span.cons/span.fail.cpp +++ b/libcxx/test/std/containers/views/views.span/span.cons/span.fail.cpp @@ -23,7 +23,7 @@ #include "test_macros.h" -template +template std::span createImplicitSpan(std::span s) { return {s}; // expected-error {{chosen constructor is explicit in copy-initialization}} } diff --git a/libcxx/test/std/containers/views/views.span/span.elem/op_idx.pass.cpp b/libcxx/test/std/containers/views/views.span/span.elem/op_idx.pass.cpp index fdafbfc73e129..e46fd267ef5cc 100644 --- a/libcxx/test/std/containers/views/views.span/span.elem/op_idx.pass.cpp +++ b/libcxx/test/std/containers/views/views.span/span.elem/op_idx.pass.cpp @@ -21,7 +21,7 @@ template -constexpr bool testConstexprSpan(Span sp, size_t idx) +constexpr bool testConstexprSpan(Span sp, std::size_t idx) { LIBCPP_ASSERT(noexcept(sp[idx])); @@ -32,7 +32,7 @@ constexpr bool testConstexprSpan(Span sp, size_t idx) template -void testRuntimeSpan(Span sp, size_t idx) +void testRuntimeSpan(Span sp, std::size_t idx) { LIBCPP_ASSERT(noexcept(sp[idx])); diff --git a/libcxx/test/std/containers/views/views.span/span.iterators/end.pass.cpp b/libcxx/test/std/containers/views/views.span/span.iterators/end.pass.cpp index d531537bb195d..d6aaf74d1058c 100644 --- a/libcxx/test/std/containers/views/views.span/span.iterators/end.pass.cpp +++ b/libcxx/test/std/containers/views/views.span/span.iterators/end.pass.cpp @@ -34,7 +34,7 @@ constexpr bool testConstexprSpan(Span s) ret = ret && (&*( e-1) == last); } - ret = ret && (static_cast(e - s.begin()) == s.size()); + ret = ret && (static_cast(e - s.begin()) == s.size()); return ret; } @@ -53,7 +53,7 @@ void testRuntimeSpan(Span s) assert(&*( e-1) == last); } - assert(static_cast(e - s.begin()) == s.size()); + assert(static_cast(e - s.begin()) == s.size()); } diff --git a/libcxx/test/std/containers/views/views.span/span.iterators/rend.pass.cpp b/libcxx/test/std/containers/views/views.span/span.iterators/rend.pass.cpp index c2bd13aaf612b..ffec15c6b2492 100644 --- a/libcxx/test/std/containers/views/views.span/span.iterators/rend.pass.cpp +++ b/libcxx/test/std/containers/views/views.span/span.iterators/rend.pass.cpp @@ -32,7 +32,7 @@ constexpr bool testConstexprSpan(Span s) ret = ret && (e != s.rbegin()); } - ret = ret && (static_cast(e - s.rbegin()) == s.size()); + ret = ret && (static_cast(e - s.rbegin()) == s.size()); return ret; } @@ -49,7 +49,7 @@ void testRuntimeSpan(Span s) assert(e != s.rbegin()); } - assert(static_cast(e - s.rbegin()) == s.size()); + assert(static_cast(e - s.rbegin()) == s.size()); } diff --git a/libcxx/test/std/containers/views/views.span/span.objectrep/as_writable_bytes.verify.cpp b/libcxx/test/std/containers/views/views.span/span.objectrep/as_writable_bytes.verify.cpp index d32d5912c7843..45485b339752c 100644 --- a/libcxx/test/std/containers/views/views.span/span.objectrep/as_writable_bytes.verify.cpp +++ b/libcxx/test/std/containers/views/views.span/span.objectrep/as_writable_bytes.verify.cpp @@ -36,7 +36,7 @@ void f() { std::as_writable_bytes(std::span()); // expected-error {{no matching function for call to 'as_writable_bytes'}} std::as_writable_bytes(std::span()); // expected-error {{no matching function for call to 'as_writable_bytes'}} std::as_writable_bytes(std::span()); // expected-error {{no matching function for call to 'as_writable_bytes'}} - std::as_writable_bytes(std::span()); // expected-error {{no matching function for call to 'as_writable_bytes'}} + std::as_writable_bytes(std::span()); // expected-error {{no matching function for call to 'as_writable_bytes'}} std::as_writable_bytes(std::span (iArr2, 1)); // expected-error {{no matching function for call to 'as_writable_bytes'}} std::as_writable_bytes(std::span(iArr2 + 5, 1)); // expected-error {{no matching function for call to 'as_writable_bytes'}} diff --git a/libcxx/test/std/containers/views/views.span/span.obs/size.pass.cpp b/libcxx/test/std/containers/views/views.span/span.obs/size.pass.cpp index 5be7b9850670d..fa41d7803e97e 100644 --- a/libcxx/test/std/containers/views/views.span/span.obs/size.pass.cpp +++ b/libcxx/test/std/containers/views/views.span/span.obs/size.pass.cpp @@ -21,7 +21,7 @@ template -constexpr bool testConstexprSpan(Span sp, size_t sz) +constexpr bool testConstexprSpan(Span sp, std::size_t sz) { ASSERT_NOEXCEPT(sp.size()); return sp.size() == sz; @@ -29,7 +29,7 @@ constexpr bool testConstexprSpan(Span sp, size_t sz) template -void testRuntimeSpan(Span sp, size_t sz) +void testRuntimeSpan(Span sp, std::size_t sz) { ASSERT_NOEXCEPT(sp.size()); assert(sp.size() == sz); diff --git a/libcxx/test/std/containers/views/views.span/span.obs/size_bytes.pass.cpp b/libcxx/test/std/containers/views/views.span/span.obs/size_bytes.pass.cpp index 9ce6d2679cc52..c25eaaa2017bf 100644 --- a/libcxx/test/std/containers/views/views.span/span.obs/size_bytes.pass.cpp +++ b/libcxx/test/std/containers/views/views.span/span.obs/size_bytes.pass.cpp @@ -22,18 +22,18 @@ template -constexpr bool testConstexprSpan(Span sp, size_t sz) +constexpr bool testConstexprSpan(Span sp, std::size_t sz) { ASSERT_NOEXCEPT(sp.size_bytes()); - return (size_t) sp.size_bytes() == sz * sizeof(typename Span::element_type); + return (std::size_t) sp.size_bytes() == sz * sizeof(typename Span::element_type); } template -void testRuntimeSpan(Span sp, size_t sz) +void testRuntimeSpan(Span sp, std::size_t sz) { ASSERT_NOEXCEPT(sp.size_bytes()); - assert((size_t) sp.size_bytes() == sz * sizeof(typename Span::element_type)); + assert((std::size_t) sp.size_bytes() == sz * sizeof(typename Span::element_type)); } struct A{}; diff --git a/libcxx/test/std/containers/views/views.span/span.sub/first.pass.cpp b/libcxx/test/std/containers/views/views.span/span.sub/first.pass.cpp index 37baea2c7c34a..a354ade2247ae 100644 --- a/libcxx/test/std/containers/views/views.span/span.sub/first.pass.cpp +++ b/libcxx/test/std/containers/views/views.span/span.sub/first.pass.cpp @@ -23,7 +23,7 @@ #include "test_macros.h" -template +template constexpr bool testConstexprSpan(Span sp) { LIBCPP_ASSERT((noexcept(sp.template first()))); @@ -43,7 +43,7 @@ constexpr bool testConstexprSpan(Span sp) } -template +template void testRuntimeSpan(Span sp) { LIBCPP_ASSERT((noexcept(sp.template first()))); diff --git a/libcxx/test/std/containers/views/views.span/span.sub/last.pass.cpp b/libcxx/test/std/containers/views/views.span/span.sub/last.pass.cpp index 1cddaa15952cf..017c805af3ed5 100644 --- a/libcxx/test/std/containers/views/views.span/span.sub/last.pass.cpp +++ b/libcxx/test/std/containers/views/views.span/span.sub/last.pass.cpp @@ -23,7 +23,7 @@ #include "test_macros.h" -template +template constexpr bool testConstexprSpan(Span sp) { LIBCPP_ASSERT((noexcept(sp.template last()))); @@ -43,7 +43,7 @@ constexpr bool testConstexprSpan(Span sp) } -template +template void testRuntimeSpan(Span sp) { LIBCPP_ASSERT((noexcept(sp.template last()))); diff --git a/libcxx/test/std/containers/views/views.span/span.sub/subspan.pass.cpp b/libcxx/test/std/containers/views/views.span/span.sub/subspan.pass.cpp index bc5731396699b..401363090e6e5 100644 --- a/libcxx/test/std/containers/views/views.span/span.sub/subspan.pass.cpp +++ b/libcxx/test/std/containers/views/views.span/span.sub/subspan.pass.cpp @@ -24,7 +24,7 @@ #include "test_macros.h" -template +template constexpr bool testConstexprSpan(Span sp) { LIBCPP_ASSERT((noexcept(sp.template subspan()))); @@ -43,7 +43,7 @@ constexpr bool testConstexprSpan(Span sp) && std::equal(s1.begin(), s1.end(), sp.begin() + Offset); } -template +template constexpr bool testConstexprSpan(Span sp) { LIBCPP_ASSERT((noexcept(sp.template subspan()))); @@ -63,7 +63,7 @@ constexpr bool testConstexprSpan(Span sp) } -template +template void testRuntimeSpan(Span sp) { LIBCPP_ASSERT((noexcept(sp.template subspan()))); @@ -82,7 +82,7 @@ void testRuntimeSpan(Span sp) } -template +template void testRuntimeSpan(Span sp) { LIBCPP_ASSERT((noexcept(sp.template subspan()))); diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/custom_alloc.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/custom_alloc.pass.cpp index 025bc0b74c8c9..83e3009ce4f83 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/custom_alloc.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/custom_alloc.pass.cpp @@ -34,7 +34,7 @@ void my_free(void*) struct test : std::strstreambuf { - test(void* (*palloc_arg)(size_t), void (*pfree_arg)(void*)) + test(void* (*palloc_arg)(std::size_t), void (*pfree_arg)(void*)) : std::strstreambuf(palloc_arg, pfree_arg) {} virtual int_type overflow(int_type c) {return std::strstreambuf::overflow(c);} diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/allocate.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/allocate.pass.cpp index 297235efc48c5..1b33f796aac23 100644 --- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/allocate.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/allocate.pass.cpp @@ -36,7 +36,7 @@ namespace ex = std::experimental::pmr; -template +template void testForSizeAndAlign() { struct T { alignas(Align) char data[S]; }; TestResource R; @@ -52,7 +52,7 @@ void testForSizeAndAlign() { } #ifndef TEST_HAS_NO_EXCEPTIONS -template +template void testAllocForSizeThrows() { struct T { char data[S]; }; using Alloc = ex::polymorphic_allocator; @@ -61,14 +61,14 @@ void testAllocForSizeThrows() { Alloc a(&R); // Test that allocating exactly the max size does not throw. - size_t maxSize = Traits::max_size(a); + std::size_t maxSize = Traits::max_size(a); try { a.allocate(maxSize); } catch (...) { assert(false); } - size_t sizeTypeMax = std::numeric_limits::max(); + std::size_t sizeTypeMax = std::numeric_limits::max(); if (maxSize != sizeTypeMax) { // Test that allocating size_t(~0) throws bad_array_new_length. @@ -79,7 +79,7 @@ void testAllocForSizeThrows() { } // Test that allocating even one more than the max size does throw. - size_t overSize = maxSize + 1; + std::size_t overSize = maxSize + 1; try { a.allocate(overSize); assert(false); diff --git a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/deallocate.pass.cpp b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/deallocate.pass.cpp index 0084d37a50839..4bdccf8a7dc80 100644 --- a/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/deallocate.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.polymorphic.allocator.class/memory.polymorphic.allocator.mem/deallocate.pass.cpp @@ -34,7 +34,7 @@ namespace ex = std::experimental::pmr; -template +template void testForSizeAndAlign() { struct T { alignas(Align) char data[S]; }; diff --git a/libcxx/test/std/experimental/memory/memory.resource.global/new_delete_resource.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.global/new_delete_resource.pass.cpp index 44b00cbb47a2a..d645fc1941f9f 100644 --- a/libcxx/test/std/experimental/memory/memory.resource.global/new_delete_resource.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.resource.global/new_delete_resource.pass.cpp @@ -32,10 +32,10 @@ namespace ex = std::experimental::pmr; struct assert_on_compare : public ex::memory_resource { protected: - void * do_allocate(size_t, size_t) override + void * do_allocate(std::size_t, size_t) override { assert(false); return nullptr; } - void do_deallocate(void *, size_t, size_t) override + void do_deallocate(void *, std::size_t, size_t) override { assert(false); } bool do_is_equal(ex::memory_resource const &) const noexcept override diff --git a/libcxx/test/std/experimental/memory/memory.resource.global/null_memory_resource.pass.cpp b/libcxx/test/std/experimental/memory/memory.resource.global/null_memory_resource.pass.cpp index 8ac9b8bfe6979..fd38ffeaf0b94 100644 --- a/libcxx/test/std/experimental/memory/memory.resource.global/null_memory_resource.pass.cpp +++ b/libcxx/test/std/experimental/memory/memory.resource.global/null_memory_resource.pass.cpp @@ -32,10 +32,10 @@ namespace ex = std::experimental::pmr; struct assert_on_compare : public ex::memory_resource { protected: - void * do_allocate(size_t, size_t) override + void * do_allocate(std::size_t, size_t) override { assert(false); return nullptr; } - void do_deallocate(void *, size_t, size_t) override + void do_deallocate(void *, std::size_t, size_t) override { assert(false); } bool do_is_equal(ex::memory_resource const &) const noexcept override diff --git a/libcxx/test/std/experimental/simd/simd.cons/broadcast.pass.cpp b/libcxx/test/std/experimental/simd/simd.cons/broadcast.pass.cpp index 02dd00379691d..edf6ba5005921 100644 --- a/libcxx/test/std/experimental/simd/simd.cons/broadcast.pass.cpp +++ b/libcxx/test/std/experimental/simd/simd.cons/broadcast.pass.cpp @@ -73,7 +73,7 @@ void compile_unsigned() { template void test_broadcast() { SimdType a(3); - for (size_t i = 0; i < a.size(); i++) { + for (std::size_t i = 0; i < a.size(); i++) { assert(a[i] == 3); } } diff --git a/libcxx/test/std/experimental/simd/simd.cons/generator.pass.cpp b/libcxx/test/std/experimental/simd/simd.cons/generator.pass.cpp index 5049d9b0de9a0..b28e1af29a89e 100644 --- a/libcxx/test/std/experimental/simd/simd.cons/generator.pass.cpp +++ b/libcxx/test/std/experimental/simd/simd.cons/generator.pass.cpp @@ -38,8 +38,8 @@ template void supported_simd128_ctor(...) = delete; struct identity { - template - int operator()(std::integral_constant) const { + template + int operator()(std::integral_constant) const { return value; } }; @@ -52,9 +52,9 @@ void compile_generator() { } struct limited_identity { - template + template typename std::conditional::type - operator()(std::integral_constant) const { + operator()(std::integral_constant) const { return value; } }; diff --git a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/hash.pass.cpp b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/hash.pass.cpp index 01db1a8e91fef..65208ffd8b1fb 100644 --- a/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/hash.pass.cpp +++ b/libcxx/test/std/experimental/utilities/propagate_const/propagate_const.nonmembers/hash.pass.cpp @@ -24,7 +24,7 @@ template <> struct hash { typedef X first_argument_type; - size_t operator()(const first_argument_type&) const + std::size_t operator()(const first_argument_type&) const { return 99; } diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.compare.pass.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.compare.pass.cpp index 365aa1cce9a1d..504616c106652 100644 --- a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.compare.pass.cpp +++ b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.compare.pass.cpp @@ -131,7 +131,7 @@ void test_compare_basic() auto h2 = hash_value(p2); assert((h1 == h2) == (p1 == p2)); // check signature - ASSERT_SAME_TYPE(size_t, decltype(hash_value(p1))); + ASSERT_SAME_TYPE(std::size_t, decltype(hash_value(p1))); ASSERT_NOEXCEPT(hash_value(p1)); } { // check std::hash @@ -139,7 +139,7 @@ void test_compare_basic() auto h2 = std::hash()(p2); assert((h1 == h2) == (p1 == p2)); // check signature - ASSERT_SAME_TYPE(size_t, decltype(std::hash()(p1))); + ASSERT_SAME_TYPE(std::size_t, decltype(std::hash()(p1))); ASSERT_NOEXCEPT(std::hash()(p1)); } } diff --git a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/minus1.pass.cpp b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/minus1.pass.cpp index 1f3003a0945d7..179c2cd464819 100644 --- a/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/minus1.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/output.streams/ostream.formatted/ostream.inserters.arithmetic/minus1.pass.cpp @@ -54,7 +54,7 @@ void test_hex(const char *expected) ss << std::hex << static_cast(-1); std::string str = ss.str(); - for (size_t i = 0; i < str.size(); ++i ) + for (std::size_t i = 0; i < str.size(); ++i ) str[i] = static_cast(std::toupper(str[i])); assert(str == expected); diff --git a/libcxx/test/std/iterators/iterator.container/data.pass.cpp b/libcxx/test/std/iterators/iterator.container/data.pass.cpp index 72c4b656fdba5..1a6af4a95054d 100644 --- a/libcxx/test/std/iterators/iterator.container/data.pass.cpp +++ b/libcxx/test/std/iterators/iterator.container/data.pass.cpp @@ -54,7 +54,7 @@ void test_container( std::initializer_list& c) assert ( std::data(c) == c.begin()); } -template +template void test_const_array( const T (&array)[Sz] ) { ASSERT_NOEXCEPT(std::data(array)); diff --git a/libcxx/test/std/iterators/iterator.container/empty.pass.cpp b/libcxx/test/std/iterators/iterator.container/empty.pass.cpp index 1e1c0e42cbcc3..1a00e88d22b15 100644 --- a/libcxx/test/std/iterators/iterator.container/empty.pass.cpp +++ b/libcxx/test/std/iterators/iterator.container/empty.pass.cpp @@ -53,7 +53,7 @@ void test_container( std::initializer_list& c ) assert ( std::empty(c) == (c.size() == 0)); } -template +template void test_const_array( const T (&array)[Sz] ) { ASSERT_NOEXCEPT(std::empty(array)); diff --git a/libcxx/test/std/iterators/iterator.container/size.pass.cpp b/libcxx/test/std/iterators/iterator.container/size.pass.cpp index 697119a3239f9..975d3e80c2b78 100644 --- a/libcxx/test/std/iterators/iterator.container/size.pass.cpp +++ b/libcxx/test/std/iterators/iterator.container/size.pass.cpp @@ -54,7 +54,7 @@ void test_container( std::initializer_list& c ) assert ( std::size(c) == c.size()); } -template +template void test_const_array( const T (&array)[Sz] ) { ASSERT_NOEXCEPT(std::size(array)); diff --git a/libcxx/test/std/iterators/iterator.container/ssize.pass.cpp b/libcxx/test/std/iterators/iterator.container/ssize.pass.cpp index 779e9ed8aa9dc..9be44094a099c 100644 --- a/libcxx/test/std/iterators/iterator.container/ssize.pass.cpp +++ b/libcxx/test/std/iterators/iterator.container/ssize.pass.cpp @@ -65,7 +65,7 @@ void test_container(std::initializer_list& c) assert ( std::ssize(c) == static_cast(c.size())); } -template +template void test_const_array(const T (&array)[Sz]) { ASSERT_NOEXCEPT(std::ssize(array)); diff --git a/libcxx/test/std/language.support/support.coroutines/coroutine.handle/coroutine.handle.hash/hash.pass.cpp b/libcxx/test/std/language.support/support.coroutines/coroutine.handle/coroutine.handle.hash/hash.pass.cpp index 6d5b9e98c4e46..1e3aed28800f1 100644 --- a/libcxx/test/std/language.support/support.coroutines/coroutine.handle/coroutine.handle.hash/hash.pass.cpp +++ b/libcxx/test/std/language.support/support.coroutines/coroutine.handle/coroutine.handle.hash/hash.pass.cpp @@ -30,8 +30,8 @@ template void do_test(int *LHSVal, int *RHSVal) { - [[maybe_unused]] const size_t ExpectLHS = std::hash{}(LHSVal); - [[maybe_unused]] const size_t ExpectRHS = std::hash{}(RHSVal); + [[maybe_unused]] const std::size_t ExpectLHS = std::hash{}(LHSVal); + [[maybe_unused]] const std::size_t ExpectRHS = std::hash{}(RHSVal); const C LHS = C::from_address(LHSVal); const C RHS = C::from_address(RHSVal); const std::hash h; @@ -40,7 +40,7 @@ void do_test(int *LHSVal, int *RHSVal) { LIBCPP_ASSERT(h(RHS) == ExpectRHS); assert((h(LHS) == h(RHS)) == (LHSVal == RHSVal)); { - ASSERT_SAME_TYPE(decltype(h(LHS)), size_t); + ASSERT_SAME_TYPE(decltype(h(LHS)), std::size_t); ASSERT_NOEXCEPT(std::hash{}(LHS)); } } diff --git a/libcxx/test/std/language.support/support.runtime/cstdlib.pass.cpp b/libcxx/test/std/language.support/support.runtime/cstdlib.pass.cpp index c4c8ddd318094..823417f8a418e 100644 --- a/libcxx/test/std/language.support/support.runtime/cstdlib.pass.cpp +++ b/libcxx/test/std/language.support/support.runtime/cstdlib.pass.cpp @@ -78,7 +78,7 @@ void test_abs() { static_assert(!has_abs::value, ""); static_assert(!has_abs::value, ""); static_assert(!has_abs::value, ""); - static_assert(!has_abs::value, ""); + static_assert(!has_abs::value, ""); TEST_DIAGNOSTIC_POP diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.statics/classic_table.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.statics/classic_table.pass.cpp index 7f85dee329a67..8fe8080bde98f 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.statics/classic_table.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.statics/classic_table.pass.cpp @@ -26,7 +26,7 @@ int main(int, char**) typedef F::mask mask; const mask *p = F::classic_table(); - for ( size_t i = 0; i < 128; ++i ) // values above 128 are not consistent + for ( std::size_t i = 0; i < 128; ++i ) // values above 128 are not consistent { bool expect_cntrl = (i < 32 || 126 < i); diff --git a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.statics/table_size.pass.cpp b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.statics/table_size.pass.cpp index 4017b94eed278..57fab12796078 100644 --- a/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.statics/table_size.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.ctype/facet.ctype.special/facet.ctype.char.statics/table_size.pass.cpp @@ -24,7 +24,7 @@ int main(int, char**) { typedef std::ctype F; - const size_t* G = &F::table_size; + const std::size_t* G = &F::table_size; assert(*G >= 256); return 0; diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/test_neg_one.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/test_neg_one.pass.cpp index 5aa2fe6bbbe9f..5eb7c92daec2a 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/test_neg_one.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/test_neg_one.pass.cpp @@ -84,7 +84,7 @@ void test_negate() { ++value; std::string std_str = make_neg_string(value); const char* str = std_str.data(); - size_t size = std_str.size(); + std::size_t size = std_str.size(); std::ios_base::iostate err = ios.goodbit; cpp17_input_iterator iter = f.get(cpp17_input_iterator(str), @@ -102,7 +102,7 @@ void test_negate() { ++value; std::string std_str = make_neg_string(value); const char* str = std_str.data(); - size_t size = std_str.size(); + std::size_t size = std_str.size(); std::ios_base::iostate err = ios.goodbit; cpp17_input_iterator iter = f.get(cpp17_input_iterator(str), @@ -118,7 +118,7 @@ void test_negate() { T value = std::numeric_limits::max(); std::string std_str = make_neg_string(value); const char* str = std_str.data(); - size_t size = std_str.size(); + std::size_t size = std_str.size(); std::ios_base::iostate err = ios.goodbit; cpp17_input_iterator iter = f.get(cpp17_input_iterator(str), @@ -134,7 +134,7 @@ void test_negate() { std::string std_str = make_neg_string(std::numeric_limits::max()); std_str.back()++; const char* str = std_str.data(); - size_t size = std_str.size(); + std::size_t size = std_str.size(); std::ios_base::iostate err = ios.goodbit; cpp17_input_iterator iter = f.get(cpp17_input_iterator(str), diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_length.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_length.pass.cpp index 53172e7d630ba..bf2f8af6e8428 100644 --- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_length.pass.cpp +++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_length.pass.cpp @@ -26,7 +26,7 @@ #include "test_macros.h" -template +template struct TestHelper; template diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_max_length.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_max_length.pass.cpp index ac3daf40987b2..e9c19e51b24e6 100644 --- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_max_length.pass.cpp +++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_max_length.pass.cpp @@ -25,7 +25,7 @@ #include "test_macros.h" -template +template struct TestHelper; template diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_out.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_out.pass.cpp index 2a6936aee10ca..24b1d22881b47 100644 --- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_out.pass.cpp +++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf16_out.pass.cpp @@ -28,7 +28,7 @@ #include "test_macros.h" -template +template struct TestHelper; template struct TestHelper { diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_length.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_length.pass.cpp index 37cc10aa90b7a..0d52894282bd0 100644 --- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_length.pass.cpp +++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_length.pass.cpp @@ -26,7 +26,7 @@ #include "test_macros.h" -template +template struct TestHelper; template diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_max_length.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_max_length.pass.cpp index 6279dceeb2eb5..32fdde30153ec 100644 --- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_max_length.pass.cpp +++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_max_length.pass.cpp @@ -25,7 +25,7 @@ #include "test_macros.h" -template +template struct TestHelper; template diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_out.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_out.pass.cpp index 7d124c982e5ec..22636ca08980c 100644 --- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_out.pass.cpp +++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_out.pass.cpp @@ -28,7 +28,7 @@ #include "test_macros.h" -template +template struct TestHelper; template diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_in.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_in.pass.cpp index d7776b3a2d899..24eb2dff6182c 100644 --- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_in.pass.cpp +++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_in.pass.cpp @@ -28,7 +28,7 @@ #include "test_macros.h" -template +template struct TestHelper; template struct TestHelper { diff --git a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_out.pass.cpp b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_out.pass.cpp index c208dc33b0f2a..b9eeb6adc9c86 100644 --- a/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_out.pass.cpp +++ b/libcxx/test/std/localization/locale.stdcvt/codecvt_utf8_utf16_out.pass.cpp @@ -28,7 +28,7 @@ #include "test_macros.h" -template +template struct TestHelper; template struct TestHelper { diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/converted.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/converted.pass.cpp index e1b1f69e81deb..2861e4a52036e 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/converted.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/converted.pass.cpp @@ -22,7 +22,7 @@ #include "test_macros.h" -template +template struct TestHelper; template struct TestHelper { diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/from_bytes.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/from_bytes.pass.cpp index 157b7454153bd..ac614b087b90a 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/from_bytes.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/from_bytes.pass.cpp @@ -25,7 +25,7 @@ #include "test_macros.h" -template +template struct TestHelper; template struct TestHelper { diff --git a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/to_bytes.pass.cpp b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/to_bytes.pass.cpp index 52c5897634d04..4f3b9318c2f35 100644 --- a/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/to_bytes.pass.cpp +++ b/libcxx/test/std/localization/locales/locale.convenience/conversions/conversions.string/to_bytes.pass.cpp @@ -25,7 +25,7 @@ #include "test_macros.h" -template +template struct TestHelper; template struct TestHelper { diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.fail.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.fail.cpp index c768d0fa9de26..1d4699c18d8ea 100644 --- a/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.fail.cpp +++ b/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.fail.cpp @@ -43,7 +43,7 @@ int main(int, char**) static_assert(toobig(), ""); // expected-error-re {{{{(static_assert|static assertion)}} expression is not an integral constant expression}} static_assert(toobig(), ""); // expected-error-re {{{{(static_assert|static assertion)}} expression is not an integral constant expression}} static_assert(toobig(), ""); // expected-error-re {{{{(static_assert|static assertion)}} expression is not an integral constant expression}} - static_assert(toobig(), ""); // expected-error-re {{{{(static_assert|static assertion)}} expression is not an integral constant expression}} + static_assert(toobig(), ""); // expected-error-re {{{{(static_assert|static assertion)}} expression is not an integral constant expression}} static_assert(toobig(), ""); // expected-error-re {{{{(static_assert|static assertion)}} expression is not an integral constant expression}} static_assert(toobig(), ""); // expected-error-re {{{{(static_assert|static assertion)}} expression is not an integral constant expression}} diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.pass.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.pass.cpp index 7aa1265838dbb..0da878d0b1f17 100644 --- a/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.pass.cpp +++ b/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.pass.cpp @@ -121,7 +121,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); test(); test(); @@ -137,7 +137,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); return 0; } diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/bit_floor.pass.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/bit_floor.pass.cpp index 7b8d67c6152f0..41d5ed1e06017 100644 --- a/libcxx/test/std/numerics/bit/bit.pow.two/bit_floor.pass.cpp +++ b/libcxx/test/std/numerics/bit/bit.pow.two/bit_floor.pass.cpp @@ -119,7 +119,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); test(); test(); @@ -136,7 +136,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); return 0; } diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/bit_width.pass.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/bit_width.pass.cpp index 899d272f56d7c..53fdec6bd2076 100644 --- a/libcxx/test/std/numerics/bit/bit.pow.two/bit_width.pass.cpp +++ b/libcxx/test/std/numerics/bit/bit.pow.two/bit_width.pass.cpp @@ -123,7 +123,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); test(); test(); @@ -139,7 +139,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); return 0; } diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/has_single_bit.pass.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/has_single_bit.pass.cpp index 2c33bda72982c..044a87fa41352 100644 --- a/libcxx/test/std/numerics/bit/bit.pow.two/has_single_bit.pass.cpp +++ b/libcxx/test/std/numerics/bit/bit.pow.two/has_single_bit.pass.cpp @@ -121,7 +121,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); test(); test(); @@ -137,7 +137,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); return 0; } diff --git a/libcxx/test/std/numerics/bit/bitops.count/countl_one.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/countl_one.pass.cpp index f9def0a4d3407..06a8a6d6cc879 100644 --- a/libcxx/test/std/numerics/bit/bitops.count/countl_one.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.count/countl_one.pass.cpp @@ -118,7 +118,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); test(); test(); @@ -134,7 +134,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); return 0; } diff --git a/libcxx/test/std/numerics/bit/bitops.count/countl_zero.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/countl_zero.pass.cpp index b76731eeb0469..5a30e57c409a6 100644 --- a/libcxx/test/std/numerics/bit/bitops.count/countl_zero.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.count/countl_zero.pass.cpp @@ -117,7 +117,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); test(); test(); @@ -133,7 +133,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); return 0; } diff --git a/libcxx/test/std/numerics/bit/bitops.count/countr_one.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/countr_one.pass.cpp index c1a9a4a0ebb5f..1698e31086ea4 100644 --- a/libcxx/test/std/numerics/bit/bitops.count/countr_one.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.count/countr_one.pass.cpp @@ -122,7 +122,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); test(); test(); @@ -138,7 +138,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); return 0; } diff --git a/libcxx/test/std/numerics/bit/bitops.count/countr_zero.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/countr_zero.pass.cpp index 639144309454c..91fbab8bbed84 100644 --- a/libcxx/test/std/numerics/bit/bitops.count/countr_zero.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.count/countr_zero.pass.cpp @@ -119,7 +119,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); test(); test(); @@ -135,7 +135,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); return 0; } diff --git a/libcxx/test/std/numerics/bit/bitops.count/popcount.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/popcount.pass.cpp index 6b996ed5177a9..00e60ff588176 100644 --- a/libcxx/test/std/numerics/bit/bitops.count/popcount.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.count/popcount.pass.cpp @@ -129,7 +129,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); test(); test(); @@ -145,7 +145,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); return 0; } diff --git a/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp b/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp index 34859ae922197..5fe63e47403c4 100644 --- a/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp @@ -118,7 +118,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); test(); test(); @@ -135,7 +135,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); return 0; } diff --git a/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp b/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp index a6851e72a5ad6..1b63540cd8c45 100644 --- a/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp @@ -119,7 +119,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); test(); test(); @@ -136,7 +136,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); return 0; } diff --git a/libcxx/test/std/numerics/c.math/cmath.pass.cpp b/libcxx/test/std/numerics/c.math/cmath.pass.cpp index c867472662150..e2062cc7ecfd9 100644 --- a/libcxx/test/std/numerics/c.math/cmath.pass.cpp +++ b/libcxx/test/std/numerics/c.math/cmath.pass.cpp @@ -133,7 +133,7 @@ void test_abs() static_assert(!has_abs::value, ""); static_assert(!has_abs::value, ""); static_assert(!has_abs::value, ""); - static_assert(!has_abs::value, ""); + static_assert(!has_abs::value, ""); TEST_DIAGNOSTIC_POP diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/deduct.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/deduct.pass.cpp index 80ac2ae0fd636..e664b6acd5fd9 100644 --- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/deduct.pass.cpp +++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/deduct.pass.cpp @@ -64,7 +64,7 @@ int main(int, char**) { // From (indirect_array) std::valarray v = {1, 2, 3, 4, 5}; - std::valarray i = {1, 2, 3}; + std::valarray i = {1, 2, 3}; std::valarray v2 = v[i]; static_assert(std::is_same_v>); } diff --git a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/size.pass.cpp b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/size.pass.cpp index 4c87bd4e816ca..21d4ee15f57c6 100644 --- a/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/size.pass.cpp +++ b/libcxx/test/std/numerics/numarray/template.valarray/valarray.cons/size.pass.cpp @@ -21,7 +21,7 @@ struct S { S() : x(1) {} ~S() { ++cnt_dtor; } int x; - static size_t cnt_dtor; + static std::size_t cnt_dtor; }; size_t S::cnt_dtor = 0; diff --git a/libcxx/test/std/numerics/numeric.ops/exclusive.scan/exclusive_scan.pass.cpp b/libcxx/test/std/numerics/numeric.ops/exclusive.scan/exclusive_scan.pass.cpp index 69f25fcbc3c6b..bef2dd7866964 100644 --- a/libcxx/test/std/numerics/numeric.ops/exclusive.scan/exclusive_scan.pass.cpp +++ b/libcxx/test/std/numerics/numeric.ops/exclusive.scan/exclusive_scan.pass.cpp @@ -57,33 +57,33 @@ test() } } -constexpr size_t triangle(size_t n) { return n*(n+1)/2; } +constexpr std::size_t triangle(size_t n) { return n*(n+1)/2; } // Basic sanity TEST_CONSTEXPR_CXX20 void basic_tests() { { - std::array v; + std::array v; std::fill(v.begin(), v.end(), 3); - std::exclusive_scan(v.begin(), v.end(), v.begin(), size_t{50}); - for (size_t i = 0; i < v.size(); ++i) + std::exclusive_scan(v.begin(), v.end(), v.begin(), std::size_t{50}); + for (std::size_t i = 0; i < v.size(); ++i) assert(v[i] == 50 + i * 3); } { - std::array v; + std::array v; std::iota(v.begin(), v.end(), 0); - std::exclusive_scan(v.begin(), v.end(), v.begin(), size_t{30}); - for (size_t i = 0; i < v.size(); ++i) + std::exclusive_scan(v.begin(), v.end(), v.begin(), std::size_t{30}); + for (std::size_t i = 0; i < v.size(); ++i) assert(v[i] == 30 + triangle(i-1)); } { - std::array v; + std::array v; std::iota(v.begin(), v.end(), 1); - std::exclusive_scan(v.begin(), v.end(), v.begin(), size_t{40}); - for (size_t i = 0; i < v.size(); ++i) + std::exclusive_scan(v.begin(), v.end(), v.begin(), std::size_t{40}); + for (std::size_t i = 0; i < v.size(); ++i) assert(v[i] == 40 + triangle(i)); } diff --git a/libcxx/test/std/numerics/numeric.ops/exclusive.scan/exclusive_scan_init_op.pass.cpp b/libcxx/test/std/numerics/numeric.ops/exclusive.scan/exclusive_scan_init_op.pass.cpp index 53698b1d7b2e1..ca2de4aec1600 100644 --- a/libcxx/test/std/numerics/numeric.ops/exclusive.scan/exclusive_scan_init_op.pass.cpp +++ b/libcxx/test/std/numerics/numeric.ops/exclusive.scan/exclusive_scan_init_op.pass.cpp @@ -77,13 +77,13 @@ test() { std::array v; std::iota(v.begin(), v.end(), static_cast(1)); - std::array res; + std::array res; std::exclusive_scan(v.begin(), v.end(), res.begin(), 1, std::multiplies<>()); assert(res.size() == 10); - size_t j = 1; + std::size_t j = 1; assert(res[0] == 1); - for (size_t i = 1; i < v.size(); ++i) + for (std::size_t i = 1; i < v.size(); ++i) { j *= i; assert(res[i] == j); diff --git a/libcxx/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan.pass.cpp b/libcxx/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan.pass.cpp index 6e005060cbda5..299d085d01a06 100644 --- a/libcxx/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan.pass.cpp +++ b/libcxx/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan.pass.cpp @@ -58,38 +58,38 @@ test() } } -constexpr size_t triangle(size_t n) { return n*(n+1)/2; } +constexpr std::size_t triangle(size_t n) { return n*(n+1)/2; } // Basic sanity TEST_CONSTEXPR_CXX20 void basic_tests() { { - std::array v; + std::array v; std::fill(v.begin(), v.end(), 3); std::inclusive_scan(v.begin(), v.end(), v.begin()); - for (size_t i = 0; i < v.size(); ++i) + for (std::size_t i = 0; i < v.size(); ++i) assert(v[i] == (i+1) * 3); } { - std::array v; + std::array v; std::iota(v.begin(), v.end(), 0); std::inclusive_scan(v.begin(), v.end(), v.begin()); - for (size_t i = 0; i < v.size(); ++i) + for (std::size_t i = 0; i < v.size(); ++i) assert(v[i] == triangle(i)); } { - std::array v; + std::array v; std::iota(v.begin(), v.end(), 1); std::inclusive_scan(v.begin(), v.end(), v.begin()); - for (size_t i = 0; i < v.size(); ++i) + for (std::size_t i = 0; i < v.size(); ++i) assert(v[i] == triangle(i + 1)); } { - std::array v, res; + std::array v, res; std::inclusive_scan(v.begin(), v.end(), res.begin()); assert(res.empty()); } diff --git a/libcxx/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan_op.pass.cpp b/libcxx/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan_op.pass.cpp index 5e33394f7483e..87bea923eed3f 100644 --- a/libcxx/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan_op.pass.cpp +++ b/libcxx/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan_op.pass.cpp @@ -62,38 +62,38 @@ test() } } -constexpr size_t triangle(size_t n) { return n*(n+1)/2; } +constexpr std::size_t triangle(size_t n) { return n*(n+1)/2; } // Basic sanity TEST_CONSTEXPR_CXX20 void basic_tests() { { - std::array v; + std::array v; std::fill(v.begin(), v.end(), 3); std::inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>()); - for (size_t i = 0; i < v.size(); ++i) + for (std::size_t i = 0; i < v.size(); ++i) assert(v[i] == (i+1) * 3); } { - std::array v; + std::array v; std::iota(v.begin(), v.end(), 0); std::inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>()); - for (size_t i = 0; i < v.size(); ++i) + for (std::size_t i = 0; i < v.size(); ++i) assert(v[i] == triangle(i)); } { - std::array v; + std::array v; std::iota(v.begin(), v.end(), 1); std::inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>()); - for (size_t i = 0; i < v.size(); ++i) + for (std::size_t i = 0; i < v.size(); ++i) assert(v[i] == triangle(i + 1)); } { - std::array v, res; + std::array v, res; std::inclusive_scan(v.begin(), v.end(), res.begin(), std::plus<>()); assert(res.empty()); } diff --git a/libcxx/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan_op_init.pass.cpp b/libcxx/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan_op_init.pass.cpp index f7c2be2c66316..2e21d38f47344 100644 --- a/libcxx/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan_op_init.pass.cpp +++ b/libcxx/test/std/numerics/numeric.ops/inclusive.scan/inclusive_scan_op_init.pass.cpp @@ -62,39 +62,39 @@ test() } } -constexpr size_t triangle(size_t n) { return n*(n+1)/2; } +constexpr std::size_t triangle(size_t n) { return n*(n+1)/2; } // Basic sanity TEST_CONSTEXPR_CXX20 void basic_tests() { { - std::array v; + std::array v; std::fill(v.begin(), v.end(), 3); - std::inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>(), size_t{50}); - for (size_t i = 0; i < v.size(); ++i) + std::inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>(), std::size_t{50}); + for (std::size_t i = 0; i < v.size(); ++i) assert(v[i] == 50 + (i+1) * 3); } { - std::array v; + std::array v; std::iota(v.begin(), v.end(), 0); - std::inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>(), size_t{40}); - for (size_t i = 0; i < v.size(); ++i) + std::inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>(), std::size_t{40}); + for (std::size_t i = 0; i < v.size(); ++i) assert(v[i] == 40 + triangle(i)); } { - std::array v; + std::array v; std::iota(v.begin(), v.end(), 1); - std::inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>(), size_t{30}); - for (size_t i = 0; i < v.size(); ++i) + std::inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>(), std::size_t{30}); + for (std::size_t i = 0; i < v.size(); ++i) assert(v[i] == 30 + triangle(i + 1)); } { - std::array v, res; - std::inclusive_scan(v.begin(), v.end(), res.begin(), std::plus<>(), size_t{40}); + std::array v, res; + std::inclusive_scan(v.begin(), v.end(), res.begin(), std::plus<>(), std::size_t{40}); assert(res.empty()); } @@ -102,13 +102,13 @@ basic_tests() { std::array v; std::iota(v.begin(), v.end(), static_cast(1)); - std::array res; - std::inclusive_scan(v.begin(), v.end(), res.begin(), std::multiplies<>(), size_t{1}); + std::array res; + std::inclusive_scan(v.begin(), v.end(), res.begin(), std::multiplies<>(), std::size_t{1}); assert(res.size() == 10); - size_t j = 1; + std::size_t j = 1; assert(res[0] == 1); - for (size_t i = 1; i < v.size(); ++i) + for (std::size_t i = 1; i < v.size(); ++i) { j *= i + 1; assert(res[i] == j); diff --git a/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.integer.pass.cpp b/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.integer.pass.cpp index 1862045a4ec90..03f3bdaf2d561 100644 --- a/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.integer.pass.cpp +++ b/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.integer.pass.cpp @@ -138,7 +138,7 @@ int main(int, char**) // int_test(); signed_test(); - unsigned_test(); + unsigned_test(); return 0; } diff --git a/libcxx/test/std/numerics/numeric.ops/transform.exclusive.scan/transform_exclusive_scan_init_bop_uop.pass.cpp b/libcxx/test/std/numerics/numeric.ops/transform.exclusive.scan/transform_exclusive_scan_init_bop_uop.pass.cpp index 9ab546a06d85b..52272205307a9 100644 --- a/libcxx/test/std/numerics/numeric.ops/transform.exclusive.scan/transform_exclusive_scan_init_bop_uop.pass.cpp +++ b/libcxx/test/std/numerics/numeric.ops/transform.exclusive.scan/transform_exclusive_scan_init_bop_uop.pass.cpp @@ -88,39 +88,39 @@ test() } } -constexpr size_t triangle(size_t n) { return n*(n+1)/2; } +constexpr std::size_t triangle(size_t n) { return n*(n+1)/2; } // Basic sanity TEST_CONSTEXPR_CXX20 void basic_tests() { { - std::array v; + std::array v; std::fill(v.begin(), v.end(), 3); - std::transform_exclusive_scan(v.begin(), v.end(), v.begin(), size_t{50}, std::plus<>(), add_one{}); - for (size_t i = 0; i < v.size(); ++i) + std::transform_exclusive_scan(v.begin(), v.end(), v.begin(), std::size_t{50}, std::plus<>(), add_one{}); + for (std::size_t i = 0; i < v.size(); ++i) assert(v[i] == 50 + i * 4); } { - std::array v; + std::array v; std::iota(v.begin(), v.end(), 0); - std::transform_exclusive_scan(v.begin(), v.end(), v.begin(), size_t{30}, std::plus<>(), add_one{}); - for (size_t i = 0; i < v.size(); ++i) + std::transform_exclusive_scan(v.begin(), v.end(), v.begin(), std::size_t{30}, std::plus<>(), add_one{}); + for (std::size_t i = 0; i < v.size(); ++i) assert(v[i] == 30 + triangle(i - 1) + i); } { - std::array v; + std::array v; std::iota(v.begin(), v.end(), 1); - std::transform_exclusive_scan(v.begin(), v.end(), v.begin(), size_t{40}, std::plus<>(), add_one{}); - for (size_t i = 0; i < v.size(); ++i) + std::transform_exclusive_scan(v.begin(), v.end(), v.begin(), std::size_t{40}, std::plus<>(), add_one{}); + for (std::size_t i = 0; i < v.size(); ++i) assert(v[i] == 40 + triangle(i) + i); } { - std::array v, res; - std::transform_exclusive_scan(v.begin(), v.end(), res.begin(), size_t{40}, std::plus<>(), add_one{}); + std::array v, res; + std::transform_exclusive_scan(v.begin(), v.end(), res.begin(), std::size_t{40}, std::plus<>(), add_one{}); assert(res.empty()); } @@ -128,13 +128,13 @@ basic_tests() { std::array v; std::iota(v.begin(), v.end(), static_cast(1)); - std::array res; - std::transform_exclusive_scan(v.begin(), v.end(), res.begin(), size_t{1}, std::multiplies<>(), add_one{}); + std::array res; + std::transform_exclusive_scan(v.begin(), v.end(), res.begin(), std::size_t{1}, std::multiplies<>(), add_one{}); assert(res.size() == 10); - size_t j = 1; + std::size_t j = 1; assert(res[0] == 1); - for (size_t i = 1; i < res.size(); ++i) + for (std::size_t i = 1; i < res.size(); ++i) { j *= i + 1; assert(res[i] == j); diff --git a/libcxx/test/std/numerics/numeric.ops/transform.inclusive.scan/transform_inclusive_scan_bop_uop.pass.cpp b/libcxx/test/std/numerics/numeric.ops/transform.inclusive.scan/transform_inclusive_scan_bop_uop.pass.cpp index d4a95bf582978..80ead01e9a795 100644 --- a/libcxx/test/std/numerics/numeric.ops/transform.inclusive.scan/transform_inclusive_scan_bop_uop.pass.cpp +++ b/libcxx/test/std/numerics/numeric.ops/transform.inclusive.scan/transform_inclusive_scan_bop_uop.pass.cpp @@ -77,38 +77,38 @@ test() } } -constexpr size_t triangle(size_t n) { return n*(n+1)/2; } +constexpr std::size_t triangle(size_t n) { return n*(n+1)/2; } // Basic sanity TEST_CONSTEXPR_CXX20 void basic_tests() { { - std::array v; + std::array v; std::fill(v.begin(), v.end(), 3); std::transform_inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>(), add_one{}); - for (size_t i = 0; i < v.size(); ++i) + for (std::size_t i = 0; i < v.size(); ++i) assert(v[i] == (i+1) * 4); } { - std::array v; + std::array v; std::iota(v.begin(), v.end(), 0); std::transform_inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>(), add_one{}); - for (size_t i = 0; i < v.size(); ++i) + for (std::size_t i = 0; i < v.size(); ++i) assert(v[i] == triangle(i) + i + 1); } { - std::array v; + std::array v; std::iota(v.begin(), v.end(), 1); std::transform_inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>(), add_one{}); - for (size_t i = 0; i < v.size(); ++i) + for (std::size_t i = 0; i < v.size(); ++i) assert(v[i] == triangle(i + 1) + i + 1); } { - std::array v, res; + std::array v, res; std::transform_inclusive_scan(v.begin(), v.end(), res.begin(), std::plus<>(), add_one{}); assert(res.empty()); } diff --git a/libcxx/test/std/numerics/numeric.ops/transform.inclusive.scan/transform_inclusive_scan_bop_uop_init.pass.cpp b/libcxx/test/std/numerics/numeric.ops/transform.inclusive.scan/transform_inclusive_scan_bop_uop_init.pass.cpp index 7d89c5b320468..18be676c7a54d 100644 --- a/libcxx/test/std/numerics/numeric.ops/transform.inclusive.scan/transform_inclusive_scan_bop_uop_init.pass.cpp +++ b/libcxx/test/std/numerics/numeric.ops/transform.inclusive.scan/transform_inclusive_scan_bop_uop_init.pass.cpp @@ -90,39 +90,39 @@ test() } } -constexpr size_t triangle(size_t n) { return n*(n+1)/2; } +constexpr std::size_t triangle(size_t n) { return n*(n+1)/2; } // Basic sanity TEST_CONSTEXPR_CXX20 void basic_tests() { { - std::array v; + std::array v; std::fill(v.begin(), v.end(), 3); - std::transform_inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>(), add_one{}, size_t{50}); - for (size_t i = 0; i < v.size(); ++i) + std::transform_inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>(), add_one{}, std::size_t{50}); + for (std::size_t i = 0; i < v.size(); ++i) assert(v[i] == 50 + (i + 1) * 4); } { - std::array v; + std::array v; std::iota(v.begin(), v.end(), 0); - std::transform_inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>(), add_one{}, size_t{30}); - for (size_t i = 0; i < v.size(); ++i) + std::transform_inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>(), add_one{}, std::size_t{30}); + for (std::size_t i = 0; i < v.size(); ++i) assert(v[i] == 30 + triangle(i) + i + 1); } { - std::array v; + std::array v; std::iota(v.begin(), v.end(), 1); - std::transform_inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>(), add_one{}, size_t{40}); - for (size_t i = 0; i < v.size(); ++i) + std::transform_inclusive_scan(v.begin(), v.end(), v.begin(), std::plus<>(), add_one{}, std::size_t{40}); + for (std::size_t i = 0; i < v.size(); ++i) assert(v[i] == 40 + triangle(i + 1) + i + 1); } { - std::array v, res; - std::transform_inclusive_scan(v.begin(), v.end(), res.begin(), std::plus<>(), add_one{}, size_t{1}); + std::array v, res; + std::transform_inclusive_scan(v.begin(), v.end(), res.begin(), std::plus<>(), add_one{}, std::size_t{1}); assert(res.empty()); } @@ -130,13 +130,13 @@ basic_tests() { std::array v; std::iota(v.begin(), v.end(), static_cast(1)); - std::array res; - std::transform_inclusive_scan(v.begin(), v.end(), res.begin(), std::multiplies<>(), add_one{}, size_t{1}); + std::array res; + std::transform_inclusive_scan(v.begin(), v.end(), res.begin(), std::multiplies<>(), add_one{}, std::size_t{1}); assert(res.size() == 10); - size_t j = 2; + std::size_t j = 2; assert(res[0] == 2); - for (size_t i = 1; i < res.size(); ++i) + for (std::size_t i = 1; i < res.size(); ++i) { j *= i + 2; assert(res[i] == j); diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/eval.pass.cpp index c24d1a4437931..e11b235b7646d 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/eval.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/eval.pass.cpp @@ -40,7 +40,7 @@ test1() G g; double b[] = {10, 14, 16, 17}; double p[] = {25, 62.5, 12.5}; - const size_t Np = sizeof(p) / sizeof(p[0]); + const std::size_t Np = sizeof(p) / sizeof(p[0]); D d(b, b+Np+1, p); const int N = 1000000; std::vector u; @@ -60,7 +60,7 @@ test1() typedef std::vector::iterator I; I lb = std::lower_bound(u.begin(), u.end(), b[i]); I ub = std::lower_bound(u.begin(), u.end(), b[i+1]); - const size_t Ni = ub - lb; + const std::size_t Ni = ub - lb; if (prob[i] == 0) assert(Ni == 0); else @@ -103,7 +103,7 @@ test2() G g; double b[] = {10, 14, 16, 17}; double p[] = {0, 62.5, 12.5}; - const size_t Np = sizeof(p) / sizeof(p[0]); + const std::size_t Np = sizeof(p) / sizeof(p[0]); D d(b, b+Np+1, p); const int N = 1000000; std::vector u; @@ -123,7 +123,7 @@ test2() typedef std::vector::iterator I; I lb = std::lower_bound(u.begin(), u.end(), b[i]); I ub = std::lower_bound(u.begin(), u.end(), b[i+1]); - const size_t Ni = ub - lb; + const std::size_t Ni = ub - lb; if (prob[i] == 0) assert(Ni == 0); else @@ -166,7 +166,7 @@ test3() G g; double b[] = {10, 14, 16, 17}; double p[] = {25, 0, 12.5}; - const size_t Np = sizeof(p) / sizeof(p[0]); + const std::size_t Np = sizeof(p) / sizeof(p[0]); D d(b, b+Np+1, p); const int N = 1000000; std::vector u; @@ -186,7 +186,7 @@ test3() typedef std::vector::iterator I; I lb = std::lower_bound(u.begin(), u.end(), b[i]); I ub = std::lower_bound(u.begin(), u.end(), b[i+1]); - const size_t Ni = ub - lb; + const std::size_t Ni = ub - lb; if (prob[i] == 0) assert(Ni == 0); else @@ -229,7 +229,7 @@ test4() G g; double b[] = {10, 14, 16, 17}; double p[] = {25, 62.5, 0}; - const size_t Np = sizeof(p) / sizeof(p[0]); + const std::size_t Np = sizeof(p) / sizeof(p[0]); D d(b, b+Np+1, p); const int N = 1000000; std::vector u; @@ -249,7 +249,7 @@ test4() typedef std::vector::iterator I; I lb = std::lower_bound(u.begin(), u.end(), b[i]); I ub = std::lower_bound(u.begin(), u.end(), b[i+1]); - const size_t Ni = ub - lb; + const std::size_t Ni = ub - lb; if (prob[i] == 0) assert(Ni == 0); else @@ -292,7 +292,7 @@ test5() G g; double b[] = {10, 14, 16, 17}; double p[] = {25, 0, 0}; - const size_t Np = sizeof(p) / sizeof(p[0]); + const std::size_t Np = sizeof(p) / sizeof(p[0]); D d(b, b+Np+1, p); const int N = 100000; std::vector u; @@ -312,7 +312,7 @@ test5() typedef std::vector::iterator I; I lb = std::lower_bound(u.begin(), u.end(), b[i]); I ub = std::lower_bound(u.begin(), u.end(), b[i+1]); - const size_t Ni = ub - lb; + const std::size_t Ni = ub - lb; if (prob[i] == 0) assert(Ni == 0); else @@ -355,7 +355,7 @@ test6() G g; double b[] = {10, 14, 16, 17}; double p[] = {0, 25, 0}; - const size_t Np = sizeof(p) / sizeof(p[0]); + const std::size_t Np = sizeof(p) / sizeof(p[0]); D d(b, b+Np+1, p); const int N = 100000; std::vector u; @@ -375,7 +375,7 @@ test6() typedef std::vector::iterator I; I lb = std::lower_bound(u.begin(), u.end(), b[i]); I ub = std::lower_bound(u.begin(), u.end(), b[i+1]); - const size_t Ni = ub - lb; + const std::size_t Ni = ub - lb; if (prob[i] == 0) assert(Ni == 0); else @@ -418,7 +418,7 @@ test7() G g; double b[] = {10, 14, 16, 17}; double p[] = {0, 0, 1}; - const size_t Np = sizeof(p) / sizeof(p[0]); + const std::size_t Np = sizeof(p) / sizeof(p[0]); D d(b, b+Np+1, p); const int N = 100000; std::vector u; @@ -438,7 +438,7 @@ test7() typedef std::vector::iterator I; I lb = std::lower_bound(u.begin(), u.end(), b[i]); I ub = std::lower_bound(u.begin(), u.end(), b[i+1]); - const size_t Ni = ub - lb; + const std::size_t Ni = ub - lb; if (prob[i] == 0) assert(Ni == 0); else @@ -481,7 +481,7 @@ test8() G g; double b[] = {10, 14, 16}; double p[] = {75, 25}; - const size_t Np = sizeof(p) / sizeof(p[0]); + const std::size_t Np = sizeof(p) / sizeof(p[0]); D d(b, b+Np+1, p); const int N = 100000; std::vector u; @@ -501,7 +501,7 @@ test8() typedef std::vector::iterator I; I lb = std::lower_bound(u.begin(), u.end(), b[i]); I ub = std::lower_bound(u.begin(), u.end(), b[i+1]); - const size_t Ni = ub - lb; + const std::size_t Ni = ub - lb; if (prob[i] == 0) assert(Ni == 0); else @@ -544,7 +544,7 @@ test9() G g; double b[] = {10, 14, 16}; double p[] = {0, 25}; - const size_t Np = sizeof(p) / sizeof(p[0]); + const std::size_t Np = sizeof(p) / sizeof(p[0]); D d(b, b+Np+1, p); const int N = 100000; std::vector u; @@ -564,7 +564,7 @@ test9() typedef std::vector::iterator I; I lb = std::lower_bound(u.begin(), u.end(), b[i]); I ub = std::lower_bound(u.begin(), u.end(), b[i+1]); - const size_t Ni = ub - lb; + const std::size_t Ni = ub - lb; if (prob[i] == 0) assert(Ni == 0); else @@ -607,7 +607,7 @@ test10() G g; double b[] = {10, 14, 16}; double p[] = {1, 0}; - const size_t Np = sizeof(p) / sizeof(p[0]); + const std::size_t Np = sizeof(p) / sizeof(p[0]); D d(b, b+Np+1, p); const int N = 100000; std::vector u; @@ -627,7 +627,7 @@ test10() typedef std::vector::iterator I; I lb = std::lower_bound(u.begin(), u.end(), b[i]); I ub = std::lower_bound(u.begin(), u.end(), b[i+1]); - const size_t Ni = ub - lb; + const std::size_t Ni = ub - lb; if (prob[i] == 0) assert(Ni == 0); else @@ -670,7 +670,7 @@ test11() G g; double b[] = {10, 14}; double p[] = {1}; - const size_t Np = sizeof(p) / sizeof(p[0]); + const std::size_t Np = sizeof(p) / sizeof(p[0]); D d(b, b+Np+1, p); const int N = 100000; std::vector u; @@ -690,7 +690,7 @@ test11() typedef std::vector::iterator I; I lb = std::lower_bound(u.begin(), u.end(), b[i]); I ub = std::lower_bound(u.begin(), u.end(), b[i+1]); - const size_t Ni = ub - lb; + const std::size_t Ni = ub - lb; if (prob[i] == 0) assert(Ni == 0); else diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/eval_param.pass.cpp index d9f2628a3f03f..d3964ce18e1e1 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/eval_param.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/eval_param.pass.cpp @@ -42,7 +42,7 @@ int main(int, char**) G g; double b[] = {10, 14, 16, 17}; double p[] = {25, 62.5, 12.5}; - const size_t Np = sizeof(p) / sizeof(p[0]); + const std::size_t Np = sizeof(p) / sizeof(p[0]); D d; P pa(b, b+Np+1, p); const int N = 1000000; @@ -63,7 +63,7 @@ int main(int, char**) typedef std::vector::iterator I; I lb = std::lower_bound(u.begin(), u.end(), b[i]); I ub = std::lower_bound(u.begin(), u.end(), b[i+1]); - const size_t Ni = ub - lb; + const std::size_t Ni = ub - lb; if (prob[i] == 0) assert(Ni == 0); else diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/get_param.pass.cpp index db7c0d6d2e312..e3458fe1da46e 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/get_param.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/get_param.pass.cpp @@ -25,7 +25,7 @@ int main(int, char**) typedef D::param_type P; double b[] = {10, 14, 16, 17}; double p[] = {25, 62.5, 12.5}; - const size_t Np = sizeof(p) / sizeof(p[0]); + const std::size_t Np = sizeof(p) / sizeof(p[0]); P pa(b, b+Np+1, p); D d(pa); assert(d.param() == pa); diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/io.pass.cpp index 72a66c7be7763..f173d6bd809ab 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/io.pass.cpp @@ -35,7 +35,7 @@ int main(int, char**) typedef std::piecewise_constant_distribution<> D; double b[] = {10, 14, 16, 17}; double p[] = {25, 62.5, 12.5}; - const size_t Np = sizeof(p) / sizeof(p[0]); + const std::size_t Np = sizeof(p) / sizeof(p[0]); D d1(b, b+Np+1, p); std::ostringstream os; os << d1; diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/max.pass.cpp index fe0c23f61ecc9..f0bca2ae10acf 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/max.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/max.pass.cpp @@ -24,7 +24,7 @@ int main(int, char**) typedef std::piecewise_constant_distribution<> D; double b[] = {10, 14, 16, 17}; double p[] = {25, 62.5, 12.5}; - const size_t Np = sizeof(p) / sizeof(p[0]); + const std::size_t Np = sizeof(p) / sizeof(p[0]); D d(b, b+Np+1, p); assert(d.max() == 17); } diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/min.pass.cpp index 533f3e27017eb..060c4eeaac8ed 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/min.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/min.pass.cpp @@ -24,7 +24,7 @@ int main(int, char**) typedef std::piecewise_constant_distribution<> D; double b[] = {10, 14, 16, 17}; double p[] = {25, 62.5, 12.5}; - const size_t Np = sizeof(p) / sizeof(p[0]); + const std::size_t Np = sizeof(p) / sizeof(p[0]); D d(b, b+Np+1, p); assert(d.min() == 10); } diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_assign.pass.cpp index a97708dcfd6b1..a7074a8a23ef9 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_assign.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_assign.pass.cpp @@ -26,7 +26,7 @@ int main(int, char**) typedef D::param_type P; double b[] = {10, 14, 16, 17}; double p[] = {25, 62.5, 12.5}; - const size_t Np = sizeof(p) / sizeof(p[0]); + const std::size_t Np = sizeof(p) / sizeof(p[0]); P p0(b, b+Np+1, p); P p1; p1 = p0; diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_copy.pass.cpp index 80b442a458a91..1d444163e0432 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_copy.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/param_copy.pass.cpp @@ -26,7 +26,7 @@ int main(int, char**) typedef D::param_type P; double b[] = {10, 14, 16, 17}; double p[] = {25, 62.5, 12.5}; - const size_t Np = sizeof(p) / sizeof(p[0]); + const std::size_t Np = sizeof(p) / sizeof(p[0]); P p0(b, b+Np+1, p); P p1 = p0; assert(p1 == p0); diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/eval.pass.cpp index ea8924effe395..e551c8c2bb38d 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/eval.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/eval.pass.cpp @@ -47,11 +47,11 @@ test1() G g; double b[] = {10, 14, 16, 17}; double p[] = {0, 1, 1, 0}; - const size_t Np = sizeof(p) / sizeof(p[0]) - 1; + const std::size_t Np = sizeof(p) / sizeof(p[0]) - 1; D d(b, b+Np+1, p); const int N = 1000000; std::vector u; - for (size_t i = 0; i < N; ++i) + for (std::size_t i = 0; i < N; ++i) { D::result_type v = d(g); assert(d.min() <= v && v < d.max()); @@ -65,16 +65,16 @@ test1() double c = std::numeric_limits::quiet_NaN(); std::vector areas(Np); double S = 0; - for (size_t i = 0; i < areas.size(); ++i) + for (std::size_t i = 0; i < areas.size(); ++i) { areas[i] = (p[i]+p[i+1])*(b[i+1]-b[i])/2; S += areas[i]; } - for (size_t i = 0; i < areas.size(); ++i) + for (std::size_t i = 0; i < areas.size(); ++i) areas[i] /= S; - for (size_t i = 0; i < Np+1; ++i) + for (std::size_t i = 0; i < Np+1; ++i) p[i] /= S; - for (size_t i = 0; i < N; ++i) + for (std::size_t i = 0; i < N; ++i) { int k = std::lower_bound(b, b+Np+1, u[i]) - b - 1; if (k != kp) @@ -99,11 +99,11 @@ test2() G g; double b[] = {10, 14, 16, 17}; double p[] = {0, 0, 1, 0}; - const size_t Np = sizeof(p) / sizeof(p[0]) - 1; + const std::size_t Np = sizeof(p) / sizeof(p[0]) - 1; D d(b, b+Np+1, p); const int N = 1000000; std::vector u; - for (size_t i = 0; i < N; ++i) + for (std::size_t i = 0; i < N; ++i) { D::result_type v = d(g); assert(d.min() <= v && v < d.max()); @@ -117,16 +117,16 @@ test2() double c = std::numeric_limits::quiet_NaN(); std::vector areas(Np); double S = 0; - for (size_t i = 0; i < areas.size(); ++i) + for (std::size_t i = 0; i < areas.size(); ++i) { areas[i] = (p[i]+p[i+1])*(b[i+1]-b[i])/2; S += areas[i]; } - for (size_t i = 0; i < areas.size(); ++i) + for (std::size_t i = 0; i < areas.size(); ++i) areas[i] /= S; - for (size_t i = 0; i < Np+1; ++i) + for (std::size_t i = 0; i < Np+1; ++i) p[i] /= S; - for (size_t i = 0; i < N; ++i) + for (std::size_t i = 0; i < N; ++i) { int k = std::lower_bound(b, b+Np+1, u[i]) - b - 1; if (k != kp) @@ -151,11 +151,11 @@ test3() G g; double b[] = {10, 14, 16, 17}; double p[] = {1, 0, 0, 0}; - const size_t Np = sizeof(p) / sizeof(p[0]) - 1; + const std::size_t Np = sizeof(p) / sizeof(p[0]) - 1; D d(b, b+Np+1, p); - const size_t N = 1000000; + const std::size_t N = 1000000; std::vector u; - for (size_t i = 0; i < N; ++i) + for (std::size_t i = 0; i < N; ++i) { D::result_type v = d(g); assert(d.min() <= v && v < d.max()); @@ -169,16 +169,16 @@ test3() double c = std::numeric_limits::quiet_NaN(); std::vector areas(Np); double S = 0; - for (size_t i = 0; i < areas.size(); ++i) + for (std::size_t i = 0; i < areas.size(); ++i) { areas[i] = (p[i]+p[i+1])*(b[i+1]-b[i])/2; S += areas[i]; } - for (size_t i = 0; i < areas.size(); ++i) + for (std::size_t i = 0; i < areas.size(); ++i) areas[i] /= S; - for (size_t i = 0; i < Np+1; ++i) + for (std::size_t i = 0; i < Np+1; ++i) p[i] /= S; - for (size_t i = 0; i < N; ++i) + for (std::size_t i = 0; i < N; ++i) { int k = std::lower_bound(b, b+Np+1, u[i]) - b - 1; if (k != kp) @@ -203,11 +203,11 @@ test4() G g; double b[] = {10, 14, 16}; double p[] = {0, 1, 0}; - const size_t Np = sizeof(p) / sizeof(p[0]) - 1; + const std::size_t Np = sizeof(p) / sizeof(p[0]) - 1; D d(b, b+Np+1, p); const int N = 1000000; std::vector u; - for (size_t i = 0; i < N; ++i) + for (std::size_t i = 0; i < N; ++i) { D::result_type v = d(g); assert(d.min() <= v && v < d.max()); @@ -221,16 +221,16 @@ test4() double c = std::numeric_limits::quiet_NaN(); std::vector areas(Np); double S = 0; - for (size_t i = 0; i < areas.size(); ++i) + for (std::size_t i = 0; i < areas.size(); ++i) { areas[i] = (p[i]+p[i+1])*(b[i+1]-b[i])/2; S += areas[i]; } - for (size_t i = 0; i < areas.size(); ++i) + for (std::size_t i = 0; i < areas.size(); ++i) areas[i] /= S; - for (size_t i = 0; i < Np+1; ++i) + for (std::size_t i = 0; i < Np+1; ++i) p[i] /= S; - for (size_t i = 0; i < N; ++i) + for (std::size_t i = 0; i < N; ++i) { int k = std::lower_bound(b, b+Np+1, u[i]) - b - 1; if (k != kp) @@ -256,11 +256,11 @@ test5() G g; double b[] = {10, 14}; double p[] = {1, 1}; - const size_t Np = sizeof(p) / sizeof(p[0]) - 1; + const std::size_t Np = sizeof(p) / sizeof(p[0]) - 1; D d(b, b+Np+1, p); const int N = 1000000; std::vector u; - for (size_t i = 0; i < N; ++i) + for (std::size_t i = 0; i < N; ++i) { D::result_type v = d(g); assert(d.min() <= v && v < d.max()); @@ -274,17 +274,17 @@ test5() double c = std::numeric_limits::quiet_NaN(); std::vector areas(Np); double S = 0; - for (size_t i = 0; i < areas.size(); ++i) + for (std::size_t i = 0; i < areas.size(); ++i) { assert(i < Np); areas[i] = (p[i]+p[i+1])*(b[i+1]-b[i])/2; S += areas[i]; } - for (size_t i = 0; i < areas.size(); ++i) + for (std::size_t i = 0; i < areas.size(); ++i) areas[i] /= S; - for (size_t i = 0; i < Np+1; ++i) + for (std::size_t i = 0; i < Np+1; ++i) p[i] /= S; - for (size_t i = 0; i < N; ++i) + for (std::size_t i = 0; i < N; ++i) { int k = std::lower_bound(b, b+Np+1, u[i]) - b - 1; if (k != kp) @@ -310,11 +310,11 @@ test6() G g; double b[] = {10, 14, 16, 17}; double p[] = {25, 62.5, 12.5, 0}; - const size_t Np = sizeof(p) / sizeof(p[0]) - 1; + const std::size_t Np = sizeof(p) / sizeof(p[0]) - 1; D d(b, b+Np+1, p); const int N = 1000000; std::vector u; - for (size_t i = 0; i < N; ++i) + for (std::size_t i = 0; i < N; ++i) { D::result_type v = d(g); assert(d.min() <= v && v < d.max()); @@ -328,16 +328,16 @@ test6() double c = std::numeric_limits::quiet_NaN(); std::vector areas(Np); double S = 0; - for (size_t i = 0; i < areas.size(); ++i) + for (std::size_t i = 0; i < areas.size(); ++i) { areas[i] = (p[i]+p[i+1])*(b[i+1]-b[i])/2; S += areas[i]; } - for (size_t i = 0; i < areas.size(); ++i) + for (std::size_t i = 0; i < areas.size(); ++i) areas[i] /= S; - for (size_t i = 0; i < Np+1; ++i) + for (std::size_t i = 0; i < Np+1; ++i) p[i] /= S; - for (size_t i = 0; i < N; ++i) + for (std::size_t i = 0; i < N; ++i) { int k = std::lower_bound(b, b+Np+1, u[i]) - b - 1; if (k != kp) diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/eval_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/eval_param.pass.cpp index 9c9365fa772ab..605b11942a0ed 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/eval_param.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/eval_param.pass.cpp @@ -48,12 +48,12 @@ int main(int, char**) G g; double b[] = {10, 14, 16, 17}; double p[] = {25, 62.5, 12.5, 0}; - const size_t Np = sizeof(p) / sizeof(p[0]) - 1; + const std::size_t Np = sizeof(p) / sizeof(p[0]) - 1; D d; P pa(b, b+Np+1, p); - const size_t N = 1000000; + const std::size_t N = 1000000; std::vector u; - for (size_t i = 0; i < N; ++i) + for (std::size_t i = 0; i < N; ++i) { D::result_type v = d(g, pa); assert(10 <= v && v < 17); @@ -67,16 +67,16 @@ int main(int, char**) double c = std::numeric_limits::quiet_NaN(); std::vector areas(Np); double S = 0; - for (size_t i = 0; i < areas.size(); ++i) + for (std::size_t i = 0; i < areas.size(); ++i) { areas[i] = (p[i]+p[i+1])*(b[i+1]-b[i])/2; S += areas[i]; } - for (size_t i = 0; i < areas.size(); ++i) + for (std::size_t i = 0; i < areas.size(); ++i) areas[i] /= S; - for (size_t i = 0; i < Np+1; ++i) + for (std::size_t i = 0; i < Np+1; ++i) p[i] /= S; - for (size_t i = 0; i < N; ++i) + for (std::size_t i = 0; i < N; ++i) { int k = std::lower_bound(b, b+Np+1, u[i]) - b - 1; if (k != kp) diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/get_param.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/get_param.pass.cpp index b9450c0952e83..5c3600885cb70 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/get_param.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/get_param.pass.cpp @@ -25,7 +25,7 @@ int main(int, char**) typedef D::param_type P; double b[] = {10, 14, 16, 17}; double p[] = {25, 62.5, 12.5, 10}; - const size_t Np = sizeof(p) / sizeof(p[0]); + const std::size_t Np = sizeof(p) / sizeof(p[0]); P pa(b, b+Np, p); D d(pa); assert(d.param() == pa); diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/io.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/io.pass.cpp index e3983b9c2d0c6..8a0a8ff5a9ec0 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/io.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/io.pass.cpp @@ -35,7 +35,7 @@ int main(int, char**) typedef std::piecewise_linear_distribution<> D; double b[] = {10, 14, 16, 17}; double p[] = {25, 62.5, 12.5, 25}; - const size_t Np = sizeof(p) / sizeof(p[0]); + const std::size_t Np = sizeof(p) / sizeof(p[0]); D d1(b, b+Np, p); std::ostringstream os; os << d1; diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/max.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/max.pass.cpp index e0c68578bc475..b37ab293b4d40 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/max.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/max.pass.cpp @@ -24,7 +24,7 @@ int main(int, char**) typedef std::piecewise_linear_distribution<> D; double b[] = {10, 14, 16, 17}; double p[] = {25, 62.5, 12.5, 5}; - const size_t Np = sizeof(p) / sizeof(p[0]); + const std::size_t Np = sizeof(p) / sizeof(p[0]); D d(b, b+Np, p); assert(d.max() == 17); } diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/min.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/min.pass.cpp index a0ec3221a3342..51b555e21a833 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/min.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/min.pass.cpp @@ -24,7 +24,7 @@ int main(int, char**) typedef std::piecewise_linear_distribution<> D; double b[] = {10, 14, 16, 17}; double p[] = {25, 62.5, 12.5, 0}; - const size_t Np = sizeof(p) / sizeof(p[0]); + const std::size_t Np = sizeof(p) / sizeof(p[0]); D d(b, b+Np, p); assert(d.min() == 10); } diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_assign.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_assign.pass.cpp index 47b5c28b7e733..ff9e317218803 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_assign.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_assign.pass.cpp @@ -26,7 +26,7 @@ int main(int, char**) typedef D::param_type P; double b[] = {10, 14, 16, 17}; double p[] = {25, 62.5, 12.5, 2}; - const size_t Np = sizeof(p) / sizeof(p[0]); + const std::size_t Np = sizeof(p) / sizeof(p[0]); P p0(b, b+Np, p); P p1; p1 = p0; diff --git a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_copy.pass.cpp b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_copy.pass.cpp index b88d9c252f7ea..15a8466fdf8ca 100644 --- a/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_copy.pass.cpp +++ b/libcxx/test/std/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/param_copy.pass.cpp @@ -26,7 +26,7 @@ int main(int, char**) typedef D::param_type P; double b[] = {10, 14, 16, 17}; double p[] = {25, 62.5, 12.5, 5}; - const size_t Np = sizeof(p) / sizeof(p[0]); + const std::size_t Np = sizeof(p) / sizeof(p[0]); P p0(b, b+Np, p); P p1 = p0; assert(p1 == p0); diff --git a/libcxx/test/std/ranges/range.access/empty.pass.cpp b/libcxx/test/std/ranges/range.access/empty.pass.cpp index 4ff1f3b941f44..990b75bf6452d 100644 --- a/libcxx/test/std/ranges/range.access/empty.pass.cpp +++ b/libcxx/test/std/ranges/range.access/empty.pass.cpp @@ -87,13 +87,13 @@ constexpr bool testEmptyMember() { } struct SizeMember { - size_t size_; - constexpr size_t size() const { return size_; } + std::size_t size_; + constexpr std::size_t size() const { return size_; } }; struct SizeFunction { - size_t size_; - friend constexpr size_t size(SizeFunction sf) { return sf.size_; } + std::size_t size_; + friend constexpr std::size_t size(SizeFunction sf) { return sf.size_; } }; struct BeginEndSizedSentinel { @@ -131,7 +131,7 @@ static_assert(!std::ranges::sized_range); struct DisabledSizeRangeWithBeginEnd { constexpr int *begin() const { return nullptr; } constexpr auto end() const { return sentinel_wrapper(nullptr); } - size_t size() const; + std::size_t size() const; }; template<> inline constexpr bool std::ranges::disable_sized_range = true; diff --git a/libcxx/test/std/ranges/range.access/size.pass.cpp b/libcxx/test/std/ranges/range.access/size.pass.cpp index 05681733f37d9..fd7d0a8b99752 100644 --- a/libcxx/test/std/ranges/range.access/size.pass.cpp +++ b/libcxx/test/std/ranges/range.access/size.pass.cpp @@ -35,23 +35,23 @@ static_assert(std::ranges::size(std::as_const(array_of_incomplete)) == 42); static_assert(std::ranges::size(static_cast(array_of_incomplete)) == 42); struct SizeMember { - constexpr size_t size() { return 42; } + constexpr std::size_t size() { return 42; } }; struct StaticSizeMember { - constexpr static size_t size() { return 42; } + constexpr static std::size_t size() { return 42; } }; static_assert(!std::is_invocable_v); struct SizeFunction { - friend constexpr size_t size(SizeFunction) { return 42; } + friend constexpr std::size_t size(SizeFunction) { return 42; } }; // Make sure the size member is preferred. struct SizeMemberAndFunction { - constexpr size_t size() { return 42; } - friend constexpr size_t size(SizeMemberAndFunction) { return 0; } + constexpr std::size_t size() { return 42; } + friend constexpr std::size_t size(SizeMemberAndFunction) { return 0; } }; bool constexpr testArrayType() { @@ -61,19 +61,19 @@ bool constexpr testArrayType() { SizeFunction d[4]; assert(std::ranges::size(a) == 4); - ASSERT_SAME_TYPE(decltype(std::ranges::size(a)), size_t); + ASSERT_SAME_TYPE(decltype(std::ranges::size(a)), std::size_t); assert(std::ranges::size(b) == 1); - ASSERT_SAME_TYPE(decltype(std::ranges::size(b)), size_t); + ASSERT_SAME_TYPE(decltype(std::ranges::size(b)), std::size_t); assert(std::ranges::size(c) == 4); - ASSERT_SAME_TYPE(decltype(std::ranges::size(c)), size_t); + ASSERT_SAME_TYPE(decltype(std::ranges::size(c)), std::size_t); assert(std::ranges::size(d) == 4); - ASSERT_SAME_TYPE(decltype(std::ranges::size(d)), size_t); + ASSERT_SAME_TYPE(decltype(std::ranges::size(d)), std::size_t); return true; } struct SizeMemberConst { - constexpr size_t size() const { return 42; } + constexpr std::size_t size() const { return 42; } }; struct SizeMemberSigned { @@ -82,7 +82,7 @@ struct SizeMemberSigned { bool constexpr testHasSizeMember() { assert(std::ranges::size(SizeMember()) == 42); - ASSERT_SAME_TYPE(decltype(std::ranges::size(SizeMember())), size_t); + ASSERT_SAME_TYPE(decltype(std::ranges::size(SizeMember())), std::size_t); const SizeMemberConst sizeMemberConst; assert(std::ranges::size(sizeMemberConst) == 42); @@ -93,7 +93,7 @@ bool constexpr testHasSizeMember() { ASSERT_SAME_TYPE(decltype(std::ranges::size(SizeMemberSigned())), long); assert(std::ranges::size(StaticSizeMember()) == 42); - ASSERT_SAME_TYPE(decltype(std::ranges::size(StaticSizeMember())), size_t); + ASSERT_SAME_TYPE(decltype(std::ranges::size(StaticSizeMember())), std::size_t); return true; } @@ -103,25 +103,25 @@ struct MoveOnlySizeFunction { MoveOnlySizeFunction(MoveOnlySizeFunction &&) = default; MoveOnlySizeFunction(MoveOnlySizeFunction const&) = delete; - friend constexpr size_t size(MoveOnlySizeFunction) { return 42; } + friend constexpr std::size_t size(MoveOnlySizeFunction) { return 42; } }; enum EnumSizeFunction { a, b }; -constexpr size_t size(EnumSizeFunction) { return 42; } +constexpr std::size_t size(EnumSizeFunction) { return 42; } struct SizeFunctionConst { - friend constexpr size_t size(const SizeFunctionConst) { return 42; } + friend constexpr std::size_t size(const SizeFunctionConst) { return 42; } }; struct SizeFunctionRef { - friend constexpr size_t size(SizeFunctionRef&) { return 42; } + friend constexpr std::size_t size(SizeFunctionRef&) { return 42; } }; struct SizeFunctionConstRef { - friend constexpr size_t size(SizeFunctionConstRef const&) { return 42; } + friend constexpr std::size_t size(SizeFunctionConstRef const&) { return 42; } }; struct SizeFunctionSigned { @@ -130,7 +130,7 @@ struct SizeFunctionSigned { bool constexpr testHasSizeFunction() { assert(std::ranges::size(SizeFunction()) == 42); - ASSERT_SAME_TYPE(decltype(std::ranges::size(SizeFunction())), size_t); + ASSERT_SAME_TYPE(decltype(std::ranges::size(SizeFunction())), std::size_t); static_assert(!std::is_invocable_v); assert(std::ranges::size(EnumSizeFunction()) == 42); assert(std::ranges::size(SizeFunctionConst()) == 42); @@ -159,7 +159,7 @@ struct InvalidReturnTypeFunction { }; struct Convertible { - operator size_t(); + operator std::size_t(); }; struct ConvertibleReturnTypeMember { @@ -188,14 +188,14 @@ static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); struct SizeMemberDisabled { - size_t size() { return 42; } + std::size_t size() { return 42; } }; template <> inline constexpr bool std::ranges::disable_sized_range = true; struct ImproperlyDisabledMember { - size_t size() const { return 42; } + std::size_t size() const { return 42; } }; // Intentionally disabling "const ConstSizeMemberDisabled". This doesn't disable anything @@ -204,14 +204,14 @@ template <> inline constexpr bool std::ranges::disable_sized_range = true; struct SizeFunctionDisabled { - friend size_t size(SizeFunctionDisabled) { return 42; } + friend std::size_t size(SizeFunctionDisabled) { return 42; } }; template <> inline constexpr bool std::ranges::disable_sized_range = true; struct ImproperlyDisabledFunction { - friend size_t size(ImproperlyDisabledFunction const&) { return 42; } + friend std::size_t size(ImproperlyDisabledFunction const&) { return 42; } }; template <> @@ -224,7 +224,7 @@ static_assert( std::is_invocable_v); @@ -277,7 +277,7 @@ struct DisabledSizeRangeWithBeginEnd { int buff[8]; constexpr int* begin() { return buff; } constexpr int* end() { return buff + 8; } - constexpr size_t size() { return 1; } + constexpr std::size_t size() { return 1; } }; template <> @@ -287,14 +287,14 @@ struct SizeBeginAndEndMembers { int buff[8]; constexpr int* begin() { return buff; } constexpr int* end() { return buff + 8; } - constexpr size_t size() { return 1; } + constexpr std::size_t size() { return 1; } }; constexpr bool testRanges() { HasMinusBeginEnd a; assert(std::ranges::size(a) == 2); // Ensure that this is converted to an *unsigned* type. - ASSERT_SAME_TYPE(decltype(std::ranges::size(a)), size_t); + ASSERT_SAME_TYPE(decltype(std::ranges::size(a)), std::size_t); IntPtrBeginAndEnd b; assert(std::ranges::size(b) == 8); @@ -304,7 +304,7 @@ constexpr bool testRanges() { RandomAccessRange d; assert(std::ranges::size(d) == 2); - ASSERT_SAME_TYPE(decltype(std::ranges::size(d)), size_t); + ASSERT_SAME_TYPE(decltype(std::ranges::size(d)), std::size_t); SizeBeginAndEndMembers e; assert(std::ranges::size(e) == 1); diff --git a/libcxx/test/std/ranges/range.access/ssize.pass.cpp b/libcxx/test/std/ranges/range.access/ssize.pass.cpp index be35fe276e9b5..a15dc344512a1 100644 --- a/libcxx/test/std/ranges/range.access/ssize.pass.cpp +++ b/libcxx/test/std/ranges/range.access/ssize.pass.cpp @@ -24,12 +24,12 @@ static_assert( std::is_invocable_v); static_assert( std::is_invocable_v); struct SizeMember { - constexpr size_t size() { return 42; } + constexpr std::size_t size() { return 42; } }; static_assert(!std::is_invocable_v); struct SizeFunction { - friend constexpr size_t size(SizeFunction) { return 42; } + friend constexpr std::size_t size(SizeFunction) { return 42; } }; struct SizeFunctionSigned { @@ -47,7 +47,7 @@ struct ShortUnsignedReturnType { }; // size_t changes depending on the platform. -using SignedSizeT = std::make_signed_t; +using SignedSizeT = std::make_signed_t; constexpr bool test() { int a[4]; diff --git a/libcxx/test/std/ranges/range.adaptors/range.as.rvalue/size.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.as.rvalue/size.pass.cpp index f330b41898b70..3de1a93bc2e61 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.as.rvalue/size.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.as.rvalue/size.pass.cpp @@ -20,7 +20,7 @@ struct ConstSizedView : std::ranges::view_base { int* begin() const; int* end() const; - constexpr size_t size() const { + constexpr std::size_t size() const { *size_called = true; return 3; } @@ -56,7 +56,7 @@ constexpr bool test() { { bool size_called = false; std::ranges::as_rvalue_view view(ConstSizedView{{}, &size_called}); - std::same_as auto size = view.size(); + std::same_as auto size = view.size(); assert(size == 3); assert(size_called); } diff --git a/libcxx/test/std/ranges/range.adaptors/range.counted/counted.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.counted/counted.pass.cpp index 71014f1f10bf9..e55121f95e7c2 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.counted/counted.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.counted/counted.pass.cpp @@ -46,7 +46,7 @@ constexpr bool test() { { static_assert(std::addressof(std::views::counted) == std::addressof(std::ranges::views::counted)); - static_assert( CountedInvocable); + static_assert( CountedInvocable); static_assert(!CountedInvocable); static_assert( CountedInvocable); static_assert( CountedInvocable); @@ -54,7 +54,7 @@ constexpr bool test() { static_assert(!CountedInvocable); static_assert(!CountedInvocable); static_assert(!CountedInvocable); - static_assert(!CountedInvocable); + static_assert(!CountedInvocable); static_assert(!CountedInvocable<>); } diff --git a/libcxx/test/std/ranges/range.adaptors/range.drop/adaptor.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.drop/adaptor.pass.cpp index 6dd90c530b7b5..542f2cda3ee84 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.drop/adaptor.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.drop/adaptor.pass.cpp @@ -47,7 +47,7 @@ struct SizedViewWithUnsizedSentinel : std::ranges::view_base { constexpr auto begin() const { return iterator(begin_); } constexpr auto end() const { return sentinel(iterator(end_)); } - constexpr size_t size() const { return end_ - begin_; } + constexpr std::size_t size() const { return end_ - begin_; } }; static_assert(std::ranges::random_access_range); static_assert(std::ranges::sized_range); diff --git a/libcxx/test/std/ranges/range.adaptors/range.drop/begin.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.drop/begin.pass.cpp index 5d08c7027ea85..cff088453d226 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.drop/begin.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.drop/begin.pass.cpp @@ -37,7 +37,7 @@ struct MaybeSimpleView : std::ranges::view_base { return nullptr; } constexpr int* end() const { return nullptr; } - constexpr size_t size() const { return 0; } + constexpr std::size_t size() const { return 0; } }; using SimpleView = MaybeSimpleView; diff --git a/libcxx/test/std/ranges/range.adaptors/range.elements/iterator/ctor.default.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.elements/iterator/ctor.default.pass.cpp index 62aa9b2a1a1f7..11da4843270bf 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.elements/iterator/ctor.default.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.elements/iterator/ctor.default.pass.cpp @@ -29,7 +29,7 @@ struct IterNoDefaultCtrView : std::ranges::view_base { sentinel_wrapper*>> end() const; }; -template +template using ElementsIter = std::ranges::iterator_t>; static_assert(!std::default_initializable>); diff --git a/libcxx/test/std/ranges/range.adaptors/range.elements/iterator/member_types.compile.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.elements/iterator/member_types.compile.pass.cpp index 67ec179b50b6b..9a76c2fcb70c2 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.elements/iterator/member_types.compile.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.elements/iterator/member_types.compile.pass.cpp @@ -20,7 +20,7 @@ template using Range = std::ranges::subrange>; -template +template using ElementsIter = std::ranges::iterator_t>; // using iterator_concept = see below; diff --git a/libcxx/test/std/ranges/range.adaptors/range.elements/range.concept.compile.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.elements/range.concept.compile.pass.cpp index 021f0283b4336..7de72ecc0a90c 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.elements/range.concept.compile.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.elements/range.concept.compile.pass.cpp @@ -35,7 +35,7 @@ template using Range = std::ranges::subrange>; -template +template concept HasElementsView = requires { typename std::ranges::elements_view; }; static_assert(HasElementsView*>, 0>); diff --git a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/constraints.compile.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/constraints.compile.pass.cpp index bf9dd997d184c..122abe6315c11 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/constraints.compile.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/constraints.compile.pass.cpp @@ -169,7 +169,7 @@ namespace test8 { struct AlmostTinyRange : std::ranges::view_base { int* begin() const; int* end() const; - static size_t size() { return 1; } + static std::size_t size() { return 1; } }; using View = InputView; @@ -192,7 +192,7 @@ namespace test9 { struct AlmostTinyRange : std::ranges::view_base { int* begin() const; int* end() const; - constexpr static size_t size() { return 2; } + constexpr static std::size_t size() { return 2; } }; using View = InputView; diff --git a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/general.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/general.pass.cpp index 421453245ef8d..f4e87bb47399e 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/general.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/general.pass.cpp @@ -75,13 +75,13 @@ constexpr bool is_equal(View& view, const Expected& expected) { return actual_it == view.end() && expected_it == expected.end(); } -template +template constexpr bool test_function_call(T&& input, Separator&& separator, std::array expected) { std::ranges::lazy_split_view v(input, separator); return is_equal(v, expected); } -template +template constexpr bool test_with_piping(T&& input, Separator&& separator, std::array expected) { auto expected_it = expected.begin(); for (auto e : input | std::ranges::views::lazy_split(separator)) { @@ -166,7 +166,7 @@ constexpr std::string_view sv(T&& str) { return std::string_view(str); }; -template +template constexpr void test_one(T&& input, Separator&& separator, std::array expected) { assert(test_function_call(input, separator, expected)); assert(test_with_piping(input, separator, expected)); diff --git a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/increment.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/increment.pass.cpp index e4d9ebab26b14..45257f228008e 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/increment.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/increment.pass.cpp @@ -22,7 +22,7 @@ struct EmptyView : std::ranges::view_base { constexpr int* begin() const { return nullptr; } constexpr int* end() const { return nullptr; } - constexpr static size_t size() { return 0; } + constexpr static std::size_t size() { return 0; } }; static_assert(std::ranges::forward_range); static_assert(std::ranges::view); diff --git a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/types.h b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/types.h index 2eedc4c91d414..f2878e823202e 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/types.h +++ b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/types.h @@ -10,6 +10,7 @@ #define TEST_STD_RANGES_RANGE_ADAPTORS_RANGE_LAZY_SPLIT_TYPES_H #include +#include #include #include #include @@ -174,7 +175,7 @@ struct ForwardTinyView : std::ranges::view_base { constexpr ForwardTinyView(char c) { *c_ = c; } constexpr forward_iterator begin() const { return forward_iterator(c_); } constexpr forward_iterator end() const { return forward_iterator(c_ + 1); } - constexpr static size_t size() { return 1; } + constexpr static std::size_t size() { return 1; } }; static_assert(std::ranges::forward_range); static_assert(std::ranges::view); diff --git a/libcxx/test/std/ranges/range.adaptors/range.reverse/size.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.reverse/size.pass.cpp index d74b048dfb283..3d74bd2f074fa 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.reverse/size.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.reverse/size.pass.cpp @@ -23,9 +23,9 @@ template struct BidirSizedRange : std::ranges::view_base { int *ptr_; - size_t size_; + std::size_t size_; - constexpr BidirSizedRange(int *ptr, size_t size) : ptr_(ptr), size_(size) {} + constexpr BidirSizedRange(int *ptr, std::size_t size) : ptr_(ptr), size_(size) {} constexpr BidirSizedRange(const BidirSizedRange &) requires (CC == Copyable) = default; constexpr BidirSizedRange(BidirSizedRange &&) requires (CC == MoveOnly) = default; constexpr BidirSizedRange& operator=(const BidirSizedRange &) requires (CC == Copyable) = default; @@ -36,7 +36,7 @@ struct BidirSizedRange : std::ranges::view_base { constexpr bidirectional_iterator end() { return bidirectional_iterator{ptr_ + 8}; } constexpr bidirectional_iterator end() const { return bidirectional_iterator{ptr_ + 8}; } - constexpr size_t size() const { return size_; } + constexpr std::size_t size() const { return size_; } }; constexpr bool test() { @@ -49,8 +49,8 @@ constexpr bool test() { assert(rev.size() == 4); assert(std::move(rev).size() == 4); - ASSERT_SAME_TYPE(decltype(rev.size()), size_t); - ASSERT_SAME_TYPE(decltype(std::move(rev).size()), size_t); + ASSERT_SAME_TYPE(decltype(rev.size()), std::size_t); + ASSERT_SAME_TYPE(decltype(std::move(rev).size()), std::size_t); } // Non-common, const bidirectional range. { @@ -59,15 +59,15 @@ constexpr bool test() { assert(rev.size() == 4); assert(std::move(rev).size() == 4); - ASSERT_SAME_TYPE(decltype(rev.size()), size_t); - ASSERT_SAME_TYPE(decltype(std::move(rev).size()), size_t); + ASSERT_SAME_TYPE(decltype(rev.size()), std::size_t); + ASSERT_SAME_TYPE(decltype(std::move(rev).size()), std::size_t); } // Non-common, non-const (move only) bidirectional range. { auto rev = std::ranges::reverse_view(BidirSizedRange{buffer, 4}); assert(std::move(rev).size() == 4); - ASSERT_SAME_TYPE(decltype(std::move(rev).size()), size_t); + ASSERT_SAME_TYPE(decltype(std::move(rev).size()), std::size_t); } return true; diff --git a/libcxx/test/std/ranges/range.adaptors/range.split/general.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.split/general.pass.cpp index 8684e3b6cd9e7..5389d931f840e 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.split/general.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.split/general.pass.cpp @@ -33,13 +33,13 @@ constexpr bool is_equal(View& view, const Expected& expected) { return std::ranges::equal(view, expected, std::ranges::equal); } -template +template constexpr bool test_function_call(T&& input, Separator&& separator, std::array expected) { std::ranges::split_view v(input, separator); return is_equal(v, expected); } -template +template constexpr bool test_with_piping(T&& input, Separator&& separator, std::array expected) { auto expected_it = expected.begin(); for (auto e : input | std::ranges::views::split(separator)) { @@ -124,7 +124,7 @@ constexpr std::string_view sv(T&& str) { return std::string_view(str); }; -template +template constexpr void test_one(T&& input, Separator&& separator, std::array expected) { assert(test_function_call(input, separator, expected)); assert(test_with_piping(input, separator, expected)); diff --git a/libcxx/test/std/ranges/range.adaptors/range.take/begin.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.take/begin.pass.cpp index ad5442fee8021..f2ac62e764d59 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.take/begin.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.take/begin.pass.cpp @@ -22,7 +22,7 @@ struct NonCommonSimpleView : std::ranges::view_base { int* begin() const; sentinel_wrapper end() const; - size_t size() { return 0; } // deliberately non-const + std::size_t size() { return 0; } // deliberately non-const }; static_assert(std::ranges::sized_range); static_assert(!std::ranges::sized_range); diff --git a/libcxx/test/std/ranges/range.adaptors/range.transform/general.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.transform/general.pass.cpp index f51906675ec70..f48aae6d2887d 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.transform/general.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.transform/general.pass.cpp @@ -39,7 +39,7 @@ auto toUpper(R range) { return std::ranges::transform_view(range, [](char c) { return std::toupper(c); }); } -template> +template> auto joinArrays(E1 (&a)[N], E2 (&b)[N], Join join = Join()) { return std::ranges::transform_view(a, [&a, &b, join](auto& x) { auto idx = (&x) - a; diff --git a/libcxx/test/std/ranges/range.adaptors/range.transform/types.h b/libcxx/test/std/ranges/range.adaptors/range.transform/types.h index d31db8d5a8146..14f85722a8c19 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.transform/types.h +++ b/libcxx/test/std/ranges/range.adaptors/range.transform/types.h @@ -9,6 +9,8 @@ #ifndef TEST_STD_RANGES_RANGE_ADAPTORS_RANGE_TRANSFORM_TYPES_H #define TEST_STD_RANGES_RANGE_ADAPTORS_RANGE_TRANSFORM_TYPES_H +#include + #include "test_macros.h" #include "test_iterators.h" #include "test_range.h" @@ -106,7 +108,7 @@ constexpr bool operator==(int* lhs, const RandomAccessIter &rhs) { return base(r struct SizedSentinelNotConstView : std::ranges::view_base { ForwardIter begin() const; int *end() const; - size_t size(); + std::size_t size(); }; // TODO: remove these bogus operators bool operator==(const ForwardIter &lhs, int* rhs); diff --git a/libcxx/test/std/ranges/range.factories/range.single.view/size.pass.cpp b/libcxx/test/std/ranges/range.factories/range.single.view/size.pass.cpp index 7d6a72db53b82..f57bc40acd156 100644 --- a/libcxx/test/std/ranges/range.factories/range.single.view/size.pass.cpp +++ b/libcxx/test/std/ranges/range.factories/range.single.view/size.pass.cpp @@ -20,28 +20,28 @@ constexpr bool test() { auto sv = std::ranges::single_view(42); assert(sv.size() == 1); - ASSERT_SAME_TYPE(decltype(sv.size()), size_t); + ASSERT_SAME_TYPE(decltype(sv.size()), std::size_t); static_assert(noexcept(sv.size())); } { const auto sv = std::ranges::single_view(42); assert(sv.size() == 1); - ASSERT_SAME_TYPE(decltype(sv.size()), size_t); + ASSERT_SAME_TYPE(decltype(sv.size()), std::size_t); static_assert(noexcept(sv.size())); } { auto sv = std::ranges::single_view(42); assert(std::ranges::size(sv) == 1); - ASSERT_SAME_TYPE(decltype(std::ranges::size(sv)), size_t); + ASSERT_SAME_TYPE(decltype(std::ranges::size(sv)), std::size_t); static_assert(noexcept(std::ranges::size(sv))); } { const auto sv = std::ranges::single_view(42); assert(std::ranges::size(sv) == 1); - ASSERT_SAME_TYPE(decltype(std::ranges::size(sv)), size_t); + ASSERT_SAME_TYPE(decltype(std::ranges::size(sv)), std::size_t); static_assert(noexcept(std::ranges::size(sv))); } @@ -49,7 +49,7 @@ constexpr bool test() { { assert(std::ranges::single_view::size() == 1); - ASSERT_SAME_TYPE(decltype(std::ranges::single_view::size()), size_t); + ASSERT_SAME_TYPE(decltype(std::ranges::single_view::size()), std::size_t); static_assert(noexcept(std::ranges::single_view::size())); } diff --git a/libcxx/test/std/ranges/range.utility/range.subrange/ctor.begin_end_size.pass.cpp b/libcxx/test/std/ranges/range.utility/range.subrange/ctor.begin_end_size.pass.cpp index c83e83b62eb80..52a66f6b10b33 100644 --- a/libcxx/test/std/ranges/range.utility/range.subrange/ctor.begin_end_size.pass.cpp +++ b/libcxx/test/std/ranges/range.utility/range.subrange/ctor.begin_end_size.pass.cpp @@ -28,8 +28,8 @@ static_assert( std::is_constructible_v); // 1. static_assert( std::is_constructible_v); // 2. static_assert( std::is_constructible_v); // 3. (Same as default case.) -static_assert(!std::is_constructible_v); // 4. -static_assert( std::is_constructible_v); // 5. +static_assert(!std::is_constructible_v); // 4. +static_assert( std::is_constructible_v); // 5. constexpr bool test() { SizedSentinelForwardSubrange a(ConditionallyConvertibleIter(globalBuff), ConditionallyConvertibleIter(globalBuff + 8), 8); diff --git a/libcxx/test/std/ranges/range.utility/range.subrange/get.pass.cpp b/libcxx/test/std/ranges/range.utility/range.subrange/get.pass.cpp index 5e8969b8d408f..e1daf887a015e 100644 --- a/libcxx/test/std/ranges/range.utility/range.subrange/get.pass.cpp +++ b/libcxx/test/std/ranges/range.utility/range.subrange/get.pass.cpp @@ -16,7 +16,7 @@ #include "test_macros.h" #include "test_iterators.h" -template +template concept HasGet = requires { std::get(std::declval()); }; diff --git a/libcxx/test/std/ranges/range.utility/range.subrange/types.h b/libcxx/test/std/ranges/range.utility/range.subrange/types.h index e730d6ae05116..865935019e0fd 100644 --- a/libcxx/test/std/ranges/range.utility/range.subrange/types.h +++ b/libcxx/test/std/ranges/range.utility/range.subrange/types.h @@ -211,7 +211,7 @@ struct DifferentSentinelWithSizeMember { constexpr ForwardIter begin() const { return ForwardIter(globalBuff); } constexpr sentinel end() const { return sentinel{globalBuff + 8}; } - constexpr size_t size() const { return 8; } + constexpr std::size_t size() const { return 8; } }; template<> diff --git a/libcxx/test/std/ranges/range.utility/view.interface/view.interface.pass.cpp b/libcxx/test/std/ranges/range.utility/view.interface/view.interface.pass.cpp index b1ed28f80e914..2fc2fa8579996 100644 --- a/libcxx/test/std/ranges/range.utility/view.interface/view.interface.pass.cpp +++ b/libcxx/test/std/ranges/range.utility/view.interface/view.interface.pass.cpp @@ -95,7 +95,7 @@ struct SizeIsTen : std::ranges::view_interface { int buff[8] = {0, 1, 2, 3, 4, 5, 6, 7}; constexpr ForwardIter begin() const { return ForwardIter(const_cast(buff)); } constexpr ForwardIter end() const { return ForwardIter(const_cast(buff) + 8); } - constexpr size_t size() const { return 10; } + constexpr std::size_t size() const { return 10; } }; static_assert(std::ranges::view); @@ -262,7 +262,7 @@ constexpr bool testSize() { } template -concept SubscriptInvocable = requires (T const& obj, size_t n) { obj[n]; }; +concept SubscriptInvocable = requires (T const& obj, std::size_t n) { obj[n]; }; constexpr bool testSubscript() { static_assert(!SubscriptInvocable); diff --git a/libcxx/test/std/re/re.alg/re.alg.match/awk.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.match/awk.locale.pass.cpp index 7d53cd9ee3698..6470b95b4edc1 100644 --- a/libcxx/test/std/re/re.alg/re.alg.match/awk.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.match/awk.locale.pass.cpp @@ -44,7 +44,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -60,7 +60,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -87,7 +87,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -103,7 +103,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } diff --git a/libcxx/test/std/re/re.alg/re.alg.match/awk.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.match/awk.pass.cpp index f3385f4891aa6..988beec15e644 100644 --- a/libcxx/test/std/re/re.alg/re.alg.match/awk.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.match/awk.pass.cpp @@ -260,7 +260,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -275,7 +275,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -290,7 +290,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -323,7 +323,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -338,7 +338,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -353,7 +353,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -375,7 +375,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -391,7 +391,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -407,7 +407,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 4); @@ -431,7 +431,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -516,7 +516,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -539,7 +539,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -569,7 +569,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -585,7 +585,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -601,7 +601,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -643,7 +643,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == 4); + assert((std::size_t)m.length(0) == 4); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -659,7 +659,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == s + std::char_traits::length(s)); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -677,7 +677,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == s+1); - assert((size_t)m.length(0) == 1); + assert((std::size_t)m.length(0) == 1); assert(m.position(0) == 0); assert(m.str(0) == L"a"); } @@ -692,7 +692,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == s+2); - assert((size_t)m.length(0) == 2); + assert((std::size_t)m.length(0) == 2); assert(m.position(0) == 0); assert(m.str(0) == L"ab"); } @@ -902,7 +902,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -917,7 +917,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -932,7 +932,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -965,7 +965,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -980,7 +980,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -995,7 +995,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1017,7 +1017,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1033,7 +1033,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1049,7 +1049,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 4); @@ -1073,7 +1073,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1158,7 +1158,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1181,7 +1181,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1211,7 +1211,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1227,7 +1227,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1243,7 +1243,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1301,7 +1301,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == s + std::char_traits::length(s)); - assert((size_t)m.length(0) == std::char_traits::length(s)); + assert((std::size_t)m.length(0) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } diff --git a/libcxx/test/std/re/re.alg/re.alg.match/basic.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.match/basic.locale.pass.cpp index 488385f266c90..430d35fe739e5 100644 --- a/libcxx/test/std/re/re.alg/re.alg.match/basic.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.match/basic.locale.pass.cpp @@ -46,7 +46,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -62,7 +62,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -89,7 +89,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -105,7 +105,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } diff --git a/libcxx/test/std/re/re.alg/re.alg.match/basic.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.match/basic.pass.cpp index d77349d59ec0f..9765a07f2e4fe 100644 --- a/libcxx/test/std/re/re.alg/re.alg.match/basic.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.match/basic.pass.cpp @@ -380,7 +380,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 2); @@ -398,7 +398,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 3); @@ -423,7 +423,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 3); @@ -518,7 +518,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -541,7 +541,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -571,7 +571,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -587,7 +587,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -603,7 +603,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -894,7 +894,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -909,7 +909,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -924,7 +924,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -957,7 +957,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -972,7 +972,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -987,7 +987,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1008,7 +1008,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 2); @@ -1026,7 +1026,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 3); @@ -1051,7 +1051,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 3); @@ -1146,7 +1146,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1169,7 +1169,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1199,7 +1199,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1215,7 +1215,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1231,7 +1231,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } diff --git a/libcxx/test/std/re/re.alg/re.alg.match/ecma.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.match/ecma.locale.pass.cpp index 0f49b9f463fb4..b512fa9b5fcf8 100644 --- a/libcxx/test/std/re/re.alg/re.alg.match/ecma.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.match/ecma.locale.pass.cpp @@ -45,7 +45,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -61,7 +61,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -86,7 +86,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -102,7 +102,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } diff --git a/libcxx/test/std/re/re.alg/re.alg.match/ecma.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.match/ecma.pass.cpp index 58502aa6e1970..4face8ba02bbe 100644 --- a/libcxx/test/std/re/re.alg/re.alg.match/ecma.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.match/ecma.pass.cpp @@ -259,7 +259,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -274,7 +274,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -289,7 +289,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -322,7 +322,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -337,7 +337,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -352,7 +352,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -374,7 +374,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -392,7 +392,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -407,7 +407,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 4); @@ -431,7 +431,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -509,7 +509,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -530,7 +530,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -557,7 +557,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -572,7 +572,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -587,7 +587,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -665,7 +665,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == s + std::char_traits::length(s)); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -907,7 +907,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -922,7 +922,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -937,7 +937,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -970,7 +970,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -985,7 +985,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1000,7 +1000,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1022,7 +1022,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1040,7 +1040,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1055,7 +1055,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 4); @@ -1079,7 +1079,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1157,7 +1157,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1178,7 +1178,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1205,7 +1205,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1220,7 +1220,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1235,7 +1235,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1301,7 +1301,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == s + std::char_traits::length(s)); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } diff --git a/libcxx/test/std/re/re.alg/re.alg.match/extended.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.match/extended.locale.pass.cpp index 1e7ded603b96f..472dc19680263 100644 --- a/libcxx/test/std/re/re.alg/re.alg.match/extended.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.match/extended.locale.pass.cpp @@ -46,7 +46,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -62,7 +62,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -89,7 +89,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -105,7 +105,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } diff --git a/libcxx/test/std/re/re.alg/re.alg.match/extended.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.match/extended.pass.cpp index b3be567d2ab24..73c1d8352ab2b 100644 --- a/libcxx/test/std/re/re.alg/re.alg.match/extended.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.match/extended.pass.cpp @@ -259,7 +259,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -274,7 +274,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -289,7 +289,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -322,7 +322,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -337,7 +337,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -352,7 +352,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -374,7 +374,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -390,7 +390,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -406,7 +406,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 4); @@ -430,7 +430,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 2); @@ -448,7 +448,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -533,7 +533,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -556,7 +556,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -586,7 +586,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -602,7 +602,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -618,7 +618,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -903,7 +903,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -918,7 +918,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -933,7 +933,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -966,7 +966,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -981,7 +981,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -996,7 +996,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1018,7 +1018,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1034,7 +1034,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1050,7 +1050,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 4); @@ -1074,7 +1074,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 2); @@ -1092,7 +1092,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1177,7 +1177,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1200,7 +1200,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1230,7 +1230,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1246,7 +1246,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1262,7 +1262,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } diff --git a/libcxx/test/std/re/re.alg/re.alg.search/awk.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/awk.locale.pass.cpp index f2a89d6736827..9125df404b1de 100644 --- a/libcxx/test/std/re/re.alg/re.alg.search/awk.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.search/awk.locale.pass.cpp @@ -46,7 +46,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -62,7 +62,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -89,7 +89,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -105,7 +105,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } diff --git a/libcxx/test/std/re/re.alg/re.alg.search/awk.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/awk.pass.cpp index 741a74f319043..2c23cf0f70079 100644 --- a/libcxx/test/std/re/re.alg/re.alg.search/awk.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.search/awk.pass.cpp @@ -323,7 +323,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -338,7 +338,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -353,7 +353,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -386,7 +386,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -401,7 +401,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -416,7 +416,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -438,7 +438,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -454,7 +454,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -470,7 +470,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 4); @@ -494,7 +494,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -588,7 +588,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -611,7 +611,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -641,7 +641,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -657,7 +657,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -673,7 +673,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -749,7 +749,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == s + std::char_traits::length(s)); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1055,7 +1055,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1070,7 +1070,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1085,7 +1085,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1118,7 +1118,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1133,7 +1133,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1148,7 +1148,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1170,7 +1170,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1186,7 +1186,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1202,7 +1202,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 4); @@ -1226,7 +1226,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1320,7 +1320,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1343,7 +1343,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1373,7 +1373,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1389,7 +1389,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1405,7 +1405,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1481,7 +1481,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == s + std::char_traits::length(s)); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } diff --git a/libcxx/test/std/re/re.alg/re.alg.search/basic.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/basic.locale.pass.cpp index fff693edd738e..f85b6a40ce129 100644 --- a/libcxx/test/std/re/re.alg/re.alg.search/basic.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.search/basic.locale.pass.cpp @@ -46,7 +46,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -62,7 +62,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -89,7 +89,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -105,7 +105,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } diff --git a/libcxx/test/std/re/re.alg/re.alg.search/basic.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/basic.pass.cpp index 287ca9999b48c..99f94f26b32ae 100644 --- a/libcxx/test/std/re/re.alg/re.alg.search/basic.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.search/basic.pass.cpp @@ -443,7 +443,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 2); @@ -461,7 +461,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 3); @@ -486,7 +486,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 3); @@ -590,7 +590,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -613,7 +613,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -643,7 +643,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -659,7 +659,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -675,7 +675,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1047,7 +1047,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1062,7 +1062,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1077,7 +1077,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1110,7 +1110,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1125,7 +1125,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1140,7 +1140,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1161,7 +1161,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 2); @@ -1179,7 +1179,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 3); @@ -1204,7 +1204,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 3); @@ -1308,7 +1308,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1331,7 +1331,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1361,7 +1361,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1377,7 +1377,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1393,7 +1393,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } diff --git a/libcxx/test/std/re/re.alg/re.alg.search/ecma.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/ecma.locale.pass.cpp index bb80b16fc60ae..aa9441cb3e58f 100644 --- a/libcxx/test/std/re/re.alg/re.alg.search/ecma.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.search/ecma.locale.pass.cpp @@ -45,7 +45,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -61,7 +61,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -86,7 +86,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -102,7 +102,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } diff --git a/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp index 00977d398dee3..518c27e424484 100644 --- a/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.search/ecma.pass.cpp @@ -322,7 +322,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -337,7 +337,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -352,7 +352,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -385,7 +385,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -400,7 +400,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -415,7 +415,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -467,7 +467,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 4); @@ -491,7 +491,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -578,7 +578,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -599,7 +599,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -626,7 +626,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -641,7 +641,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -656,7 +656,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -758,7 +758,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == s + std::char_traits::length(s)); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1063,7 +1063,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1078,7 +1078,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1093,7 +1093,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1126,7 +1126,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1141,7 +1141,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1156,7 +1156,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1208,7 +1208,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 4); @@ -1232,7 +1232,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1319,7 +1319,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1340,7 +1340,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1367,7 +1367,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1382,7 +1382,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1397,7 +1397,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1499,7 +1499,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == s + std::char_traits::length(s)); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } diff --git a/libcxx/test/std/re/re.alg/re.alg.search/extended.locale.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/extended.locale.pass.cpp index bbe39e7657b7b..9746e45f29da5 100644 --- a/libcxx/test/std/re/re.alg/re.alg.search/extended.locale.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.search/extended.locale.pass.cpp @@ -46,7 +46,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -62,7 +62,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -89,7 +89,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -105,7 +105,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } diff --git a/libcxx/test/std/re/re.alg/re.alg.search/extended.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/extended.pass.cpp index 8ffedfcb263eb..0ddb49d619a18 100644 --- a/libcxx/test/std/re/re.alg/re.alg.search/extended.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.search/extended.pass.cpp @@ -323,7 +323,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -338,7 +338,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -353,7 +353,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -386,7 +386,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -401,7 +401,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -416,7 +416,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -438,7 +438,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -454,7 +454,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -470,7 +470,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 4); @@ -494,7 +494,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 2); @@ -512,7 +512,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -606,7 +606,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -629,7 +629,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -659,7 +659,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -675,7 +675,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -691,7 +691,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1057,7 +1057,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1072,7 +1072,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1087,7 +1087,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1120,7 +1120,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1135,7 +1135,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1150,7 +1150,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1172,7 +1172,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1188,7 +1188,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1204,7 +1204,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 4); @@ -1228,7 +1228,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); assert(m.length(1) == 2); @@ -1246,7 +1246,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1340,7 +1340,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1363,7 +1363,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1393,7 +1393,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1409,7 +1409,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } @@ -1425,7 +1425,7 @@ int main(int, char**) assert(!m.suffix().matched); assert(m.suffix().first == m[0].second); assert(m.suffix().second == m[0].second); - assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); + assert(m.length(0) >= 0 && static_cast(m.length(0)) == std::char_traits::length(s)); assert(m.position(0) == 0); assert(m.str(0) == s); } diff --git a/libcxx/test/std/re/re.alg/re.alg.search/grep.pass.cpp b/libcxx/test/std/re/re.alg/re.alg.search/grep.pass.cpp index 29300a818119b..3569ad53ee60b 100644 --- a/libcxx/test/std/re/re.alg/re.alg.search/grep.pass.cpp +++ b/libcxx/test/std/re/re.alg/re.alg.search/grep.pass.cpp @@ -24,7 +24,7 @@ extern "C" void LLVMFuzzerTestOneInput(const char *data) { #ifndef TEST_HAS_NO_EXCEPTIONS - size_t size = strlen(data); + std::size_t size = strlen(data); if (size > 0) { try diff --git a/libcxx/test/std/strings/basic.string.hash/char_type_hash.fail.cpp b/libcxx/test/std/strings/basic.string.hash/char_type_hash.fail.cpp index 721d7e65b39cc..339b200806e2b 100644 --- a/libcxx/test/std/strings/basic.string.hash/char_type_hash.fail.cpp +++ b/libcxx/test/std/strings/basic.string.hash/char_type_hash.fail.cpp @@ -33,14 +33,14 @@ struct trait // copied from <__string> static inline bool eq(char_type __c1, char_type __c2) { return __c1 == __c2; } static inline bool lt(char_type __c1, char_type __c2) { return __c1 < __c2; } - static int compare(const char_type* __s1, const char_type* __s2, size_t __n); - static size_t length(const char_type* __s); - static const char_type* find(const char_type* __s, size_t __n, + static int compare(const char_type* __s1, const char_type* __s2, std::size_t __n); + static std::size_t length(const char_type* __s); + static const char_type* find(const char_type* __s, std::size_t __n, const char_type& __a); - static char_type* move(char_type* __s1, const char_type* __s2, size_t __n); - static char_type* copy(char_type* __s1, const char_type* __s2, size_t __n); - static char_type* assign(char_type* __s, size_t __n, char_type __a); + static char_type* move(char_type* __s1, const char_type* __s2, std::size_t __n); + static char_type* copy(char_type* __s1, const char_type* __s2, std::size_t __n); + static char_type* assign(char_type* __s, std::size_t __n, char_type __a); static inline int_type not_eof(int_type __c) { return eq_int_type(__c, eof()) ? ~eof() : __c; diff --git a/libcxx/test/std/strings/basic.string.hash/enabled_hashes.pass.cpp b/libcxx/test/std/strings/basic.string.hash/enabled_hashes.pass.cpp index f79b0d9162eba..31e351a698652 100644 --- a/libcxx/test/std/strings/basic.string.hash/enabled_hashes.pass.cpp +++ b/libcxx/test/std/strings/basic.string.hash/enabled_hashes.pass.cpp @@ -36,12 +36,12 @@ struct std::char_traits { static bool eq(char_type, char_type); static bool lt(char_type, char_type); - static int compare(const char_type*, const char_type*, size_t); - static size_t length(const char_type*); - static const char_type* find(const char_type*, size_t, const char_type&); - static char_type* move(char_type*, const char_type*, size_t); - static char_type* copy(char_type*, const char_type*, size_t); - static char_type* assign(char_type*, size_t, char_type); + static int compare(const char_type*, const char_type*, std::size_t); + static std::size_t length(const char_type*); + static const char_type* find(const char_type*, std::size_t, const char_type&); + static char_type* move(char_type*, const char_type*, std::size_t); + static char_type* copy(char_type*, const char_type*, std::size_t); + static char_type* assign(char_type*, std::size_t, char_type); static int_type not_eof(int_type); static char_type to_char_type(int_type); diff --git a/libcxx/test/std/strings/basic.string/string.capacity/max_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.capacity/max_size.pass.cpp index b8e4198f54d46..ca4b05b8b055a 100644 --- a/libcxx/test/std/strings/basic.string/string.capacity/max_size.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.capacity/max_size.pass.cpp @@ -30,7 +30,7 @@ TEST_CONSTEXPR_CXX20 void test1(const S& s) { S s2(s); - const size_t sz = s2.max_size() - 1; + const std::size_t sz = s2.max_size() - 1; try { s2.resize(sz, 'x'); } catch ( const std::bad_alloc & ) { return ; } assert ( s2.size() == sz ); @@ -41,7 +41,7 @@ TEST_CONSTEXPR_CXX20 void test2(const S& s) { S s2(s); - const size_t sz = s2.max_size(); + const std::size_t sz = s2.max_size(); try { s2.resize(sz, 'x'); } catch ( const std::bad_alloc & ) { return ; } assert ( s.size() == sz ); @@ -76,7 +76,7 @@ TEST_CONSTEXPR_CXX20 bool test() { constexpr bool test_constexpr() { std::string str; - size_t size = str.max_size(); + std::size_t size = str.max_size(); assert(size > 0); return true; diff --git a/libcxx/test/std/strings/basic.string/string.capacity/over_max_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.capacity/over_max_size.pass.cpp index 65907e03b0e2f..7fcf0c6a6cea2 100644 --- a/libcxx/test/std/strings/basic.string/string.capacity/over_max_size.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.capacity/over_max_size.pass.cpp @@ -31,7 +31,7 @@ test(const S& s) { assert(s.max_size() >= s.size()); S s2(s); - const size_t sz = s2.max_size() + 1; + const std::size_t sz = s2.max_size() + 1; try { s2.resize(sz, 'x'); } catch ( const std::length_error & ) { return ; } assert ( false ); diff --git a/libcxx/test/std/strings/basic.string/string.capacity/resize_and_overwrite.pass.cpp b/libcxx/test/std/strings/basic.string/string.capacity/resize_and_overwrite.pass.cpp index b22a9036e70f2..3fafde81e8abf 100644 --- a/libcxx/test/std/strings/basic.string/string.capacity/resize_and_overwrite.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.capacity/resize_and_overwrite.pass.cpp @@ -21,7 +21,7 @@ #include "test_macros.h" template -constexpr void test_appending(size_t k, size_t N, size_t new_capacity) { +constexpr void test_appending(std::size_t k, size_t N, size_t new_capacity) { assert(N > k); assert(new_capacity >= N); auto s = S(k, 'a'); @@ -40,7 +40,7 @@ constexpr void test_appending(size_t k, size_t N, size_t new_capacity) { } template -constexpr void test_truncating(size_t o, size_t N) { +constexpr void test_truncating(std::size_t o, size_t N) { assert(N < o); auto s = S(o, 'a'); s.resize_and_overwrite(N, [&](auto* p, auto n) { @@ -76,10 +76,10 @@ constexpr bool test() { void test_value_categories() { std::string s; - s.resize_and_overwrite(10, [](char*&&, size_t&&) { return 0; }); - s.resize_and_overwrite(10, [](char* const&, const size_t&) { return 0; }); + s.resize_and_overwrite(10, [](char*&&, std::size_t&&) { return 0; }); + s.resize_and_overwrite(10, [](char* const&, const std::size_t&) { return 0; }); struct RefQualified { - int operator()(char*, size_t) && { return 0; } + int operator()(char*, std::size_t) && { return 0; } }; s.resize_and_overwrite(10, RefQualified{}); } diff --git a/libcxx/test/std/strings/basic.string/string.capacity/shrink_to_fit.explicit_instantiation.sh.cpp b/libcxx/test/std/strings/basic.string/string.capacity/shrink_to_fit.explicit_instantiation.sh.cpp index 195491f45045e..9f0f2ed714bcf 100644 --- a/libcxx/test/std/strings/basic.string/string.capacity/shrink_to_fit.explicit_instantiation.sh.cpp +++ b/libcxx/test/std/strings/basic.string/string.capacity/shrink_to_fit.explicit_instantiation.sh.cpp @@ -34,12 +34,12 @@ struct string16_char_traits { static void assign(char_type&, const char_type&) { } static bool eq(const char_type&, const char_type&) { return false; } static bool lt(const char_type&, const char_type&) { return false; } - static int compare(const char_type*, const char_type*, size_t) { return 0; } - static size_t length(const char_type*) { return 0; } - static const char_type* find(const char_type*, size_t, const char_type&) { return nullptr; } - static char_type* move(char_type*, const char_type*, size_t) { return nullptr; } - static char_type* copy(char_type*, const char_type*, size_t) { return nullptr; } - static char_type* assign(char_type*, size_t, char_type) { return nullptr; } + static int compare(const char_type*, const char_type*, std::size_t) { return 0; } + static std::size_t length(const char_type*) { return 0; } + static const char_type* find(const char_type*, std::size_t, const char_type&) { return nullptr; } + static char_type* move(char_type*, const char_type*, std::size_t) { return nullptr; } + static char_type* copy(char_type*, const char_type*, std::size_t) { return nullptr; } + static char_type* assign(char_type*, std::size_t, char_type) { return nullptr; } static int_type not_eof(const int_type&) { return 0; } static char_type to_char_type(const int_type&) { return char_type(); } static int_type to_int_type(const char_type&) { return int_type(); } diff --git a/libcxx/test/std/strings/basic.string/string.cons/dtor.pass.cpp b/libcxx/test/std/strings/basic.string/string.cons/dtor.pass.cpp index 07e9a60397d42..92d6531224cc1 100644 --- a/libcxx/test/std/strings/basic.string/string.cons/dtor.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.cons/dtor.pass.cpp @@ -23,7 +23,7 @@ struct throwing_alloc { typedef T value_type; throwing_alloc(const throwing_alloc&); - T *allocate(size_t); + T *allocate(std::size_t); ~throwing_alloc() noexcept(false); }; diff --git a/libcxx/test/std/strings/basic.string/string.cons/implicit_deduction_guides.pass.cpp b/libcxx/test/std/strings/basic.string/string.cons/implicit_deduction_guides.pass.cpp index f9cdeac8b203e..5f3064bc0452b 100644 --- a/libcxx/test/std/strings/basic.string/string.cons/implicit_deduction_guides.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.cons/implicit_deduction_guides.pass.cpp @@ -81,7 +81,7 @@ TEST_CONSTEXPR_CXX20 bool test() { } { // Testing (4) w/o allocator const std::string sin("abc"); - std::basic_string s(sin, (size_t)1); + std::basic_string s(sin, (std::size_t)1); ASSERT_SAME_TYPE(decltype(s), std::string); assert(s == "bc"); @@ -97,7 +97,7 @@ TEST_CONSTEXPR_CXX20 bool test() { } { // Testing (4) w/ allocator const std::string sin("abc"); - std::basic_string s(sin, (size_t)1, std::allocator{}); + std::basic_string s(sin, (std::size_t)1, std::allocator{}); ASSERT_SAME_TYPE(decltype(s), std::string); assert(s == "bc"); @@ -113,7 +113,7 @@ TEST_CONSTEXPR_CXX20 bool test() { } { // Testing (5) w/o allocator const std::string sin("abc"); - std::basic_string s(sin, (size_t)1, (size_t)3); + std::basic_string s(sin, (std::size_t)1, (size_t)3); ASSERT_SAME_TYPE(decltype(s), std::string); assert(s == "bc"); @@ -129,7 +129,7 @@ TEST_CONSTEXPR_CXX20 bool test() { } { // Testing (5) w/ allocator const std::string sin("abc"); - std::basic_string s(sin, (size_t)1, (size_t)3, std::allocator{}); + std::basic_string s(sin, (std::size_t)1, (size_t)3, std::allocator{}); ASSERT_SAME_TYPE(decltype(s), std::string); assert(s == "bc"); @@ -144,18 +144,18 @@ TEST_CONSTEXPR_CXX20 bool test() { #endif } { // Testing (6) w/o allocator - std::basic_string s("abc", (size_t)2); + std::basic_string s("abc", (std::size_t)2); ASSERT_SAME_TYPE(decltype(s), std::string); assert(s == "ab"); #ifndef TEST_HAS_NO_WIDE_CHARACTERS - std::basic_string w(L"abcdef", (size_t)3); + std::basic_string w(L"abcdef", (std::size_t)3); ASSERT_SAME_TYPE(decltype(w), std::wstring); assert(w == L"abc"); #endif } { // Testing (6) w/ allocator - std::basic_string s("abc", (size_t)2, std::allocator{}); + std::basic_string s("abc", (std::size_t)2, std::allocator{}); ASSERT_SAME_TYPE(decltype(s), std::string); assert(s == "ab"); diff --git a/libcxx/test/std/strings/basic.string/string.cons/move_assign_noexcept.pass.cpp b/libcxx/test/std/strings/basic.string/string.cons/move_assign_noexcept.pass.cpp index c2b97ecb3ec00..1e24c20b4b101 100644 --- a/libcxx/test/std/strings/basic.string/string.cons/move_assign_noexcept.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.cons/move_assign_noexcept.pass.cpp @@ -31,7 +31,7 @@ struct some_alloc { typedef T value_type; some_alloc(const some_alloc&); - T *allocate(size_t); + T *allocate(std::size_t); }; template @@ -41,7 +41,7 @@ struct some_alloc2 some_alloc2() {} some_alloc2(const some_alloc2&); - T *allocate(size_t); + T *allocate(std::size_t); void deallocate(void*, unsigned) {} typedef std::false_type propagate_on_container_move_assignment; @@ -55,7 +55,7 @@ struct some_alloc3 some_alloc3() {} some_alloc3(const some_alloc3&); - T *allocate(size_t); + T *allocate(std::size_t); void deallocate(void*, unsigned) {} typedef std::false_type propagate_on_container_move_assignment; diff --git a/libcxx/test/std/strings/basic.string/string.modifiers/string_append/push_back.pass.cpp b/libcxx/test/std/strings/basic.string/string.modifiers/string_append/push_back.pass.cpp index 7d9cf9aecd900..809ed97decd79 100644 --- a/libcxx/test/std/strings/basic.string/string.modifiers/string_append/push_back.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.modifiers/string_append/push_back.pass.cpp @@ -35,15 +35,15 @@ namespace std { static bool eq(char_type c1, char_type c2); static bool lt(char_type c1, char_type c2); - static int compare(const char_type* s1, const char_type* s2, size_t n); - static size_t length(const char_type* s); - static const char_type* find(const char_type* s, size_t n, const char_type& a); - static char_type* move(char_type* s1, const char_type* s2, size_t n); - static TEST_CONSTEXPR_CXX20 char_type* copy(char_type* s1, const char_type* s2, size_t n) { + static int compare(const char_type* s1, const char_type* s2, std::size_t n); + static std::size_t length(const char_type* s); + static const char_type* find(const char_type* s, std::size_t n, const char_type& a); + static char_type* move(char_type* s1, const char_type* s2, std::size_t n); + static TEST_CONSTEXPR_CXX20 char_type* copy(char_type* s1, const char_type* s2, std::size_t n) { std::copy_n(s2, n, s1); return s1; } - static TEST_CONSTEXPR_CXX20 char_type* assign(char_type* s, size_t n, char_type a) { + static TEST_CONSTEXPR_CXX20 char_type* assign(char_type* s, std::size_t n, char_type a) { std::fill_n(s, n, a); return s; } diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string.cmp/comparison.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string.cmp/comparison.pass.cpp index d9f71bd31e62a..03b3b98df8d94 100644 --- a/libcxx/test/std/strings/basic.string/string.nonmembers/string.cmp/comparison.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string.cmp/comparison.pass.cpp @@ -59,8 +59,8 @@ constexpr void test() { }; static_assert(v.size() == vn.size()); - for (size_t i = 0; i < v.size(); ++i) { - for (size_t j = 0; j < v.size(); ++j) { + for (std::size_t i = 0; i < v.size(); ++i) { + for (std::size_t j = 0; j < v.size(); ++j) { assert(testOrder(v[i], v[j], i == j ? Ordering::equivalent : i < j ? Ordering::less : Ordering::greater)); assert(testOrder( diff --git a/libcxx/test/std/strings/basic.string/string.nonmembers/string.special/swap_noexcept.pass.cpp b/libcxx/test/std/strings/basic.string/string.nonmembers/string.special/swap_noexcept.pass.cpp index de820028e2602..9a5513b90c111 100644 --- a/libcxx/test/std/strings/basic.string/string.nonmembers/string.special/swap_noexcept.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.nonmembers/string.special/swap_noexcept.pass.cpp @@ -34,7 +34,7 @@ struct some_alloc some_alloc() {} some_alloc(const some_alloc&); - T *allocate(size_t); + T *allocate(std::size_t); void deallocate(void*, unsigned) {} typedef std::true_type propagate_on_container_swap; }; @@ -46,7 +46,7 @@ struct some_alloc2 some_alloc2() {} some_alloc2(const some_alloc2&); - T *allocate(size_t); + T *allocate(std::size_t); void deallocate(void*, unsigned) {} typedef std::false_type propagate_on_container_swap; diff --git a/libcxx/test/std/strings/basic.string/string.require/contiguous.pass.cpp b/libcxx/test/std/strings/basic.string/string.require/contiguous.pass.cpp index f7087fd3c5d95..822f1a6171a1b 100644 --- a/libcxx/test/std/strings/basic.string/string.require/contiguous.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.require/contiguous.pass.cpp @@ -21,7 +21,7 @@ template TEST_CONSTEXPR_CXX20 void test_contiguous ( const C &c ) { - for ( size_t i = 0; i < c.size(); ++i ) + for ( std::size_t i = 0; i < c.size(); ++i ) assert ( *(c.begin() + static_cast(i)) == *(std::addressof(*c.begin()) + i)); } diff --git a/libcxx/test/std/strings/c.strings/cuchar.compile.pass.cpp b/libcxx/test/std/strings/c.strings/cuchar.compile.pass.cpp index 909f2c025bdf8..775f4cd2272fa 100644 --- a/libcxx/test/std/strings/c.strings/cuchar.compile.pass.cpp +++ b/libcxx/test/std/strings/c.strings/cuchar.compile.pass.cpp @@ -24,12 +24,12 @@ // __STDC_UTF_32__ may or may not be defined by the C standard library #if !defined(TEST_HAS_NO_C8RTOMB_MBRTOC8) -ASSERT_SAME_TYPE(size_t, decltype(std::mbrtoc8((char8_t*)0, (const char*)0, (size_t)0, (mbstate_t*)0))); -ASSERT_SAME_TYPE(size_t, decltype(std::c8rtomb((char*)0, (char8_t)0, (mbstate_t*)0))); +ASSERT_SAME_TYPE(std::size_t, decltype(std::mbrtoc8((char8_t*)0, (const char*)0, (size_t)0, (mbstate_t*)0))); +ASSERT_SAME_TYPE(std::size_t, decltype(std::c8rtomb((char*)0, (char8_t)0, (mbstate_t*)0))); #endif -ASSERT_SAME_TYPE(size_t, decltype(std::mbrtoc16((char16_t*)0, (const char*)0, (size_t)0, (mbstate_t*)0))); -ASSERT_SAME_TYPE(size_t, decltype(std::c16rtomb((char*)0, (char16_t)0, (mbstate_t*)0))); +ASSERT_SAME_TYPE(std::size_t, decltype(std::mbrtoc16((char16_t*)0, (const char*)0, (size_t)0, (mbstate_t*)0))); +ASSERT_SAME_TYPE(std::size_t, decltype(std::c16rtomb((char*)0, (char16_t)0, (mbstate_t*)0))); -ASSERT_SAME_TYPE(size_t, decltype(std::mbrtoc32((char32_t*)0, (const char*)0, (size_t)0, (mbstate_t*)0))); -ASSERT_SAME_TYPE(size_t, decltype(std::c16rtomb((char*)0, (char32_t)0, (mbstate_t*)0))); +ASSERT_SAME_TYPE(std::size_t, decltype(std::mbrtoc32((char32_t*)0, (const char*)0, (size_t)0, (mbstate_t*)0))); +ASSERT_SAME_TYPE(std::size_t, decltype(std::c16rtomb((char*)0, (char32_t)0, (mbstate_t*)0))); diff --git a/libcxx/test/std/strings/string.conversions/stod.pass.cpp b/libcxx/test/std/strings/string.conversions/stod.pass.cpp index bc553adc4a7f0..dbf10a4948de4 100644 --- a/libcxx/test/std/strings/string.conversions/stod.pass.cpp +++ b/libcxx/test/std/strings/string.conversions/stod.pass.cpp @@ -27,29 +27,29 @@ int main(int, char**) assert(std::stod("-10") == -10); assert(std::stod(" 10") == 10); { - size_t idx = 0; + std::size_t idx = 0; assert(std::stod("10g", &idx) == 10); assert(idx == 2); } { - size_t idx = 0; + std::size_t idx = 0; assert(std::stod("1.e60", &idx) == 1.e60); assert(idx == 5); } { - size_t idx = 0; + std::size_t idx = 0; assert(std::stod("INF", &idx) == INFINITY); assert(idx == 3); } { - size_t idx = 0; + std::size_t idx = 0; assert(std::isnan(std::stod("NAN", &idx))); assert(idx == 3); } #ifndef TEST_HAS_NO_EXCEPTIONS { - size_t idx = 0; + std::size_t idx = 0; try { assert(std::stod("1.e360", &idx) == INFINITY); assert(false); @@ -58,7 +58,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stod("", &idx); assert(false); @@ -67,7 +67,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stod(" - 8", &idx); assert(false); @@ -76,7 +76,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stod("a1", &idx); assert(false); @@ -95,28 +95,28 @@ int main(int, char**) assert(std::stod(L"-10.5") == -10.5); assert(std::stod(L" 10") == 10); { - size_t idx = 0; + std::size_t idx = 0; assert(std::stod(L"10g", &idx) == 10); assert(idx == 2); } { - size_t idx = 0; + std::size_t idx = 0; assert(std::stod(L"1.e60", &idx) == 1.e60); assert(idx == 5); } { - size_t idx = 0; + std::size_t idx = 0; assert(std::stod(L"INF", &idx) == INFINITY); assert(idx == 3); } { - size_t idx = 0; + std::size_t idx = 0; assert(std::isnan(std::stod(L"NAN", &idx))); assert(idx == 3); } #ifndef TEST_HAS_NO_EXCEPTIONS { - size_t idx = 0; + std::size_t idx = 0; try { assert(std::stod(L"1.e360", &idx) == INFINITY); assert(false); @@ -125,7 +125,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stod(L"", &idx); assert(false); @@ -134,7 +134,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stod(L" - 8", &idx); assert(false); @@ -143,7 +143,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stod(L"a1", &idx); assert(false); diff --git a/libcxx/test/std/strings/string.conversions/stof.pass.cpp b/libcxx/test/std/strings/string.conversions/stof.pass.cpp index da4bddd575015..d95e5d1b10a86 100644 --- a/libcxx/test/std/strings/string.conversions/stof.pass.cpp +++ b/libcxx/test/std/strings/string.conversions/stof.pass.cpp @@ -25,23 +25,23 @@ int main(int, char**) assert(std::stof("-10") == -10); assert(std::stof(" 10") == 10); { - size_t idx = 0; + std::size_t idx = 0; assert(std::stof("10g", &idx) == 10); assert(idx == 2); } { - size_t idx = 0; + std::size_t idx = 0; assert(std::stof("INF", &idx) == INFINITY); assert(idx == 3); } { - size_t idx = 0; + std::size_t idx = 0; assert(std::isnan(std::stof("NAN", &idx))); assert(idx == 3); } #ifndef TEST_HAS_NO_EXCEPTIONS { - size_t idx = 0; + std::size_t idx = 0; try { assert(std::stof("1.e60", &idx) == INFINITY); assert(false); @@ -50,7 +50,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { assert(std::stof("1.e360", &idx) == INFINITY); assert(false); @@ -59,7 +59,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stof("", &idx); assert(false); @@ -68,7 +68,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stof(" - 8", &idx); assert(false); @@ -77,7 +77,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stof("a1", &idx); assert(false); @@ -93,23 +93,23 @@ int main(int, char**) assert(std::stof(L"-10.5") == -10.5); assert(std::stof(L" 10") == 10); { - size_t idx = 0; + std::size_t idx = 0; assert(std::stof(L"10g", &idx) == 10); assert(idx == 2); } { - size_t idx = 0; + std::size_t idx = 0; assert(std::stof(L"INF", &idx) == INFINITY); assert(idx == 3); } { - size_t idx = 0; + std::size_t idx = 0; assert(std::isnan(std::stof(L"NAN", &idx))); assert(idx == 3); } #ifndef TEST_HAS_NO_EXCEPTIONS { - size_t idx = 0; + std::size_t idx = 0; try { assert(std::stof(L"1.e60", &idx) == INFINITY); assert(false); @@ -118,7 +118,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { assert(std::stof(L"1.e360", &idx) == INFINITY); assert(false); @@ -127,7 +127,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stof(L"", &idx); assert(false); @@ -136,7 +136,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stof(L" - 8", &idx); assert(false); @@ -145,7 +145,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stof(L"a1", &idx); assert(false); diff --git a/libcxx/test/std/strings/string.conversions/stoi.pass.cpp b/libcxx/test/std/strings/string.conversions/stoi.pass.cpp index 8a8ea645fc268..f7650e3d245f9 100644 --- a/libcxx/test/std/strings/string.conversions/stoi.pass.cpp +++ b/libcxx/test/std/strings/string.conversions/stoi.pass.cpp @@ -24,13 +24,13 @@ int main(int, char**) assert(std::stoi("-10") == -10); assert(std::stoi(" 10") == 10); { - size_t idx = 0; + std::size_t idx = 0; assert(std::stoi("10g", &idx, 16) == 16); assert(idx == 2); } #ifndef TEST_HAS_NO_EXCEPTIONS if (std::numeric_limits::max() > std::numeric_limits::max()) { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stoi("0x100000000", &idx, 16); assert(false); @@ -39,7 +39,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stoi("", &idx); assert(false); @@ -48,7 +48,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stoi(" - 8", &idx); assert(false); @@ -57,7 +57,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stoi("a1", &idx); assert(false); @@ -73,13 +73,13 @@ int main(int, char**) assert(std::stoi(L"-10") == -10); assert(std::stoi(L" 10") == 10); { - size_t idx = 0; + std::size_t idx = 0; assert(std::stoi(L"10g", &idx, 16) == 16); assert(idx == 2); } #ifndef TEST_HAS_NO_EXCEPTIONS if (std::numeric_limits::max() > std::numeric_limits::max()) { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stoi(L"0x100000000", &idx, 16); assert(false); @@ -88,7 +88,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stoi(L"", &idx); assert(false); @@ -97,7 +97,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stoi(L" - 8", &idx); assert(false); @@ -106,7 +106,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stoi(L"a1", &idx); assert(false); diff --git a/libcxx/test/std/strings/string.conversions/stol.pass.cpp b/libcxx/test/std/strings/string.conversions/stol.pass.cpp index 3f376933e4857..2356ae87a4e7f 100644 --- a/libcxx/test/std/strings/string.conversions/stol.pass.cpp +++ b/libcxx/test/std/strings/string.conversions/stol.pass.cpp @@ -24,13 +24,13 @@ int main(int, char**) assert(std::stol("-10") == -10); assert(std::stol(" 10") == 10); { - size_t idx = 0; + std::size_t idx = 0; assert(std::stol("10g", &idx, 16) == 16); assert(idx == 2); } #ifndef TEST_HAS_NO_EXCEPTIONS { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stol("", &idx); assert(false); @@ -39,7 +39,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stol(" - 8", &idx); assert(false); @@ -48,7 +48,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stol("a1", &idx); assert(false); @@ -57,7 +57,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { // LWG#2009 and PR14919 (void)std::stol("9999999999999999999999999999999999999999999999999", &idx); @@ -74,13 +74,13 @@ int main(int, char**) assert(std::stol(L"-10") == -10); assert(std::stol(L" 10") == 10); { - size_t idx = 0; + std::size_t idx = 0; assert(std::stol(L"10g", &idx, 16) == 16); assert(idx == 2); } #ifndef TEST_HAS_NO_EXCEPTIONS { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stol(L"", &idx); assert(false); @@ -89,7 +89,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stol(L" - 8", &idx); assert(false); @@ -98,7 +98,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stol(L"a1", &idx); assert(false); @@ -107,7 +107,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { // LWG#2009 and PR14919 (void)std::stol(L"9999999999999999999999999999999999999999999999999", &idx); diff --git a/libcxx/test/std/strings/string.conversions/stold.pass.cpp b/libcxx/test/std/strings/string.conversions/stold.pass.cpp index abe8842fc6e09..dc9d3c607ae37 100644 --- a/libcxx/test/std/strings/string.conversions/stold.pass.cpp +++ b/libcxx/test/std/strings/string.conversions/stold.pass.cpp @@ -25,29 +25,29 @@ int main(int, char**) assert(std::stold("-10") == -10); assert(std::stold(" 10") == 10); { - size_t idx = 0; + std::size_t idx = 0; assert(std::stold("10g", &idx) == 10); assert(idx == 2); } { - size_t idx = 0; + std::size_t idx = 0; assert(std::stold("1.e60", &idx) == 1.e60L); assert(idx == 5); } { - size_t idx = 0; + std::size_t idx = 0; assert(std::stold("INF", &idx) == INFINITY); assert(idx == 3); } { - size_t idx = 0; + std::size_t idx = 0; assert(std::isnan(std::stold("NAN", &idx))); assert(idx == 3); } #ifndef TEST_HAS_NO_EXCEPTIONS { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stold("", &idx); assert(false); @@ -56,7 +56,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stold(" - 8", &idx); assert(false); @@ -65,7 +65,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stold("a1", &idx); assert(false); @@ -74,7 +74,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { assert(std::stold("1.e6000", &idx) == INFINITY); assert(false); @@ -90,28 +90,28 @@ int main(int, char**) assert(std::stold(L"-10.5") == -10.5); assert(std::stold(L" 10") == 10); { - size_t idx = 0; + std::size_t idx = 0; assert(std::stold(L"10g", &idx) == 10); assert(idx == 2); } { - size_t idx = 0; + std::size_t idx = 0; assert(std::stold(L"1.e60", &idx) == 1.e60L); assert(idx == 5); } { - size_t idx = 0; + std::size_t idx = 0; assert(std::stold(L"INF", &idx) == INFINITY); assert(idx == 3); } { - size_t idx = 0; + std::size_t idx = 0; assert(std::isnan(std::stold(L"NAN", &idx))); assert(idx == 3); } #ifndef TEST_HAS_NO_EXCEPTIONS { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stold(L"", &idx); assert(false); @@ -120,7 +120,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stold(L" - 8", &idx); assert(false); @@ -129,7 +129,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stold(L"a1", &idx); assert(false); @@ -138,7 +138,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { assert(std::stold(L"1.e6000", &idx) == INFINITY); assert(false); diff --git a/libcxx/test/std/strings/string.conversions/stoll.pass.cpp b/libcxx/test/std/strings/string.conversions/stoll.pass.cpp index b8eb1b5c05fc2..fc6bd136cf4f3 100644 --- a/libcxx/test/std/strings/string.conversions/stoll.pass.cpp +++ b/libcxx/test/std/strings/string.conversions/stoll.pass.cpp @@ -24,13 +24,13 @@ int main(int, char**) assert(std::stoll("-10") == -10); assert(std::stoll(" 10") == 10); { - size_t idx = 0; + std::size_t idx = 0; assert(std::stoll("10g", &idx, 16) == 16); assert(idx == 2); } #ifndef TEST_HAS_NO_EXCEPTIONS { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stoll("", &idx); assert(false); @@ -39,7 +39,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stoll(" - 8", &idx); assert(false); @@ -48,7 +48,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stoll("a1", &idx); assert(false); @@ -57,7 +57,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { // LWG#2009 and PR14919 (void)std::stoll("99999999999999999999999999", &idx); @@ -74,13 +74,13 @@ int main(int, char**) assert(std::stoll(L"-10") == -10); assert(std::stoll(L" 10") == 10); { - size_t idx = 0; + std::size_t idx = 0; assert(std::stoll(L"10g", &idx, 16) == 16); assert(idx == 2); } #ifndef TEST_HAS_NO_EXCEPTIONS { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stoll(L"", &idx); assert(false); @@ -89,7 +89,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stoll(L" - 8", &idx); assert(false); @@ -98,7 +98,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stoll(L"a1", &idx); assert(false); @@ -107,7 +107,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { // LWG#2009 and PR14919 (void)std::stoll(L"99999999999999999999999999", &idx); diff --git a/libcxx/test/std/strings/string.conversions/stoul.pass.cpp b/libcxx/test/std/strings/string.conversions/stoul.pass.cpp index b84891ecd2dcb..a8d23f92959aa 100644 --- a/libcxx/test/std/strings/string.conversions/stoul.pass.cpp +++ b/libcxx/test/std/strings/string.conversions/stoul.pass.cpp @@ -23,13 +23,13 @@ int main(int, char**) assert(std::stoul("-0") == 0); assert(std::stoul(" 10") == 10); { - size_t idx = 0; + std::size_t idx = 0; assert(std::stoul("10g", &idx, 16) == 16); assert(idx == 2); } #ifndef TEST_HAS_NO_EXCEPTIONS { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stoul("", &idx); assert(false); @@ -38,7 +38,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stoul(" - 8", &idx); assert(false); @@ -47,7 +47,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stoul("a1", &idx); assert(false); @@ -56,7 +56,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { // LWG#2009 and PR14919 (void)std::stoul("9999999999999999999999999999999999999999999999999", &idx); @@ -72,13 +72,13 @@ int main(int, char**) assert(std::stoul(L"-0") == 0); assert(std::stoul(L" 10") == 10); { - size_t idx = 0; + std::size_t idx = 0; assert(std::stoul(L"10g", &idx, 16) == 16); assert(idx == 2); } #ifndef TEST_HAS_NO_EXCEPTIONS { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stoul(L"", &idx); assert(false); @@ -87,7 +87,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stoul(L" - 8", &idx); assert(false); @@ -96,7 +96,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stoul(L"a1", &idx); assert(false); @@ -105,7 +105,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { // LWG#2009 and PR14919 (void)std::stoul(L"9999999999999999999999999999999999999999999999999", &idx); diff --git a/libcxx/test/std/strings/string.conversions/stoull.pass.cpp b/libcxx/test/std/strings/string.conversions/stoull.pass.cpp index 744dcab28c4d5..2757cea130872 100644 --- a/libcxx/test/std/strings/string.conversions/stoull.pass.cpp +++ b/libcxx/test/std/strings/string.conversions/stoull.pass.cpp @@ -23,13 +23,13 @@ int main(int, char**) assert(std::stoull("-0") == 0); assert(std::stoull(" 10") == 10); { - size_t idx = 0; + std::size_t idx = 0; assert(std::stoull("10g", &idx, 16) == 16); assert(idx == 2); } #ifndef TEST_HAS_NO_EXCEPTIONS { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stoull("", &idx); assert(false); @@ -38,7 +38,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stoull(" - 8", &idx); assert(false); @@ -47,7 +47,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stoull("a1", &idx); assert(false); @@ -56,7 +56,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { // LWG#2009 and PR14919 (void)std::stoull("9999999999999999999999999999999999999999999999999", &idx); @@ -72,13 +72,13 @@ int main(int, char**) assert(std::stoull(L"-0") == 0); assert(std::stoull(L" 10") == 10); { - size_t idx = 0; + std::size_t idx = 0; assert(std::stoull(L"10g", &idx, 16) == 16); assert(idx == 2); } #ifndef TEST_HAS_NO_EXCEPTIONS { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stoull(L"", &idx); assert(false); @@ -87,7 +87,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stoull(L" - 8", &idx); assert(false); @@ -96,7 +96,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { (void)std::stoull(L"a1", &idx); assert(false); @@ -105,7 +105,7 @@ int main(int, char**) } } { - size_t idx = 0; + std::size_t idx = 0; try { // LWG#2009 and PR14919 (void)std::stoull(L"9999999999999999999999999999999999999999999999999", &idx); diff --git a/libcxx/test/std/strings/string.view/string.view.access/at.pass.cpp b/libcxx/test/std/strings/string.view/string.view.access/at.pass.cpp index 99a81acdf32d3..c01017493f3e6 100644 --- a/libcxx/test/std/strings/string.view/string.view.access/at.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.access/at.pass.cpp @@ -17,10 +17,10 @@ #include "test_macros.h" template -void test ( const CharT *s, size_t len ) { +void test ( const CharT *s, std::size_t len ) { std::basic_string_view sv ( s, len ); assert ( sv.length() == len ); - for ( size_t i = 0; i < len; ++i ) { + for ( std::size_t i = 0; i < len; ++i ) { assert ( sv.at(i) == s[i] ); assert ( &sv.at(i) == s + i ); } diff --git a/libcxx/test/std/strings/string.view/string.view.access/back.pass.cpp b/libcxx/test/std/strings/string.view/string.view.access/back.pass.cpp index 49b84ab1a6818..e63b80daeb032 100644 --- a/libcxx/test/std/strings/string.view/string.view.access/back.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.access/back.pass.cpp @@ -18,7 +18,7 @@ #include "test_macros.h" template -bool test ( const CharT *s, size_t len ) { +bool test ( const CharT *s, std::size_t len ) { typedef std::basic_string_view SV; SV sv ( s, len ); ASSERT_SAME_TYPE(decltype(sv.back()), typename SV::const_reference); diff --git a/libcxx/test/std/strings/string.view/string.view.access/data.pass.cpp b/libcxx/test/std/strings/string.view/string.view.access/data.pass.cpp index 50d2a14348a3e..bf5485b96826c 100644 --- a/libcxx/test/std/strings/string.view/string.view.access/data.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.access/data.pass.cpp @@ -19,7 +19,7 @@ #include "test_macros.h" template -void test ( const CharT *s, size_t len ) { +void test ( const CharT *s, std::size_t len ) { std::basic_string_view sv ( s, len ); assert ( sv.length() == len ); assert ( sv.data() == s ); diff --git a/libcxx/test/std/strings/string.view/string.view.access/front.pass.cpp b/libcxx/test/std/strings/string.view/string.view.access/front.pass.cpp index d9e1607643bca..72aa0258bbc04 100644 --- a/libcxx/test/std/strings/string.view/string.view.access/front.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.access/front.pass.cpp @@ -18,7 +18,7 @@ #include "test_macros.h" template -bool test ( const CharT *s, size_t len ) { +bool test ( const CharT *s, std::size_t len ) { typedef std::basic_string_view SV; SV sv ( s, len ); ASSERT_SAME_TYPE(decltype(sv.front()), typename SV::const_reference); diff --git a/libcxx/test/std/strings/string.view/string.view.access/index.pass.cpp b/libcxx/test/std/strings/string.view/string.view.access/index.pass.cpp index b27bc65251c57..51c60ec0057b8 100644 --- a/libcxx/test/std/strings/string.view/string.view.access/index.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.access/index.pass.cpp @@ -18,13 +18,13 @@ #include "test_macros.h" template -void test ( const CharT *s, size_t len ) { +void test ( const CharT *s, std::size_t len ) { typedef std::basic_string_view SV; SV sv ( s, len ); ASSERT_SAME_TYPE(decltype(sv[0]), typename SV::const_reference); LIBCPP_ASSERT_NOEXCEPT( sv[0]); assert ( sv.length() == len ); - for ( size_t i = 0; i < len; ++i ) { + for ( std::size_t i = 0; i < len; ++i ) { assert ( sv[i] == s[i] ); assert ( &sv[i] == s + i ); } diff --git a/libcxx/test/std/strings/string.view/string.view.capacity/capacity.pass.cpp b/libcxx/test/std/strings/string.view/string.view.capacity/capacity.pass.cpp index 2e11aa120d3d5..a9fd862d65548 100644 --- a/libcxx/test/std/strings/string.view/string.view.capacity/capacity.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.capacity/capacity.pass.cpp @@ -59,7 +59,7 @@ void test1 () { } template -void test2 ( const CharT *s, size_t len ) { +void test2 ( const CharT *s, std::size_t len ) { { std::basic_string_view sv1 ( s ); assert ( sv1.size() == len ); diff --git a/libcxx/test/std/strings/string.view/string.view.comparison/common_type_specialization.pass.cpp b/libcxx/test/std/strings/string.view/string.view.comparison/common_type_specialization.pass.cpp index 248ef567bfa7f..ef120cbcff178 100644 --- a/libcxx/test/std/strings/string.view/string.view.comparison/common_type_specialization.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.comparison/common_type_specialization.pass.cpp @@ -32,7 +32,7 @@ struct std::char_traits { static bool eq(char_wrapper lhs, char_wrapper rhs) { return lhs.c == rhs.c; } - static size_t length(const char_wrapper* a) { + static std::size_t length(const char_wrapper* a) { static_assert(sizeof(char_wrapper) == 1, "strlen requires this"); return std::strlen(reinterpret_cast(a)); } diff --git a/libcxx/test/std/strings/string.view/string.view.comparison/comparison.pass.cpp b/libcxx/test/std/strings/string.view/string.view.comparison/comparison.pass.cpp index 120b9a0b3f273..bf4ebd9258b53 100644 --- a/libcxx/test/std/strings/string.view/string.view.comparison/comparison.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.comparison/comparison.pass.cpp @@ -47,7 +47,7 @@ struct char_traits { static constexpr void assign(char_type& __c1, const char_type& __c2) noexcept { __c1 = __c2; } static constexpr bool eq(char_type __c1, char_type __c2) noexcept { return __c1 == __c2; } static constexpr bool lt(char_type __c1, char_type __c2) noexcept { return __c1 < __c2; } - static constexpr int compare(const char_type* __s1, const char_type* __s2, size_t __n) { + static constexpr int compare(const char_type* __s1, const char_type* __s2, std::size_t __n) { for (; __n; --__n, ++__s1, ++__s2) { if (lt(*__s1, *__s2)) return -1; @@ -57,11 +57,11 @@ struct char_traits { return 0; } - static constexpr size_t length(const char_type* __s); - static constexpr const char_type* find(const char_type* __s, size_t __n, const char_type& __a); - static constexpr char_type* move(char_type* __s1, const char_type* __s2, size_t __n); - static constexpr char_type* copy(char_type* __s1, const char_type* __s2, size_t __n); - static constexpr char_type* assign(char_type* __s, size_t __n, char_type __a); + static constexpr std::size_t length(const char_type* __s); + static constexpr const char_type* find(const char_type* __s, std::size_t __n, const char_type& __a); + static constexpr char_type* move(char_type* __s1, const char_type* __s2, std::size_t __n); + static constexpr char_type* copy(char_type* __s1, const char_type* __s2, std::size_t __n); + static constexpr char_type* assign(char_type* __s, std::size_t __n, char_type __a); static constexpr int_type not_eof(int_type __c) noexcept { return eq_int_type(__c, eof()) ? ~eof() : __c; } static constexpr char_type to_char_type(int_type __c) noexcept { return char_type(__c); } static constexpr int_type to_int_type(char_type __c) noexcept { return int_type(__c); } @@ -91,8 +91,8 @@ constexpr void test() { }; static_assert(v.size() == vn.size()); - for (size_t i = 0; i < v.size(); ++i) { - for (size_t j = 0; j < v.size(); ++j) { + for (std::size_t i = 0; i < v.size(); ++i) { + for (std::size_t j = 0; j < v.size(); ++j) { assert(testOrder(v[i], v[j], i == j ? Ordering::equivalent : i < j ? Ordering::less : Ordering::greater)); assert(testOrder( v[i], diff --git a/libcxx/test/std/strings/string.view/string.view.comparison/comparison.verify.cpp b/libcxx/test/std/strings/string.view/string.view.comparison/comparison.verify.cpp index 09bd1bd2cf77d..a2c58c3bbc159 100644 --- a/libcxx/test/std/strings/string.view/string.view.comparison/comparison.verify.cpp +++ b/libcxx/test/std/strings/string.view/string.view.comparison/comparison.verify.cpp @@ -34,12 +34,12 @@ struct traits { static constexpr bool eq(char_type&, const char_type&) noexcept; static constexpr bool lt(char_type&, const char_type&) noexcept; - static constexpr int compare(const char_type*, const char_type*, size_t) { return 0; } - static constexpr size_t length(const char_type*); - static constexpr const char_type* find(const char_type*, size_t, const char_type&); - static constexpr char_type* move(char_type*, const char_type*, size_t); - static constexpr char_type* copy(char_type*, const char_type*, size_t); - static constexpr char_type* assign(char_type*, size_t, char_type); + static constexpr int compare(const char_type*, const char_type*, std::size_t) { return 0; } + static constexpr std::size_t length(const char_type*); + static constexpr const char_type* find(const char_type*, std::size_t, const char_type&); + static constexpr char_type* move(char_type*, const char_type*, std::size_t); + static constexpr char_type* copy(char_type*, const char_type*, std::size_t); + static constexpr char_type* assign(char_type*, std::size_t, char_type); static constexpr int_type not_eof(int_type) noexcept; diff --git a/libcxx/test/std/strings/string.view/string.view.cons/from_literal.pass.cpp b/libcxx/test/std/strings/string.view/string.view.cons/from_literal.pass.cpp index 124aaeaab54a7..2f1e478d088cb 100644 --- a/libcxx/test/std/strings/string.view/string.view.cons/from_literal.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.cons/from_literal.pass.cpp @@ -22,7 +22,7 @@ template size_t StrLen ( const CharT *s ) { - size_t retVal = 0; + std::size_t retVal = 0; while ( *s != 0 ) { ++retVal; ++s; } return retVal; } diff --git a/libcxx/test/std/strings/string.view/string.view.cons/from_ptr_len.pass.cpp b/libcxx/test/std/strings/string.view/string.view.cons/from_ptr_len.pass.cpp index 8d48f90848119..8ff6061b48ecc 100644 --- a/libcxx/test/std/strings/string.view/string.view.cons/from_ptr_len.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.cons/from_ptr_len.pass.cpp @@ -20,7 +20,7 @@ #include "test_macros.h" template -void test ( const CharT *s, size_t sz ) { +void test ( const CharT *s, std::size_t sz ) { { typedef std::basic_string_view SV; LIBCPP_ASSERT_NOEXCEPT(SV(s, sz)); diff --git a/libcxx/test/std/strings/string.view/string.view.cons/from_range.pass.cpp b/libcxx/test/std/strings/string.view/string.view.cons/from_range.pass.cpp index 4ea963d925495..bf24d53aba890 100644 --- a/libcxx/test/std/strings/string.view/string.view.cons/from_range.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.cons/from_range.pass.cpp @@ -163,7 +163,7 @@ void test_throwing() { struct ThrowingSize { char* begin() const { return nullptr; } char* end() const { return nullptr; } - size_t size() const { throw 42; return 0; } + std::size_t size() const { throw 42; return 0; } }; try { ThrowingSize x; diff --git a/libcxx/test/std/strings/string.view/string.view.hash/char_type.hash.fail.cpp b/libcxx/test/std/strings/string.view/string.view.hash/char_type.hash.fail.cpp index a1d6c10b64c84..c81b61b527e69 100644 --- a/libcxx/test/std/strings/string.view/string.view.hash/char_type.hash.fail.cpp +++ b/libcxx/test/std/strings/string.view/string.view.hash/char_type.hash.fail.cpp @@ -33,14 +33,14 @@ struct trait // copied from <__string> static inline bool eq(char_type __c1, char_type __c2) { return __c1 == __c2; } static inline bool lt(char_type __c1, char_type __c2) { return __c1 < __c2; } - static int compare(const char_type* __s1, const char_type* __s2, size_t __n); - static size_t length(const char_type* __s); - static const char_type* find(const char_type* __s, size_t __n, + static int compare(const char_type* __s1, const char_type* __s2, std::size_t __n); + static std::size_t length(const char_type* __s); + static const char_type* find(const char_type* __s, std::size_t __n, const char_type& __a); - static char_type* move(char_type* __s1, const char_type* __s2, size_t __n); - static char_type* copy(char_type* __s1, const char_type* __s2, size_t __n); - static char_type* assign(char_type* __s, size_t __n, char_type __a); + static char_type* move(char_type* __s1, const char_type* __s2, std::size_t __n); + static char_type* copy(char_type* __s1, const char_type* __s2, std::size_t __n); + static char_type* assign(char_type* __s, std::size_t __n, char_type __a); static inline int_type not_eof(int_type __c) { return eq_int_type(__c, eof()) ? ~eof() : __c; diff --git a/libcxx/test/std/strings/string.view/string.view.hash/enabled_hashes.pass.cpp b/libcxx/test/std/strings/string.view/string.view.hash/enabled_hashes.pass.cpp index 407823ba43258..f0fffbd32e7ae 100644 --- a/libcxx/test/std/strings/string.view/string.view.hash/enabled_hashes.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.hash/enabled_hashes.pass.cpp @@ -36,12 +36,12 @@ struct std::char_traits { static bool eq(char_type, char_type); static bool lt(char_type, char_type); - static int compare(const char_type*, const char_type*, size_t); - static size_t length(const char_type*); - static const char_type* find(const char_type*, size_t, const char_type&); - static char_type* move(char_type*, const char_type*, size_t); - static char_type* copy(char_type*, const char_type*, size_t); - static char_type* assign(char_type*, size_t, char_type); + static int compare(const char_type*, const char_type*, std::size_t); + static std::size_t length(const char_type*); + static const char_type* find(const char_type*, std::size_t, const char_type&); + static char_type* move(char_type*, const char_type*, std::size_t); + static char_type* copy(char_type*, const char_type*, std::size_t); + static char_type* assign(char_type*, std::size_t, char_type); static int_type not_eof(int_type); static char_type to_char_type(int_type); diff --git a/libcxx/test/std/strings/string.view/string.view.iterators/rbegin.pass.cpp b/libcxx/test/std/strings/string.view/string.view.iterators/rbegin.pass.cpp index 9b96c0552519b..e5f0db5ffbf24 100644 --- a/libcxx/test/std/strings/string.view/string.view.iterators/rbegin.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.iterators/rbegin.pass.cpp @@ -27,7 +27,7 @@ test(S s) typename S::const_reverse_iterator cb2 = s.crbegin(); if (!s.empty()) { - const size_t last = s.size() - 1; + const std::size_t last = s.size() - 1; assert( *b == s[last]); assert( &*b == &s[last]); assert( *cb1 == s[last]); diff --git a/libcxx/test/std/strings/string.view/string.view.modifiers/remove_prefix.pass.cpp b/libcxx/test/std/strings/string.view/string.view.modifiers/remove_prefix.pass.cpp index bda42f0523075..4e41130a54b63 100644 --- a/libcxx/test/std/strings/string.view/string.view.modifiers/remove_prefix.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.modifiers/remove_prefix.pass.cpp @@ -18,7 +18,7 @@ #include "test_macros.h" template -void test ( const CharT *s, size_t len ) { +void test ( const CharT *s, std::size_t len ) { typedef std::basic_string_view SV; { SV sv1 ( s ); @@ -39,7 +39,7 @@ void test ( const CharT *s, size_t len ) { } #if TEST_STD_VER > 11 -constexpr size_t test_ce ( size_t n, size_t k ) { +constexpr std::size_t test_ce ( size_t n, size_t k ) { typedef std::basic_string_view SV; SV sv1{ "ABCDEFGHIJKL", n }; sv1.remove_prefix ( k ); diff --git a/libcxx/test/std/strings/string.view/string.view.modifiers/remove_suffix.pass.cpp b/libcxx/test/std/strings/string.view/string.view.modifiers/remove_suffix.pass.cpp index 95a2331b61054..2ebe665c78f9e 100644 --- a/libcxx/test/std/strings/string.view/string.view.modifiers/remove_suffix.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.modifiers/remove_suffix.pass.cpp @@ -18,7 +18,7 @@ #include "test_macros.h" template -void test ( const CharT *s, size_t len ) { +void test ( const CharT *s, std::size_t len ) { typedef std::basic_string_view SV; { SV sv1 ( s ); @@ -40,7 +40,7 @@ void test ( const CharT *s, size_t len ) { } #if TEST_STD_VER > 11 -constexpr size_t test_ce ( size_t n, size_t k ) { +constexpr std::size_t test_ce ( size_t n, size_t k ) { typedef std::basic_string_view SV; SV sv1{ "ABCDEFGHIJKL", n }; sv1.remove_suffix ( k ); diff --git a/libcxx/test/std/strings/string.view/string.view.modifiers/swap.pass.cpp b/libcxx/test/std/strings/string.view/string.view.modifiers/swap.pass.cpp index db229987b515b..7f46cbf15e7c1 100644 --- a/libcxx/test/std/strings/string.view/string.view.modifiers/swap.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.modifiers/swap.pass.cpp @@ -18,7 +18,7 @@ #include "test_macros.h" template -void test ( const CharT *s, size_t len ) { +void test ( const CharT *s, std::size_t len ) { typedef std::basic_string_view SV; { SV sv1(s); @@ -37,7 +37,7 @@ void test ( const CharT *s, size_t len ) { } #if TEST_STD_VER > 11 -constexpr size_t test_ce ( size_t n, size_t k ) { +constexpr std::size_t test_ce ( size_t n, size_t k ) { typedef std::basic_string_view SV; SV sv1{ "ABCDEFGHIJKL", n }; SV sv2 { sv1.data(), k }; diff --git a/libcxx/test/std/strings/string.view/string.view.ops/compare.pointer_size.pass.cpp b/libcxx/test/std/strings/string.view/string.view.ops/compare.pointer_size.pass.cpp index a7a5c30241814..b40ff6483ea6b 100644 --- a/libcxx/test/std/strings/string.view/string.view.ops/compare.pointer_size.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.ops/compare.pointer_size.pass.cpp @@ -23,7 +23,7 @@ int sign ( int x ) { return x > 0 ? 1 : ( x < 0 ? -1 : 0 ); } template void test1 ( std::basic_string_view sv1, - size_t pos1, size_t n1, const CharT *s, int expected ) { + std::size_t pos1, size_t n1, const CharT *s, int expected ) { #ifdef TEST_HAS_NO_EXCEPTIONS if (pos1 <= sv1.size()) assert(sign(sv1.compare(pos1, n1, s)) == sign(expected)); @@ -40,7 +40,7 @@ void test1 ( std::basic_string_view sv1, template void -test( const CharT *s1, size_t pos1, size_t n1, const CharT *s2, int expected) +test( const CharT *s1, std::size_t pos1, size_t n1, const CharT *s2, int expected) { typedef std::basic_string_view string_view_t; string_view_t sv1 ( s1 ); @@ -378,7 +378,7 @@ int main(int, char**) test("abcdefghijklmnopqrst", 5, 5, "", 20); test("abcdefghijklmnopqrst", 0, 8, "abcde", 15); test("abcdefghijklmnopqrst", 0, 12, "abcdefghij", 10); - test("abcdefghijklmnopqrst", 0, static_cast(-1), "abcdefghijklmnopqrst", 0); + test("abcdefghijklmnopqrst", 0, static_cast(-1), "abcdefghijklmnopqrst", 0); } #ifndef TEST_HAS_NO_WIDE_CHARACTERS @@ -398,7 +398,7 @@ int main(int, char**) test(L"abcdefghijklmnopqrst", 5, 5, L"", 20); test(L"abcdefghijklmnopqrst", 0, 8, L"abcde", 15); test(L"abcdefghijklmnopqrst", 0, 12, L"abcdefghij", 10); - test(L"abcdefghijklmnopqrst", 0, static_cast(-1), L"abcdefghijklmnopqrst", 0); + test(L"abcdefghijklmnopqrst", 0, static_cast(-1), L"abcdefghijklmnopqrst", 0); } #endif @@ -419,7 +419,7 @@ int main(int, char**) test(U"abcdefghijklmnopqrst", 5, 5, U"", 20); test(U"abcdefghijklmnopqrst", 0, 8, U"abcde", 15); test(U"abcdefghijklmnopqrst", 0, 12, U"abcdefghij", 10); - test(U"abcdefghijklmnopqrst", 0, static_cast(-1), U"abcdefghijklmnopqrst", 0); + test(U"abcdefghijklmnopqrst", 0, static_cast(-1), U"abcdefghijklmnopqrst", 0); } { @@ -438,7 +438,7 @@ int main(int, char**) test(u"abcdefghijklmnopqrst", 5, 5, u"", 20); test(u"abcdefghijklmnopqrst", 0, 8, u"abcde", 15); test(u"abcdefghijklmnopqrst", 0, 12, u"abcdefghij", 10); - test(u"abcdefghijklmnopqrst", 0, static_cast(-1), u"abcdefghijklmnopqrst", 0); + test(u"abcdefghijklmnopqrst", 0, static_cast(-1), u"abcdefghijklmnopqrst", 0); } #endif diff --git a/libcxx/test/std/strings/string.view/string.view.ops/compare.size_size_sv.pass.cpp b/libcxx/test/std/strings/string.view/string.view.ops/compare.size_size_sv.pass.cpp index 477bfdde8c790..c1999d292882b 100644 --- a/libcxx/test/std/strings/string.view/string.view.ops/compare.size_size_sv.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.ops/compare.size_size_sv.pass.cpp @@ -22,7 +22,7 @@ int sign ( int x ) { return x > 0 ? 1 : ( x < 0 ? -1 : 0 ); } template -void test1 ( std::basic_string_view sv1, size_t pos1, size_t n1, +void test1 ( std::basic_string_view sv1, std::size_t pos1, size_t n1, std::basic_string_view sv2, int expected ) { #ifdef TEST_HAS_NO_EXCEPTIONS if (pos1 <= sv1.size()) @@ -40,7 +40,7 @@ void test1 ( std::basic_string_view sv1, size_t pos1, size_t n1, template -void test ( const CharT *s1, size_t pos1, size_t n1, const CharT *s2, int expected ) { +void test ( const CharT *s1, std::size_t pos1, size_t n1, const CharT *s2, int expected ) { typedef std::basic_string_view string_view_t; string_view_t sv1 ( s1 ); string_view_t sv2 ( s2 ); diff --git a/libcxx/test/std/strings/string.view/string.view.ops/compare.size_size_sv_pointer_size.pass.cpp b/libcxx/test/std/strings/string.view/string.view.ops/compare.size_size_sv_pointer_size.pass.cpp index f0c4a79e60837..4e5af15b9352f 100644 --- a/libcxx/test/std/strings/string.view/string.view.ops/compare.size_size_sv_pointer_size.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.ops/compare.size_size_sv_pointer_size.pass.cpp @@ -23,8 +23,8 @@ int sign ( int x ) { return x > 0 ? 1 : ( x < 0 ? -1 : 0 ); } template -void test1 ( std::basic_string_view sv1, size_t pos1, size_t n1, - const CharT *s2, size_t n2, int expected ) { +void test1 ( std::basic_string_view sv1, std::size_t pos1, size_t n1, + const CharT *s2, std::size_t n2, int expected ) { #ifdef TEST_HAS_NO_EXCEPTIONS if (pos1 <= sv1.size()) assert(sign(sv1.compare(pos1, n1, s2, n2)) == sign(expected)); @@ -41,8 +41,8 @@ void test1 ( std::basic_string_view sv1, size_t pos1, size_t n1, template -void test ( const CharT *s1, size_t pos1, size_t n1, - const CharT *s2, size_t n2, +void test ( const CharT *s1, std::size_t pos1, size_t n1, + const CharT *s2, std::size_t n2, int expected ) { typedef std::basic_string_view string_view_t; string_view_t sv1 ( s1 ); diff --git a/libcxx/test/std/strings/string.view/string.view.ops/compare.size_size_sv_size_size.pass.cpp b/libcxx/test/std/strings/string.view/string.view.ops/compare.size_size_sv_size_size.pass.cpp index a0f661c3520a6..a882372537503 100644 --- a/libcxx/test/std/strings/string.view/string.view.ops/compare.size_size_sv_size_size.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.ops/compare.size_size_sv_size_size.pass.cpp @@ -23,8 +23,8 @@ int sign ( int x ) { return x > 0 ? 1 : ( x < 0 ? -1 : 0 ); } template -void test1 ( std::basic_string_view sv1, size_t pos1, size_t n1, - std::basic_string_view sv2, size_t pos2, size_t n2, +void test1 ( std::basic_string_view sv1, std::size_t pos1, size_t n1, + std::basic_string_view sv2, std::size_t pos2, size_t n2, int expected ) { #ifdef TEST_HAS_NO_EXCEPTIONS if (pos1 <= sv1.size() && pos2 <= sv2.size()) @@ -42,8 +42,8 @@ void test1 ( std::basic_string_view sv1, size_t pos1, size_t n1, template -void test ( const CharT *s1, size_t pos1, size_t n1, - const CharT *s2, size_t pos2, size_t n2, +void test ( const CharT *s1, std::size_t pos1, size_t n1, + const CharT *s2, std::size_t pos2, size_t n2, int expected ) { typedef std::basic_string_view string_view_t; diff --git a/libcxx/test/std/strings/string.view/string.view.ops/copy.pass.cpp b/libcxx/test/std/strings/string.view/string.view.ops/copy.pass.cpp index 54e185f793c8f..0128e98ee6aa2 100644 --- a/libcxx/test/std/strings/string.view/string.view.ops/copy.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.ops/copy.pass.cpp @@ -26,8 +26,8 @@ #include "test_macros.h" template -void test1 ( std::basic_string_view sv, size_t n, size_t pos ) { - const size_t rlen = std::min ( n, sv.size() - pos ); +void test1 ( std::basic_string_view sv, std::size_t n, size_t pos ) { + const std::size_t rlen = std::min ( n, sv.size() - pos ); CharT *dest1 = new CharT [rlen + 1]; dest1[rlen] = 0; CharT *dest2 = new CharT [rlen + 1]; dest2[rlen] = 0; @@ -45,7 +45,7 @@ void test1 ( std::basic_string_view sv, size_t n, size_t pos ) { } else { sv.copy(dest1, n, pos); std::copy_n(sv.begin() + pos, rlen, dest2); - for ( size_t i = 0; i <= rlen; ++i ) + for ( std::size_t i = 0; i <= rlen; ++i ) assert ( dest1[i] == dest2[i] ); } delete [] dest1; diff --git a/libcxx/test/std/strings/string.view/string.view.ops/substr.pass.cpp b/libcxx/test/std/strings/string.view/string.view.ops/substr.pass.cpp index 92f9d8cc8592c..0de64f0e59d49 100644 --- a/libcxx/test/std/strings/string.view/string.view.ops/substr.pass.cpp +++ b/libcxx/test/std/strings/string.view/string.view.ops/substr.pass.cpp @@ -24,7 +24,7 @@ #include "test_macros.h" template -void test1(std::basic_string_view sv, size_t n, size_t pos) { +void test1(std::basic_string_view sv, std::size_t n, size_t pos) { std::basic_string_view sv1; #ifdef TEST_HAS_NO_EXCEPTIONS if (pos > sv.size()) @@ -40,9 +40,9 @@ void test1(std::basic_string_view sv, size_t n, size_t pos) { return ; } #endif - const size_t rlen = std::min(n, sv.size() - pos); + const std::size_t rlen = std::min(n, sv.size() - pos); assert (sv1.size() == rlen); - for (size_t i = 0; i < rlen; ++i) + for (std::size_t i = 0; i < rlen; ++i) assert(sv[pos+i] == sv1[i]); } diff --git a/libcxx/test/std/strings/strings.erasure/erase.pass.cpp b/libcxx/test/std/strings/strings.erasure/erase.pass.cpp index aa397ca68536b..ed7dc3cc5fbea 100644 --- a/libcxx/test/std/strings/strings.erasure/erase.pass.cpp +++ b/libcxx/test/std/strings/strings.erasure/erase.pass.cpp @@ -22,7 +22,7 @@ #include "min_allocator.h" template -void test0(S s, U val, S expected, size_t expected_erased_count) { +void test0(S s, U val, S expected, std::size_t expected_erased_count) { ASSERT_SAME_TYPE(typename S::size_type, decltype(std::erase(s, val))); assert(expected_erased_count == std::erase(s, val)); LIBCPP_ASSERT(s.__invariants()); diff --git a/libcxx/test/std/strings/strings.erasure/erase_if.pass.cpp b/libcxx/test/std/strings/strings.erasure/erase_if.pass.cpp index 1957451bc64aa..b098f88208de8 100644 --- a/libcxx/test/std/strings/strings.erasure/erase_if.pass.cpp +++ b/libcxx/test/std/strings/strings.erasure/erase_if.pass.cpp @@ -21,7 +21,7 @@ #include "min_allocator.h" template -void test0(S s, Pred p, S expected, size_t expected_erased_count) { +void test0(S s, Pred p, S expected, std::size_t expected_erased_count) { ASSERT_SAME_TYPE(typename S::size_type, decltype(std::erase_if(s, p))); assert(expected_erased_count == std::erase_if(s, p)); LIBCPP_ASSERT(s.__invariants()); diff --git a/libcxx/test/std/utilities/charconv/charconv.msvc/test.cpp b/libcxx/test/std/utilities/charconv/charconv.msvc/test.cpp index 38ff3d8f29a62..30ee9adcd74bf 100644 --- a/libcxx/test/std/utilities/charconv/charconv.msvc/test.cpp +++ b/libcxx/test/std/utilities/charconv/charconv.msvc/test.cpp @@ -49,10 +49,10 @@ using namespace std; void initialize_randomness(mt19937_64& mt64, const int argc, char** const /*argv*/) { - constexpr size_t n = mt19937_64::state_size; - constexpr size_t w = mt19937_64::word_size; + constexpr std::size_t n = mt19937_64::state_size; + constexpr std::size_t w = mt19937_64::word_size; static_assert(w % 32 == 0); - constexpr size_t k = w / 32; + constexpr std::size_t k = w / 32; vector vec(n * k); @@ -70,7 +70,7 @@ void initialize_randomness(mt19937_64& mt64, const int argc, char** const /*argv puts("SEED DATA:"); for (const auto& elem : vec) { - printf("%zu ", static_cast(elem)); + printf("%zu ", static_cast(elem)); } printf("\n"); @@ -103,14 +103,14 @@ void test_common_to_chars( // without attempting to write to extra chars even when they're available. Finally, we also verify that we aren't // underrunning the buffer. This is a concern because sometimes we walk backwards when rounding. - constexpr size_t BufferPrefix = 20; // detect buffer underruns (specific value isn't important) + constexpr std::size_t BufferPrefix = 20; // detect buffer underruns (specific value isn't important) - constexpr size_t Space = is_integral_v ? 1 + 64 // worst case: -2^63 in binary + constexpr std::size_t Space = is_integral_v ? 1 + 64 // worst case: -2^63 in binary : is_same_v ? 1 + 151 // worst case: negative min subnormal float, fixed notation : 1 + 1076; // worst case: negative min subnormal double, fixed notation - constexpr size_t BufferSuffix = 30; // detect buffer overruns (specific value isn't important) + constexpr std::size_t BufferSuffix = 30; // detect buffer overruns (specific value isn't important) array buff; @@ -118,12 +118,12 @@ void test_common_to_chars( char* const first = buff_begin + BufferPrefix; char* const buff_end = buff_begin + buff.size(); - constexpr size_t ExtraChars = 3; + constexpr std::size_t ExtraChars = 3; static_assert(ExtraChars + 10 < BufferSuffix, "The specific values aren't important, but there should be plenty of room to detect buffer overruns."); - for (size_t n = 0; n <= correct.size() + ExtraChars; ++n) { - assert(n <= static_cast(buff_end - first)); + for (std::size_t n = 0; n <= correct.size() + ExtraChars; ++n) { + assert(n <= static_cast(buff_end - first)); char* const last = first + n; buff.fill('@'); @@ -361,7 +361,7 @@ void test_integer_to_chars() { for (const auto& p : output_positive) { if (p.first <= static_cast(numeric_limits::max())) { - test_integer_to_chars(static_cast(p.first), base, p.second[static_cast(base)]); + test_integer_to_chars(static_cast(p.first), base, p.second[static_cast(base)]); } } @@ -370,7 +370,7 @@ void test_integer_to_chars() { for (const auto& p : output_negative) { if (p.first >= static_cast(numeric_limits::min())) { - test_integer_to_chars(static_cast(p.first), base, p.second[static_cast(base)]); + test_integer_to_chars(static_cast(p.first), base, p.second[static_cast(base)]); } } } @@ -382,7 +382,7 @@ void test_integer_to_chars() { enum class TestFromCharsMode { Normal, SignalingNaN }; template -void test_from_chars(const string_view input, const BaseOrFmt base_or_fmt, const size_t correct_idx, +void test_from_chars(const string_view input, const BaseOrFmt base_or_fmt, const std::size_t correct_idx, const errc correct_ec, const optional opt_correct = nullopt, const TestFromCharsMode mode = TestFromCharsMode::Normal) { @@ -545,7 +545,7 @@ void all_integer_tests() { void assert_message_bits(const bool b, const char* const msg, const std::uint32_t bits) { if (!b) { - fprintf(stderr, "%s failed for 0x%08zX\n", msg, static_cast(bits)); + fprintf(stderr, "%s failed for 0x%08zX\n", msg, static_cast(bits)); fprintf(stderr, "This is a randomized test.\n"); fprintf(stderr, "DO NOT IGNORE/RERUN THIS FAILURE.\n"); fprintf(stderr, "You must report it to the STL maintainers.\n"); @@ -587,7 +587,7 @@ void test_floating_prefix(const conditional_t; // "-1.2345678901234567e-100" or "-1.23456789e-10" - constexpr size_t buffer_size = IsDouble ? 24 : 15; + constexpr std::size_t buffer_size = IsDouble ? 24 : 15; char buffer[buffer_size]; // TODO Enable once std::from_chars has floating point support. #if 0 @@ -600,11 +600,11 @@ void test_floating_prefix(const conditional_t(fixed_result.ptr - fixed_buffer)); + const string_view fixed_sv(fixed_buffer, static_cast(fixed_result.ptr - fixed_buffer)); if (find(fixed_sv.begin(), fixed_sv.end(), '.') == fixed_sv.end()) { const int stdio_ret = sprintf_s(stdio_buffer, size(stdio_buffer), "%.0f", input); @@ -654,7 +654,7 @@ void test_floating_hex_prefix(const conditional_t(result.ptr - charconv_buffer)); + string_view charconv_sv(charconv_buffer, static_cast(result.ptr - charconv_buffer)); int stdio_ret = sprintf_s(stdio_buffer, size(stdio_buffer), "%.*f", precision, input); assert_message_bits(stdio_ret != -1, "sprintf_s fixed precision", bits); @@ -725,7 +725,7 @@ void test_floating_precision_prefix(const conditional_t(result.ptr - charconv_buffer)); + charconv_sv = string_view(charconv_buffer, static_cast(result.ptr - charconv_buffer)); stdio_ret = sprintf_s(stdio_buffer, size(stdio_buffer), "%.*e", precision, input); assert_message_bits(stdio_ret != -1, "sprintf_s scientific precision", bits); @@ -736,7 +736,7 @@ void test_floating_precision_prefix(const conditional_t(result.ptr - general_buffer)); + charconv_sv = string_view(general_buffer, static_cast(result.ptr - general_buffer)); stdio_ret = sprintf_s(general_stdio_buffer, size(general_stdio_buffer), "%.5000g", input); assert_message_bits(stdio_ret != -1, "sprintf_s general precision", bits); @@ -1072,7 +1072,7 @@ int main(int argc, char** argv) { const long long ms = chrono::duration_cast(finish - start).count(); puts("PASS"); - printf("Randomized test cases: %zu\n", static_cast(PrefixesToTest * Fractions)); + printf("Randomized test cases: %zu\n", static_cast(PrefixesToTest * Fractions)); printf("Total time: %lld ms\n", ms); if (ms < 3'000) { diff --git a/libcxx/test/std/utilities/charconv/charconv.msvc/test.hpp b/libcxx/test/std/utilities/charconv/charconv.msvc/test.hpp index 3fdc0d31f8b19..eb6d6486333e9 100644 --- a/libcxx/test/std/utilities/charconv/charconv.msvc/test.hpp +++ b/libcxx/test/std/utilities/charconv/charconv.msvc/test.hpp @@ -21,7 +21,7 @@ inline constexpr double double_nan_payload = __builtin_nan("1729"); struct FloatFromCharsTestCase { const char* input; chars_format fmt; - size_t correct_idx; + std::size_t correct_idx; errc correct_ec; float correct_value; }; @@ -42,7 +42,7 @@ struct FloatPrecisionToCharsTestCase { struct DoubleFromCharsTestCase { const char* input; chars_format fmt; - size_t correct_idx; + std::size_t correct_idx; errc correct_ec; double correct_value; }; diff --git a/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/arg.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/arg.pass.cpp index 181c908aa5668..bb190c5ceee25 100644 --- a/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/arg.pass.cpp +++ b/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/arg.pass.cpp @@ -32,7 +32,7 @@ void test() { const std::basic_format_context context = test_format_context_create(OutIt{output}, args); LIBCPP_ASSERT(args.__size() == 4); ASSERT_NOEXCEPT(context.arg(0)); - for (size_t i = 0, e = args.__size(); i != e; ++i) { + for (std::size_t i = 0, e = args.__size(); i != e; ++i) { assert(context.arg(i)); } assert(!context.arg(args.__size())); diff --git a/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.char_array.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.char_array.pass.cpp index 5c2b6dc32ac45..bb3299636e623 100644 --- a/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.char_array.pass.cpp +++ b/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.char_array.pass.cpp @@ -34,14 +34,14 @@ // This is based on the method found in // clang/test/CXX/temp/temp.arg/temp.arg.nontype/p1-cxx20.cpp -template +template struct Tester { // This is not part of the real test, but is used the deduce the size of the input. constexpr Tester(const char (&r)[N]) { __builtin_memcpy(text, r, N); } char text[N]; // The size of the array shouldn't include the NUL character. - static const size_t size = N - 1; + static const std::size_t size = N - 1; template void test(const std::basic_string& expected, const std::basic_string_view& fmt) const { @@ -82,7 +82,7 @@ struct Tester { } }; -template +template Tester(const char (&)[N]) -> Tester; template diff --git a/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.floating_point.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.floating_point.pass.cpp index d74e1104d6a05..dc40acecc8a19 100644 --- a/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.floating_point.pass.cpp +++ b/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.floating_point.pass.cpp @@ -87,7 +87,7 @@ void test_hex_lower_case_precision(ArithmeticT value) { char* end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::hex, 20'000).ptr; test_termination_condition(STR(".20000a}"), value, std::basic_string{buffer.begin(), end}); - size_t size = buffer.end() - end; + std::size_t size = buffer.end() - end; std::fill_n(end, size, '#'); test_termination_condition(STR("#<25000.20000a}"), value, std::basic_string{buffer.begin(), buffer.end()}); std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end()); @@ -127,7 +127,7 @@ void test_hex_upper_case_precision(ArithmeticT value) { std::transform(buffer.begin(), end, buffer.begin(), [](char c) { return std::toupper(c); }); test_termination_condition(STR(".20000A}"), value, std::basic_string{buffer.begin(), end}); - size_t size = buffer.end() - end; + std::size_t size = buffer.end() - end; std::fill_n(end, size, '#'); test_termination_condition(STR("#<25000.20000A}"), value, std::basic_string{buffer.begin(), buffer.end()}); std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end()); @@ -167,7 +167,7 @@ void test_scientific_lower_case_precision(ArithmeticT value) { char* end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::scientific, 20'000).ptr; test_termination_condition(STR(".20000e}"), value, std::basic_string{buffer.begin(), end}); - size_t size = buffer.end() - end; + std::size_t size = buffer.end() - end; std::fill_n(end, size, '#'); test_termination_condition(STR("#<25000.20000e}"), value, std::basic_string{buffer.begin(), buffer.end()}); std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end()); @@ -207,7 +207,7 @@ void test_scientific_upper_case_precision(ArithmeticT value) { std::transform(buffer.begin(), end, buffer.begin(), [](char c) { return std::toupper(c); }); test_termination_condition(STR(".20000E}"), value, std::basic_string{buffer.begin(), end}); - size_t size = buffer.end() - end; + std::size_t size = buffer.end() - end; std::fill_n(end, size, '#'); test_termination_condition(STR("#<25000.20000E}"), value, std::basic_string{buffer.begin(), buffer.end()}); std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end()); @@ -247,7 +247,7 @@ void test_fixed_lower_case_precision(ArithmeticT value) { char* end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::fixed, 20'000).ptr; test_termination_condition(STR(".20000f}"), value, std::basic_string{buffer.begin(), end}); - size_t size = buffer.end() - end; + std::size_t size = buffer.end() - end; std::fill_n(end, size, '#'); test_termination_condition(STR("#<25000.20000f}"), value, std::basic_string{buffer.begin(), buffer.end()}); std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end()); @@ -287,7 +287,7 @@ void test_fixed_upper_case_precision(ArithmeticT value) { std::transform(buffer.begin(), end, buffer.begin(), [](char c) { return std::toupper(c); }); test_termination_condition(STR(".20000F}"), value, std::basic_string{buffer.begin(), end}); - size_t size = buffer.end() - end; + std::size_t size = buffer.end() - end; std::fill_n(end, size, '#'); test_termination_condition(STR("#<25000.20000F}"), value, std::basic_string{buffer.begin(), buffer.end()}); std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end()); @@ -327,7 +327,7 @@ void test_general_lower_case_precision(ArithmeticT value) { char* end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::general, 20'000).ptr; test_termination_condition(STR(".20000g}"), value, std::basic_string{buffer.begin(), end}); - size_t size = buffer.end() - end; + std::size_t size = buffer.end() - end; std::fill_n(end, size, '#'); test_termination_condition(STR("#<25000.20000g}"), value, std::basic_string{buffer.begin(), buffer.end()}); std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end()); @@ -367,7 +367,7 @@ void test_general_upper_case_precision(ArithmeticT value) { std::transform(buffer.begin(), end, buffer.begin(), [](char c) { return std::toupper(c); }); test_termination_condition(STR(".20000G}"), value, std::basic_string{buffer.begin(), end}); - size_t size = buffer.end() - end; + std::size_t size = buffer.end() - end; std::fill_n(end, size, '#'); test_termination_condition(STR("#<25000.20000G}"), value, std::basic_string{buffer.begin(), buffer.end()}); std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end()); diff --git a/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/check_arg_id.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/check_arg_id.pass.cpp index 030ce42927226..bc6b418862525 100644 --- a/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/check_arg_id.pass.cpp +++ b/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/check_arg_id.pass.cpp @@ -26,7 +26,7 @@ constexpr bool test() { std::format_parse_context context("", 10); - for (size_t i = 0; i < 10; ++i) + for (std::size_t i = 0; i < 10; ++i) context.check_arg_id(i); return true; @@ -46,13 +46,13 @@ void test_exception() { assert(false); }(); - auto test_arg = [](size_t num_args) { + auto test_arg = [](std::size_t num_args) { std::format_parse_context context("", num_args); // Out of bounds access is valid if !std::is_constant_evaluated() - for (size_t i = 0; i <= num_args; ++i) + for (std::size_t i = 0; i <= num_args; ++i) context.check_arg_id(i); }; - for (size_t i = 0; i < 10; ++i) + for (std::size_t i = 0; i < 10; ++i) test_arg(i); } diff --git a/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/next_arg_id.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/next_arg_id.pass.cpp index 5ec3a943ba974..2d6de1f2f3354 100644 --- a/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/next_arg_id.pass.cpp +++ b/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/next_arg_id.pass.cpp @@ -26,7 +26,7 @@ constexpr bool test() { std::format_parse_context context("", 10); - for (size_t i = 0; i < 10; ++i) + for (std::size_t i = 0; i < 10; ++i) assert(i == context.next_arg_id()); return true; diff --git a/libcxx/test/std/utilities/format/format.functions/escaped_output.ascii.pass.cpp b/libcxx/test/std/utilities/format/format.functions/escaped_output.ascii.pass.cpp index d08133b98d250..a3184e4cba4a0 100644 --- a/libcxx/test/std/utilities/format/format.functions/escaped_output.ascii.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/escaped_output.ascii.pass.cpp @@ -91,12 +91,12 @@ auto test_formatted_size = []( std::basic_string_view expected, test_format_string fmt, Args&&... args) { { - size_t size = std::formatted_size(fmt, std::forward(args)...); + std::size_t size = std::formatted_size(fmt, std::forward(args)...); assert(size == expected.size()); } #ifndef TEST_HAS_NO_LOCALIZATION { - size_t size = std::formatted_size(std::locale(), fmt, std::forward(args)...); + std::size_t size = std::formatted_size(std::locale(), fmt, std::forward(args)...); assert(size == expected.size()); } #endif // TEST_HAS_NO_LOCALIZATION @@ -106,7 +106,7 @@ auto test_format_to_n = []( std::basic_string_view expected, test_format_string fmt, Args&&... args) { { - size_t n = expected.size(); + std::size_t n = expected.size(); std::basic_string out(n, CharT(' ')); std::format_to_n_result result = std::format_to_n(out.begin(), n, fmt, std::forward(args)...); assert(result.size == static_cast(expected.size())); @@ -115,7 +115,7 @@ auto test_format_to_n = } #ifndef TEST_HAS_NO_LOCALIZATION { - size_t n = expected.size(); + std::size_t n = expected.size(); std::basic_string out(n, CharT(' ')); std::format_to_n_result result = std::format_to_n(out.begin(), n, std::locale(), fmt, std::forward(args)...); diff --git a/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp b/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp index 9caa3a22d7503..0cb0257b8ea57 100644 --- a/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp @@ -97,12 +97,12 @@ auto test_formatted_size = []( std::basic_string_view expected, test_format_string fmt, Args&&... args) { { - size_t size = std::formatted_size(fmt, std::forward(args)...); + std::size_t size = std::formatted_size(fmt, std::forward(args)...); assert(size == expected.size()); } #ifndef TEST_HAS_NO_LOCALIZATION { - size_t size = std::formatted_size(std::locale(), fmt, std::forward(args)...); + std::size_t size = std::formatted_size(std::locale(), fmt, std::forward(args)...); assert(size == expected.size()); } #endif // TEST_HAS_NO_LOCALIZATION @@ -112,7 +112,7 @@ auto test_format_to_n = []( std::basic_string_view expected, test_format_string fmt, Args&&... args) { { - size_t n = expected.size(); + std::size_t n = expected.size(); std::basic_string out(n, CharT(' ')); std::format_to_n_result result = std::format_to_n(out.begin(), n, fmt, std::forward(args)...); assert(result.size == static_cast(expected.size())); @@ -121,7 +121,7 @@ auto test_format_to_n = } #ifndef TEST_HAS_NO_LOCALIZATION { - size_t n = expected.size(); + std::size_t n = expected.size(); std::basic_string out(n, CharT(' ')); std::format_to_n_result result = std::format_to_n(out.begin(), n, std::locale(), fmt, std::forward(args)...); diff --git a/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp index 625b99f0a8de1..7d41ddb81a00f 100644 --- a/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp @@ -32,7 +32,7 @@ auto test = []( std::basic_string_view expected, test_format_string fmt, Args&&... args) constexpr { - size_t size = std::formatted_size(std::locale(), fmt, std::forward(args)...); + std::size_t size = std::formatted_size(std::locale(), fmt, std::forward(args)...); assert(size == expected.size()); }; diff --git a/libcxx/test/std/utilities/format/format.functions/formatted_size.pass.cpp b/libcxx/test/std/utilities/format/format.functions/formatted_size.pass.cpp index 6a627ec3fecfe..cd31da125c849 100644 --- a/libcxx/test/std/utilities/format/format.functions/formatted_size.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/formatted_size.pass.cpp @@ -29,7 +29,7 @@ auto test = []( std::basic_string_view expected, test_format_string fmt, Args&&... args) constexpr { - size_t size = std::formatted_size(fmt, std::forward(args)...); + std::size_t size = std::formatted_size(fmt, std::forward(args)...); assert(size == expected.size()); }; diff --git a/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp b/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp index c3ddef8fe7378..d5939e255423e 100644 --- a/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp @@ -165,7 +165,7 @@ void test(std::basic_string_view expected, test_format_string(args)...); + std::size_t size = std::formatted_size(fmt, std::forward(args)...); assert(size == expected.size()); } } @@ -215,7 +215,7 @@ void test( } // *** formatted_size *** { - size_t size = std::formatted_size(loc, fmt, std::forward(args)...); + std::size_t size = std::formatted_size(loc, fmt, std::forward(args)...); assert(size == expected.size()); } } diff --git a/libcxx/test/std/utilities/function.objects/func.search/func.search.bm/hash.pass.cpp b/libcxx/test/std/utilities/function.objects/func.search/func.search.bm/hash.pass.cpp index a7665f181ccd9..1704eb67562e1 100644 --- a/libcxx/test/std/utilities/function.objects/func.search/func.search.bm/hash.pass.cpp +++ b/libcxx/test/std/utilities/function.objects/func.search/func.search.bm/hash.pass.cpp @@ -39,7 +39,7 @@ #include "test_iterators.h" template struct MyHash { - size_t operator () (T t) const { return static_cast(t); } + std::size_t operator () (T t) const { return static_cast(t); } }; template diff --git a/libcxx/test/std/utilities/function.objects/func.search/func.search.bm/hash.pred.pass.cpp b/libcxx/test/std/utilities/function.objects/func.search/func.search.bm/hash.pred.pass.cpp index c70782c8c78aa..3648be9bb06ac 100644 --- a/libcxx/test/std/utilities/function.objects/func.search/func.search.bm/hash.pred.pass.cpp +++ b/libcxx/test/std/utilities/function.objects/func.search/func.search.bm/hash.pred.pass.cpp @@ -39,7 +39,7 @@ #include "test_iterators.h" template struct MyHash { - size_t operator () (T t) const { return static_cast(t); } + std::size_t operator () (T t) const { return static_cast(t); } }; struct count_equal diff --git a/libcxx/test/std/utilities/function.objects/func.search/func.search.bmh/hash.pass.cpp b/libcxx/test/std/utilities/function.objects/func.search/func.search.bmh/hash.pass.cpp index 8ada7ef1d9e80..d0e6d5f8bfb6b 100644 --- a/libcxx/test/std/utilities/function.objects/func.search/func.search.bmh/hash.pass.cpp +++ b/libcxx/test/std/utilities/function.objects/func.search/func.search.bmh/hash.pass.cpp @@ -38,7 +38,7 @@ #include "test_iterators.h" template struct MyHash { - size_t operator () (T t) const { return static_cast(t); } + std::size_t operator () (T t) const { return static_cast(t); } }; template diff --git a/libcxx/test/std/utilities/function.objects/func.search/func.search.bmh/hash.pred.pass.cpp b/libcxx/test/std/utilities/function.objects/func.search/func.search.bmh/hash.pred.pass.cpp index ec74f53850d1b..59b5b30d7f1b5 100644 --- a/libcxx/test/std/utilities/function.objects/func.search/func.search.bmh/hash.pred.pass.cpp +++ b/libcxx/test/std/utilities/function.objects/func.search/func.search.bmh/hash.pred.pass.cpp @@ -38,7 +38,7 @@ #include "test_iterators.h" template struct MyHash { - size_t operator () (T t) const { return static_cast(t); } + std::size_t operator () (T t) const { return static_cast(t); } }; struct count_equal diff --git a/libcxx/test/std/utilities/function.objects/unord.hash/enum.compile.fail.cpp b/libcxx/test/std/utilities/function.objects/unord.hash/enum.compile.fail.cpp index 2e36b4c55318a..4401bc1b6507f 100644 --- a/libcxx/test/std/utilities/function.objects/unord.hash/enum.compile.fail.cpp +++ b/libcxx/test/std/utilities/function.objects/unord.hash/enum.compile.fail.cpp @@ -19,7 +19,7 @@ struct X {}; int main(int, char**) { X x; - size_t h = std::hash{} ( x ); + std::size_t h = std::hash{} ( x ); return 0; } diff --git a/libcxx/test/std/utilities/function.objects/unord.hash/integral.pass.cpp b/libcxx/test/std/utilities/function.objects/unord.hash/integral.pass.cpp index 622a99cf0b4e8..c645ad8f476f1 100644 --- a/libcxx/test/std/utilities/function.objects/unord.hash/integral.pass.cpp +++ b/libcxx/test/std/utilities/function.objects/unord.hash/integral.pass.cpp @@ -44,7 +44,7 @@ test() if (small) { const std::size_t result = h(t); - LIBCPP_ASSERT(result == static_cast(t)); + LIBCPP_ASSERT(result == static_cast(t)); ((void)result); // Prevent unused warning } } @@ -72,7 +72,7 @@ int main(int, char**) // LWG #2119 test(); - test(); + test(); test(); test(); diff --git a/libcxx/test/std/utilities/intseq/intseq.general/integer_seq.pass.cpp b/libcxx/test/std/utilities/intseq/intseq.general/integer_seq.pass.cpp index f607da7f927b6..10a30dc4030ae 100644 --- a/libcxx/test/std/utilities/intseq/intseq.general/integer_seq.pass.cpp +++ b/libcxx/test/std/utilities/intseq/intseq.general/integer_seq.pass.cpp @@ -27,9 +27,9 @@ int main(int, char**) { // Make a couple of sequences using int3 = std::make_integer_sequence; // generates int: 0,1,2 - using size7 = std::make_integer_sequence; // generates size_t: 0,1,2,3,4,5,6 + using size7 = std::make_integer_sequence; // generates size_t: 0,1,2,3,4,5,6 using size4 = std::make_index_sequence<4>; // generates size_t: 0,1,2,3 - using size2 = std::index_sequence_for; // generates size_t: 0,1 + using size2 = std::index_sequence_for; // generates size_t: 0,1 using intmix = std::integer_sequence; // generates int: 9,8,7,2 using sizemix = std::index_sequence<1, 1, 2, 3, 5>; // generates size_t: 1,1,2,3,5 @@ -37,19 +37,19 @@ int main(int, char**) static_assert ( std::is_same::value, "int3 type wrong" ); static_assert ( int3::size () == 3, "int3 size wrong" ); - static_assert ( std::is_same::value, "size7 type wrong" ); + static_assert ( std::is_same::value, "size7 type wrong" ); static_assert ( size7::size () == 7, "size7 size wrong" ); - static_assert ( std::is_same::value, "size4 type wrong" ); + static_assert ( std::is_same::value, "size4 type wrong" ); static_assert ( size4::size () == 4, "size4 size wrong" ); - static_assert ( std::is_same::value, "size2 type wrong" ); + static_assert ( std::is_same::value, "size2 type wrong" ); static_assert ( size2::size () == 2, "size2 size wrong" ); static_assert ( std::is_same::value, "intmix type wrong" ); static_assert ( intmix::size () == 4, "intmix size wrong" ); - static_assert ( std::is_same::value, "sizemix type wrong" ); + static_assert ( std::is_same::value, "sizemix type wrong" ); static_assert ( sizemix::size () == 5, "sizemix size wrong" ); auto tup = std::make_tuple ( 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20 ); diff --git a/libcxx/test/std/utilities/memory/allocator.traits/allocate_at_least.pass.cpp b/libcxx/test/std/utilities/memory/allocator.traits/allocate_at_least.pass.cpp index c8e23948bcfb8..ad9a2381cbfa1 100644 --- a/libcxx/test/std/utilities/memory/allocator.traits/allocate_at_least.pass.cpp +++ b/libcxx/test/std/utilities/memory/allocator.traits/allocate_at_least.pass.cpp @@ -27,8 +27,8 @@ struct no_allocate_at_least { using value_type = T; T t; - constexpr T* allocate(size_t) { return &t; } - constexpr void deallocate(T*, size_t) {} + constexpr T* allocate(std::size_t) { return &t; } + constexpr void deallocate(T*, std::size_t) {} }; template @@ -37,9 +37,9 @@ struct has_allocate_at_least { T t1; T t2; - constexpr T* allocate(size_t) { return &t1; } - constexpr void deallocate(T*, size_t) {} - constexpr std::allocation_result allocate_at_least(size_t) { + constexpr T* allocate(std::size_t) { return &t1; } + constexpr void deallocate(T*, std::size_t) {} + constexpr std::allocation_result allocate_at_least(std::size_t) { return {&t2, 2}; } }; diff --git a/libcxx/test/std/utilities/memory/allocator.traits/allocator.traits.members/max_size.pass.cpp b/libcxx/test/std/utilities/memory/allocator.traits/allocator.traits.members/max_size.pass.cpp index 301078e515498..3ede16a08c178 100644 --- a/libcxx/test/std/utilities/memory/allocator.traits/allocator.traits.members/max_size.pass.cpp +++ b/libcxx/test/std/utilities/memory/allocator.traits/allocator.traits.members/max_size.pass.cpp @@ -36,7 +36,7 @@ struct B { typedef T value_type; - TEST_CONSTEXPR_CXX20 size_t max_size() const + TEST_CONSTEXPR_CXX20 std::size_t max_size() const { return 100; } diff --git a/libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate.constexpr.size.verify.cpp b/libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate.constexpr.size.verify.cpp index 71db96807f55f..67ee2d8e33420 100644 --- a/libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate.constexpr.size.verify.cpp +++ b/libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate.constexpr.size.verify.cpp @@ -26,8 +26,8 @@ constexpr bool test() A a; TEST_IGNORE_NODISCARD a.allocate(AT::max_size(a) + 1); // just barely too large TEST_IGNORE_NODISCARD a.allocate(AT::max_size(a) * 2); // significantly too large - TEST_IGNORE_NODISCARD a.allocate(((size_t) -1) / sizeof(T) + 1); // multiply will overflow - TEST_IGNORE_NODISCARD a.allocate((size_t) -1); // way too large + TEST_IGNORE_NODISCARD a.allocate(((std::size_t) -1) / sizeof(T) + 1); // multiply will overflow + TEST_IGNORE_NODISCARD a.allocate((std::size_t) -1); // way too large return true; } diff --git a/libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate.pass.cpp b/libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate.pass.cpp index 49c7a0b3aad8d..244e9800d0a19 100644 --- a/libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate.pass.cpp +++ b/libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate.pass.cpp @@ -26,15 +26,15 @@ static const bool UsingAlignedNew = true; #endif #ifdef __STDCPP_DEFAULT_NEW_ALIGNMENT__ -static const size_t MaxAligned = __STDCPP_DEFAULT_NEW_ALIGNMENT__; +static const std::size_t MaxAligned = __STDCPP_DEFAULT_NEW_ALIGNMENT__; #else -static const size_t MaxAligned = std::alignment_of::value; +static const std::size_t MaxAligned = std::alignment_of::value; #endif -static const size_t OverAligned = MaxAligned * 2; +static const std::size_t OverAligned = MaxAligned * 2; -template +template struct TEST_ALIGNAS(Align) AlignedType { char data; static int constructed; @@ -42,11 +42,11 @@ struct TEST_ALIGNAS(Align) AlignedType { AlignedType(AlignedType const&) { ++constructed; } ~AlignedType() { --constructed; } }; -template +template int AlignedType::constructed = 0; -template +template void test_aligned() { typedef AlignedType T; T::constructed = 0; @@ -78,7 +78,7 @@ void test_aligned() { } #if TEST_STD_VER > 17 -template +template constexpr bool test_aligned_constexpr() { typedef AlignedType T; std::allocator a; diff --git a/libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate.size.pass.cpp b/libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate.size.pass.cpp index 3622b7c84adac..1913a0e0dcc8d 100644 --- a/libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate.size.pass.cpp +++ b/libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate.size.pass.cpp @@ -19,7 +19,7 @@ #include "test_macros.h" template -void test_max(size_t count) +void test_max(std::size_t count) { std::allocator a; try { @@ -38,8 +38,8 @@ void test() A a; test_max (AT::max_size(a) + 1); // just barely too large test_max (AT::max_size(a) * 2); // significantly too large - test_max (((size_t) -1) / sizeof(T) + 1); // multiply will overflow - test_max ((size_t) -1); // way too large + test_max (((std::size_t) -1) / sizeof(T) + 1); // multiply will overflow + test_max ((std::size_t) -1); // way too large } int main(int, char**) diff --git a/libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate_at_least.pass.cpp b/libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate_at_least.pass.cpp index 6ac63a49d5015..05c66b065ef15 100644 --- a/libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate_at_least.pass.cpp +++ b/libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate_at_least.pass.cpp @@ -25,14 +25,14 @@ static const bool UsingAlignedNew = true; #endif #ifdef __STDCPP_DEFAULT_NEW_ALIGNMENT__ -static const size_t MaxAligned = __STDCPP_DEFAULT_NEW_ALIGNMENT__; +static const std::size_t MaxAligned = __STDCPP_DEFAULT_NEW_ALIGNMENT__; #else -static const size_t MaxAligned = std::alignment_of::value; +static const std::size_t MaxAligned = std::alignment_of::value; #endif -static const size_t OverAligned = MaxAligned * 2; +static const std::size_t OverAligned = MaxAligned * 2; -template +template struct alignas(Align) AlignedType { char data; static int constructed; @@ -40,11 +40,11 @@ struct alignas(Align) AlignedType { AlignedType(AlignedType const&) { ++constructed; } ~AlignedType() { --constructed; } }; -template +template int AlignedType::constructed = 0; -template +template void test_aligned() { typedef AlignedType T; T::constructed = 0; @@ -76,7 +76,7 @@ void test_aligned() { } } -template +template constexpr bool test_aligned_constexpr() { typedef AlignedType T; std::allocator a; diff --git a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy_n.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy_n.pass.cpp index 013db78c876a9..097f88dec0022 100644 --- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy_n.pass.cpp +++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy_n.pass.cpp @@ -33,9 +33,9 @@ // libc++-specific. LIBCPP_STATIC_ASSERT(std::is_class_v); -static_assert(std::is_invocable_v); +static_assert(std::is_invocable_v); struct NotConvertibleFromInt {}; -static_assert(!std::is_invocable_v); int main(int, char**) { diff --git a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill.n/ranges_uninitialized_fill_n.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill.n/ranges_uninitialized_fill_n.pass.cpp index a905507eef89d..0e8846e8c7c8f 100644 --- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill.n/ranges_uninitialized_fill_n.pass.cpp +++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.fill.n/ranges_uninitialized_fill_n.pass.cpp @@ -33,7 +33,7 @@ LIBCPP_STATIC_ASSERT(std::is_class_v); struct NotConvertibleFromInt {}; -static_assert(!std::is_invocable_v); +static_assert(!std::is_invocable_v); int main(int, char**) { constexpr int value = 42; diff --git a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp index 09bdcf4bf7b26..df79987920f4d 100644 --- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp +++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp @@ -34,9 +34,9 @@ // libc++-specific. LIBCPP_STATIC_ASSERT(std::is_class_v); -static_assert(std::is_invocable_v); +static_assert(std::is_invocable_v); struct NotConvertibleFromInt {}; -static_assert(!std::is_invocable_v); int main(int, char**) { diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/hash_unique_ptr.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/hash_unique_ptr.pass.cpp index fc1500ff7f63e..707038e53ed10 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/hash_unique_ptr.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.hash/hash_unique_ptr.pass.cpp @@ -48,8 +48,8 @@ void test_disabled_with_deleter() { } template -struct std::hash>> { - size_t operator()(min_pointer> p) const TEST_NOEXCEPT_FALSE { +struct std::hash>> { + std::size_t operator()(min_pointer> p) const TEST_NOEXCEPT_FALSE { if (!p) return 0; return std::hash{}(std::addressof(*p)); } diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared_construct.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared_construct.pass.cpp index 3fa4ce7622c54..e354d4a2721d5 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared_construct.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared_construct.pass.cpp @@ -122,7 +122,7 @@ struct Bar { std::max_align_t y; }; -void test_aligned(void* p, size_t align) { +void test_aligned(void* p, std::size_t align) { assert(reinterpret_cast(p) % align == 0); } diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/make_shared_for_overwrite.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/make_shared_for_overwrite.pass.cpp index 0027b0aa7c289..21e1786f01588 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/make_shared_for_overwrite.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/make_shared_for_overwrite.pass.cpp @@ -40,21 +40,21 @@ static_assert(!HasMakeSharedForOverwrite); // bounded array static_assert(HasMakeSharedForOverwrite); -static_assert(!HasMakeSharedForOverwrite); +static_assert(!HasMakeSharedForOverwrite); static_assert(!HasMakeSharedForOverwrite); static_assert(!HasMakeSharedForOverwrite); static_assert(HasMakeSharedForOverwrite); -static_assert(!HasMakeSharedForOverwrite); +static_assert(!HasMakeSharedForOverwrite); static_assert(!HasMakeSharedForOverwrite); static_assert(!HasMakeSharedForOverwrite); // unbounded array -static_assert(HasMakeSharedForOverwrite); -static_assert(HasMakeSharedForOverwrite); +static_assert(HasMakeSharedForOverwrite); +static_assert(HasMakeSharedForOverwrite); static_assert(!HasMakeSharedForOverwrite); static_assert(!HasMakeSharedForOverwrite); -static_assert(!HasMakeSharedForOverwrite); -static_assert(!HasMakeSharedForOverwrite); +static_assert(!HasMakeSharedForOverwrite); +static_assert(!HasMakeSharedForOverwrite); constexpr char pattern = 0xDE; diff --git a/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/aligned_storage.pass.cpp b/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/aligned_storage.pass.cpp index 35d116065b075..51fe5dc332e91 100644 --- a/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/aligned_storage.pass.cpp +++ b/libcxx/test/std/utilities/meta/meta.trans/meta.trans.other/aligned_storage.pass.cpp @@ -274,7 +274,7 @@ int main(int, char**) static_assert(std::is_trivial::value, ""); static_assert(std::is_standard_layout::value, ""); #if TEST_STD_VER >= 11 - const size_t alignment = TEST_ALIGNOF(std::max_align_t) > 16 ? + const std::size_t alignment = TEST_ALIGNOF(std::max_align_t) > 16 ? 16 : TEST_ALIGNOF(std::max_align_t); static_assert(std::alignment_of::value == alignment, ""); #else @@ -292,7 +292,7 @@ int main(int, char**) static_assert(std::is_trivial::value, ""); static_assert(std::is_standard_layout::value, ""); #if TEST_STD_VER >= 11 - const size_t alignment = TEST_ALIGNOF(std::max_align_t) > 16 ? + const std::size_t alignment = TEST_ALIGNOF(std::max_align_t) > 16 ? 16 : TEST_ALIGNOF(std::max_align_t); static_assert(std::alignment_of::value == alignment, ""); static_assert(sizeof(T1) == 16 + alignment, ""); diff --git a/libcxx/test/std/utilities/optional/optional.hash/hash.pass.cpp b/libcxx/test/std/utilities/optional/optional.hash/hash.pass.cpp index bb8a319316cdb..cb23e03e7ca70 100644 --- a/libcxx/test/std/utilities/optional/optional.hash/hash.pass.cpp +++ b/libcxx/test/std/utilities/optional/optional.hash/hash.pass.cpp @@ -27,7 +27,7 @@ namespace std { template <> struct hash { - size_t operator()(B const&) noexcept(false) { return 0; } + std::size_t operator()(B const&) noexcept(false) { return 0; } }; } diff --git a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.create/make_unique_for_overwrite.pass.cpp b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.create/make_unique_for_overwrite.pass.cpp index 3f86904798fb8..06367fe51479f 100644 --- a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.create/make_unique_for_overwrite.pass.cpp +++ b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.create/make_unique_for_overwrite.pass.cpp @@ -42,21 +42,21 @@ static_assert(!HasMakeUniqueForOverwrite); // template // constexpr unique_ptr make_unique_for_overwrite(size_t n); -static_assert(HasMakeUniqueForOverwrite); -static_assert(HasMakeUniqueForOverwrite); +static_assert(HasMakeUniqueForOverwrite); +static_assert(HasMakeUniqueForOverwrite); static_assert(!HasMakeUniqueForOverwrite); static_assert(!HasMakeUniqueForOverwrite); -static_assert(!HasMakeUniqueForOverwrite); -static_assert(!HasMakeUniqueForOverwrite); +static_assert(!HasMakeUniqueForOverwrite); +static_assert(!HasMakeUniqueForOverwrite); // template // unspecified make_unique_for_overwrite(Args&&...) = delete; static_assert(!HasMakeUniqueForOverwrite); -static_assert(!HasMakeUniqueForOverwrite); +static_assert(!HasMakeUniqueForOverwrite); static_assert(!HasMakeUniqueForOverwrite); static_assert(!HasMakeUniqueForOverwrite); static_assert(!HasMakeUniqueForOverwrite); -static_assert(!HasMakeUniqueForOverwrite); +static_assert(!HasMakeUniqueForOverwrite); static_assert(!HasMakeUniqueForOverwrite); static_assert(!HasMakeUniqueForOverwrite); @@ -108,13 +108,13 @@ struct WithCustomNew { inline static bool customNewCalled = false; inline static bool customNewArrCalled = false; - static void* operator new(size_t n) { + static void* operator new(std::size_t n) { customNewCalled = true; return ::operator new(n); ; } - static void* operator new[](size_t n) { + static void* operator new[](std::size_t n) { customNewArrCalled = true; return ::operator new[](n); } diff --git a/libcxx/test/std/utilities/template.bitset/bitset.cons/string_ctor.pass.cpp b/libcxx/test/std/utilities/template.bitset/bitset.cons/string_ctor.pass.cpp index 522b9678d52db..2a80e2d48b322 100644 --- a/libcxx/test/std/utilities/template.bitset/bitset.cons/string_ctor.pass.cpp +++ b/libcxx/test/std/utilities/template.bitset/bitset.cons/string_ctor.pass.cpp @@ -76,7 +76,7 @@ TEST_CONSTEXPR_CXX23 void test_for_non_eager_instantiation() { // since it may not be well formed and can cause an error in the // non-immediate context. static_assert(!std::is_constructible, Nonsense*>::value, ""); - static_assert(!std::is_constructible, Nonsense*, size_t, Nonsense&, Nonsense&>::value, ""); + static_assert(!std::is_constructible, Nonsense*, std::size_t, Nonsense&, Nonsense&>::value, ""); } TEST_CONSTEXPR_CXX23 bool test() { diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.pass.cpp index 5722044360140..e3a21149c21e4 100644 --- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.pass.cpp +++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.apply/make_from_tuple.pass.cpp @@ -43,7 +43,7 @@ struct ConstructibleFromTuple> { TypeID const* arg_types; }; -template +template struct ConstructibleFromTuple> { template explicit ConstructibleFromTuple(Args&&... xargs) diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/recursion_depth.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/recursion_depth.pass.cpp index 339361acdf5b7..df10df84229f3 100644 --- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/recursion_depth.pass.cpp +++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.cnstr/recursion_depth.pass.cpp @@ -15,7 +15,7 @@ #include #include -template +template constexpr void CreateTuple(std::index_sequence) { std::tuple tuple(I...); assert(std::get<0>(tuple) == 0); diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple.include.array.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple.include.array.pass.cpp index 2236b6d1c29c6..c92ad4bf9834f 100644 --- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple.include.array.pass.cpp +++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple.include.array.pass.cpp @@ -24,7 +24,7 @@ #include "test_macros.h" -template +template void test() { static_assert((std::is_base_of, diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple.include.utility.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple.include.utility.pass.cpp index 458e4ffbc8c92..243f162fc70ec 100644 --- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple.include.utility.pass.cpp +++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple.include.utility.pass.cpp @@ -23,7 +23,7 @@ #include "test_macros.h" -template +template void test() { static_assert((std::is_base_of, diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_incomplete.fail.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_incomplete.fail.cpp index cde87e1208773..a2a65e0ebb8b5 100644 --- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_incomplete.fail.cpp +++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_incomplete.fail.cpp @@ -27,7 +27,7 @@ struct Dummy3 {}; template <> struct std::tuple_size { public: - static size_t value; + static std::size_t value; }; template <> diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_incomplete.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_incomplete.pass.cpp index c0ec0cae5d57e..cb8d3e7bde512 100644 --- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_incomplete.pass.cpp +++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_incomplete.pass.cpp @@ -22,7 +22,7 @@ #include "test_macros.h" -template )> +template )> constexpr bool is_complete(int) { static_assert(Size > 0, ""); return true; } template constexpr bool is_complete(long) { return false; } template constexpr bool is_complete() { return is_complete(0); } @@ -31,7 +31,7 @@ struct Dummy1 {}; struct Dummy2 {}; namespace std { -template <> struct tuple_size : public integral_constant {}; +template <> struct tuple_size : public integral_constant {}; } template diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_structured_bindings.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_structured_bindings.pass.cpp index eb87e0b18c1d3..937d47a38792c 100644 --- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_structured_bindings.pass.cpp +++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple_size_structured_bindings.pass.cpp @@ -113,7 +113,7 @@ struct Test { int x; }; -template +template int get(Test const&) { static_assert(N == 0, ""); return -1; } template <> @@ -130,7 +130,7 @@ void test_before_tuple_size_specialization() { template <> struct std::tuple_size { public: - static const size_t value = 1; + static const std::size_t value = 1; }; void test_after_tuple_size_specialization() { diff --git a/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/allocate.pass.cpp b/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/allocate.pass.cpp index ece7594e8ff2c..77e07dbefbc7b 100644 --- a/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/allocate.pass.cpp +++ b/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/allocate.pass.cpp @@ -29,7 +29,7 @@ #include "test_macros.h" #include "test_std_memory_resource.h" -template +template void testForSizeAndAlign() { struct T { alignas(Align) std::byte buf[S]; @@ -48,7 +48,7 @@ void testForSizeAndAlign() { } #ifndef TEST_HAS_NO_EXCEPTIONS -template +template void testAllocForSizeThrows() { struct T { std::byte buf[S]; @@ -60,8 +60,8 @@ void testAllocForSizeThrows() { Alloc a(&R); // Test that allocating exactly the max size does not throw. - size_t maxSize = Traits::max_size(a); - size_t sizeTypeMax = std::numeric_limits::max(); + std::size_t maxSize = Traits::max_size(a); + std::size_t sizeTypeMax = std::numeric_limits::max(); if (maxSize != sizeTypeMax) { // Test that allocating size_t(~0) throws bad alloc. try { @@ -71,7 +71,7 @@ void testAllocForSizeThrows() { } // Test that allocating even one more than the max size does throw. - size_t overSize = maxSize + 1; + std::size_t overSize = maxSize + 1; try { (void)a.allocate(overSize); assert(false); diff --git a/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/allocate_deallocate_bytes.pass.cpp b/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/allocate_deallocate_bytes.pass.cpp index a4db295f9ac32..0f5e2f0ae29ad 100644 --- a/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/allocate_deallocate_bytes.pass.cpp +++ b/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/allocate_deallocate_bytes.pass.cpp @@ -27,8 +27,8 @@ template void test() { - size_t last_size = 0; - size_t last_alignment = 0; + std::size_t last_size = 0; + std::size_t last_alignment = 0; TrackingMemRes resource(&last_size, &last_alignment); std::pmr::polymorphic_allocator allocator(&resource); diff --git a/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/allocate_deallocate_object.pass.cpp b/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/allocate_deallocate_object.pass.cpp index b061afbbae0f1..5321554cb3a19 100644 --- a/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/allocate_deallocate_object.pass.cpp +++ b/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/allocate_deallocate_object.pass.cpp @@ -27,8 +27,8 @@ template void test() { - size_t last_size = 0; - size_t last_alignment = 0; + std::size_t last_size = 0; + std::size_t last_alignment = 0; TrackingMemRes resource(&last_size, &last_alignment); std::pmr::polymorphic_allocator allocator(&resource); diff --git a/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/deallocate.pass.cpp b/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/deallocate.pass.cpp index e7884c9b079fe..5ead89bfa8b1d 100644 --- a/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/deallocate.pass.cpp +++ b/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/deallocate.pass.cpp @@ -26,7 +26,7 @@ #include "test_macros.h" #include "test_std_memory_resource.h" -template +template void testForSizeAndAlign() { struct T { alignas(Align) std::byte buf[S]; diff --git a/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/new_delete_object.pass.cpp b/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/new_delete_object.pass.cpp index 463a8157e7233..a149486542bd1 100644 --- a/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/new_delete_object.pass.cpp +++ b/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/new_delete_object.pass.cpp @@ -27,8 +27,8 @@ template void test() { - size_t last_size = 0; - size_t last_alignment = 0; + std::size_t last_size = 0; + std::size_t last_alignment = 0; TrackingMemRes resource(&last_size, &last_alignment); std::pmr::polymorphic_allocator allocator(&resource); diff --git a/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/tracking_mem_res.h b/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/tracking_mem_res.h index d0b9eab578092..e78d9881eb457 100644 --- a/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/tracking_mem_res.h +++ b/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/tracking_mem_res.h @@ -9,23 +9,24 @@ #ifndef TRACKING_MEM_RES_H #define TRACKING_MEM_RES_H +#include #include class TrackingMemRes : public std::pmr::memory_resource { public: - TrackingMemRes(size_t* last_size, size_t* last_alginment) : last_size_(last_size), last_alginment_(last_alginment) {} + TrackingMemRes(std::size_t* last_size, size_t* last_alginment) : last_size_(last_size), last_alginment_(last_alginment) {} private: - size_t* last_size_; - size_t* last_alginment_; - void* do_allocate(size_t size, size_t alignment) override { + std::size_t* last_size_; + std::size_t* last_alginment_; + void* do_allocate(std::size_t size, size_t alignment) override { *last_size_ = size; *last_alginment_ = alignment; return std::pmr::new_delete_resource()->allocate(size, alignment); } - void do_deallocate(void* ptr, size_t size, size_t alignment) override { + void do_deallocate(void* ptr, std::size_t size, size_t alignment) override { *last_size_ = size; *last_alginment_ = alignment; std::pmr::new_delete_resource()->deallocate(ptr, size, alignment); diff --git a/libcxx/test/std/utilities/utility/mem.res/mem.res.global/new_delete_resource.pass.cpp b/libcxx/test/std/utilities/utility/mem.res/mem.res.global/new_delete_resource.pass.cpp index bd8b8e9175c8c..abfeeef7058cf 100644 --- a/libcxx/test/std/utilities/utility/mem.res/mem.res.global/new_delete_resource.pass.cpp +++ b/libcxx/test/std/utilities/utility/mem.res/mem.res.global/new_delete_resource.pass.cpp @@ -22,12 +22,12 @@ #include "test_macros.h" class assert_on_compare : public std::pmr::memory_resource { - void* do_allocate(size_t, size_t) override { + void* do_allocate(std::size_t, size_t) override { assert(false); return nullptr; } - void do_deallocate(void*, size_t, size_t) override { assert(false); } + void do_deallocate(void*, std::size_t, size_t) override { assert(false); } bool do_is_equal(const std::pmr::memory_resource&) const noexcept override { assert(false); diff --git a/libcxx/test/std/utilities/utility/mem.res/mem.res.global/null_memory_resource.pass.cpp b/libcxx/test/std/utilities/utility/mem.res/mem.res.global/null_memory_resource.pass.cpp index fdaa398cbf6ae..097794f00f9e6 100644 --- a/libcxx/test/std/utilities/utility/mem.res/mem.res.global/null_memory_resource.pass.cpp +++ b/libcxx/test/std/utilities/utility/mem.res/mem.res.global/null_memory_resource.pass.cpp @@ -24,12 +24,12 @@ #include "test_macros.h" struct assert_on_compare : public std::pmr::memory_resource { - void* do_allocate(size_t, size_t) override { + void* do_allocate(std::size_t, size_t) override { assert(false); return nullptr; } - void do_deallocate(void*, size_t, size_t) override { assert(false); } + void do_deallocate(void*, std::size_t, size_t) override { assert(false); } bool do_is_equal(const std::pmr::memory_resource&) const noexcept override { assert(false); diff --git a/libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_exception_safety.pass.cpp b/libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_exception_safety.pass.cpp index d8f8dac433c79..e5cc4afc61520 100644 --- a/libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_exception_safety.pass.cpp +++ b/libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_exception_safety.pass.cpp @@ -27,9 +27,9 @@ struct repointable_resource : public std::pmr::memory_resource { explicit repointable_resource(std::pmr::memory_resource* res) : which(res) {} private: - void* do_allocate(size_t size, size_t align) override { return which->allocate(size, align); } + void* do_allocate(std::size_t size, size_t align) override { return which->allocate(size, align); } - void do_deallocate(void* p, size_t size, size_t align) override { return which->deallocate(p, size, align); } + void do_deallocate(void* p, std::size_t size, size_t align) override { return which->deallocate(p, size, align); } bool do_is_equal(std::pmr::memory_resource const& rhs) const noexcept override { return which->is_equal(rhs); } }; @@ -49,7 +49,7 @@ void test_exception_safety() { assert(res != buffer); assert(globalMemCounter.checkNewCalledEq(1)); assert(globalMemCounter.checkDeleteCalledEq(0)); - const size_t last_new_size = globalMemCounter.last_new_size; + const std::size_t last_new_size = globalMemCounter.last_new_size; upstream.which = std::pmr::null_memory_resource(); try { diff --git a/libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_in_geometric_progression.pass.cpp b/libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_in_geometric_progression.pass.cpp index ffe17d13a8233..f28aabb7f6430 100644 --- a/libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_in_geometric_progression.pass.cpp +++ b/libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_in_geometric_progression.pass.cpp @@ -30,7 +30,7 @@ void test_geometric_progression() { std::pmr::memory_resource& r1 = mono1; assert(globalMemCounter.checkNewCalledEq(0)); - size_t next_buffer_size = 100; + std::size_t next_buffer_size = 100; void* ret = r1.allocate(10, 1); assert(ret != nullptr); assert(globalMemCounter.checkNewCalledEq(1)); diff --git a/libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_overaligned_request.pass.cpp b/libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_overaligned_request.pass.cpp index d2b8454ae30c2..c16789a4558ea 100644 --- a/libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_overaligned_request.pass.cpp +++ b/libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_overaligned_request.pass.cpp @@ -25,7 +25,7 @@ int main(int, char**) { auto mono1 = std::pmr::monotonic_buffer_resource(1024, std::pmr::new_delete_resource()); std::pmr::memory_resource& r1 = mono1; - constexpr size_t big_alignment = 8 * alignof(std::max_align_t); + constexpr std::size_t big_alignment = 8 * alignof(std::max_align_t); static_assert(big_alignment > 4); void* ret = r1.allocate(2048, big_alignment); diff --git a/libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_with_initial_size.pass.cpp b/libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_with_initial_size.pass.cpp index 15b3336c9b783..d764c90300d82 100644 --- a/libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_with_initial_size.pass.cpp +++ b/libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/allocate_with_initial_size.pass.cpp @@ -20,7 +20,7 @@ #include "count_new.h" #include "test_macros.h" -void test(size_t initial_buffer_size) { +void test(std::size_t initial_buffer_size) { globalMemCounter.reset(); auto mono1 = std::pmr::monotonic_buffer_resource(initial_buffer_size, std::pmr::new_delete_resource()); diff --git a/libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/equality.pass.cpp b/libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/equality.pass.cpp index 426331056af68..a218ac9bdcf10 100644 --- a/libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/equality.pass.cpp +++ b/libcxx/test/std/utilities/utility/mem.res/mem.res.monotonic.buffer/mem.res.monotonic.buffer.mem/equality.pass.cpp @@ -19,12 +19,12 @@ #include // size_t struct assert_on_compare : public std::pmr::memory_resource { - void* do_allocate(size_t, size_t) override { + void* do_allocate(std::size_t, size_t) override { assert(false); return nullptr; } - void do_deallocate(void*, size_t, size_t) override { assert(false); } + void do_deallocate(void*, std::size_t, size_t) override { assert(false); } bool do_is_equal(const std::pmr::memory_resource&) const noexcept override { assert(false); diff --git a/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/equality.pass.cpp b/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/equality.pass.cpp index 39f0a11d6ac5e..d0099ca24b392 100644 --- a/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/equality.pass.cpp +++ b/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/equality.pass.cpp @@ -23,12 +23,12 @@ #include "count_new.h" class assert_on_compare : public std::pmr::memory_resource { - void* do_allocate(size_t, size_t) override { + void* do_allocate(std::size_t, size_t) override { assert(false); return nullptr; } - void do_deallocate(void*, size_t, size_t) override { assert(false); } + void do_deallocate(void*, std::size_t, size_t) override { assert(false); } bool do_is_equal(const std::pmr::memory_resource&) const noexcept override { assert(false); diff --git a/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/sync_allocate_overaligned_request.pass.cpp b/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/sync_allocate_overaligned_request.pass.cpp index d962d7f3d978b..c62723fd79735 100644 --- a/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/sync_allocate_overaligned_request.pass.cpp +++ b/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/sync_allocate_overaligned_request.pass.cpp @@ -21,9 +21,9 @@ #include "count_new.h" #include "test_macros.h" -bool is_aligned_to(void* p, size_t alignment) { +bool is_aligned_to(void* p, std::size_t alignment) { void* p2 = p; - size_t space = 1; + std::size_t space = 1; void* result = std::align(alignment, 1, p2, space); return (result == p); } @@ -34,7 +34,7 @@ int main(int, char**) { std::pmr::synchronized_pool_resource sync1(opts, std::pmr::new_delete_resource()); std::pmr::memory_resource& r1 = sync1; - constexpr size_t big_alignment = 8 * alignof(std::max_align_t); + constexpr std::size_t big_alignment = 8 * alignof(std::max_align_t); static_assert(big_alignment > 4); assert(globalMemCounter.checkNewCalledEq(0)); diff --git a/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/sync_allocate_reuse_blocks.pass.cpp b/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/sync_allocate_reuse_blocks.pass.cpp index 1fbc2b0e3fc87..7e694df44a07a 100644 --- a/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/sync_allocate_reuse_blocks.pass.cpp +++ b/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/sync_allocate_reuse_blocks.pass.cpp @@ -21,9 +21,9 @@ #include "count_new.h" #include "test_macros.h" -static bool is_aligned_to(void* p, size_t alignment) { +static bool is_aligned_to(void* p, std::size_t alignment) { void* p2 = p; - size_t space = 1; + std::size_t space = 1; void* result = std::align(alignment, 1, p2, space); return (result == p); } diff --git a/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/sync_deallocate_matches_allocate.pass.cpp b/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/sync_deallocate_matches_allocate.pass.cpp index 26101d8e71c28..13595a66b45a9 100644 --- a/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/sync_deallocate_matches_allocate.pass.cpp +++ b/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/sync_deallocate_matches_allocate.pass.cpp @@ -22,9 +22,9 @@ #include struct allocation_record { - size_t bytes; - size_t align; - explicit allocation_record(size_t b, size_t a) : bytes(b), align(a) {} + std::size_t bytes; + std::size_t align; + explicit allocation_record(std::size_t b, size_t a) : bytes(b), align(a) {} bool operator==(const allocation_record& rhs) const { return (bytes == rhs.bytes) && (align == rhs.align); } bool operator<(const allocation_record& rhs) const { if (bytes != rhs.bytes) @@ -34,12 +34,12 @@ struct allocation_record { }; class test_resource : public std::pmr::memory_resource { - void* do_allocate(size_t bytes, size_t align) override { + void* do_allocate(std::size_t bytes, size_t align) override { void* result = std::pmr::new_delete_resource()->allocate(bytes, align); successful_allocations.emplace_back(bytes, align); return result; } - void do_deallocate(void* p, size_t bytes, size_t align) override { + void do_deallocate(void* p, std::size_t bytes, size_t align) override { deallocations.emplace_back(bytes, align); return std::pmr::new_delete_resource()->deallocate(p, bytes, align); } @@ -70,7 +70,7 @@ void test_allocation_pattern(F do_pattern) { tr.deallocations.end())); } -template +template auto foo() { return [=](auto& mr) { void* p = mr.allocate(Bytes, Align); diff --git a/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/unsync_allocate_overaligned_request.pass.cpp b/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/unsync_allocate_overaligned_request.pass.cpp index a316750231724..445396bac32dd 100644 --- a/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/unsync_allocate_overaligned_request.pass.cpp +++ b/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/unsync_allocate_overaligned_request.pass.cpp @@ -21,9 +21,9 @@ #include "count_new.h" #include "test_macros.h" -bool is_aligned_to(void* p, size_t alignment) { +bool is_aligned_to(void* p, std::size_t alignment) { void* p2 = p; - size_t space = 1; + std::size_t space = 1; void* result = std::align(alignment, 1, p2, space); return (result == p); } @@ -34,7 +34,7 @@ int main(int, char**) { auto unsync1 = std::pmr::unsynchronized_pool_resource(opts, std::pmr::new_delete_resource()); std::pmr::memory_resource& r1 = unsync1; - constexpr size_t big_alignment = 8 * alignof(std::max_align_t); + constexpr std::size_t big_alignment = 8 * alignof(std::max_align_t); static_assert(big_alignment > 4); assert(globalMemCounter.checkNewCalledEq(0)); diff --git a/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/unsync_allocate_reuse_blocks.pass.cpp b/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/unsync_allocate_reuse_blocks.pass.cpp index 72087e88d78e6..f807acfa48652 100644 --- a/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/unsync_allocate_reuse_blocks.pass.cpp +++ b/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/unsync_allocate_reuse_blocks.pass.cpp @@ -21,9 +21,9 @@ #include "count_new.h" #include "test_macros.h" -static bool is_aligned_to(void* p, size_t alignment) { +static bool is_aligned_to(void* p, std::size_t alignment) { void* p2 = p; - size_t space = 1; + std::size_t space = 1; void* result = std::align(alignment, 1, p2, space); return (result == p); } diff --git a/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/unsync_deallocate_matches_allocate.pass.cpp b/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/unsync_deallocate_matches_allocate.pass.cpp index 2863cba6dd94b..65c441858d146 100644 --- a/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/unsync_deallocate_matches_allocate.pass.cpp +++ b/libcxx/test/std/utilities/utility/mem.res/mem.res.pool/mem.res.pool.mem/unsync_deallocate_matches_allocate.pass.cpp @@ -22,9 +22,9 @@ #include struct allocation_record { - size_t bytes; - size_t align; - explicit allocation_record(size_t b, size_t a) : bytes(b), align(a) {} + std::size_t bytes; + std::size_t align; + explicit allocation_record(std::size_t b, size_t a) : bytes(b), align(a) {} bool operator==(const allocation_record& rhs) const { return (bytes == rhs.bytes) && (align == rhs.align); } bool operator<(const allocation_record& rhs) const { if (bytes != rhs.bytes) @@ -34,12 +34,12 @@ struct allocation_record { }; class test_resource : public std::pmr::memory_resource { - void* do_allocate(size_t bytes, size_t align) override { + void* do_allocate(std::size_t bytes, size_t align) override { void* result = std::pmr::new_delete_resource()->allocate(bytes, align); successful_allocations.emplace_back(bytes, align); return result; } - void do_deallocate(void* p, size_t bytes, size_t align) override { + void do_deallocate(void* p, std::size_t bytes, size_t align) override { deallocations.emplace_back(bytes, align); return std::pmr::new_delete_resource()->deallocate(p, bytes, align); } @@ -70,7 +70,7 @@ void test_allocation_pattern(F do_pattern) { tr.deallocations.end())); } -template +template auto foo() { return [=](auto& mr) { void* p = mr.allocate(Bytes, Align); diff --git a/libcxx/test/std/utilities/utility/utility.inplace/inplace.pass.cpp b/libcxx/test/std/utilities/utility/utility.inplace/inplace.pass.cpp index 8a1afe2b41fa5..79a9f5ffb98d3 100644 --- a/libcxx/test/std/utilities/utility/utility.inplace/inplace.pass.cpp +++ b/libcxx/test/std/utilities/utility/utility.inplace/inplace.pass.cpp @@ -61,12 +61,12 @@ int main(int, char**) { { using T1 = std::in_place_index_t<0>; using T2 = std::in_place_index_t<1>; - using T3 = std::in_place_index_t(-1)>; + using T3 = std::in_place_index_t(-1)>; static_assert(!std::is_same::value && !std::is_same::value); static_assert(!std::is_same::value); static_assert(check_tag(std::in_place_index<0>)); static_assert(check_tag(std::in_place_index<1>)); - static_assert(check_tag(std::in_place_index(-1)>)); + static_assert(check_tag(std::in_place_index(-1)>)); } return 0; diff --git a/libcxx/test/std/utilities/variant/variant.get/get_index.pass.cpp b/libcxx/test/std/utilities/variant/variant.get/get_index.pass.cpp index b18d94fb0c0f3..4e6cc65a9adc9 100644 --- a/libcxx/test/std/utilities/variant/variant.get/get_index.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.get/get_index.pass.cpp @@ -234,7 +234,7 @@ void test_const_rvalue_get() { #endif } -template using Idx = std::integral_constant; +template using Idx = std::integral_constant; void test_throws_for_all_value_categories() { #ifndef TEST_HAS_NO_EXCEPTIONS @@ -245,8 +245,8 @@ void test_throws_for_all_value_categories() { V v1(42l); const V &cv1 = v1; assert(v1.index() == 1); - std::integral_constant zero; - std::integral_constant one; + std::integral_constant zero; + std::integral_constant one; auto test = [](auto idx, auto &&v) { using Idx = decltype(idx); try { diff --git a/libcxx/test/std/utilities/variant/variant.hash/hash.pass.cpp b/libcxx/test/std/utilities/variant/variant.hash/hash.pass.cpp index 770bbecd73cba..f472144403d55 100644 --- a/libcxx/test/std/utilities/variant/variant.hash/hash.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.hash/hash.pass.cpp @@ -24,7 +24,7 @@ #ifndef TEST_HAS_NO_EXCEPTIONS namespace std { template <> struct hash<::MakeEmptyT> { - size_t operator()(const ::MakeEmptyT &) const { + std::size_t operator()(const ::MakeEmptyT &) const { assert(false); return 0; } @@ -127,7 +127,7 @@ namespace std { template <> struct hash { - size_t operator()(B const&) const { + std::size_t operator()(B const&) const { return 0; } }; diff --git a/libcxx/test/std/utilities/variant/variant.helpers/variant_alternative.pass.cpp b/libcxx/test/std/utilities/variant/variant.helpers/variant_alternative.pass.cpp index f5e1e3d2c9ef5..be1a0c960d1ce 100644 --- a/libcxx/test/std/utilities/variant/variant.helpers/variant_alternative.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.helpers/variant_alternative.pass.cpp @@ -27,7 +27,7 @@ #include "test_macros.h" #include "variant_test_helpers.h" -template void test() { +template void test() { static_assert( std::is_same_v::type, E>, ""); static_assert( diff --git a/libcxx/test/std/utilities/variant/variant.helpers/variant_size.pass.cpp b/libcxx/test/std/utilities/variant/variant.helpers/variant_size.pass.cpp index 8c5b70d6f88dc..1ee0c324d7b87 100644 --- a/libcxx/test/std/utilities/variant/variant.helpers/variant_size.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.helpers/variant_size.pass.cpp @@ -23,7 +23,7 @@ #include "test_macros.h" -template void test() { +template void test() { static_assert(std::variant_size::value == E, ""); static_assert(std::variant_size::value == E, ""); static_assert(std::variant_size::value == E, ""); diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/copy.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/copy.pass.cpp index 46ccd44096653..f92b2753c4c7d 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/copy.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/copy.pass.cpp @@ -322,7 +322,7 @@ void test_copy_assignment_empty_non_empty() { #endif // TEST_HAS_NO_EXCEPTIONS } -template struct Result { size_t index; T value; }; +template struct Result { std::size_t index; T value; }; void test_copy_assignment_same_index() { { @@ -550,7 +550,7 @@ void test_copy_assignment_different_index() { } } -template +template constexpr bool test_constexpr_assign_imp( std::variant&& v, ValueType&& new_value) { diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/move.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/move.pass.cpp index 775330f8bf71a..700554454c190 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/move.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/move.pass.cpp @@ -291,7 +291,7 @@ void test_move_assignment_empty_non_empty() { #endif // TEST_HAS_NO_EXCEPTIONS } -template struct Result { size_t index; T value; }; +template struct Result { std::size_t index; T value; }; void test_move_assignment_same_index() { { @@ -464,7 +464,7 @@ void test_move_assignment_different_index() { } } -template +template constexpr bool test_constexpr_assign_imp( std::variant&& v, ValueType&& new_value) { diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/copy.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/copy.pass.cpp index b328ba4cbe8ad..52b0d88eb7389 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/copy.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/copy.pass.cpp @@ -223,7 +223,7 @@ void test_copy_ctor_valueless_by_exception() { #endif // TEST_HAS_NO_EXCEPTIONS } -template +template constexpr bool test_constexpr_copy_ctor_imp(std::variant const& v) { auto v2 = v; return v2.index() == v.index() && diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/move.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/move.pass.cpp index 1c67f020d1107..7d813a09b18d6 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/move.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/move.pass.cpp @@ -161,7 +161,7 @@ void test_move_ctor_sfinae() { } template -struct Result { size_t index; T value; }; +struct Result { std::size_t index; T value; }; void test_move_ctor_basic() { { @@ -292,7 +292,7 @@ void test_move_ctor_valueless_by_exception() { #endif // TEST_HAS_NO_EXCEPTIONS } -template +template constexpr bool test_constexpr_ctor_imp(std::variant const& v) { auto copy = v; auto v2 = std::move(copy); diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_args.pass.cpp index 37a18496be983..4dbe7a8ba43ea 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_args.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_args.pass.cpp @@ -28,18 +28,18 @@ #include "test_macros.h" #include "variant_test_helpers.h" -template +template constexpr auto test_emplace_exists_imp(int) -> decltype( std::declval().template emplace(std::declval()...), true) { return true; } -template +template constexpr auto test_emplace_exists_imp(long) -> bool { return false; } -template constexpr bool emplace_exists() { +template constexpr bool emplace_exists() { return test_emplace_exists_imp(0); } diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_init_list_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_init_list_args.pass.cpp index 308067897f52f..b598df0de9c7e 100644 --- a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_init_list_args.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_init_list_args.pass.cpp @@ -39,18 +39,18 @@ struct InitListArg { : size(il.size()), value(v) {} }; -template +template constexpr auto test_emplace_exists_imp(int) -> decltype( std::declval().template emplace(std::declval()...), true) { return true; } -template +template constexpr auto test_emplace_exists_imp(long) -> bool { return false; } -template constexpr bool emplace_exists() { +template constexpr bool emplace_exists() { return test_emplace_exists_imp(0); } diff --git a/libcxx/test/std/utilities/variant/variant.visit/visit.pass.cpp b/libcxx/test/std/utilities/variant/variant.visit/visit.pass.cpp index 1e29073bba2d9..cfd1290d154e9 100644 --- a/libcxx/test/std/utilities/variant/variant.visit/visit.pass.cpp +++ b/libcxx/test/std/utilities/variant/variant.visit/visit.pass.cpp @@ -347,7 +347,7 @@ void test_caller_accepts_nonconst() { struct MyVariant : std::variant {}; namespace std { -template +template void get(const MyVariant&) { assert(false); } diff --git a/libcxx/test/support/MoveOnly.h b/libcxx/test/support/MoveOnly.h index 0795cac4ef5df..4a1c1fc09cd48 100644 --- a/libcxx/test/support/MoveOnly.h +++ b/libcxx/test/support/MoveOnly.h @@ -61,8 +61,8 @@ template <> struct std::hash { typedef MoveOnly argument_type; - typedef size_t result_type; - TEST_CONSTEXPR size_t operator()(const MoveOnly& x) const {return static_cast(x.get());} + typedef std::size_t result_type; + TEST_CONSTEXPR std::size_t operator()(const MoveOnly& x) const {return static_cast(x.get());} }; #endif // MOVEONLY_H diff --git a/libcxx/test/support/allocators.h b/libcxx/test/support/allocators.h index 0cdaacb7555c4..2b987ad872783 100644 --- a/libcxx/test/support/allocators.h +++ b/libcxx/test/support/allocators.h @@ -9,6 +9,7 @@ #ifndef ALLOCATORS_H #define ALLOCATORS_H +#include #include #include #include diff --git a/libcxx/test/support/charconv_test_helpers.h b/libcxx/test/support/charconv_test_helpers.h index 1e8010a3b97a7..455546a394db5 100644 --- a/libcxx/test/support/charconv_test_helpers.h +++ b/libcxx/test/support/charconv_test_helpers.h @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -79,12 +80,12 @@ fits_in(T v) template struct to_chars_test_base { - template + template TEST_CONSTEXPR_CXX23 void test(T v, char const (&expect)[N], Ts... args) { std::to_chars_result r; - constexpr size_t len = N - 1; + constexpr std::size_t len = N - 1; static_assert(len > 0, "expected output won't be empty"); if (!fits_in(v)) @@ -111,7 +112,7 @@ struct to_chars_test_base std::iota(buf, buf + sizeof(buf), static_cast(1)); r = std::to_chars(buf, buf + sizeof(buf), v, args...); assert(r.ec == std::errc{}); - for (size_t i = r.ptr - buf; i < sizeof(buf); ++i) + for (std::size_t i = r.ptr - buf; i < sizeof(buf); ++i) assert(static_cast(buf[i]) == i + 1); *r.ptr = '\0'; diff --git a/libcxx/test/support/constexpr_char_traits.h b/libcxx/test/support/constexpr_char_traits.h index 56c22fe72581f..75380d5a7ffbb 100644 --- a/libcxx/test/support/constexpr_char_traits.h +++ b/libcxx/test/support/constexpr_char_traits.h @@ -12,6 +12,7 @@ #include #include +#include #include "test_macros.h" @@ -35,12 +36,12 @@ struct constexpr_char_traits static TEST_CONSTEXPR bool lt(char_type c1, char_type c2) TEST_NOEXCEPT {return c1 < c2;} - static TEST_CONSTEXPR_CXX14 int compare(const char_type* s1, const char_type* s2, size_t n); - static TEST_CONSTEXPR_CXX14 size_t length(const char_type* s); - static TEST_CONSTEXPR_CXX14 const char_type* find(const char_type* s, size_t n, const char_type& a); - static TEST_CONSTEXPR_CXX14 char_type* move(char_type* s1, const char_type* s2, size_t n); - static TEST_CONSTEXPR_CXX14 char_type* copy(char_type* s1, const char_type* s2, size_t n); - static TEST_CONSTEXPR_CXX14 char_type* assign(char_type* s, size_t n, char_type a); + static TEST_CONSTEXPR_CXX14 int compare(const char_type* s1, const char_type* s2, std::size_t n); + static TEST_CONSTEXPR_CXX14 std::size_t length(const char_type* s); + static TEST_CONSTEXPR_CXX14 const char_type* find(const char_type* s, std::size_t n, const char_type& a); + static TEST_CONSTEXPR_CXX14 char_type* move(char_type* s1, const char_type* s2, std::size_t n); + static TEST_CONSTEXPR_CXX14 char_type* copy(char_type* s1, const char_type* s2, std::size_t n); + static TEST_CONSTEXPR_CXX14 char_type* assign(char_type* s, std::size_t n, char_type a); static TEST_CONSTEXPR int_type not_eof(int_type c) TEST_NOEXCEPT {return eq_int_type(c, eof()) ? ~eof() : c;} @@ -61,7 +62,7 @@ struct constexpr_char_traits template TEST_CONSTEXPR_CXX14 int -constexpr_char_traits::compare(const char_type* s1, const char_type* s2, size_t n) +constexpr_char_traits::compare(const char_type* s1, const char_type* s2, std::size_t n) { for (; n; --n, ++s1, ++s2) { @@ -74,10 +75,10 @@ constexpr_char_traits::compare(const char_type* s1, const char_type* s2, } template -TEST_CONSTEXPR_CXX14 size_t +TEST_CONSTEXPR_CXX14 std::size_t constexpr_char_traits::length(const char_type* s) { - size_t len = 0; + std::size_t len = 0; for (; !eq(*s, char_type(0)); ++s) ++len; return len; @@ -85,7 +86,7 @@ constexpr_char_traits::length(const char_type* s) template TEST_CONSTEXPR_CXX14 const CharT* -constexpr_char_traits::find(const char_type* s, size_t n, const char_type& a) +constexpr_char_traits::find(const char_type* s, std::size_t n, const char_type& a) { for (; n; --n) { @@ -98,7 +99,7 @@ constexpr_char_traits::find(const char_type* s, size_t n, const char_type template TEST_CONSTEXPR_CXX14 CharT* -constexpr_char_traits::move(char_type* s1, const char_type* s2, size_t n) +constexpr_char_traits::move(char_type* s1, const char_type* s2, std::size_t n) { char_type* r = s1; if (s1 < s2) @@ -118,7 +119,7 @@ constexpr_char_traits::move(char_type* s1, const char_type* s2, size_t n) template TEST_CONSTEXPR_CXX14 CharT* -constexpr_char_traits::copy(char_type* s1, const char_type* s2, size_t n) +constexpr_char_traits::copy(char_type* s1, const char_type* s2, std::size_t n) { if (!TEST_IS_CONSTANT_EVALUATED) // fails in constexpr because we might be comparing unrelated pointers assert(s2 < s1 || s2 >= s1+n); @@ -130,7 +131,7 @@ constexpr_char_traits::copy(char_type* s1, const char_type* s2, size_t n) template TEST_CONSTEXPR_CXX14 CharT* -constexpr_char_traits::assign(char_type* s, size_t n, char_type a) +constexpr_char_traits::assign(char_type* s, std::size_t n, char_type a) { char_type* r = s; for (; n; --n, ++s) diff --git a/libcxx/test/support/container_test_types.h b/libcxx/test/support/container_test_types.h index 96e2e2d2ea8c8..ed26ba5d40af5 100644 --- a/libcxx/test/support/container_test_types.h +++ b/libcxx/test/support/container_test_types.h @@ -86,6 +86,7 @@ */ #include +#include #include #include #include @@ -428,9 +429,9 @@ namespace std { template struct hash< ::CopyInsertable > { typedef ::CopyInsertable argument_type; - typedef size_t result_type; + typedef std::size_t result_type; - size_t operator()(argument_type const& arg) const { + std::size_t operator()(argument_type const& arg) const { return arg.data; } }; diff --git a/libcxx/test/support/controlled_allocators.h b/libcxx/test/support/controlled_allocators.h index 1144ae4c6ecd5..be517d6075b76 100644 --- a/libcxx/test/support/controlled_allocators.h +++ b/libcxx/test/support/controlled_allocators.h @@ -29,7 +29,7 @@ struct AllocController; // 'AllocController' is a concrete type that instruments and controls the // behavior of test allocators. -template +template class CountingAllocator; // 'CountingAllocator' is an basic implementation of the 'Allocator' // requirements that use the 'AllocController' interface. @@ -97,7 +97,7 @@ struct AllocController { AllocController() = default; - void countAlloc(void* p, size_t s, size_t a) { + void countAlloc(void* p, std::size_t s, size_t a) { ++alive; ++alloc_count; alive_size += s; @@ -107,7 +107,7 @@ struct AllocController { last_align = last_alloc_align = a; } - void countDealloc(void* p, size_t s, size_t a) { + void countDealloc(void* p, std::size_t s, size_t a) { --alive; ++dealloc_count; alive_size -= s; @@ -144,35 +144,35 @@ struct AllocController { last_destroy_pointer = nullptr; } public: - bool checkAlloc(void* p, size_t s, size_t a) const { + bool checkAlloc(void* p, std::size_t s, size_t a) const { return p == last_alloc_pointer && s == last_alloc_size && a == last_alloc_align; } - bool checkAlloc(void* p, size_t s) const { + bool checkAlloc(void* p, std::size_t s) const { return p == last_alloc_pointer && s == last_alloc_size; } - bool checkAllocAtLeast(void* p, size_t s, size_t a) const { + bool checkAllocAtLeast(void* p, std::size_t s, size_t a) const { return p == last_alloc_pointer && s <= last_alloc_size && a <= last_alloc_align; } - bool checkAllocAtLeast(void* p, size_t s) const { + bool checkAllocAtLeast(void* p, std::size_t s) const { return p == last_alloc_pointer && s <= last_alloc_size; } - bool checkDealloc(void* p, size_t s, size_t a) const { + bool checkDealloc(void* p, std::size_t s, size_t a) const { return p == last_dealloc_pointer && s == last_dealloc_size && a == last_dealloc_align; } - bool checkDealloc(void* p, size_t s) const { + bool checkDealloc(void* p, std::size_t s) const { return p == last_dealloc_pointer && s == last_dealloc_size; } @@ -222,7 +222,7 @@ struct AllocController { DISALLOW_COPY(AllocController); }; -template +template class CountingAllocator { public: @@ -282,12 +282,12 @@ class CountingAllocator AllocController& getController() const { return *P; } private: - template friend class CountingAllocator; + template friend class CountingAllocator; AllocController *P; }; -template +template class CountingAllocator { public: @@ -325,17 +325,17 @@ class CountingAllocator AllocController& getController() const { return *P; } private: - template friend class CountingAllocator; + template friend class CountingAllocator; AllocController *P; }; -template +template inline bool operator==(CountingAllocator const& x, CountingAllocator const& y) { return &x.getController() == &y.getController(); } -template +template inline bool operator!=(CountingAllocator const& x, CountingAllocator const& y) { return !(x == y); diff --git a/libcxx/test/support/counting_predicates.h b/libcxx/test/support/counting_predicates.h index ef3c293109a9d..7bf58b251b1f7 100644 --- a/libcxx/test/support/counting_predicates.h +++ b/libcxx/test/support/counting_predicates.h @@ -23,12 +23,12 @@ struct unary_counting_predicate { ~unary_counting_predicate() {} bool operator () (const Arg &a) const { ++count_; return p_(a); } - size_t count() const { return count_; } + std::size_t count() const { return count_; } void reset() { count_ = 0; } private: Predicate p_; - mutable size_t count_; + mutable std::size_t count_; }; @@ -43,12 +43,12 @@ struct binary_counting_predicate { ~binary_counting_predicate() {} bool operator () (const Arg1 &a1, const Arg2 &a2) const { ++count_; return p_(a1, a2); } - size_t count() const { return count_; } + std::size_t count() const { return count_; } void reset() { count_ = 0; } private: Predicate p_; - mutable size_t count_; + mutable std::size_t count_; }; #if TEST_STD_VER > 14 diff --git a/libcxx/test/support/deduction_guides_sfinae_checks.h b/libcxx/test/support/deduction_guides_sfinae_checks.h index 767ff0242e1fe..5ada5ac28e8c8 100644 --- a/libcxx/test/support/deduction_guides_sfinae_checks.h +++ b/libcxx/test/support/deduction_guides_sfinae_checks.h @@ -9,6 +9,7 @@ #ifndef TEST_SUPPORT_DEDUCTION_GUIDES_SFINAE_CHECKS_H #define TEST_SUPPORT_DEDUCTION_GUIDES_SFINAE_CHECKS_H +#include #include #include #include @@ -178,30 +179,30 @@ constexpr void UnorderedContainerDeductionGuidesSfinaeAway() { // (iter, iter, buckets) // // Cannot deduce from (BAD_iter, BAD_iter, buckets) - static_assert(SFINAEs_away); - LIBCPP_STATIC_ASSERT(SFINAEs_away); + static_assert(SFINAEs_away); + LIBCPP_STATIC_ASSERT(SFINAEs_away); // (iter, iter, buckets, hash) // // Cannot deduce from (BAD_iter, BAD_iter, buckets, hash) - static_assert(SFINAEs_away); + static_assert(SFINAEs_away); LIBCPP_STATIC_ASSERT( - SFINAEs_away); + SFINAEs_away); // Cannot deduce from (iter, iter, buckets, BAD_hash) - static_assert(SFINAEs_away); + static_assert(SFINAEs_away); // Note: (iter, iter, buckets, ALLOC_as_hash) is allowed -- it just calls // (iter, iter, buckets, alloc) // (iter, iter, buckets, hash, pred) // // Cannot deduce from (BAD_iter, BAD_iter, buckets, hash, pred) - static_assert(SFINAEs_away); + static_assert(SFINAEs_away); LIBCPP_STATIC_ASSERT( - SFINAEs_away); + SFINAEs_away); // Cannot deduce from (iter, iter, buckets, BAD_hash, pred) - static_assert(SFINAEs_away); + static_assert(SFINAEs_away); // Cannot deduce from (iter, iter, buckets, ALLOC_as_hash, pred) - static_assert(SFINAEs_away); + static_assert(SFINAEs_away); // Note: (iter, iter, buckets, hash, ALLOC_as_pred) is allowed -- it just // calls (iter, iter, buckets, hash, alloc) @@ -209,28 +210,28 @@ constexpr void UnorderedContainerDeductionGuidesSfinaeAway() { // // Cannot deduce from (BAD_iter, BAD_iter, buckets, hash, pred, alloc) static_assert( - SFINAEs_away); + SFINAEs_away); LIBCPP_STATIC_ASSERT(SFINAEs_away); + std::size_t, Hash, Pred, Alloc>); // Cannot deduce from (iter, iter, buckets, BAD_hash, pred, alloc) static_assert( - SFINAEs_away); + SFINAEs_away); // Cannot deduce from (iter, iter, buckets, ALLOC_as_hash, pred, alloc) static_assert( - SFINAEs_away); + SFINAEs_away); // Cannot deduce from (iter, iter, buckets, hash, ALLOC_as_pred, alloc) static_assert( - SFINAEs_away); + SFINAEs_away); // Cannot deduce from (iter, iter, buckets, hash, pred, BAD_alloc) static_assert( - SFINAEs_away); + SFINAEs_away); // (iter, iter, buckets, alloc) // // Cannot deduce from (BAD_iter, BAD_iter, buckets, alloc) - static_assert(SFINAEs_away); + static_assert(SFINAEs_away); LIBCPP_STATIC_ASSERT( - SFINAEs_away); + SFINAEs_away); // Note: (iter, iter, buckets, BAD_alloc) is interpreted as (iter, iter, // buckets, hash), which is valid because the only requirement for the hash // parameter is that it's not integral. @@ -246,14 +247,14 @@ constexpr void UnorderedContainerDeductionGuidesSfinaeAway() { // (iter, iter, buckets, hash, alloc) // // Cannot deduce from (BAD_iter, BAD_iter, buckets, hash, alloc) - static_assert(SFINAEs_away); + static_assert(SFINAEs_away); LIBCPP_STATIC_ASSERT( - SFINAEs_away); + SFINAEs_away); // Cannot deduce from (iter, iter, buckets, BAD_hash, alloc) - static_assert(SFINAEs_away); + static_assert(SFINAEs_away); // Cannot deduce from (iter, iter, buckets, ALLOC_as_hash, alloc) static_assert( - SFINAEs_away); + SFINAEs_away); // Note: (iter, iter, buckets, hash, BAD_alloc) is interpreted as (iter, iter, // buckets, hash, pred), which is valid because there are no requirements for // the predicate. @@ -261,16 +262,16 @@ constexpr void UnorderedContainerDeductionGuidesSfinaeAway() { // (init_list, buckets, hash) // // Cannot deduce from (init_list, buckets, BAD_hash) - static_assert(SFINAEs_away); + static_assert(SFINAEs_away); // Note: (init_list, buckets, ALLOC_as_hash) is interpreted as (init_list, // buckets, alloc), which is valid. // (init_list, buckets, hash, pred) // // Cannot deduce from (init_list, buckets, BAD_hash, pred) - static_assert(SFINAEs_away); + static_assert(SFINAEs_away); // Cannot deduce from (init_list, buckets, ALLOC_as_hash, pred) - static_assert(SFINAEs_away); + static_assert(SFINAEs_away); // Note: (init_list, buckets, hash, ALLOC_as_pred) is interpreted as // (init_list, buckets, hash, alloc), which is valid. @@ -278,16 +279,16 @@ constexpr void UnorderedContainerDeductionGuidesSfinaeAway() { // // Cannot deduce from (init_list, buckets, BAD_hash, pred, alloc) static_assert( - SFINAEs_away); + SFINAEs_away); // Cannot deduce from (init_list, buckets, ALLOC_as_hash, pred, alloc) static_assert( - SFINAEs_away); + SFINAEs_away); // Cannot deduce from (init_list, buckets, hash, ALLOC_as_pred, alloc) static_assert( - SFINAEs_away); + SFINAEs_away); // Cannot deduce from (init_list, buckets, hash, pred, BAD_alloc) static_assert( - SFINAEs_away); + SFINAEs_away); // (init_list, buckets, alloc) // @@ -298,9 +299,9 @@ constexpr void UnorderedContainerDeductionGuidesSfinaeAway() { // (init_list, buckets, hash, alloc) // // Cannot deduce from (init_list, buckets, BAD_hash, alloc) - static_assert(SFINAEs_away); + static_assert(SFINAEs_away); // Cannot deduce from (init_list, buckets, ALLOC_as_hash, alloc) - static_assert(SFINAEs_away); + static_assert(SFINAEs_away); // (init_list, alloc) // diff --git a/libcxx/test/support/deleter_types.h b/libcxx/test/support/deleter_types.h index c5680020b0646..2afb9b3e6860b 100644 --- a/libcxx/test/support/deleter_types.h +++ b/libcxx/test/support/deleter_types.h @@ -15,6 +15,7 @@ #ifndef SUPPORT_DELETER_TYPES_H #define SUPPORT_DELETER_TYPES_H +#include #include #include #include @@ -385,13 +386,13 @@ void swap(test_deleter& x, test_deleter& y) { #if TEST_STD_VER >= 11 -template +template class PointerDeleter { PointerDeleter(const PointerDeleter&); PointerDeleter& operator=(const PointerDeleter&); public: - typedef min_pointer> pointer; + typedef min_pointer> pointer; TEST_CONSTEXPR_CXX23 PointerDeleter() = default; TEST_CONSTEXPR_CXX23 PointerDeleter(PointerDeleter&&) = default; @@ -413,13 +414,13 @@ class PointerDeleter { PointerDeleter(const PointerDeleter&, typename std::enable_if::value>::type* = 0); }; -template +template class PointerDeleter { PointerDeleter(const PointerDeleter&); PointerDeleter& operator=(const PointerDeleter&); public: - typedef min_pointer > pointer; + typedef min_pointer > pointer; TEST_CONSTEXPR_CXX23 PointerDeleter() = default; TEST_CONSTEXPR_CXX23 PointerDeleter(PointerDeleter&&) = default; diff --git a/libcxx/test/support/filesystem_test_helper.h b/libcxx/test/support/filesystem_test_helper.h index 766a973c23720..5d3738b91c041 100644 --- a/libcxx/test/support/filesystem_test_helper.h +++ b/libcxx/test/support/filesystem_test_helper.h @@ -336,7 +336,7 @@ struct scoped_test_env fs::path const cwd = utils::getcwd(); fs::path const tmp = fs::temp_directory_path(); std::string base = cwd.filename().string(); - size_t i = std::hash()(cwd.string()); + std::size_t i = std::hash()(cwd.string()); fs::path p = tmp / (base + "-static_env." + std::to_string(i)); while (utils::exists(p.string())) { p = tmp / (base + "-static_env." + std::to_string(++i)); diff --git a/libcxx/test/support/format.functions.common.h b/libcxx/test/support/format.functions.common.h index e70ae34200573..65da5fd252b7e 100644 --- a/libcxx/test/support/format.functions.common.h +++ b/libcxx/test/support/format.functions.common.h @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -143,7 +144,7 @@ struct std::formatter { // The return value is a collection of basic_strings, instead of // basic_string_views since the values are temporaries. namespace detail { -template +template std::basic_string get_colons() { static std::basic_string result(N, CharT(':')); return result; diff --git a/libcxx/test/support/make_string.h b/libcxx/test/support/make_string.h index 728b6540abe07..50dd609ce3651 100644 --- a/libcxx/test/support/make_string.h +++ b/libcxx/test/support/make_string.h @@ -47,18 +47,18 @@ #define MKSTR_LEN(CharT, Str) MKSTR(Str).length((const CharT*)0) struct MultiStringType { - MKSTR_WCHAR_ONLY(const wchar_t* w_; size_t wn_; ) - MKSTR_CHAR8_ONLY(const char8_t* u8_; size_t u8n_; ) - MKSTR_CXX11_ONLY(const char16_t* u16_; size_t u16n_; ) - MKSTR_CXX11_ONLY(const char32_t* u32_; size_t u32n_; ) - const char* s_; size_t sn_; + MKSTR_WCHAR_ONLY(const wchar_t* w_; std::size_t wn_; ) + MKSTR_CHAR8_ONLY(const char8_t* u8_; std::size_t u8n_; ) + MKSTR_CXX11_ONLY(const char16_t* u16_; std::size_t u16n_; ) + MKSTR_CXX11_ONLY(const char32_t* u32_; std::size_t u32n_; ) + const char* s_; std::size_t sn_; TEST_CONSTEXPR MultiStringType( - MKSTR_WCHAR_ONLY(const wchar_t *w, size_t wn,) - MKSTR_CHAR8_ONLY(const char8_t *u8, size_t u8n,) - MKSTR_CXX11_ONLY(const char16_t *u16, size_t u16n,) - MKSTR_CXX11_ONLY(const char32_t *u32, size_t u32n,) - const char *s, size_t sn) + MKSTR_WCHAR_ONLY(const wchar_t *w, std::size_t wn,) + MKSTR_CHAR8_ONLY(const char8_t *u8, std::size_t u8n,) + MKSTR_CXX11_ONLY(const char16_t *u16, std::size_t u16n,) + MKSTR_CXX11_ONLY(const char32_t *u32, std::size_t u32n,) + const char *s, std::size_t sn) : MKSTR_WCHAR_ONLY(w_(w), wn_(wn),) MKSTR_CHAR8_ONLY(u8_(u8), u8n_(u8n),) MKSTR_CXX11_ONLY(u16_(u16), u16n_(u16n),) @@ -71,11 +71,11 @@ struct MultiStringType { MKSTR_CXX11_ONLY(constexpr const char16_t *as_ptr(const char16_t*) const { return u16_; }) MKSTR_CXX11_ONLY(constexpr const char32_t *as_ptr(const char32_t*) const { return u32_; }) - TEST_CONSTEXPR size_t length(const char*) const { return sn_; } - MKSTR_WCHAR_ONLY(TEST_CONSTEXPR size_t length(const wchar_t*) const { return wn_; }) - MKSTR_CHAR8_ONLY(constexpr size_t length(const char8_t*) const { return u8n_; }) - MKSTR_CXX11_ONLY(constexpr size_t length(const char16_t*) const { return u16n_; }) - MKSTR_CXX11_ONLY(constexpr size_t length(const char32_t*) const { return u32n_; }) + TEST_CONSTEXPR std::size_t length(const char*) const { return sn_; } + MKSTR_WCHAR_ONLY(TEST_CONSTEXPR std::size_t length(const wchar_t*) const { return wn_; }) + MKSTR_CHAR8_ONLY(constexpr std::size_t length(const char8_t*) const { return u8n_; }) + MKSTR_CXX11_ONLY(constexpr std::size_t length(const char16_t*) const { return u16n_; }) + MKSTR_CXX11_ONLY(constexpr std::size_t length(const char32_t*) const { return u32n_; }) // These implicit conversions are used by some tests. TODO: maybe eliminate them? TEST_CONSTEXPR operator const char*() const { return s_; } diff --git a/libcxx/test/support/min_allocator.h b/libcxx/test/support/min_allocator.h index f9d37e72ef631..1b3727af8185c 100644 --- a/libcxx/test/support/min_allocator.h +++ b/libcxx/test/support/min_allocator.h @@ -85,12 +85,12 @@ class no_default_allocator }; struct malloc_allocator_base { - static size_t outstanding_bytes; - static size_t alloc_count; - static size_t dealloc_count; + static std::size_t outstanding_bytes; + static std::size_t alloc_count; + static std::size_t dealloc_count; static bool disable_default_constructor; - static size_t outstanding_alloc() { + static std::size_t outstanding_alloc() { assert(alloc_count >= dealloc_count); return (alloc_count - dealloc_count); } @@ -123,7 +123,7 @@ class malloc_allocator : public malloc_allocator_base T* allocate(std::size_t n) { - const size_t nbytes = n*sizeof(T); + const std::size_t nbytes = n*sizeof(T); ++alloc_count; outstanding_bytes += nbytes; return static_cast(std::malloc(nbytes)); @@ -131,7 +131,7 @@ class malloc_allocator : public malloc_allocator_base void deallocate(T* p, std::size_t n) { - const size_t nbytes = n*sizeof(T); + const std::size_t nbytes = n*sizeof(T); ++dealloc_count; outstanding_bytes -= nbytes; std::free(static_cast(p)); @@ -194,7 +194,7 @@ struct cpp03_overload_allocator : bare_allocator }; template bool cpp03_overload_allocator::construct_called = false; -template > class min_pointer; +template > class min_pointer; template class min_pointer; template class min_pointer; template class min_pointer; diff --git a/libcxx/test/support/nasty_containers.h b/libcxx/test/support/nasty_containers.h index 0b56a27feefa2..18a424ba1fdbf 100644 --- a/libcxx/test/support/nasty_containers.h +++ b/libcxx/test/support/nasty_containers.h @@ -10,6 +10,7 @@ #define NASTY_CONTAINERS_H #include +#include #include #include diff --git a/libcxx/test/support/nasty_string.h b/libcxx/test/support/nasty_string.h index 901700abef37c..672c3cb4ed9ea 100644 --- a/libcxx/test/support/nasty_string.h +++ b/libcxx/test/support/nasty_string.h @@ -10,6 +10,7 @@ #define TEST_SUPPORT_NASTY_STRING_H #include +#include #include #include @@ -72,12 +73,12 @@ struct nasty_char_traits { static constexpr bool lt(char_type c1, char_type c2) noexcept { return c1.c < c2.c; } - static constexpr int compare(const char_type* s1, const char_type* s2, size_t n); - static constexpr size_t length(const char_type* s); - static constexpr const char_type* find(const char_type* s, size_t n, const char_type& a); - static constexpr char_type* move(char_type* s1, const char_type* s2, size_t n); - static constexpr char_type* copy(char_type* s1, const char_type* s2, size_t n); - static constexpr char_type* assign(char_type* s, size_t n, char_type a); + static constexpr int compare(const char_type* s1, const char_type* s2, std::size_t n); + static constexpr std::size_t length(const char_type* s); + static constexpr const char_type* find(const char_type* s, std::size_t n, const char_type& a); + static constexpr char_type* move(char_type* s1, const char_type* s2, std::size_t n); + static constexpr char_type* copy(char_type* s1, const char_type* s2, std::size_t n); + static constexpr char_type* assign(char_type* s, std::size_t n, char_type a); static constexpr int_type not_eof(int_type c) noexcept { return eq_int_type(c, eof()) ? ~eof() : c; } @@ -90,7 +91,7 @@ struct nasty_char_traits { static constexpr int_type eof() noexcept { return int_type(EOF); } }; -constexpr int nasty_char_traits::compare(const nasty_char* s1, const nasty_char* s2, size_t n) { +constexpr int nasty_char_traits::compare(const nasty_char* s1, const nasty_char* s2, std::size_t n) { for (; n; --n, ++s1, ++s2) { if (lt(*s1, *s2)) return -1; @@ -100,14 +101,14 @@ constexpr int nasty_char_traits::compare(const nasty_char* s1, const nasty_char* return 0; } -constexpr size_t nasty_char_traits::length(const nasty_char* s) { - size_t len = 0; +constexpr std::size_t nasty_char_traits::length(const nasty_char* s) { + std::size_t len = 0; for (; !eq(*s, nasty_char(0)); ++s) ++len; return len; } -constexpr const nasty_char* nasty_char_traits::find(const nasty_char* s, size_t n, const nasty_char& a) { +constexpr const nasty_char* nasty_char_traits::find(const nasty_char* s, std::size_t n, const nasty_char& a) { for (; n; --n) { if (eq(*s, a)) return s; @@ -116,7 +117,7 @@ constexpr const nasty_char* nasty_char_traits::find(const nasty_char* s, size_t return 0; } -constexpr nasty_char* nasty_char_traits::move(nasty_char* s1, const nasty_char* s2, size_t n) { +constexpr nasty_char* nasty_char_traits::move(nasty_char* s1, const nasty_char* s2, std::size_t n) { nasty_char* r = s1; if (s1 < s2) { for (; n; --n, ++s1, ++s2) @@ -130,7 +131,7 @@ constexpr nasty_char* nasty_char_traits::move(nasty_char* s1, const nasty_char* return r; } -constexpr nasty_char* nasty_char_traits::copy(nasty_char* s1, const nasty_char* s2, size_t n) { +constexpr nasty_char* nasty_char_traits::copy(nasty_char* s1, const nasty_char* s2, std::size_t n) { if (!std::is_constant_evaluated()) // fails in constexpr because we might be comparing unrelated pointers assert(s2 < s1 || s2 >= s1 + n); nasty_char* r = s1; @@ -139,7 +140,7 @@ constexpr nasty_char* nasty_char_traits::copy(nasty_char* s1, const nasty_char* return r; } -constexpr nasty_char* nasty_char_traits::assign(nasty_char* s, size_t n, nasty_char a) { +constexpr nasty_char* nasty_char_traits::assign(nasty_char* s, std::size_t n, nasty_char a) { nasty_char* r = s; for (; n; --n, ++s) assign(*s, a); @@ -148,7 +149,7 @@ constexpr nasty_char* nasty_char_traits::assign(nasty_char* s, size_t n, nasty_c using nasty_string = std::basic_string; -template +template struct ToNastyChar { constexpr ToNastyChar(const char (&r)[N]) { std::transform(r, r + N, std::addressof(text[0]), [](char c) { return nasty_char{c}; }); @@ -156,7 +157,7 @@ struct ToNastyChar { nasty_char text[N]; }; -template +template ToNastyChar(const char (&)[N]) -> ToNastyChar; template diff --git a/libcxx/test/support/operator_hijacker.h b/libcxx/test/support/operator_hijacker.h index 294d2e55879b7..a2569da0310ff 100644 --- a/libcxx/test/support/operator_hijacker.h +++ b/libcxx/test/support/operator_hijacker.h @@ -40,7 +40,7 @@ static_assert(std::is_trivially_copyable::value && // template <> struct std::hash { - size_t operator()(const operator_hijacker&) const { return 0; } + std::size_t operator()(const operator_hijacker&) const { return 0; } }; #endif // SUPPORT_OPERATOR_HIJACKER_H diff --git a/libcxx/test/support/platform_support.h b/libcxx/test/support/platform_support.h index 88ddf84eb20c9..ba14b32e3e94d 100644 --- a/libcxx/test/support/platform_support.h +++ b/libcxx/test/support/platform_support.h @@ -87,7 +87,7 @@ std::string get_temp_file_name() inline bool glibc_version_less_than(char const* version) { std::string test_version = std::string("glibc ") + version; - size_t n = confstr(_CS_GNU_LIBC_VERSION, nullptr, (size_t)0); + std::size_t n = confstr(_CS_GNU_LIBC_VERSION, nullptr, (size_t)0); char *current_version = new char[n]; confstr(_CS_GNU_LIBC_VERSION, current_version, n); diff --git a/libcxx/test/support/poisoned_hash_helper.h b/libcxx/test/support/poisoned_hash_helper.h index d37f3b356608a..000b08cd37beb 100644 --- a/libcxx/test/support/poisoned_hash_helper.h +++ b/libcxx/test/support/poisoned_hash_helper.h @@ -11,6 +11,7 @@ #define SUPPORT_POISONED_HASH_HELPER_H #include +#include #include #include @@ -117,7 +118,7 @@ struct ConvertibleTo { template ()(std::declval()))> constexpr bool can_hash(int) { - return std::is_same::value; + return std::is_same::value; } template constexpr bool can_hash(long) { diff --git a/libcxx/test/support/test.support/test_poisoned_hash_helper.pass.cpp b/libcxx/test/support/test.support/test_poisoned_hash_helper.pass.cpp index f204fc33af14f..81450748b1a98 100644 --- a/libcxx/test/support/test.support/test_poisoned_hash_helper.pass.cpp +++ b/libcxx/test/support/test.support/test_poisoned_hash_helper.pass.cpp @@ -17,7 +17,7 @@ #include "test_macros.h" -template +template constexpr bool is_complete_imp(int) { return true; } template constexpr bool is_complete_imp(long) { return false; } template constexpr bool is_complete() { return is_complete_imp(0); } diff --git a/libcxx/test/support/test_allocator.h b/libcxx/test/support/test_allocator.h index c78224946236b..9330150a83851 100644 --- a/libcxx/test/support/test_allocator.h +++ b/libcxx/test/support/test_allocator.h @@ -405,7 +405,7 @@ class thread_unsafe_shared_ptr { struct control_block { template TEST_CONSTEXPR control_block(Args... args) : content(std::forward(args)...) {} - size_t ref_count = 1; + std::size_t ref_count = 1; T content; }; diff --git a/libcxx/test/support/test_constexpr_container.h b/libcxx/test/support/test_constexpr_container.h index 092f8b6b4e0e5..ca553be337946 100644 --- a/libcxx/test/support/test_constexpr_container.h +++ b/libcxx/test/support/test_constexpr_container.h @@ -14,6 +14,7 @@ #include #include +#include #include #include "test_macros.h" @@ -34,7 +35,7 @@ class ConstexprFixedCapacityDeque { constexpr iterator end() { return data_ + size_; } constexpr const_iterator begin() const { return data_; } constexpr const_iterator end() const { return data_ + size_; } - constexpr size_t size() const { return size_; } + constexpr std::size_t size() const { return size_; } constexpr const T& front() const { assert(size_ >= 1); return data_[0]; } constexpr const T& back() const { assert(size_ >= 1); return data_[size_-1]; } diff --git a/libcxx/test/support/test_memory_resource.h b/libcxx/test/support/test_memory_resource.h index 1a0d722089ae2..280cdae1c619d 100644 --- a/libcxx/test/support/test_memory_resource.h +++ b/libcxx/test/support/test_memory_resource.h @@ -113,8 +113,8 @@ int TestResourceImp::resource_destructed = 0; struct NullProvider { NullProvider() {} - void* allocate(size_t, size_t) { return nullptr; } - void deallocate(void*, size_t, size_t) {} + void* allocate(std::size_t, size_t) { return nullptr; } + void deallocate(void*, std::size_t, size_t) {} void reset() {} private: DISALLOW_COPY(NullProvider); @@ -122,22 +122,22 @@ struct NullProvider { struct NewDeleteProvider { NewDeleteProvider() {} - void* allocate(size_t s, size_t) { return ::operator new(s); } - void deallocate(void* p, size_t, size_t) { ::operator delete(p); } + void* allocate(std::size_t s, size_t) { return ::operator new(s); } + void deallocate(void* p, std::size_t, size_t) { ::operator delete(p); } void reset() {} private: DISALLOW_COPY(NewDeleteProvider); }; -template // 10 pages worth of memory. +template // 10 pages worth of memory. struct BufferProvider { char buffer[Size]; void* next = &buffer; - size_t space = Size; + std::size_t space = Size; BufferProvider() {} - void* allocate(size_t s, size_t a) { + void* allocate(std::size_t s, size_t a) { void* ret = std::align(s, a, next, space); if (ret == nullptr) { #ifndef TEST_HAS_NO_EXCEPTIONS @@ -150,7 +150,7 @@ struct BufferProvider { return ret; } - void deallocate(void*, size_t, size_t) {} + void deallocate(void*, std::size_t, size_t) {} void reset() { next = &buffer; diff --git a/libcxx/test/support/test_std_memory_resource.h b/libcxx/test/support/test_std_memory_resource.h index b372c2731084e..f3923ae1d4d2f 100644 --- a/libcxx/test/support/test_std_memory_resource.h +++ b/libcxx/test/support/test_std_memory_resource.h @@ -105,14 +105,14 @@ int TestResourceImp::resource_destructed = 0; struct NullProvider { NullProvider() {} - void* allocate(size_t, size_t) { + void* allocate(std::size_t, size_t) { #ifndef TEST_HAS_NO_EXCEPTIONS throw std::runtime_error(""); #else std::abort(); #endif } - void deallocate(void*, size_t, size_t) {} + void deallocate(void*, std::size_t, size_t) {} void reset() {} private: @@ -121,23 +121,23 @@ struct NullProvider { struct NewDeleteProvider { NewDeleteProvider() {} - void* allocate(size_t s, size_t) { return ::operator new(s); } - void deallocate(void* p, size_t, size_t) { ::operator delete(p); } + void* allocate(std::size_t s, size_t) { return ::operator new(s); } + void deallocate(void* p, std::size_t, size_t) { ::operator delete(p); } void reset() {} private: DISALLOW_COPY(NewDeleteProvider); }; -template // 10 pages worth of memory. +template // 10 pages worth of memory. struct BufferProvider { char buffer[Size]; void* next = &buffer; - size_t space = Size; + std::size_t space = Size; BufferProvider() {} - void* allocate(size_t s, size_t a) { + void* allocate(std::size_t s, size_t a) { void* ret = std::align(a, s, next, space); if (ret == nullptr) { #ifndef TEST_HAS_NO_EXCEPTIONS @@ -150,7 +150,7 @@ struct BufferProvider { return ret; } - void deallocate(void*, size_t, size_t) {} + void deallocate(void*, std::size_t, size_t) {} void reset() { next = &buffer; diff --git a/libcxx/test/support/uses_alloc_types.h b/libcxx/test/support/uses_alloc_types.h index 94035726077e7..d4489ffc0fe5e 100644 --- a/libcxx/test/support/uses_alloc_types.h +++ b/libcxx/test/support/uses_alloc_types.h @@ -101,7 +101,7 @@ bool checkConstructionEquiv(TestType& T, TestType& U) //////////////////////////////////////////////////////////////////////////////// namespace detail { -template +template struct TakeNImp; template @@ -109,11 +109,11 @@ struct TakeNImp { typedef ArgList type; }; -template +template struct TakeNImp, F, R...> : TakeNImp, R...> {}; -template +template struct TakeNArgs : TakeNImp, Args...> {}; template @@ -263,7 +263,7 @@ struct UsesAllocatorTestBase { UsesAllocatorTestBaseStorage alloc_store; }; -template +template class UsesAllocatorV1 : public UsesAllocatorTestBase, Alloc> { public: @@ -297,7 +297,7 @@ class UsesAllocatorV1 : public UsesAllocatorTestBase +template class UsesAllocatorV2 : public UsesAllocatorTestBase, Alloc> { public: @@ -323,7 +323,7 @@ class UsesAllocatorV2 : public UsesAllocatorTestBase +template class UsesAllocatorV3 : public UsesAllocatorTestBase, Alloc> { public: @@ -355,7 +355,7 @@ class UsesAllocatorV3 : public UsesAllocatorTestBase +template class NotUsesAllocator : public UsesAllocatorTestBase, Alloc> { public: diff --git a/libcxx/utils/generate_extended_grapheme_cluster_test.py b/libcxx/utils/generate_extended_grapheme_cluster_test.py index 94f669235a73b..6714b221b7789 100755 --- a/libcxx/utils/generate_extended_grapheme_cluster_test.py +++ b/libcxx/utils/generate_extended_grapheme_cluster_test.py @@ -169,7 +169,7 @@ def parseBreakTestLine(input: TextIO) -> Optional[BreakTestItem]: /// The offset of the last code units of the extended grapheme clusters in the input. /// /// The vector has the same number of entries as \\ref code_points. - std::vector breaks; + std::vector breaks; }}; /// The data for UTF-8. From 24caf0196d03858bd9fe90d14133fb69c8cea444 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 21 Mar 2023 09:47:07 -0700 Subject: [PATCH 210/691] [IndVarSimplify] Remove duplicate call to getSCEV. NFC We already did this same call on the line before. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D146444 --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 2b19842050a5f..18c3fe06503b4 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1080,8 +1080,7 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB, // a truncate within in. bool Extended = false; const SCEV *IV = SE->getSCEV(CmpIndVar); - const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar), - ExitCnt->getType()); + const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType()); const SCEV *ZExtTrunc = SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType()); From b904e68f13ba7d4f4aa86a3495e2441c99247671 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Tue, 21 Mar 2023 12:48:13 -0400 Subject: [PATCH 211/691] No longer issue static lambda pedantic warning for pre-c++2b compat We were accidentally issuing "static lambdas are incompatible with C++ standards before C++2b" with -pedantic because it was an ExtWarn diagnostic rather than a Warning. This corrects the diagnostic category and adds some test coverage. Fixes #61582 --- clang/docs/ReleaseNotes.rst | 4 ++-- clang/include/clang/Basic/DiagnosticParseKinds.td | 2 +- clang/test/Parser/cxx2b-lambdas-ext-warns.cpp | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 6ae71683804d4..c0162cf506cbc 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -241,8 +241,8 @@ Bug Fixes to C++ Support - Fix incorrect deletion of the default constructor of unions in some cases. (`#48416 `_) - No longer issue a pre-C++2b compatibility warning in ``-pedantic`` mode - regading overloaded `operator[]` with more than one parmeter. (`#61582 - `_) + regading overloaded `operator[]` with more than one parmeter or for static + lambdas. (`#61582 `_) Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index 243c69a551650..2ad4d98f0ed88 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1049,7 +1049,7 @@ def err_lambda_template_parameter_list_empty : Error< // C++2b static lambdas def err_static_lambda: ExtWarn< "static lambdas are a C++2b extension">, InGroup; -def warn_cxx20_compat_static_lambda: ExtWarn< +def warn_cxx20_compat_static_lambda : Warning< "static lambdas are incompatible with C++ standards before C++2b">, InGroup, DefaultIgnore; def err_static_mutable_lambda : Error< diff --git a/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp b/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp index bcb5574a2fe10..3a80f59aba565 100644 --- a/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp +++ b/clang/test/Parser/cxx2b-lambdas-ext-warns.cpp @@ -1,6 +1,7 @@ // RUN: %clang_cc1 -std=c++20 %s -verify=cxx20 // RUN: %clang_cc1 -std=c++2b %s -verify=cxx2b // RUN: %clang_cc1 -std=c++2b -Wpre-c++2b-compat %s -verify=precxx2b +// RUN: %clang_cc1 -std=c++2b -pedantic %s -verify=cxx2b //cxx2b-no-diagnostics From 2981832501f7bca6dc95ba54af68bdd1766629c4 Mon Sep 17 00:00:00 2001 From: Jakub Kuderski Date: Tue, 21 Mar 2023 12:58:17 -0400 Subject: [PATCH 212/691] [ADT] Add `llvm::range_size` function for generic ranges This function follows `std::ranges::size` from C++20. It is intended mainly for generic code that does not know the exact range type. I did not modify the existing `llvm::size` function because it has a strict guarantee of O(1) running time, and we cannot guarantee that when we delegate size check to user-defined size functions. Use `range_size` to optimize size checks in `zip`* and `enumerate` functions. Before that, we would have to perform linear scans for ranges without random access iterators. This is the last change I have planned in the series that overhauls `zip`* and `enumerate`. Reviewed By: dblaikie, zero9178 Differential Revision: https://reviews.llvm.org/D146231 --- llvm/include/llvm/ADT/STLExtras.h | 53 ++++++++++++++++++++---- llvm/unittests/ADT/IteratorTest.cpp | 61 ++++++++++++++++++++++++++++ llvm/unittests/ADT/STLExtrasTest.cpp | 33 +++++++++++++++ 3 files changed, 138 insertions(+), 9 deletions(-) diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index 8d739106bccbb..d19e2f9d067e7 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -77,6 +77,14 @@ constexpr void swap_impl(T &&lhs, swap(std::forward(lhs), std::forward(rhs)); } +using std::size; + +template +constexpr auto size_impl(RangeT &&range) + -> decltype(size(std::forward(range))) { + return size(std::forward(range)); +} + } // end namespace adl_detail /// Returns the begin iterator to \p range using `std::begin` and @@ -103,6 +111,14 @@ constexpr void adl_swap(T &&lhs, T &&rhs) noexcept( adl_detail::swap_impl(std::forward(lhs), std::forward(rhs)); } +/// Returns the size of \p range using `std::size` and functions found through +/// Argument-Dependent Lookup (ADL). +template +constexpr auto adl_size(RangeT &&range) + -> decltype(adl_detail::size_impl(std::forward(range))) { + return adl_detail::size_impl(std::forward(range)); +} + namespace detail { template @@ -745,6 +761,8 @@ bool any_of(R &&range, UnaryPredicate P); template bool all_equal(std::initializer_list Values); +template constexpr size_t range_size(R &&Range); + namespace detail { using std::declval; @@ -936,9 +954,7 @@ detail::zippy zip(T &&t, U &&u, template detail::zippy zip_equal(T &&t, U &&u, Args &&...args) { - assert(all_equal({std::distance(adl_begin(t), adl_end(t)), - std::distance(adl_begin(u), adl_end(u)), - std::distance(adl_begin(args), adl_end(args))...}) && + assert(all_equal({range_size(t), range_size(u), range_size(args)...}) && "Iteratees do not have equal length"); return detail::zippy( std::forward(t), std::forward(u), std::forward(args)...); @@ -951,9 +967,7 @@ detail::zippy zip_equal(T &&t, U &&u, template detail::zippy zip_first(T &&t, U &&u, Args &&...args) { - assert(std::distance(adl_begin(t), adl_end(t)) <= - std::min({std::distance(adl_begin(u), adl_end(u)), - std::distance(adl_begin(args), adl_end(args))...}) && + assert(range_size(t) <= std::min({range_size(u), range_size(args)...}) && "First iteratee is not the shortest"); return detail::zippy( @@ -1769,6 +1783,29 @@ auto size(R &&Range, return std::distance(Range.begin(), Range.end()); } +namespace detail { +template +using check_has_free_function_size = + decltype(adl_size(std::declval())); + +template +static constexpr bool HasFreeFunctionSize = + is_detected::value; +} // namespace detail + +/// Returns the size of the \p Range, i.e., the number of elements. This +/// implementation takes inspiration from `std::ranges::size` from C++20 and +/// delegates the size check to `adl_size` or `std::distance`, in this order of +/// preference. Unlike `llvm::size`, this function does *not* guarantee O(1) +/// running time, and is intended to be used in generic code that does not know +/// the exact range type. +template constexpr size_t range_size(R &&Range) { + if constexpr (detail::HasFreeFunctionSize) + return adl_size(Range); + else + return static_cast(std::distance(adl_begin(Range), adl_end(Range))); +} + /// Provide wrappers to std::for_each which take ranges instead of having to /// pass begin/end explicitly. template @@ -2389,9 +2426,7 @@ auto enumerate(FirstRange &&First, RestRanges &&...Rest) { #ifndef NDEBUG // Note: Create an array instead of an initializer list to work around an // Apple clang 14 compiler bug. - size_t sizes[] = { - static_cast(std::distance(adl_begin(First), adl_end(First))), - static_cast(std::distance(adl_begin(Rest), adl_end(Rest)))...}; + size_t sizes[] = {range_size(First), range_size(Rest)...}; assert(all_equal(sizes) && "Ranges have different length"); #endif } diff --git a/llvm/unittests/ADT/IteratorTest.cpp b/llvm/unittests/ADT/IteratorTest.cpp index 7d10729c2dd9f..5b815dbcd37a6 100644 --- a/llvm/unittests/ADT/IteratorTest.cpp +++ b/llvm/unittests/ADT/IteratorTest.cpp @@ -743,4 +743,65 @@ TEST(RangeTest, Distance) { EXPECT_EQ(std::distance(v2.begin(), v2.end()), size(v2)); } +TEST(RangeSizeTest, CommonRangeTypes) { + SmallVector v1 = {1, 2, 3}; + EXPECT_EQ(range_size(v1), 3u); + + std::map m1 = {{1, 1}, {2, 2}}; + EXPECT_EQ(range_size(m1), 2u); + + auto it_range = llvm::make_range(m1.begin(), m1.end()); + EXPECT_EQ(range_size(it_range), 2u); + + static constexpr int c_arr[5] = {}; + static_assert(range_size(c_arr) == 5u); + + static constexpr std::array cpp_arr = {}; + static_assert(range_size(cpp_arr) == 6u); +} + +struct FooWithMemberSize { + size_t size() const { return 42; } + auto begin() { return Data.begin(); } + auto end() { return Data.end(); } + + std::set Data; +}; + +TEST(RangeSizeTest, MemberSize) { + // Make sure that member `.size()` is preferred over the free fuction and + // `std::distance`. + FooWithMemberSize container; + EXPECT_EQ(range_size(container), 42u); +} + +struct FooWithFreeSize { + friend size_t size(const FooWithFreeSize &) { return 13; } + auto begin() { return Data.begin(); } + auto end() { return Data.end(); } + + std::set Data; +}; + +TEST(RangeSizeTest, FreeSize) { + // Make sure that `size(x)` is preferred over `std::distance`. + FooWithFreeSize container; + EXPECT_EQ(range_size(container), 13u); +} + +struct FooWithDistance { + auto begin() { return Data.begin(); } + auto end() { return Data.end(); } + + std::set Data; +}; + +TEST(RangeSizeTest, Distance) { + // Make sure that we can fall back to `std::distance` even the iterator is not + // random-access. + FooWithDistance container; + EXPECT_EQ(range_size(container), 0u); + container.Data = {1, 2, 3, 4}; + EXPECT_EQ(range_size(container), 4u); +} } // anonymous namespace diff --git a/llvm/unittests/ADT/STLExtrasTest.cpp b/llvm/unittests/ADT/STLExtrasTest.cpp index bb602bb6c39f7..278447166fc59 100644 --- a/llvm/unittests/ADT/STLExtrasTest.cpp +++ b/llvm/unittests/ADT/STLExtrasTest.cpp @@ -575,6 +575,39 @@ TEST(STLExtrasTest, ADLTestConstexpr) { SUCCEED(); } +struct FooWithMemberSize { + size_t size() const { return 42; } + auto begin() { return Data.begin(); } + auto end() { return Data.end(); } + + std::set Data; +}; + +namespace some_namespace { +struct FooWithFreeSize { + auto begin() { return Data.begin(); } + auto end() { return Data.end(); } + + std::set Data; +}; + +size_t size(const FooWithFreeSize &) { return 13; } +} // namespace some_namespace + +TEST(STLExtrasTest, ADLSizeTest) { + FooWithMemberSize foo1; + EXPECT_EQ(adl_size(foo1), 42u); + + some_namespace::FooWithFreeSize foo2; + EXPECT_EQ(adl_size(foo2), 13u); + + static constexpr int c_arr[] = {1, 2, 3}; + static_assert(adl_size(c_arr) == 3u); + + static constexpr std::array cpp_arr = {}; + static_assert(adl_size(cpp_arr) == 4u); +} + TEST(STLExtrasTest, DropBeginTest) { SmallVector vec{0, 1, 2, 3, 4}; From 006230e10de66edcce05427f5fc36cf30c554df7 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 21 Mar 2023 10:11:01 -0700 Subject: [PATCH 213/691] [test] Remove redundant check prefix from new-pm-thinlto-prelink-pgo-defaults.ll --- .../new-pm-thinlto-prelink-pgo-defaults.ll | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll index f6fbeff0acb44..7f35a6d1acab9 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -5,27 +5,27 @@ ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' \ ; RUN: -passes='thinlto-pre-link' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1,CHECK-O123SZ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O1 ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' \ ; RUN: -passes='thinlto-pre-link' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-O123SZ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' \ ; RUN: -passes='thinlto-pre-link' -S -passes-ep-pipeline-start='no-op-module' %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-O123SZ,CHECK-EP-PIPELINE-START +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O3,CHECK-O23SZ,CHECK-EP-PIPELINE-START ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' \ ; RUN: -passes='thinlto-pre-link' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O123SZ,CHECK-O23SZ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O23SZ ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager \ ; RUN: -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' \ ; RUN: -passes='thinlto-pre-link' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O123SZ,CHECK-O23SZ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O23SZ ; RUN: opt -disable-verify -verify-analysis-invalidation=0 -eagerly-invalidate-analyses=0 -debug-pass-manager -debug-info-for-profiling \ ; RUN: -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' \ ; RUN: -passes='thinlto-pre-link' -S %s 2>&1 \ -; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ,CHECK-O123SZ +; RUN: | FileCheck %s --check-prefixes=CHECK-O,CHECK-O2,CHECK-O23SZ ; ; CHECK-O: Running pass: Annotation2Metadata ; CHECK-O-NEXT: Running pass: ForceFunctionAttrsPass @@ -56,20 +56,20 @@ ; CHECK-O-NEXT: Running analysis: TypeBasedAA ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: SimplifyCFGPass -; CHECK-O123SZ-NEXT: Running pass: ModuleInlinerWrapperPass -; CHECK-O123SZ-NEXT: Running analysis: InlineAdvisorAnalysis -; CHECK-O123SZ-NEXT: Running analysis: InnerAnalysisManagerProxy -; CHECK-O123SZ-NEXT: Running analysis: LazyCallGraphAnalysis -; CHECK-O123SZ-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy on (foo) -; CHECK-O123SZ-NEXT: Running analysis: OuterAnalysisManagerProxy -; CHECK-O123SZ-NEXT: Running pass: InlinerPass on (foo) -; CHECK-O123SZ-NEXT: Running pass: InlinerPass on (foo) -; CHECK-O123SZ-NEXT: Running pass: SROAPass on foo -; CHECK-O123SZ-NEXT: Running pass: EarlyCSEPass on foo -; CHECK-O123SZ-NEXT: Running pass: SimplifyCFGPass on foo -; CHECK-O123SZ-NEXT: Running pass: InstCombinePass on foo -; CHECK-O123SZ-NEXT: Invalidating analysis: InlineAdvisorAnalysis -; CHECK-O123SZ-NEXT: Running pass: GlobalDCEPass +; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass +; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis +; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy +; CHECK-O-NEXT: Running analysis: LazyCallGraphAnalysis +; CHECK-O-NEXT: Running analysis: FunctionAnalysisManagerCGSCCProxy on (foo) +; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy +; CHECK-O-NEXT: Running pass: InlinerPass on (foo) +; CHECK-O-NEXT: Running pass: InlinerPass on (foo) +; CHECK-O-NEXT: Running pass: SROAPass on foo +; CHECK-O-NEXT: Running pass: EarlyCSEPass on foo +; CHECK-O-NEXT: Running pass: SimplifyCFGPass on foo +; CHECK-O-NEXT: Running pass: InstCombinePass on foo +; CHECK-O-NEXT: Invalidating analysis: InlineAdvisorAnalysis +; CHECK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-O-NEXT: Running pass: PGOInstrumentationUse ; These next two can appear in any order since they are accessed as parameters ; on the same call to BlockFrequencyInfo::calculate. @@ -78,8 +78,8 @@ ; CHECK-O-DAG: Running analysis: LoopAnalysis on foo ; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis on foo ; CHECK-O-NEXT: Invalidating analysis: InnerAnalysisManagerProxy -; CHECK-O123SZ-NEXT: Invalidating analysis: LazyCallGraphAnalysis on -; CHECK-O123SZ-NEXT: Invalidating analysis: InnerAnalysisManagerProxy +; CHECK-O-NEXT: Invalidating analysis: LazyCallGraphAnalysis on +; CHECK-O-NEXT: Invalidating analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis ; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion on ; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy From 75fdf7fd1516090651c0c3ffba4869cba9f3a879 Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Thu, 16 Mar 2023 19:15:38 -0700 Subject: [PATCH 214/691] [lldb] Test direct ivar access in objc++ (NFC) Add an Objective-C++ specific test for direct ivar access. This adds to the existing C++ and ObjC tests, and tests against regression for future refactoring. Differential Revision: https://reviews.llvm.org/D146320 --- .../frame/var/direct-ivar/objcpp/Makefile | 4 +++ .../TestFrameVarDirectIvarObjCPlusPlus.py | 24 +++++++++++++ .../frame/var/direct-ivar/objcpp/main.mm | 35 +++++++++++++++++++ 3 files changed, 63 insertions(+) create mode 100644 lldb/test/API/commands/frame/var/direct-ivar/objcpp/Makefile create mode 100644 lldb/test/API/commands/frame/var/direct-ivar/objcpp/TestFrameVarDirectIvarObjCPlusPlus.py create mode 100644 lldb/test/API/commands/frame/var/direct-ivar/objcpp/main.mm diff --git a/lldb/test/API/commands/frame/var/direct-ivar/objcpp/Makefile b/lldb/test/API/commands/frame/var/direct-ivar/objcpp/Makefile new file mode 100644 index 0000000000000..e987754de59ab --- /dev/null +++ b/lldb/test/API/commands/frame/var/direct-ivar/objcpp/Makefile @@ -0,0 +1,4 @@ +OBJCXX_SOURCES := main.mm +CFLAGS_EXTRAS := -fblocks -fobjc-arc +LD_EXTRAS := -lobjc +include Makefile.rules diff --git a/lldb/test/API/commands/frame/var/direct-ivar/objcpp/TestFrameVarDirectIvarObjCPlusPlus.py b/lldb/test/API/commands/frame/var/direct-ivar/objcpp/TestFrameVarDirectIvarObjCPlusPlus.py new file mode 100644 index 0000000000000..e09a8ff5df96d --- /dev/null +++ b/lldb/test/API/commands/frame/var/direct-ivar/objcpp/TestFrameVarDirectIvarObjCPlusPlus.py @@ -0,0 +1,24 @@ +import lldb +from lldbsuite.test.lldbtest import * +from lldbsuite.test.decorators import * +from lldbsuite.test import lldbutil + + +class TestCase(TestBase): + @skipUnlessDarwin + def test_objc_self(self): + self.build() + lldbutil.run_to_source_breakpoint(self, "check self", lldb.SBFileSpec("main.mm")) + self.expect("frame variable _ivar", startstr="(int) _ivar = 30") + + @skipUnlessDarwin + def test_objc_explicit_self(self): + self.build() + lldbutil.run_to_source_breakpoint(self, "check explicit self", lldb.SBFileSpec("main.mm")) + self.expect("frame variable _ivar", startstr="(int) _ivar = 30") + + @skipUnlessDarwin + def test_cpp_this(self): + self.build() + lldbutil.run_to_source_breakpoint(self, "check this", lldb.SBFileSpec("main.mm")) + self.expect("frame variable m_field", startstr="(int) m_field = 41") diff --git a/lldb/test/API/commands/frame/var/direct-ivar/objcpp/main.mm b/lldb/test/API/commands/frame/var/direct-ivar/objcpp/main.mm new file mode 100644 index 0000000000000..2903d19cee883 --- /dev/null +++ b/lldb/test/API/commands/frame/var/direct-ivar/objcpp/main.mm @@ -0,0 +1,35 @@ +#import +#include + +struct Structure { + int m_field; + void fun() { + puts("check this\n"); + } +}; + +@interface Classic : NSObject { +@public + int _ivar; +} +@end + +@implementation Classic +- (void)fun { + puts("check self\n"); +} +@end + +int main() { + Structure s; + s.m_field = 41; + s.fun(); + + Classic *c = [Classic new]; + c->_ivar = 30; + [c fun]; + + Classic *self = c; + puts("check explicit self\n"); + (void)self; +} From 651b4054a019cb681bb879e31a01054a3105ea90 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 21 Mar 2023 17:23:21 +0000 Subject: [PATCH 215/691] [DAG] visitABS - use FoldConstantArithmetic to perform constant folding. Avoid needing to perform extra isConstantIntBuildVectorOrConstantInt checks --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 5a331ceb96d64..4a34d4724ae8f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10617,8 +10617,8 @@ SDValue DAGCombiner::visitABS(SDNode *N) { EVT VT = N->getValueType(0); // fold (abs c1) -> c2 - if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) - return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::ABS, SDLoc(N), VT, {N0})) + return C; // fold (abs (abs x)) -> (abs x) if (N0.getOpcode() == ISD::ABS) return N0; From c819257b7f3e88cbb90cdaaa90eec650513702c7 Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Tue, 21 Mar 2023 16:39:33 +0000 Subject: [PATCH 216/691] [llvm][support] Fix ScopedPrinterTest on AIX The test strings we used for infinity and NAN were not correct on AIX. This patch creates those dynamically instead of hard-coded. Reviewed By: abhina.sreeskantharajan Differential Revision: https://reviews.llvm.org/D146542 --- llvm/unittests/Support/ScopedPrinterTest.cpp | 22 ++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/llvm/unittests/Support/ScopedPrinterTest.cpp b/llvm/unittests/Support/ScopedPrinterTest.cpp index ea024ec05df6b..f62d310f25d95 100644 --- a/llvm/unittests/Support/ScopedPrinterTest.cpp +++ b/llvm/unittests/Support/ScopedPrinterTest.cpp @@ -8,6 +8,7 @@ #include "llvm/Support/ScopedPrinter.h" #include "llvm/ADT/APSInt.h" +#include "llvm/Support/Format.h" #include "gtest/gtest.h" #include #include @@ -594,6 +595,19 @@ TEST_F(ScopedPrinterTest, PrintNumber) { format("%5.1f", std::numeric_limits::max()).snprint(Buf, sizeof(Buf)); std::string MaxDoubleStr(Buf); + format("%5.1f", std::numeric_limits::infinity()) + .snprint(Buf, sizeof(Buf)); + std::string InfFloatStr(Buf); + + std::to_string(std::numeric_limits::infinity()); + std::string InfDoubleStr(Buf); + + format("%5.1f", std::nanf("1")).snprint(Buf, sizeof(Buf)); + std::string NaNFloatStr(Buf); + + format("%5.1f", std::nan("1")).snprint(Buf, sizeof(Buf)); + std::string NaNDoubleStr(Buf); + std::string ExpectedOut = Twine( R"(uint64_t-max: 18446744073709551615 uint64_t-min: 0 @@ -615,15 +629,15 @@ apsint: 9999999999999999999999 label: value (0) float-max: )" + MaxFloatStr + R"( float-min: 0.0 -float-inf: inf -float-nan: nan +float-inf: )" + InfFloatStr + R"( +float-nan: )" + NaNFloatStr + R"( float-42.0: 42.0 float-42.5625: 42.6 double-max: )" + MaxDoubleStr + R"( double-min: 0.0 -double-inf: inf -double-nan: nan +double-inf: )" + InfDoubleStr + R"( +double-nan: )" + NaNDoubleStr + R"( double-42.0: 42.0 double-42.5625: 42.6 )") From 385496385476fc9735da5fa4acabc34654e8b30d Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Wed, 8 Mar 2023 13:22:00 -0800 Subject: [PATCH 217/691] Recommit [lldb] Change dwim-print to default to disabled persistent results Change `dwim-print` to now disable persistent results by default, unless requested by the user with the `--persistent-result` flag. Ex: ``` (lldb) dwim-print 1 + 1 (int) 2 (lldb) dwim-print --persistent-result on -- 1 + 1 (int) $0 = 2 ``` Users who wish to enable persistent results can make and use an alias that includes `--persistent-result on`. Updates: To recommit this, both TestPersistentResult.py and TestPAlias.py needed to be updated, as well as the changes in D146230. Differential Revision: https://reviews.llvm.org/D145609 --- .../Commands/CommandObjectDWIMPrint.cpp | 4 +++ .../Commands/CommandObjectExpression.cpp | 9 +++--- .../source/Commands/CommandObjectExpression.h | 2 +- .../API/commands/dwim-print/TestDWIMPrint.py | 31 ++++++++----------- .../persistent_result/TestPersistentResult.py | 8 ++--- .../API/functionalities/alias/TestPAlias.py | 2 +- 6 files changed, 28 insertions(+), 28 deletions(-) diff --git a/lldb/source/Commands/CommandObjectDWIMPrint.cpp b/lldb/source/Commands/CommandObjectDWIMPrint.cpp index d8bc7a1e89696..419a27acc8181 100644 --- a/lldb/source/Commands/CommandObjectDWIMPrint.cpp +++ b/lldb/source/Commands/CommandObjectDWIMPrint.cpp @@ -71,6 +71,10 @@ bool CommandObjectDWIMPrint::DoExecute(StringRef command, return false; } + // If the user has not specified, default to disabling persistent results. + if (m_expr_options.suppress_persistent_result == eLazyBoolCalculate) + m_expr_options.suppress_persistent_result = eLazyBoolYes; + auto verbosity = GetDebugger().GetDWIMPrintVerbosity(); Target *target_ptr = m_exe_ctx.GetTargetPtr(); diff --git a/lldb/source/Commands/CommandObjectExpression.cpp b/lldb/source/Commands/CommandObjectExpression.cpp index 63b92363369df..2658677085a24 100644 --- a/lldb/source/Commands/CommandObjectExpression.cpp +++ b/lldb/source/Commands/CommandObjectExpression.cpp @@ -151,7 +151,7 @@ Status CommandObjectExpression::CommandOptions::SetOptionValue( bool persist_result = OptionArgParser::ToBoolean(option_arg, true, &success); if (success) - suppress_persistent_result = !persist_result; + suppress_persistent_result = !persist_result ? eLazyBoolYes : eLazyBoolNo; else error.SetErrorStringWithFormat( "could not convert \"%s\" to a boolean value.", @@ -187,7 +187,7 @@ void CommandObjectExpression::CommandOptions::OptionParsingStarting( auto_apply_fixits = eLazyBoolCalculate; top_level = false; allow_jit = true; - suppress_persistent_result = false; + suppress_persistent_result = eLazyBoolCalculate; } llvm::ArrayRef @@ -202,8 +202,9 @@ CommandObjectExpression::CommandOptions::GetEvaluateExpressionOptions( options.SetCoerceToId(display_opts.use_objc); // Explicitly disabling persistent results takes precedence over the // m_verbosity/use_objc logic. - if (suppress_persistent_result) - options.SetSuppressPersistentResult(true); + if (suppress_persistent_result != eLazyBoolCalculate) + options.SetSuppressPersistentResult(suppress_persistent_result == + eLazyBoolYes); else if (m_verbosity == eLanguageRuntimeDescriptionDisplayVerbosityCompact) options.SetSuppressPersistentResult(display_opts.use_objc); options.SetUnwindOnError(unwind_on_error); diff --git a/lldb/source/Commands/CommandObjectExpression.h b/lldb/source/Commands/CommandObjectExpression.h index e381a4a5aaf92..d6a4bb19fd650 100644 --- a/lldb/source/Commands/CommandObjectExpression.h +++ b/lldb/source/Commands/CommandObjectExpression.h @@ -53,7 +53,7 @@ class CommandObjectExpression : public CommandObjectRaw, lldb::LanguageType language; LanguageRuntimeDescriptionDisplayVerbosity m_verbosity; LazyBool auto_apply_fixits; - bool suppress_persistent_result; + LazyBool suppress_persistent_result; }; CommandObjectExpression(CommandInterpreter &interpreter); diff --git a/lldb/test/API/commands/dwim-print/TestDWIMPrint.py b/lldb/test/API/commands/dwim-print/TestDWIMPrint.py index 705e2ef79ddeb..22d18f91d0a59 100644 --- a/lldb/test/API/commands/dwim-print/TestDWIMPrint.py +++ b/lldb/test/API/commands/dwim-print/TestDWIMPrint.py @@ -16,18 +16,16 @@ def _run_cmd(self, cmd: str) -> str: self.ci.HandleCommand(cmd, result) return result.GetOutput().rstrip() - VAR_IDENT_RAW = r"(?:\$\d+|\w+) = " - VAR_IDENT = re.compile(VAR_IDENT_RAW) + VAR_IDENT = re.compile(r"(?:\$\d+|\w+) = ") - def _mask_persistent_var(self, string: str) -> str: + def _strip_result_var(self, string: str) -> str: """ - Replace persistent result variables (ex '$0', '$1', etc) with a regex - that matches any persistent result (r'\$\d+'). The returned string can - be matched against other `expression` results. + Strip (persistent) result variables (ex '$0 = ', or 'someVar = ', etc). + + This allows for using the output of `expression`/`frame variable`, to + compare it to `dwim-print` output, which disables result variables. """ - before, after = self.VAR_IDENT.split(string, maxsplit=1) - # Support either a frame variable (\w+) or a persistent result (\$\d+). - return re.escape(before) + self.VAR_IDENT_RAW + re.escape(after) + return self.VAR_IDENT.subn("", string, 1)[0] def _expect_cmd( self, @@ -46,19 +44,16 @@ def _expect_cmd( if actual_cmd == "frame variable": resolved_cmd = resolved_cmd.replace(" -- ", " ", 1) - expected_output = self._run_cmd(resolved_cmd) + resolved_cmd_output = self._run_cmd(resolved_cmd) + dwim_cmd_output = self._strip_result_var(resolved_cmd_output) # Verify dwim-print chose the expected command. self.runCmd("settings set dwim-print-verbosity full") - substrs = [f"note: ran `{resolved_cmd}`"] - patterns = [] - - if self.VAR_IDENT.search(expected_output): - patterns.append(self._mask_persistent_var(expected_output)) - else: - substrs.append(expected_output) - self.expect(dwim_cmd, substrs=substrs, patterns=patterns) + self.expect(dwim_cmd, substrs=[ + f"note: ran `{resolved_cmd}`", + dwim_cmd_output, + ]) def test_variables(self): """Test dwim-print with variables.""" diff --git a/lldb/test/API/commands/expression/persistent_result/TestPersistentResult.py b/lldb/test/API/commands/expression/persistent_result/TestPersistentResult.py index 10eb100bac37b..911b8f605939b 100644 --- a/lldb/test/API/commands/expression/persistent_result/TestPersistentResult.py +++ b/lldb/test/API/commands/expression/persistent_result/TestPersistentResult.py @@ -31,7 +31,7 @@ def test_expression_persists_result(self): self.expect("expression i", substrs=["(int) $0 = 30"]) self.expect("expression $0", substrs=["(int) $0 = 30"]) - def test_p_persists_result(self): - """Test `p` does persist a result variable.""" - self.expect("p i", substrs=["(int) $0 = 30"]) - self.expect("p $0", substrs=["(int) $0 = 30"]) + def test_p_does_not_persist_results(self): + """Test `p` does not persist a result variable.""" + self.expect("p i", startstr="(int) 30") + self.expect("p $0", error=True) diff --git a/lldb/test/API/functionalities/alias/TestPAlias.py b/lldb/test/API/functionalities/alias/TestPAlias.py index b694e903b9f00..e1f00b91e0149 100644 --- a/lldb/test/API/functionalities/alias/TestPAlias.py +++ b/lldb/test/API/functionalities/alias/TestPAlias.py @@ -7,5 +7,5 @@ class TestCase(TestBase): def test(self): self.build() lldbutil.run_to_source_breakpoint(self, "return", lldb.SBFileSpec("main.c")) - self.expect("p -g", substrs=["$0 = -"]) + self.expect("p -g", startstr="(int) -41") self.expect("p -i0 -g", error=True) From 2b34d59858d6fd12b9bab4ac0868998c940e43f6 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 21 Mar 2023 10:25:04 -0700 Subject: [PATCH 218/691] [test] Change DAG to NEXT in pipeline tests These were made consistent in 951a980dc7aa6. --- .../new-pm-thinlto-postlink-pgo-defaults.ll | 7 ++----- ...ew-pm-thinlto-postlink-samplepgo-defaults.ll | 8 ++------ .../new-pm-thinlto-prelink-pgo-defaults.ll | 17 +++++++---------- ...new-pm-thinlto-prelink-samplepgo-defaults.ll | 6 ++---- 4 files changed, 13 insertions(+), 25 deletions(-) diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll index c1e2fd2230056..36ce293fe62ef 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -56,11 +56,8 @@ ; CHECK-O-NEXT: Running analysis: TypeBasedAA ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis on foo -; These next two can appear in any order since they are accessed as parameters -; on the same call to BlockFrequencyInfo::calculate. -; CHECK-OSZ-DAG: Running analysis: LoopAnalysis on foo -; CHECK-OSZ-DAG: Running analysis: BranchProbabilityAnalysis on foo -; CHECK-O123-NEXT: Running analysis: BranchProbabilityAnalysis on foo +; CHECK-O-NEXT: Running analysis: BranchProbabilityAnalysis on foo +; CHECK-OSZ-NEXT: Running analysis: LoopAnalysis on foo ; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll index a65af442163d5..746c4b6a67369 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -64,14 +64,10 @@ ; CHECK-O-NEXT: Running analysis: TypeBasedAA ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis on foo -; These next two can appear in any order since they are accessed as parameters -; on the same call to BlockFrequencyInfo::calculate. -; CHECK-OSZ-DAG: Running analysis: LoopAnalysis on foo -; CHECK-OSZ-DAG: Running analysis: BranchProbabilityAnalysis on foo -; CHECK-O123-NEXT: Running analysis: BranchProbabilityAnalysis on foo +; CHECK-O-NEXT: Running analysis: BranchProbabilityAnalysis on foo +; CHECK-OSZ-NEXT: Running analysis: LoopAnalysis on foo ; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O-NEXT: Running pass: SimplifyCFGPass on foo - ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA diff --git a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll index 7f35a6d1acab9..fdaee98449898 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-pgo-defaults.ll @@ -71,11 +71,10 @@ ; CHECK-O-NEXT: Invalidating analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: GlobalDCEPass ; CHECK-O-NEXT: Running pass: PGOInstrumentationUse -; These next two can appear in any order since they are accessed as parameters -; on the same call to BlockFrequencyInfo::calculate. -; CHECK-O-DAG: Running analysis: BranchProbabilityAnalysis on foo -; CHECK-O-DAG: Running analysis: PostDominatorTreeAnalysis on foo -; CHECK-O-DAG: Running analysis: LoopAnalysis on foo +; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis +; CHECK-O-NEXT: Running analysis: BranchProbabilityAnalysis on foo +; CHECK-O-NEXT: Running analysis: LoopAnalysis on foo +; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis on foo ; CHECK-O-NEXT: Invalidating analysis: InnerAnalysisManagerProxy ; CHECK-O-NEXT: Invalidating analysis: LazyCallGraphAnalysis on @@ -122,11 +121,9 @@ ; CHECK-O-NEXT: Running pass: SimplifyCFGPass ; CHECK-O-NEXT: Running pass: InstCombinePass ; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis on foo -; These next two can appear in any order since they are accessed as parameters -; on the same call to BlockFrequencyInfo::calculate. -; CHECK-O-DAG: Running analysis: LoopAnalysis on foo -; CHECK-O-DAG: Running analysis: BranchProbabilityAnalysis on foo -; CHECK-O-DAG: Running analysis: PostDominatorTreeAnalysis on foo +; CHECK-O-NEXT: Running analysis: BranchProbabilityAnalysis on foo +; CHECK-O-NEXT: Running analysis: LoopAnalysis on foo +; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O3-NEXT: Running pass: AggressiveInstCombinePass ; CHECK-O23SZ-NEXT: Running pass: ConstraintEliminationPass ; CHECK-O1-NEXT: Running pass: LibCallsShrinkWrapPass diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll index de3643bc35843..b187bbc2a0812 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -59,10 +59,8 @@ ; CHECK-O-NEXT: Running analysis: TypeBasedAA ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running analysis: BlockFrequencyAnalysis on foo -; These next two can appear in any order since they are accessed as parameters -; on the same call to BlockFrequencyInfo::calculate. -; CHECK-O-DAG: Running analysis: LoopAnalysis on foo -; CHECK-O-DAG: Running analysis: BranchProbabilityAnalysis on foo +; CHECK-O-NEXT: Running analysis: BranchProbabilityAnalysis on foo +; CHECK-O-NEXT: Running analysis: LoopAnalysis on foo ; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O-NEXT: Running pass: SimplifyCFGPass on foo ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass From 2356bf27f722eddbdf3c9acf4072cbdd348f00d0 Mon Sep 17 00:00:00 2001 From: Alex Langford Date: Mon, 20 Mar 2023 16:41:11 -0700 Subject: [PATCH 219/691] [lldb][CMake] Enforce not letting lldbUtility link against any other lldb libs lldbUtility is not supposed to depend on anything else in lldb. Let's enforce that constraint in CMake rather than hoping something doesn't slip in under the radar. Differential Revision: https://reviews.llvm.org/D146473 --- lldb/cmake/modules/AddLLDB.cmake | 12 +++++++++++- lldb/source/Utility/CMakeLists.txt | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/lldb/cmake/modules/AddLLDB.cmake b/lldb/cmake/modules/AddLLDB.cmake index 374946fe49083..e8fa70a5a6848 100644 --- a/lldb/cmake/modules/AddLLDB.cmake +++ b/lldb/cmake/modules/AddLLDB.cmake @@ -37,13 +37,23 @@ function(add_lldb_library name) # only supported parameters to this macro are the optional # MODULE;SHARED;STATIC library type and source files cmake_parse_arguments(PARAM - "MODULE;SHARED;STATIC;OBJECT;PLUGIN;FRAMEWORK" + "MODULE;SHARED;STATIC;OBJECT;PLUGIN;FRAMEWORK;NO_INTERNAL_DEPENDENCIES" "INSTALL_PREFIX;ENTITLEMENTS" "EXTRA_CXXFLAGS;DEPENDS;LINK_LIBS;LINK_COMPONENTS;CLANG_LIBS" ${ARGN}) llvm_process_sources(srcs ${PARAM_UNPARSED_ARGUMENTS}) list(APPEND LLVM_LINK_COMPONENTS ${PARAM_LINK_COMPONENTS}) + if(PARAM_NO_INTERNAL_DEPENDENCIES) + foreach(link_lib ${PARAM_LINK_LIBS}) + if (link_lib MATCHES "^lldb") + message(FATAL_ERROR + "Library ${name} cannot depend on any other lldb libs " + "(Found ${link_lib} in LINK_LIBS)") + endif() + endforeach() + endif() + if(PARAM_PLUGIN) set_property(GLOBAL APPEND PROPERTY LLDB_PLUGINS ${name}) endif() diff --git a/lldb/source/Utility/CMakeLists.txt b/lldb/source/Utility/CMakeLists.txt index 89acd7cd2eaf5..c48ccdd7c1ed1 100644 --- a/lldb/source/Utility/CMakeLists.txt +++ b/lldb/source/Utility/CMakeLists.txt @@ -23,7 +23,7 @@ if (NOT HAVE_CXX_ATOMICS64_WITHOUT_LIB ) list(APPEND LLDB_SYSTEM_LIBS atomic) endif() -add_lldb_library(lldbUtility +add_lldb_library(lldbUtility NO_INTERNAL_DEPENDENCIES ArchSpec.cpp Args.cpp Baton.cpp From fa6ea7a419f37befbed04368bcb8af4c718facbb Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Mon, 20 Mar 2023 11:18:35 -0700 Subject: [PATCH 220/691] [AlwaysInliner] Make legacy pass like the new pass The legacy pass is only used in AMDGPU codegen, which doesn't care about running it in call graph order (it actually has to work around that fact). Make the legacy pass a module pass and share code with the new pass. This allows us to remove the legacy inliner infrastructure. Reviewed By: mtrofin Differential Revision: https://reviews.llvm.org/D146446 --- llvm/include/llvm/Analysis/AliasAnalysis.h | 4 - llvm/include/llvm/Transforms/IPO/Inliner.h | 56 --- llvm/include/llvm/Transforms/Utils/Cloning.h | 7 +- llvm/lib/Analysis/AliasAnalysis.cpp | 11 - .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 6 - llvm/lib/Transforms/IPO/AlwaysInliner.cpp | 136 +++--- llvm/lib/Transforms/IPO/Inliner.cpp | 413 +----------------- llvm/lib/Transforms/IPO/ModuleInliner.cpp | 2 +- llvm/lib/Transforms/IPO/PartialInlining.cpp | 2 +- llvm/lib/Transforms/IPO/SampleProfile.cpp | 2 +- llvm/lib/Transforms/Utils/InlineFunction.cpp | 81 +--- llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 45 +- 12 files changed, 88 insertions(+), 677 deletions(-) diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h index 953e15e358f12..68956bcf388ad 100644 --- a/llvm/include/llvm/Analysis/AliasAnalysis.h +++ b/llvm/include/llvm/Analysis/AliasAnalysis.h @@ -985,10 +985,6 @@ ImmutablePass *createExternalAAWrapperPass( /// getAnalysisUsage. AAResults createLegacyPMAAResults(Pass &P, Function &F, BasicAAResult &BAR); -/// A helper for the legacy pass manager to populate \p AU to add uses to make -/// sure the analyses required by \p createLegacyPMAAResults are available. -void getAAResultsAnalysisUsage(AnalysisUsage &AU); - } // end namespace llvm #endif // LLVM_ANALYSIS_ALIASANALYSIS_H diff --git a/llvm/include/llvm/Transforms/IPO/Inliner.h b/llvm/include/llvm/Transforms/IPO/Inliner.h index 1e154eb8f5dac..f1d1e86f3cab6 100644 --- a/llvm/include/llvm/Transforms/IPO/Inliner.h +++ b/llvm/include/llvm/Transforms/IPO/Inliner.h @@ -23,62 +23,6 @@ class AssumptionCacheTracker; class CallGraph; class ProfileSummaryInfo; -/// This class contains all of the helper code which is used to perform the -/// inlining operations that do not depend on the policy. It contains the core -/// bottom-up inlining infrastructure that specific inliner passes use. -struct LegacyInlinerBase : public CallGraphSCCPass { - explicit LegacyInlinerBase(char &ID); - explicit LegacyInlinerBase(char &ID, bool InsertLifetime); - - /// For this class, we declare that we require and preserve the call graph. - /// If the derived class implements this method, it should always explicitly - /// call the implementation here. - void getAnalysisUsage(AnalysisUsage &Info) const override; - - using llvm::Pass::doInitialization; - - bool doInitialization(CallGraph &CG) override; - - /// Main run interface method, this implements the interface required by the - /// Pass class. - bool runOnSCC(CallGraphSCC &SCC) override; - - using llvm::Pass::doFinalization; - - /// Remove now-dead linkonce functions at the end of processing to avoid - /// breaking the SCC traversal. - bool doFinalization(CallGraph &CG) override; - - /// This method must be implemented by the subclass to determine the cost of - /// inlining the specified call site. If the cost returned is greater than - /// the current inline threshold, the call site is not inlined. - virtual InlineCost getInlineCost(CallBase &CB) = 0; - - /// Remove dead functions. - /// - /// This also includes a hack in the form of the 'AlwaysInlineOnly' flag - /// which restricts it to deleting functions with an 'AlwaysInline' - /// attribute. This is useful for the InlineAlways pass that only wants to - /// deal with that subset of the functions. - bool removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly = false); - - /// This function performs the main work of the pass. The default of - /// Inlinter::runOnSCC() calls skipSCC() before calling this method, but - /// derived classes which cannot be skipped can override that method and call - /// this function unconditionally. - bool inlineCalls(CallGraphSCC &SCC); - -private: - // Insert @llvm.lifetime intrinsics. - bool InsertLifetime = true; - -protected: - AssumptionCacheTracker *ACT; - ProfileSummaryInfo *PSI; - std::function GetTLI; - ImportedFunctionsInliningStatistics ImportedFunctionsStats; -}; - /// The inliner pass for the new pass manager. /// /// This pass wires together the inlining utilities and the inline cost diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h index 85343a6f66b2d..22c8a3b246f30 100644 --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -203,18 +203,15 @@ void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, class InlineFunctionInfo { public: explicit InlineFunctionInfo( - CallGraph *cg = nullptr, function_ref GetAssumptionCache = nullptr, ProfileSummaryInfo *PSI = nullptr, BlockFrequencyInfo *CallerBFI = nullptr, BlockFrequencyInfo *CalleeBFI = nullptr, bool UpdateProfile = true) - : CG(cg), GetAssumptionCache(GetAssumptionCache), PSI(PSI), - CallerBFI(CallerBFI), CalleeBFI(CalleeBFI), - UpdateProfile(UpdateProfile) {} + : GetAssumptionCache(GetAssumptionCache), PSI(PSI), CallerBFI(CallerBFI), + CalleeBFI(CalleeBFI), UpdateProfile(UpdateProfile) {} /// If non-null, InlineFunction will update the callgraph to reflect the /// changes it makes. - CallGraph *CG; function_ref GetAssumptionCache; ProfileSummaryInfo *PSI; BlockFrequencyInfo *CallerBFI, *CalleeBFI; diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp index 9e24f6b87bdb1..cf1b11636efa5 100644 --- a/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/llvm/lib/Analysis/AliasAnalysis.cpp @@ -935,14 +935,3 @@ bool llvm::isNotVisibleOnUnwind(const Value *Object, return false; } - -void llvm::getAAResultsAnalysisUsage(AnalysisUsage &AU) { - // This function needs to be in sync with llvm::createLegacyPMAAResults -- if - // more alias analyses are added to llvm::createLegacyPMAAResults, they need - // to be added here also. - AU.addRequired(); - AU.addUsedIfAvailable(); - AU.addUsedIfAvailable(); - AU.addUsedIfAvailable(); - AU.addUsedIfAvailable(); -} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 64dc8604e76ac..f8821b9f7f4b6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -974,12 +974,6 @@ void AMDGPUPassConfig::addIRPasses() { // Function calls are not supported, so make sure we inline everything. addPass(createAMDGPUAlwaysInlinePass()); addPass(createAlwaysInlinerLegacyPass()); - // We need to add the barrier noop pass, otherwise adding the function - // inlining pass will cause all of the PassConfigs passes to be run - // one function at a time, which means if we have a module with two - // functions, then we will generate code for the first function - // without ever running any passes on the second. - addPass(createBarrierNoopPass()); // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments. if (TM.getTargetTriple().getArch() == Triple::r600) diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp index 09286482edff1..cc375f9badcd4 100644 --- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp +++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp @@ -28,16 +28,13 @@ using namespace llvm; #define DEBUG_TYPE "inline" -PreservedAnalyses AlwaysInlinerPass::run(Module &M, - ModuleAnalysisManager &MAM) { - // Add inline assumptions during code generation. - FunctionAnalysisManager &FAM = - MAM.getResult(M).getManager(); - auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { - return FAM.getResult(F); - }; - auto &PSI = MAM.getResult(M); +namespace { +bool AlwaysInlineImpl( + Module &M, bool InsertLifetime, ProfileSummaryInfo &PSI, + function_ref GetAssumptionCache, + function_ref GetAAR, + function_ref GetBFI) { SmallSetVector Calls; bool Changed = false; SmallVector InlinedFunctions; @@ -65,14 +62,12 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M, DebugLoc DLoc = CB->getDebugLoc(); BasicBlock *Block = CB->getParent(); - InlineFunctionInfo IFI( - /*cg=*/nullptr, GetAssumptionCache, &PSI, - &FAM.getResult(*Caller), - &FAM.getResult(F)); + InlineFunctionInfo IFI(GetAssumptionCache, &PSI, + GetBFI ? &GetBFI(*Caller) : nullptr, + GetBFI ? &GetBFI(F) : nullptr); - InlineResult Res = - InlineFunction(*CB, IFI, /*MergeAttributes=*/true, - &FAM.getResult(F), InsertLifetime); + InlineResult Res = InlineFunction(*CB, IFI, /*MergeAttributes=*/true, + &GetAAR(F), InsertLifetime); if (!Res.isSuccess()) { ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, @@ -127,48 +122,52 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M, } } - return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); + return Changed; } -namespace { - -/// Inliner pass which only handles "always inline" functions. -/// -/// Unlike the \c AlwaysInlinerPass, this uses the more heavyweight \c Inliner -/// base class to provide several facilities such as array alloca merging. -class AlwaysInlinerLegacyPass : public LegacyInlinerBase { +struct AlwaysInlinerLegacyPass : public ModulePass { + bool InsertLifetime; -public: - AlwaysInlinerLegacyPass() : LegacyInlinerBase(ID, /*InsertLifetime*/ true) { - initializeAlwaysInlinerLegacyPassPass(*PassRegistry::getPassRegistry()); - } + AlwaysInlinerLegacyPass() + : AlwaysInlinerLegacyPass(/*InsertLifetime*/ true) {} AlwaysInlinerLegacyPass(bool InsertLifetime) - : LegacyInlinerBase(ID, InsertLifetime) { + : ModulePass(ID), InsertLifetime(InsertLifetime) { initializeAlwaysInlinerLegacyPassPass(*PassRegistry::getPassRegistry()); } /// Main run interface method. We override here to avoid calling skipSCC(). - bool runOnSCC(CallGraphSCC &SCC) override { return inlineCalls(SCC); } + bool runOnModule(Module &M) override { + + auto &PSI = getAnalysis().getPSI(); + auto GetAAR = [&](Function &F) -> AAResults & { + return getAnalysis(F).getAAResults(); + }; + auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { + return getAnalysis().getAssumptionCache(F); + }; + + return AlwaysInlineImpl(M, InsertLifetime, PSI, GetAssumptionCache, GetAAR, + /*GetBFI*/ nullptr); + } static char ID; // Pass identification, replacement for typeid - InlineCost getInlineCost(CallBase &CB) override; - - using llvm::Pass::doFinalization; - bool doFinalization(CallGraph &CG) override { - return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/true); + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); } }; -} + +} // namespace char AlwaysInlinerLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(AlwaysInlinerLegacyPass, "always-inline", "Inliner for always_inline functions", false, false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(AlwaysInlinerLegacyPass, "always-inline", "Inliner for always_inline functions", false, false) @@ -176,46 +175,23 @@ Pass *llvm::createAlwaysInlinerLegacyPass(bool InsertLifetime) { return new AlwaysInlinerLegacyPass(InsertLifetime); } -/// Get the inline cost for the always-inliner. -/// -/// The always inliner *only* handles functions which are marked with the -/// attribute to force inlining. As such, it is dramatically simpler and avoids -/// using the powerful (but expensive) inline cost analysis. Instead it uses -/// a very simple and boring direct walk of the instructions looking for -/// impossible-to-inline constructs. -/// -/// Note, it would be possible to go to some lengths to cache the information -/// computed here, but as we only expect to do this for relatively few and -/// small functions which have the explicit attribute to force inlining, it is -/// likely not worth it in practice. -InlineCost AlwaysInlinerLegacyPass::getInlineCost(CallBase &CB) { - Function *Callee = CB.getCalledFunction(); - - // Only inline direct calls to functions with always-inline attributes - // that are viable for inlining. - if (!Callee) - return InlineCost::getNever("indirect call"); - - // When callee coroutine function is inlined into caller coroutine function - // before coro-split pass, - // coro-early pass can not handle this quiet well. - // So we won't inline the coroutine function if it have not been unsplited - if (Callee->isPresplitCoroutine()) - return InlineCost::getNever("unsplited coroutine call"); - - // FIXME: We shouldn't even get here for declarations. - if (Callee->isDeclaration()) - return InlineCost::getNever("no definition"); - - if (!CB.hasFnAttr(Attribute::AlwaysInline)) - return InlineCost::getNever("no alwaysinline attribute"); - - if (Callee->hasFnAttribute(Attribute::AlwaysInline) && CB.isNoInline()) - return InlineCost::getNever("noinline call site attribute"); - - auto IsViable = isInlineViable(*Callee); - if (!IsViable.isSuccess()) - return InlineCost::getNever(IsViable.getFailureReason()); - - return InlineCost::getAlways("always inliner"); +PreservedAnalyses AlwaysInlinerPass::run(Module &M, + ModuleAnalysisManager &MAM) { + FunctionAnalysisManager &FAM = + MAM.getResult(M).getManager(); + auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { + return FAM.getResult(F); + }; + auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & { + return FAM.getResult(F); + }; + auto GetAAR = [&](Function &F) -> AAResults & { + return FAM.getResult(F); + }; + auto &PSI = MAM.getResult(M); + + bool Changed = AlwaysInlineImpl(M, InsertLifetime, PSI, GetAssumptionCache, + GetAAR, GetBFI); + + return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); } diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp index 7340edcbd6bec..01808b3d14fed 100644 --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -27,7 +27,6 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CGSCCPassManager.h" -#include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LazyCallGraph.h" @@ -71,7 +70,6 @@ using namespace llvm; #define DEBUG_TYPE "inline" STATISTIC(NumInlined, "Number of functions inlined"); -STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined"); STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); static cl::opt IntraSCCCostMultiplier( @@ -96,9 +94,6 @@ static cl::opt EnablePostSCCAdvisorPrinting("enable-scc-inline-advisor-printing", cl::init(false), cl::Hidden); -namespace llvm { -extern cl::opt InlinerFunctionImportStats; -} static cl::opt CGSCCInlineReplayFile( "cgscc-inline-replay", cl::init(""), cl::value_desc("filename"), @@ -151,56 +146,6 @@ static cl::opt CGSCCInlineReplayFormat( ":. (default)")), cl::desc("How cgscc inline replay file is formatted"), cl::Hidden); -LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {} - -LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime) - : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {} - -/// For this class, we declare that we require and preserve the call graph. -/// If the derived class implements this method, it should -/// always explicitly call the implementation here. -void LegacyInlinerBase::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - getAAResultsAnalysisUsage(AU); - CallGraphSCCPass::getAnalysisUsage(AU); -} - -using InlinedArrayAllocasTy = DenseMap>; - -/// If it is possible to inline the specified call site, -/// do so and update the CallGraph for this operation. -/// -/// This function also does some basic book-keeping to update the IR. The -/// InlinedArrayAllocas map keeps track of any allocas that are already -/// available from other functions inlined into the caller. If we are able to -/// inline this call site we attempt to reuse already available allocas or add -/// any new allocas to the set if not possible. -static InlineResult inlineCallIfPossible( - CallBase &CB, InlineFunctionInfo &IFI, - InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory, - bool InsertLifetime, function_ref &AARGetter, - ImportedFunctionsInliningStatistics &ImportedFunctionsStats) { - Function *Callee = CB.getCalledFunction(); - Function *Caller = CB.getCaller(); - - AAResults &AAR = AARGetter(*Callee); - - // Try to inline the function. Get the list of static allocas that were - // inlined. - InlineResult IR = - InlineFunction(CB, IFI, - /*MergeAttributes=*/true, &AAR, InsertLifetime); - if (!IR.isSuccess()) - return IR; - - if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) - ImportedFunctionsStats.recordInline(*Caller, *Callee); - - return IR; // success -} - /// Return true if the specified inline history ID /// indicates an inline history that includes the specified function. static bool inlineHistoryIncludes( @@ -216,362 +161,6 @@ static bool inlineHistoryIncludes( return false; } -bool LegacyInlinerBase::doInitialization(CallGraph &CG) { - if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) - ImportedFunctionsStats.setModuleInfo(CG.getModule()); - return false; // No changes to CallGraph. -} - -bool LegacyInlinerBase::runOnSCC(CallGraphSCC &SCC) { - if (skipSCC(SCC)) - return false; - return inlineCalls(SCC); -} - -static bool -inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, - std::function GetAssumptionCache, - ProfileSummaryInfo *PSI, - std::function GetTLI, - bool InsertLifetime, - function_ref GetInlineCost, - function_ref AARGetter, - ImportedFunctionsInliningStatistics &ImportedFunctionsStats) { - SmallPtrSet SCCFunctions; - LLVM_DEBUG(dbgs() << "Inliner visiting SCC:"); - for (CallGraphNode *Node : SCC) { - Function *F = Node->getFunction(); - if (F) - SCCFunctions.insert(F); - LLVM_DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE")); - } - - // Scan through and identify all call sites ahead of time so that we only - // inline call sites in the original functions, not call sites that result - // from inlining other functions. - SmallVector, 16> CallSites; - - // When inlining a callee produces new call sites, we want to keep track of - // the fact that they were inlined from the callee. This allows us to avoid - // infinite inlining in some obscure cases. To represent this, we use an - // index into the InlineHistory vector. - SmallVector, 8> InlineHistory; - - for (CallGraphNode *Node : SCC) { - Function *F = Node->getFunction(); - if (!F || F->isDeclaration()) - continue; - - OptimizationRemarkEmitter ORE(F); - for (BasicBlock &BB : *F) - for (Instruction &I : BB) { - auto *CB = dyn_cast(&I); - // If this isn't a call, or it is a call to an intrinsic, it can - // never be inlined. - if (!CB || isa(I)) - continue; - - // If this is a direct call to an external function, we can never inline - // it. If it is an indirect call, inlining may resolve it to be a - // direct call, so we keep it. - if (Function *Callee = CB->getCalledFunction()) - if (Callee->isDeclaration()) { - using namespace ore; - - setInlineRemark(*CB, "unavailable definition"); - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I) - << NV("Callee", Callee) << " will not be inlined into " - << NV("Caller", CB->getCaller()) - << " because its definition is unavailable" - << setIsVerbose(); - }); - continue; - } - - CallSites.push_back(std::make_pair(CB, -1)); - } - } - - LLVM_DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n"); - - // If there are no calls in this function, exit early. - if (CallSites.empty()) - return false; - - // Now that we have all of the call sites, move the ones to functions in the - // current SCC to the end of the list. - unsigned FirstCallInSCC = CallSites.size(); - for (unsigned I = 0; I < FirstCallInSCC; ++I) - if (Function *F = CallSites[I].first->getCalledFunction()) - if (SCCFunctions.count(F)) - std::swap(CallSites[I--], CallSites[--FirstCallInSCC]); - - InlinedArrayAllocasTy InlinedArrayAllocas; - InlineFunctionInfo InlineInfo(&CG, GetAssumptionCache, PSI); - - // Now that we have all of the call sites, loop over them and inline them if - // it looks profitable to do so. - bool Changed = false; - bool LocalChange; - do { - LocalChange = false; - // Iterate over the outer loop because inlining functions can cause indirect - // calls to become direct calls. - // CallSites may be modified inside so ranged for loop can not be used. - for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) { - auto &P = CallSites[CSi]; - CallBase &CB = *P.first; - const int InlineHistoryID = P.second; - - Function *Caller = CB.getCaller(); - Function *Callee = CB.getCalledFunction(); - - // We can only inline direct calls to non-declarations. - if (!Callee || Callee->isDeclaration()) - continue; - - bool IsTriviallyDead = isInstructionTriviallyDead(&CB, &GetTLI(*Caller)); - - if (!IsTriviallyDead) { - // If this call site was obtained by inlining another function, verify - // that the include path for the function did not include the callee - // itself. If so, we'd be recursively inlining the same function, - // which would provide the same callsites, which would cause us to - // infinitely inline. - if (InlineHistoryID != -1 && - inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) { - setInlineRemark(CB, "recursive"); - continue; - } - } - - // FIXME for new PM: because of the old PM we currently generate ORE and - // in turn BFI on demand. With the new PM, the ORE dependency should - // just become a regular analysis dependency. - OptimizationRemarkEmitter ORE(Caller); - - auto OIC = shouldInline(CB, GetInlineCost, ORE); - // If the policy determines that we should inline this function, - // delete the call instead. - if (!OIC) - continue; - - // If this call site is dead and it is to a readonly function, we should - // just delete the call instead of trying to inline it, regardless of - // size. This happens because IPSCCP propagates the result out of the - // call and then we're left with the dead call. - if (IsTriviallyDead) { - LLVM_DEBUG(dbgs() << " -> Deleting dead call: " << CB << "\n"); - // Update the call graph by deleting the edge from Callee to Caller. - setInlineRemark(CB, "trivially dead"); - CG[Caller]->removeCallEdgeFor(CB); - CB.eraseFromParent(); - ++NumCallsDeleted; - } else { - // Get DebugLoc to report. CB will be invalid after Inliner. - DebugLoc DLoc = CB.getDebugLoc(); - BasicBlock *Block = CB.getParent(); - - // Attempt to inline the function. - using namespace ore; - - InlineResult IR = inlineCallIfPossible( - CB, InlineInfo, InlinedArrayAllocas, InlineHistoryID, - InsertLifetime, AARGetter, ImportedFunctionsStats); - if (!IR.isSuccess()) { - setInlineRemark(CB, std::string(IR.getFailureReason()) + "; " + - inlineCostStr(*OIC)); - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, - Block) - << NV("Callee", Callee) << " will not be inlined into " - << NV("Caller", Caller) << ": " - << NV("Reason", IR.getFailureReason()); - }); - continue; - } - ++NumInlined; - - emitInlinedIntoBasedOnCost(ORE, DLoc, Block, *Callee, *Caller, *OIC); - - // If inlining this function gave us any new call sites, throw them - // onto our worklist to process. They are useful inline candidates. - if (!InlineInfo.InlinedCalls.empty()) { - // Create a new inline history entry for this, so that we remember - // that these new callsites came about due to inlining Callee. - int NewHistoryID = InlineHistory.size(); - InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID)); - -#ifndef NDEBUG - // Make sure no dupplicates in the inline candidates. This could - // happen when a callsite is simpilfied to reusing the return value - // of another callsite during function cloning, thus the other - // callsite will be reconsidered here. - DenseSet DbgCallSites; - for (auto &II : CallSites) - DbgCallSites.insert(II.first); -#endif - - for (Value *Ptr : InlineInfo.InlinedCalls) { -#ifndef NDEBUG - assert(DbgCallSites.count(dyn_cast(Ptr)) == 0); -#endif - CallSites.push_back( - std::make_pair(dyn_cast(Ptr), NewHistoryID)); - } - } - } - - // If we inlined or deleted the last possible call site to the function, - // delete the function body now. - assert(Callee && "Expected to be non-null due to check at start of loop"); - if (Callee->use_empty() && Callee->hasLocalLinkage() && - // TODO: Can remove if in SCC now. - !SCCFunctions.count(Callee) && - // The function may be apparently dead, but if there are indirect - // callgraph references to the node, we cannot delete it yet, this - // could invalidate the CGSCC iterator. - CG[Callee]->getNumReferences() == 0) { - LLVM_DEBUG(dbgs() << " -> Deleting dead function: " - << Callee->getName() << "\n"); - CallGraphNode *CalleeNode = CG[Callee]; - - // Remove any call graph edges from the callee to its callees. - CalleeNode->removeAllCalledFunctions(); - - // Removing the node for callee from the call graph and delete it. - delete CG.removeFunctionFromModule(CalleeNode); - ++NumDeleted; - } - - // Remove this call site from the list. If possible, use - // swap/pop_back for efficiency, but do not use it if doing so would - // move a call site to a function in this SCC before the - // 'FirstCallInSCC' barrier. - if (SCC.isSingular()) { - CallSites[CSi] = CallSites.back(); - CallSites.pop_back(); - } else { - CallSites.erase(CallSites.begin() + CSi); - } - --CSi; - - Changed = true; - LocalChange = true; - } - } while (LocalChange); - - return Changed; -} - -bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) { - CallGraph &CG = getAnalysis().getCallGraph(); - ACT = &getAnalysis(); - PSI = &getAnalysis().getPSI(); - GetTLI = [&](Function &F) -> const TargetLibraryInfo & { - return getAnalysis().getTLI(F); - }; - auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { - return ACT->getAssumptionCache(F); - }; - return inlineCallsImpl( - SCC, CG, GetAssumptionCache, PSI, GetTLI, InsertLifetime, - [&](CallBase &CB) { return getInlineCost(CB); }, LegacyAARGetter(*this), - ImportedFunctionsStats); -} - -/// Remove now-dead linkonce functions at the end of -/// processing to avoid breaking the SCC traversal. -bool LegacyInlinerBase::doFinalization(CallGraph &CG) { - if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) - ImportedFunctionsStats.dump(InlinerFunctionImportStats == - InlinerFunctionImportStatsOpts::Verbose); - return removeDeadFunctions(CG); -} - -/// Remove dead functions that are not included in DNR (Do Not Remove) list. -bool LegacyInlinerBase::removeDeadFunctions(CallGraph &CG, - bool AlwaysInlineOnly) { - SmallVector FunctionsToRemove; - SmallVector DeadFunctionsInComdats; - - auto RemoveCGN = [&](CallGraphNode *CGN) { - // Remove any call graph edges from the function to its callees. - CGN->removeAllCalledFunctions(); - - // Remove any edges from the external node to the function's call graph - // node. These edges might have been made irrelegant due to - // optimization of the program. - CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN); - - // Removing the node for callee from the call graph and delete it. - FunctionsToRemove.push_back(CGN); - }; - - // Scan for all of the functions, looking for ones that should now be removed - // from the program. Insert the dead ones in the FunctionsToRemove set. - for (const auto &I : CG) { - CallGraphNode *CGN = I.second.get(); - Function *F = CGN->getFunction(); - if (!F || F->isDeclaration()) - continue; - - // Handle the case when this function is called and we only want to care - // about always-inline functions. This is a bit of a hack to share code - // between here and the InlineAlways pass. - if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline)) - continue; - - // If the only remaining users of the function are dead constants, remove - // them. - F->removeDeadConstantUsers(); - - if (!F->isDefTriviallyDead()) - continue; - - // It is unsafe to drop a function with discardable linkage from a COMDAT - // without also dropping the other members of the COMDAT. - // The inliner doesn't visit non-function entities which are in COMDAT - // groups so it is unsafe to do so *unless* the linkage is local. - if (!F->hasLocalLinkage()) { - if (F->hasComdat()) { - DeadFunctionsInComdats.push_back(F); - continue; - } - } - - RemoveCGN(CGN); - } - if (!DeadFunctionsInComdats.empty()) { - // Filter out the functions whose comdats remain alive. - filterDeadComdatFunctions(DeadFunctionsInComdats); - // Remove the rest. - for (Function *F : DeadFunctionsInComdats) - RemoveCGN(CG[F]); - } - - if (FunctionsToRemove.empty()) - return false; - - // Now that we know which functions to delete, do so. We didn't want to do - // this inline, because that would invalidate our CallGraph::iterator - // objects. :( - // - // Note that it doesn't matter that we are iterating over a non-stable order - // here to do this, it doesn't matter which order the functions are deleted - // in. - array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end()); - FunctionsToRemove.erase( - std::unique(FunctionsToRemove.begin(), FunctionsToRemove.end()), - FunctionsToRemove.end()); - for (CallGraphNode *CGN : FunctionsToRemove) { - delete CG.removeFunctionFromModule(CGN); - ++NumDeleted; - } - return true; -} - InlineAdvisor & InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM, FunctionAnalysisManager &FAM, Module &M) { @@ -781,7 +370,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // Setup the data structure used to plumb customization into the // `InlineFunction` routine. InlineFunctionInfo IFI( - /*cg=*/nullptr, GetAssumptionCache, PSI, + GetAssumptionCache, PSI, &FAM.getResult(*(CB->getCaller())), &FAM.getResult(Callee)); diff --git a/llvm/lib/Transforms/IPO/ModuleInliner.cpp b/llvm/lib/Transforms/IPO/ModuleInliner.cpp index 956a8597c8941..71ef3b4f3e209 100644 --- a/llvm/lib/Transforms/IPO/ModuleInliner.cpp +++ b/llvm/lib/Transforms/IPO/ModuleInliner.cpp @@ -213,7 +213,7 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M, // Setup the data structure used to plumb customization into the // `InlineFunction` routine. InlineFunctionInfo IFI( - /*cg=*/nullptr, GetAssumptionCache, PSI, + GetAssumptionCache, PSI, &FAM.getResult(*(CB->getCaller())), &FAM.getResult(Callee)); diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp index b040187adcfb2..5a6e15a960c0d 100644 --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -1383,7 +1383,7 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into " << ore::NV("Caller", CB->getCaller()); - InlineFunctionInfo IFI(nullptr, GetAssumptionCache, &PSI); + InlineFunctionInfo IFI(GetAssumptionCache, &PSI); // We can only forward varargs when we outlined a single region, else we // bail on vararg functions. if (!InlineFunction(*CB, IFI, /*MergeAttributes=*/false, nullptr, true, diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index ccccb37af42f5..e745ff3a853d7 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1263,7 +1263,7 @@ bool SampleProfileLoader::tryInlineCandidate( if (!Cost) return false; - InlineFunctionInfo IFI(nullptr, GetAC); + InlineFunctionInfo IFI(GetAC); IFI.UpdateProfile = false; InlineResult IR = InlineFunction(CB, IFI, /*MergeAttributes=*/true); diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 474691643eb25..8794a6d4be9d0 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1460,81 +1460,6 @@ static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) { } } -/// Once we have cloned code over from a callee into the caller, -/// update the specified callgraph to reflect the changes we made. -/// Note that it's possible that not all code was copied over, so only -/// some edges of the callgraph may remain. -static void UpdateCallGraphAfterInlining(CallBase &CB, - Function::iterator FirstNewBlock, - ValueToValueMapTy &VMap, - InlineFunctionInfo &IFI) { - CallGraph &CG = *IFI.CG; - const Function *Caller = CB.getCaller(); - const Function *Callee = CB.getCalledFunction(); - CallGraphNode *CalleeNode = CG[Callee]; - CallGraphNode *CallerNode = CG[Caller]; - - // Since we inlined some uninlined call sites in the callee into the caller, - // add edges from the caller to all of the callees of the callee. - CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end(); - - // Consider the case where CalleeNode == CallerNode. - CallGraphNode::CalledFunctionsVector CallCache; - if (CalleeNode == CallerNode) { - CallCache.assign(I, E); - I = CallCache.begin(); - E = CallCache.end(); - } - - for (; I != E; ++I) { - // Skip 'refererence' call records. - if (!I->first) - continue; - - const Value *OrigCall = *I->first; - - ValueToValueMapTy::iterator VMI = VMap.find(OrigCall); - // Only copy the edge if the call was inlined! - if (VMI == VMap.end() || VMI->second == nullptr) - continue; - - // If the call was inlined, but then constant folded, there is no edge to - // add. Check for this case. - auto *NewCall = dyn_cast(VMI->second); - if (!NewCall) - continue; - - // We do not treat intrinsic calls like real function calls because we - // expect them to become inline code; do not add an edge for an intrinsic. - if (NewCall->getCalledFunction() && - NewCall->getCalledFunction()->isIntrinsic()) - continue; - - // Remember that this call site got inlined for the client of - // InlineFunction. - IFI.InlinedCalls.push_back(NewCall); - - // It's possible that inlining the callsite will cause it to go from an - // indirect to a direct call by resolving a function pointer. If this - // happens, set the callee of the new call site to a more precise - // destination. This can also happen if the call graph node of the caller - // was just unnecessarily imprecise. - if (!I->second->getFunction()) - if (Function *F = NewCall->getCalledFunction()) { - // Indirect call site resolved to direct call. - CallerNode->addCalledFunction(NewCall, CG[F]); - - continue; - } - - CallerNode->addCalledFunction(NewCall, I->second); - } - - // Update the call graph by deleting the edge from Callee to Caller. We must - // do this after the loop above in case Caller and Callee are the same. - CallerNode->removeCallEdgeFor(*cast(&CB)); -} - static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src, Module *M, BasicBlock *InsertBlock, InlineFunctionInfo &IFI, @@ -2300,10 +2225,6 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, } } - // Update the callgraph if requested. - if (IFI.CG) - UpdateCallGraphAfterInlining(CB, FirstNewBlock, VMap, IFI); - // For 'nodebug' functions, the associated DISubprogram is always null. // Conservatively avoid propagating the callsite debug location to // instructions inlined from a function whose DISubprogram is not null. @@ -2709,7 +2630,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // call graph updates weren't requested, as those provide value handle based // tracking of inlined call sites instead. Calls to intrinsics are not // collected because they are not inlineable. - if (InlinedFunctionInfo.ContainsCalls && !IFI.CG) { + if (InlinedFunctionInfo.ContainsCalls) { // Otherwise just collect the raw call sites that were inlined. for (BasicBlock &NewBB : make_range(FirstNewBlock->getIterator(), Caller->end())) diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index 6582a6a1c441e..2699899845b3c 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -36,10 +36,11 @@ ; GCN-O0-NEXT: Early propagate attributes from kernels to functions ; GCN-O0-NEXT: AMDGPU Lower Intrinsics ; GCN-O0-NEXT: AMDGPU Inline All Functions -; GCN-O0-NEXT: CallGraph Construction -; GCN-O0-NEXT: Call Graph SCC Pass Manager -; GCN-O0-NEXT: Inliner for always_inline functions -; GCN-O0-NEXT: A No-Op Barrier Pass +; GCN-O0-NEXT: Inliner for always_inline functions +; GCN-O0-NEXT: FunctionPass Manager +; GCN-O0-NEXT: Dominator Tree Construction +; GCN-O0-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O0-NEXT: Function Alias Analysis Results ; GCN-O0-NEXT: Lower OpenCL enqueued blocks ; GCN-O0-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O0-NEXT: FunctionPass Manager @@ -186,10 +187,11 @@ ; GCN-O1-NEXT: Early propagate attributes from kernels to functions ; GCN-O1-NEXT: AMDGPU Lower Intrinsics ; GCN-O1-NEXT: AMDGPU Inline All Functions -; GCN-O1-NEXT: CallGraph Construction -; GCN-O1-NEXT: Call Graph SCC Pass Manager -; GCN-O1-NEXT: Inliner for always_inline functions -; GCN-O1-NEXT: A No-Op Barrier Pass +; GCN-O1-NEXT: Inliner for always_inline functions +; GCN-O1-NEXT: FunctionPass Manager +; GCN-O1-NEXT: Dominator Tree Construction +; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O1-NEXT: Function Alias Analysis Results ; GCN-O1-NEXT: Lower OpenCL enqueued blocks ; GCN-O1-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O1-NEXT: FunctionPass Manager @@ -461,10 +463,11 @@ ; GCN-O1-OPTS-NEXT: Early propagate attributes from kernels to functions ; GCN-O1-OPTS-NEXT: AMDGPU Lower Intrinsics ; GCN-O1-OPTS-NEXT: AMDGPU Inline All Functions -; GCN-O1-OPTS-NEXT: CallGraph Construction -; GCN-O1-OPTS-NEXT: Call Graph SCC Pass Manager -; GCN-O1-OPTS-NEXT: Inliner for always_inline functions -; GCN-O1-OPTS-NEXT: A No-Op Barrier Pass +; GCN-O1-OPTS-NEXT: Inliner for always_inline functions +; GCN-O1-OPTS-NEXT: FunctionPass Manager +; GCN-O1-OPTS-NEXT: Dominator Tree Construction +; GCN-O1-OPTS-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O1-OPTS-NEXT: Function Alias Analysis Results ; GCN-O1-OPTS-NEXT: Lower OpenCL enqueued blocks ; GCN-O1-OPTS-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O1-OPTS-NEXT: FunctionPass Manager @@ -768,10 +771,11 @@ ; GCN-O2-NEXT: Early propagate attributes from kernels to functions ; GCN-O2-NEXT: AMDGPU Lower Intrinsics ; GCN-O2-NEXT: AMDGPU Inline All Functions -; GCN-O2-NEXT: CallGraph Construction -; GCN-O2-NEXT: Call Graph SCC Pass Manager -; GCN-O2-NEXT: Inliner for always_inline functions -; GCN-O2-NEXT: A No-Op Barrier Pass +; GCN-O2-NEXT: Inliner for always_inline functions +; GCN-O2-NEXT: FunctionPass Manager +; GCN-O2-NEXT: Dominator Tree Construction +; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O2-NEXT: Function Alias Analysis Results ; GCN-O2-NEXT: Lower OpenCL enqueued blocks ; GCN-O2-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O2-NEXT: FunctionPass Manager @@ -1078,10 +1082,11 @@ ; GCN-O3-NEXT: Early propagate attributes from kernels to functions ; GCN-O3-NEXT: AMDGPU Lower Intrinsics ; GCN-O3-NEXT: AMDGPU Inline All Functions -; GCN-O3-NEXT: CallGraph Construction -; GCN-O3-NEXT: Call Graph SCC Pass Manager -; GCN-O3-NEXT: Inliner for always_inline functions -; GCN-O3-NEXT: A No-Op Barrier Pass +; GCN-O3-NEXT: Inliner for always_inline functions +; GCN-O3-NEXT: FunctionPass Manager +; GCN-O3-NEXT: Dominator Tree Construction +; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O3-NEXT: Function Alias Analysis Results ; GCN-O3-NEXT: Lower OpenCL enqueued blocks ; GCN-O3-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O3-NEXT: FunctionPass Manager From 32baf5c1c29b6b2f282354c9f5919865bc1ff958 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Gr=C3=A4nitz?= Date: Tue, 21 Mar 2023 19:01:54 +0100 Subject: [PATCH 221/691] [lldb][expr] Propagate ClangDynamicCheckerFunctions::Install() errors to caller I came accross this, because a lot of regression tests were saying: ``` (lldb) p argc error: expression failed to parse: error: couldn't install checkers, unknown error ``` With this change, error messages provide more detail: ``` (lldb) p argc error: expression failed to parse: error: couldn't install checkers: error: Couldn't lookup symbols: __objc_load ``` I didn't find a case where `Diagnostics()` is not empty. Also it looks like this isn't covered in any test (yet). Reviewed By: bulbazord, Michael137 Differential Revision: https://reviews.llvm.org/D146541 --- .../lldb/Expression/DynamicCheckerFunctions.h | 10 +++--- .../Clang/ClangExpressionParser.cpp | 14 ++++----- .../Clang/IRDynamicChecks.cpp | 31 +++++++++---------- .../ExpressionParser/Clang/IRDynamicChecks.h | 8 ++--- 4 files changed, 30 insertions(+), 33 deletions(-) diff --git a/lldb/include/lldb/Expression/DynamicCheckerFunctions.h b/lldb/include/lldb/Expression/DynamicCheckerFunctions.h index 02bce5abdf4cb..57a93ca30586a 100644 --- a/lldb/include/lldb/Expression/DynamicCheckerFunctions.h +++ b/lldb/include/lldb/Expression/DynamicCheckerFunctions.h @@ -11,6 +11,8 @@ #include "lldb/lldb-types.h" +#include "llvm/Support/Error.h" + namespace lldb_private { class DiagnosticManager; @@ -46,10 +48,10 @@ class DynamicCheckerFunctions { /// The execution context to install the functions into. /// /// \return - /// True on success; false on failure, or if the functions have - /// already been installed. - virtual bool Install(DiagnosticManager &diagnostic_manager, - ExecutionContext &exe_ctx) = 0; + /// Either llvm::ErrorSuccess or Error with llvm::ErrorInfo + /// + virtual llvm::Error Install(DiagnosticManager &diagnostic_manager, + ExecutionContext &exe_ctx) = 0; virtual bool DoCheckersExplainStop(lldb::addr_t addr, Stream &message) = 0; DynamicCheckerFunctionsKind GetKind() const { return m_kind; } diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp index 0b40df141f098..9852bbc62aa43 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp @@ -1403,14 +1403,12 @@ lldb_private::Status ClangExpressionParser::PrepareForExecution( ClangDynamicCheckerFunctions *dynamic_checkers = new ClangDynamicCheckerFunctions(); - DiagnosticManager install_diagnostics; - - if (!dynamic_checkers->Install(install_diagnostics, exe_ctx)) { - if (install_diagnostics.Diagnostics().size()) - err.SetErrorString(install_diagnostics.GetString().c_str()); - else - err.SetErrorString("couldn't install checkers, unknown error"); - + DiagnosticManager install_diags; + if (Error Err = dynamic_checkers->Install(install_diags, exe_ctx)) { + std::string ErrMsg = "couldn't install checkers: " + toString(std::move(Err)); + if (install_diags.Diagnostics().size()) + ErrMsg = ErrMsg + "\n" + install_diags.GetString().c_str(); + err.SetErrorString(ErrMsg); return err; } diff --git a/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp b/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp index 0549868274685..cd7d1ff6148b3 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.cpp @@ -47,33 +47,30 @@ ClangDynamicCheckerFunctions::ClangDynamicCheckerFunctions() ClangDynamicCheckerFunctions::~ClangDynamicCheckerFunctions() = default; -bool ClangDynamicCheckerFunctions::Install( +llvm::Error ClangDynamicCheckerFunctions::Install( DiagnosticManager &diagnostic_manager, ExecutionContext &exe_ctx) { - auto utility_fn_or_error = exe_ctx.GetTargetRef().CreateUtilityFunction( - g_valid_pointer_check_text, VALID_POINTER_CHECK_NAME, - lldb::eLanguageTypeC, exe_ctx); - if (!utility_fn_or_error) { - llvm::consumeError(utility_fn_or_error.takeError()); - return false; - } - m_valid_pointer_check = std::move(*utility_fn_or_error); + Expected> utility_fn = + exe_ctx.GetTargetRef().CreateUtilityFunction( + g_valid_pointer_check_text, VALID_POINTER_CHECK_NAME, + lldb::eLanguageTypeC, exe_ctx); + if (!utility_fn) + return utility_fn.takeError(); + m_valid_pointer_check = std::move(*utility_fn); if (Process *process = exe_ctx.GetProcessPtr()) { ObjCLanguageRuntime *objc_language_runtime = ObjCLanguageRuntime::Get(*process); if (objc_language_runtime) { - auto utility_fn_or_error = objc_language_runtime->CreateObjectChecker( - VALID_OBJC_OBJECT_CHECK_NAME, exe_ctx); - if (!utility_fn_or_error) { - llvm::consumeError(utility_fn_or_error.takeError()); - return false; - } - m_objc_object_check = std::move(*utility_fn_or_error); + Expected> checker_fn = + objc_language_runtime->CreateObjectChecker(VALID_OBJC_OBJECT_CHECK_NAME, exe_ctx); + if (!checker_fn) + return checker_fn.takeError(); + m_objc_object_check = std::move(*checker_fn); } } - return true; + return Error::success(); } bool ClangDynamicCheckerFunctions::DoCheckersExplainStop(lldb::addr_t addr, diff --git a/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.h b/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.h index 4abd16c5c3261..ff20c1f08be0c 100644 --- a/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.h +++ b/lldb/source/Plugins/ExpressionParser/Clang/IRDynamicChecks.h @@ -46,10 +46,10 @@ class ClangDynamicCheckerFunctions /// The execution context to install the functions into. /// /// \return - /// True on success; false on failure, or if the functions have - /// already been installed. - bool Install(DiagnosticManager &diagnostic_manager, - ExecutionContext &exe_ctx) override; + /// Either llvm::ErrorSuccess or Error with llvm::ErrorInfo + /// + llvm::Error Install(DiagnosticManager &diagnostic_manager, + ExecutionContext &exe_ctx) override; bool DoCheckersExplainStop(lldb::addr_t addr, Stream &message) override; From 83542e47644eb32a738b288a9de6540cbda0b8c9 Mon Sep 17 00:00:00 2001 From: Hristo Hristov Date: Sat, 18 Mar 2023 12:54:17 +0200 Subject: [PATCH 222/691] [libc++][spaceship] Implement `operator<=>` for `duration` Implements parts of [[ https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2019/p1614r2.html | P1614R2 ]] Implemented `operator<=>` for `std::chrono::duration` Reviewed By: #libc, Mordante Differential Revision: https://reviews.llvm.org/D145881 --- libcxx/docs/Status/SpaceshipProjects.csv | 2 +- libcxx/include/__chrono/duration.h | 20 +++++ libcxx/include/chrono | 6 +- .../compare.three_way.pass.cpp | 77 +++++++++++++++++++ 4 files changed, 103 insertions(+), 2 deletions(-) create mode 100644 libcxx/test/std/time/time.duration/time.duration.comparisons/compare.three_way.pass.cpp diff --git a/libcxx/docs/Status/SpaceshipProjects.csv b/libcxx/docs/Status/SpaceshipProjects.csv index a3dc4ee81ec34..e92fd1236a29f 100644 --- a/libcxx/docs/Status/SpaceshipProjects.csv +++ b/libcxx/docs/Status/SpaceshipProjects.csv @@ -52,7 +52,7 @@ Section,Description,Dependencies,Assignee,Complete | `[range.iota.iterator] `_,| `ranges::iota_view::iterator `_,[concepts.cmp],Arthur O'Dwyer,|Complete| | `[range.transform.iterator] `_,| `ranges::transform_view::iterator `_,[concepts.cmp],Arthur O'Dwyer,|Complete| | `[range.elements.iterator] `_,| ranges::elements_view::iterator,[concepts.cmp],Hui Xie,|Complete| -| `[time.duration.comparisons] `_, "chrono::duration", None, Mark de Wever, |Not Started| +| `[time.duration.comparisons] `_, `chrono::duration `_, None, Hristo Hristov, |Complete| | `[time.point.comparisons] `_, "chrono::time_point", None, Mark de Wever, |Not Started| "| `[time.cal.day.nonmembers] `_ | `[time.cal.month.nonmembers] `_ diff --git a/libcxx/include/__chrono/duration.h b/libcxx/include/__chrono/duration.h index 5b27a5493d71f..2c8d50a997868 100644 --- a/libcxx/include/__chrono/duration.h +++ b/libcxx/include/__chrono/duration.h @@ -10,6 +10,8 @@ #ifndef _LIBCPP___CHRONO_DURATION_H #define _LIBCPP___CHRONO_DURATION_H +#include <__compare/ordering.h> +#include <__compare/three_way_comparable.h> #include <__config> #include <__type_traits/common_type.h> #include <__type_traits/enable_if.h> @@ -343,6 +345,8 @@ operator==(const duration<_Rep1, _Period1>& __lhs, const duration<_Rep2, _Period return __duration_eq, duration<_Rep2, _Period2> >()(__lhs, __rhs); } +#if _LIBCPP_STD_VER <= 17 + // Duration != template @@ -354,6 +358,8 @@ operator!=(const duration<_Rep1, _Period1>& __lhs, const duration<_Rep2, _Period return !(__lhs == __rhs); } +#endif // _LIBCPP_STD_VER <= 17 + // Duration < template @@ -417,6 +423,20 @@ operator>=(const duration<_Rep1, _Period1>& __lhs, const duration<_Rep2, _Period return !(__lhs < __rhs); } +#if _LIBCPP_STD_VER >= 20 + +template + requires three_way_comparable> +_LIBCPP_HIDE_FROM_ABI +constexpr auto operator<=>(const duration<_Rep1, _Period1>& __lhs, + const duration<_Rep2, _Period2>& __rhs) +{ + using _Ct = common_type_t, duration<_Rep2, _Period2>>; + return _Ct(__lhs).count() <=> _Ct(__rhs).count(); +} + +#endif // _LIBCPP_STD_VER >= 20 + // Duration + template diff --git a/libcxx/include/chrono b/libcxx/include/chrono index 7593e947ed623..f34fc590831ac 100644 --- a/libcxx/include/chrono +++ b/libcxx/include/chrono @@ -182,7 +182,7 @@ template bool operator==(const duration& lhs, const duration& rhs); template constexpr - bool operator!=(const duration& lhs, const duration& rhs); + bool operator!=(const duration& lhs, const duration& rhs); // removed in C++20 template constexpr bool operator< (const duration& lhs, const duration& rhs); @@ -195,6 +195,10 @@ template template constexpr bool operator>=(const duration& lhs, const duration& rhs); +template + requires three_way_comparable + constexpr auto operator<=>(const duration& lhs, + const duration& rhs); // since C++20 // duration_cast template diff --git a/libcxx/test/std/time/time.duration/time.duration.comparisons/compare.three_way.pass.cpp b/libcxx/test/std/time/time.duration/time.duration.comparisons/compare.three_way.pass.cpp new file mode 100644 index 0000000000000..282df3e786dcb --- /dev/null +++ b/libcxx/test/std/time/time.duration/time.duration.comparisons/compare.three_way.pass.cpp @@ -0,0 +1,77 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 + +// + +// duration + +// template +// requires ThreeWayComparable +// constexpr auto operator<=>(const duration& lhs, +// const duration& rhs); + +#include +#include +#include + +#include "test_comparisons.h" + +constexpr bool test() { + { + std::chrono::seconds s1(3); + std::chrono::seconds s2(3); + assert((s1 <=> s2) == std::strong_ordering::equal); + assert(testOrder(s1, s2, std::strong_ordering::equal)); + } + { + std::chrono::seconds s1(3); + std::chrono::seconds s2(4); + assert((s1 <=> s2) == std::strong_ordering::less); + assert(testOrder(s1, s2, std::strong_ordering::less)); + } + { + std::chrono::milliseconds s1(3); + std::chrono::microseconds s2(3000); + assert((s1 <=> s2) == std::strong_ordering::equal); + assert(testOrder(s1, s2, std::strong_ordering::equal)); + } + { + std::chrono::milliseconds s1(3); + std::chrono::microseconds s2(4000); + assert((s1 <=> s2) == std::strong_ordering::less); + assert(testOrder(s1, s2, std::strong_ordering::less)); + } + { + std::chrono::duration> s1(9); + std::chrono::duration> s2(10); + assert((s1 <=> s2) == std::strong_ordering::equal); + assert(testOrder(s1, s2, std::strong_ordering::equal)); + } + { + std::chrono::duration> s1(10); + std::chrono::duration> s2(9); + assert((s1 <=> s2) == std::strong_ordering::greater); + assert(testOrder(s1, s2, std::strong_ordering::greater)); + } + { + std::chrono::duration> s1(9); + std::chrono::duration> s2(10.1); + assert((s1 <=> s2) == std::strong_ordering::less); + assert(testOrder(s1, s2, std::strong_ordering::less)); + } + + return true; +} + +int main(int, char**) { + assert(test()); + static_assert(test()); + return 0; +} From a37df84f99ebe68c3e9cc533ffd3952fb22d1f38 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 21 Mar 2023 11:07:38 -0700 Subject: [PATCH 223/691] [SelectionDAG][RISCV] Remove code for handling too small shift type from SimplifyDemandedBits. This code detected that the type returned from getShiftAmountTy was too small to hold the constant shift amount. But it used the full type size instead of scalar type size leading it to crash for scalable vectors. This code was necessary when getShiftAmountTy would always return the target preferred shift amount type for scalars even when the type was an illegal type larger than the target supported. For vectors, getShiftAmountTy has always returned the vector type. Fortunately, getShiftAmountTy was fixed a while ago to detect that the target's preferred size for scalars is not large enough for the type. So we can delete this code. Switched to use getShiftAmountConstant to further simplify the code. Fixs PR61561. --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 9 ++--- llvm/test/CodeGen/RISCV/rvv/pr61561.ll | 33 +++++++++++++++++++ 2 files changed, 36 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/pr61561.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 6d0de75579a60..c82f9ce64ea5a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1721,12 +1721,9 @@ bool TargetLowering::SimplifyDemandedBits( unsigned InnerBits = InnerVT.getScalarSizeInBits(); if (ShAmt < InnerBits && DemandedBits.getActiveBits() <= InnerBits && isTypeDesirableForOp(ISD::SHL, InnerVT)) { - EVT ShTy = getShiftAmountTy(InnerVT, DL); - if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits())) - ShTy = InnerVT; - SDValue NarrowShl = - TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp, - TLO.DAG.getConstant(ShAmt, dl, ShTy)); + SDValue NarrowShl = TLO.DAG.getNode( + ISD::SHL, dl, InnerVT, InnerOp, + TLO.DAG.getShiftAmountConstant(ShAmt, InnerVT, dl)); return TLO.CombineTo( Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl)); } diff --git a/llvm/test/CodeGen/RISCV/rvv/pr61561.ll b/llvm/test/CodeGen/RISCV/rvv/pr61561.ll new file mode 100644 index 0000000000000..1478e8bfd3c65 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/pr61561.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s + +define @foo(ptr %p) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: vl1re16.v v8, (a0) +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsll.vi v8, v8, 3 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: li a0, 248 +; CHECK-NEXT: vand.vx v8, v10, a0 +; CHECK-NEXT: lui a0, 4 +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: lui a0, 1 +; CHECK-NEXT: addiw a0, a0, -361 +; CHECK-NEXT: vmacc.vx v10, a0, v8 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v10, 15 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: ret + %i13 = load , ptr %p, align 2 + %i14 = zext %i13 to + %i15 = shl nuw nsw %i14, shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) + %i16 = and %i15, shufflevector ( insertelement ( poison, i32 248, i64 0), poison, zeroinitializer) + %i17 = mul nuw nsw %i16, shufflevector ( insertelement ( poison, i32 3735, i64 0), poison, zeroinitializer) + %i18 = add nuw nsw %i17, shufflevector ( insertelement ( poison, i32 16384, i64 0), poison, zeroinitializer) + %i21 = lshr %i18, shufflevector ( insertelement ( poison, i32 15, i64 0), poison, zeroinitializer) + %i22 = trunc %i21 to + ret %i22 +} From 2bd4130362966b71ae8bc0c0da56d5e064a09dae Mon Sep 17 00:00:00 2001 From: "Kevin P. Neal" Date: Tue, 21 Mar 2023 13:54:42 -0400 Subject: [PATCH 224/691] Update checks in advance of an update to D68233. In the past, the IR Verifier would bail out at the first broken function it found. This required trickery with sed to put multiple broken functions in a single test file. Now, the Verifier allows for multiple broken functions. The sed trickery is no longer needed. I've eliminated it. I've also split the test into two since one of them passes verification and we need to look at the output IR from 'opt'. The other fails and we need to look at the diagnostics printed by the Verifier. --- llvm/test/Verifier/fp-intrinsics-pass.ll | 43 ++++++++++ llvm/test/Verifier/fp-intrinsics.ll | 100 +++++++++-------------- 2 files changed, 80 insertions(+), 63 deletions(-) create mode 100644 llvm/test/Verifier/fp-intrinsics-pass.ll diff --git a/llvm/test/Verifier/fp-intrinsics-pass.ll b/llvm/test/Verifier/fp-intrinsics-pass.ll new file mode 100644 index 0000000000000..45b0278e8330e --- /dev/null +++ b/llvm/test/Verifier/fp-intrinsics-pass.ll @@ -0,0 +1,43 @@ +; RUN: opt -passes=verify -S < %s 2>&1 | FileCheck %s + +declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) #0 +declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata) #0 + +; Test that the verifier accepts legal code, and that the correct attributes are +; attached to the FP intrinsic. The attributes are checked at the bottom. +; CHECK: declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) #[[ATTR:[0-9]+]] +; CHECK: declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata) #[[ATTR]] +; Note: FP exceptions aren't usually caught through normal unwind mechanisms, +; but we may want to revisit this for asynchronous exception handling. +define double @f1(double %a, double %b) #0 { +; CHECK-LABEL: define double @f1 +; CHECK-SAME: (double [[A:%.*]], double [[B:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[FADD:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[A]], double [[B]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR1]] +; CHECK-NEXT: ret double [[FADD]] +entry: + %fadd = call double @llvm.experimental.constrained.fadd.f64( + double %a, double %b, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %fadd +} + +define double @f1u(double %a) #0 { +; CHECK-LABEL: define double @f1u +; CHECK-SAME: (double [[A:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[FSQRT:%.*]] = call double @llvm.experimental.constrained.sqrt.f64(double [[A]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR1]] +; CHECK-NEXT: ret double [[FSQRT]] +; +entry: + %fsqrt = call double @llvm.experimental.constrained.sqrt.f64( + double %a, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %fsqrt +} + +attributes #0 = { strictfp } +; TODO: Why is strictfp not in the below list? +; CHECK: attributes #[[ATTR]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } diff --git a/llvm/test/Verifier/fp-intrinsics.ll b/llvm/test/Verifier/fp-intrinsics.ll index cabb1aedc8c44..4934843d5a2ed 100644 --- a/llvm/test/Verifier/fp-intrinsics.ll +++ b/llvm/test/Verifier/fp-intrinsics.ll @@ -1,80 +1,54 @@ -; RUN: opt -passes=verify -S < %s 2>&1 | FileCheck --check-prefix=CHECK1 %s -; RUN: sed -e s/.T2:// %s | not opt -passes=verify -disable-output 2>&1 | FileCheck --check-prefix=CHECK2 %s -; RUN: sed -e s/.T3:// %s | not opt -passes=verify -disable-output 2>&1 | FileCheck --check-prefix=CHECK3 %s -; RUN: sed -e s/.T4:// %s | not opt -passes=verify -disable-output 2>&1 | FileCheck --check-prefix=CHECK4 %s -; RUN: sed -e s/.T5:// %s | not opt -passes=verify -disable-output 2>&1 | FileCheck --check-prefix=CHECK5 %s +; RUN: not opt -passes=verify -disable-output < %s 2>&1 | FileCheck %s -; Common declarations used for all runs. declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata) -; Test that the verifier accepts legal code, and that the correct attributes are -; attached to the FP intrinsic. -; CHECK1: declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata) #[[ATTR:[0-9]+]] -; CHECK1: declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata) #[[ATTR]] -; CHECK1: attributes #[[ATTR]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } -; Note: FP exceptions aren't usually caught through normal unwind mechanisms, -; but we may want to revisit this for asynchronous exception handling. -define double @f1(double %a, double %b) #0 { +; Test an illegal value for the rounding mode argument. +; CHECK: invalid rounding mode argument +; CHECK-NEXT: %fadd = call double @llvm.experimental.constrained.fadd.f64(double %a, double %b, metadata !"round.dynomic", metadata !"fpexcept.strict") #1 +define double @f2(double %a, double %b) #0 { entry: %fadd = call double @llvm.experimental.constrained.fadd.f64( - double %a, double %b, - metadata !"round.dynamic", - metadata !"fpexcept.strict") #0 + double %a, double %b, + metadata !"round.dynomic", + metadata !"fpexcept.strict") #0 ret double %fadd } -define double @f1u(double %a) #0 { +; Test an illegal value for the exception behavior argument. +; CHECK-NEXT: invalid exception behavior argument +; CHECK-NEXT: %fadd = call double @llvm.experimental.constrained.fadd.f64(double %a, double %b, metadata !"round.dynamic", metadata !"fpexcept.restrict") #1 +define double @f3(double %a, double %b) #0 { entry: - %fsqrt = call double @llvm.experimental.constrained.sqrt.f64( - double %a, - metadata !"round.dynamic", - metadata !"fpexcept.strict") #0 - ret double %fsqrt + %fadd = call double @llvm.experimental.constrained.fadd.f64( + double %a, double %b, + metadata !"round.dynamic", + metadata !"fpexcept.restrict") #0 + ret double %fadd } ; Test an illegal value for the rounding mode argument. -; CHECK2: invalid rounding mode argument -;T2: define double @f2(double %a, double %b) #0 { -;T2: entry: -;T2: %fadd = call double @llvm.experimental.constrained.fadd.f64( -;T2: double %a, double %b, -;T2: metadata !"round.dynomite", -;T2: metadata !"fpexcept.strict") #0 -;T2: ret double %fadd -;T2: } - -; Test an illegal value for the exception behavior argument. -; CHECK3: invalid exception behavior argument -;T3: define double @f3(double %a, double %b) #0 { -;T3: entry: -;T3: %fadd = call double @llvm.experimental.constrained.fadd.f64( -;T3: double %a, double %b, -;T3: metadata !"round.dynamic", -;T3: metadata !"fpexcept.restrict") #0 -;T3: ret double %fadd -;T3: } - -; Test an illegal value for the rounding mode argument. -; CHECK4: invalid rounding mode argument -;T4: define double @f4(double %a) #0 { -;T4: entry: -;T4: %fadd = call double @llvm.experimental.constrained.sqrt.f64( -;T4: double %a, -;T4: metadata !"round.dynomite", -;T4: metadata !"fpexcept.strict") #0 -;T4: ret double %fadd -;T4: } +; CHECK-NEXT: invalid rounding mode argument +; CHECK-NEXT: %fadd = call double @llvm.experimental.constrained.sqrt.f64(double %a, metadata !"round.dynomic", metadata !"fpexcept.strict") #1 +define double @f4(double %a) #0 { +entry: + %fadd = call double @llvm.experimental.constrained.sqrt.f64( + double %a, + metadata !"round.dynomic", + metadata !"fpexcept.strict") #0 + ret double %fadd +} ; Test an illegal value for the exception behavior argument. -; CHECK5: invalid exception behavior argument -;T5: define double @f5(double %a) #0 { -;T5: entry: -;T5: %fadd = call double @llvm.experimental.constrained.sqrt.f64( -;T5: double %a, -;T5: metadata !"round.dynamic", -;T5: metadata !"fpexcept.restrict") #0 -;T5: ret double %fadd -;T5: } +; CHECK-NEXT: invalid exception behavior argument +; CHECK-NEXT: %fadd = call double @llvm.experimental.constrained.sqrt.f64(double %a, metadata !"round.dynamic", metadata !"fpexcept.restrict") #1 +define double @f5(double %a) #0 { +entry: + %fadd = call double @llvm.experimental.constrained.sqrt.f64( + double %a, + metadata !"round.dynamic", + metadata !"fpexcept.restrict") #0 + ret double %fadd +} attributes #0 = { strictfp } From 22b5fe74782a322e07855e20f83a14d7a426fcc9 Mon Sep 17 00:00:00 2001 From: Shoaib Meenai Date: Tue, 21 Mar 2023 11:19:34 -0700 Subject: [PATCH 225/691] [llvm-libtool-darwin] Fix test when libfile.a exists This can be a valid system library, as reported in https://reviews.llvm.org/D85540#inline-1415298 --- llvm/test/tools/llvm-libtool-darwin/L-and-l.test | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/test/tools/llvm-libtool-darwin/L-and-l.test b/llvm/test/tools/llvm-libtool-darwin/L-and-l.test index 1ba59acc7ed74..679ae888f2edc 100644 --- a/llvm/test/tools/llvm-libtool-darwin/L-and-l.test +++ b/llvm/test/tools/llvm-libtool-darwin/L-and-l.test @@ -112,13 +112,13 @@ ## Check that an error is thrown when the input library cannot be found ## (since 'lib' and '.a' are added): -# RUN: llvm-ar cr %t/dirname/file %t-input1.o -# RUN: not llvm-libtool-darwin -static -o %t.lib -lfile -L%t/dirname 2>&1 | \ -# RUN: FileCheck %s --check-prefix=NOT-FOUND -DFILE=libfile.a +# RUN: llvm-ar cr %t/dirname/file-does-exist %t-input1.o +# RUN: not llvm-libtool-darwin -static -o %t.lib -lfile-does-exist -L%t/dirname 2>&1 | \ +# RUN: FileCheck %s --check-prefix=NOT-FOUND -DFILE=libfile-does-exist.a -# RUN: llvm-ar cr %t/dirname/libfile.a %t-input1.o -# RUN: not llvm-libtool-darwin -static -o %t.lib -llibfile.a -L%t/dirname 2>&1 | \ -# RUN: FileCheck %s --check-prefix=NOT-FOUND -DFILE=liblibfile.a.a +# RUN: llvm-ar cr %t/dirname/libfile-does-exist.a %t-input1.o +# RUN: not llvm-libtool-darwin -static -o %t.lib -llibfile-does-exist.a -L%t/dirname 2>&1 | \ +# RUN: FileCheck %s --check-prefix=NOT-FOUND -DFILE=liblibfile-does-exist.a.a ## Check that an error is thrown when the input library is not valid: # RUN: touch %t/dirname/not-valid.o From 852cfc2ed31bd434222d55930312f7e94884a322 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 21 Mar 2023 12:05:18 -0700 Subject: [PATCH 226/691] [NFC] Fix incorrect comment for simplifyRightShift --- llvm/lib/Analysis/InstructionSimplify.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index ebc5a77efd1f7..ecb0cdbd13c62 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -1411,8 +1411,8 @@ static Value *simplifyShift(Instruction::BinaryOps Opcode, Value *Op0, return nullptr; } -/// Given operands for an Shl, LShr or AShr, see if we can -/// fold the result. If not, this returns null. +/// Given operands for an LShr or AShr, see if we can fold the result. If not, +/// this returns null. static Value *simplifyRightShift(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, bool IsExact, const SimplifyQuery &Q, unsigned MaxRecurse) { From ae484c21c05668f84b13304c28bc39f753e493de Mon Sep 17 00:00:00 2001 From: Blue Gaston Date: Tue, 21 Mar 2023 08:48:22 -0700 Subject: [PATCH 227/691] [TSAN][Darwin] Forward declare spinlock functions on darwin for TSAN interceptors Spinlock symbols are removed from headers in MacOS version 10.12 and greater. Even though they are deprecated, the symbols remain available on the system. The TSAN interceptors currently cause a build failure after this version because of the change in availability of the symbol. We want to continue intercepting the symbols available on the OS. So we add forward declarations so that the TSAN interceptors can build. This is tested with the existing osspinlock_norace test. Differential Revision: https://reviews.llvm.org/D146537 --- .../lib/tsan/rtl/tsan_interceptors_mac.cpp | 1 + .../lib/tsan/rtl/tsan_spinlock_defs_mac.h | 45 +++++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 compiler-rt/lib/tsan/rtl/tsan_spinlock_defs_mac.h diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp index 1ee47bcd1237e..e4f9e2915ced2 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_mac.cpp @@ -18,6 +18,7 @@ #include "tsan_interceptors.h" #include "tsan_interface.h" #include "tsan_interface_ann.h" +#include "tsan_spinlock_defs_mac.h" #include "sanitizer_common/sanitizer_addrhashmap.h" #include diff --git a/compiler-rt/lib/tsan/rtl/tsan_spinlock_defs_mac.h b/compiler-rt/lib/tsan/rtl/tsan_spinlock_defs_mac.h new file mode 100644 index 0000000000000..1a99a81c03023 --- /dev/null +++ b/compiler-rt/lib/tsan/rtl/tsan_spinlock_defs_mac.h @@ -0,0 +1,45 @@ +//===-- tsan_spinlock_defs_mac.h -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of ThreadSanitizer (TSan), a race detector. +// +// Mac-specific forward-declared function defintions that may be +// deprecated in later versions of the OS. +// These are needed for interceptors. +// +//===----------------------------------------------------------------------===// + +#if SANITIZER_APPLE + +#ifndef TSAN_SPINLOCK_DEFS_MAC_H +#define TSAN_SPINLOCK_DEFS_MAC_H + +#include + +extern "C" { + +/* +Provides forward declarations related to OSSpinLocks on Darwin. These functions are +deprecated on macOS version 10.12 and later, +and are no longer included in the system headers. + +However, the symbols are still available on the system, so we provide these forward +declarations to prevent compilation errors in tsan_interceptors_mac.cpp, which +references these functions when defining TSAN interceptor functions. +*/ + +typedef int32_t OSSpinLock; + +void OSSpinLockLock(volatile OSSpinLock *__lock); +void OSSpinLockUnlock(volatile OSSpinLock *__lock); +bool OSSpinLockTry(volatile OSSpinLock *__lock); + +} + +#endif //TSAN_SPINLOCK_DEFS_MAC_H +#endif // SANITIZER_APPLE From ee7b6fd4c1c11641f9b1b54441ed038fd761de13 Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Fri, 17 Mar 2023 16:52:19 -0700 Subject: [PATCH 228/691] [flang] Set proper source location for the main function. Take the source position for the anonymous program from its scope. If the first evaluation is a construct or directive, then it has null source position. Author: vdonaldson Differential Revision: https://reviews.llvm.org/D146445 --- flang/lib/Lower/PFTBuilder.cpp | 4 +--- flang/test/Lower/main_location.f90 | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) create mode 100644 flang/test/Lower/main_location.f90 diff --git a/flang/lib/Lower/PFTBuilder.cpp b/flang/lib/Lower/PFTBuilder.cpp index 6261a9093cbc3..2c63852ce95ee 100644 --- a/flang/lib/Lower/PFTBuilder.cpp +++ b/flang/lib/Lower/PFTBuilder.cpp @@ -1671,9 +1671,7 @@ parser::CharBlock Fortran::lower::pft::FunctionLikeUnit::getStartingSourceLoc() const { if (beginStmt) return stmtSourceLoc(*beginStmt); - if (!evaluationList.empty()) - return evaluationList.front().position; - return stmtSourceLoc(endStmt); + return scope->sourceRange(); } //===----------------------------------------------------------------------===// diff --git a/flang/test/Lower/main_location.f90 b/flang/test/Lower/main_location.f90 new file mode 100644 index 0000000000000..db63339288f03 --- /dev/null +++ b/flang/test/Lower/main_location.f90 @@ -0,0 +1,26 @@ +! RUN: split-file %s %t +! RUN: bbc %t/test1.f90 -o - --emit-fir --mlir-print-debuginfo | FileCheck %s --check-prefix=TEST1 +! RUN: bbc %t/test2.f90 -o - --emit-fir --mlir-print-debuginfo | FileCheck %s --check-prefix=TEST2 + +! Check that the missing optional program-stmt (R1401) +! does not result in unknown source location of the corresponding +! function. + +!--- test1.f90 +if (.false.) then +endif +end + +! TEST1: func.func @_QQmain() { +! TEST1-NEXT: return loc("{{.*}}test1.f90":3:1) +! TEST1-NEXT: } loc("{{.*}}test1.f90":1:1) + +!--- test2.f90 +!!! keep me here +if (.true.) then +endif +end program + +! TEST2: func.func @_QQmain() { +! TEST2-NEXT: return loc("{{.*}}test2.f90":4:1) +! TEST2-NEXT: } loc("{{.*}}test2.f90":2:1) From e240e6b8b7cc68a29bc8a5d6a7d0e3b7eaa61dc0 Mon Sep 17 00:00:00 2001 From: Andrew Litteken Date: Mon, 20 Mar 2023 20:54:44 -0500 Subject: [PATCH 229/691] [IRSim] Check largest sections first when analyzing similarity When we check for similarity, right now there is no order to how it is checked, except for via the suffix tree ordering. We can reduce how much structural analysis we perform by checking the the regions in decreasing size. In doing so, we know that if two large sections match, each of their contained regions also match. This allows us to skip the structural checking for each smaller section. IT does require that we use the large regions as a "bridge" to create the canonical mapping between the two regions. This reduces compile time significantly for some benchmarks. It will not perform as well for programs with many small items. Recommit fixes the IRSimilarity tests. Recommit of: 805ec19d7d9915989be8a8a626176b5e29e19eee Recommit fixes llvm-sim tests Recommit of: 082ec267583100455fee356bb0d4ebd55aba2d46 Reviewer: paquette Differential Revision: https://reviews.llvm.org/D139338 --- .../llvm/Analysis/IRSimilarityIdentifier.h | 43 ++++ llvm/lib/Analysis/IRSimilarityIdentifier.cpp | 214 +++++++++++++++++- .../Analysis/IRSimilarityIdentifier/basic.ll | 44 ++-- .../debug-inst-starts-block.ll | 21 +- .../IRSimilarityIdentifier/different.ll | 18 +- .../IRSimilarityIdentifier/nothing.ll | 2 + .../Transforms/IROutliner/illegal-assumes.ll | 40 ++-- llvm/test/tools/llvm-sim/single-sim-file.test | 16 +- llvm/test/tools/llvm-sim/single-sim.test | 16 +- 9 files changed, 336 insertions(+), 78 deletions(-) diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h index 9f9e7c59b42ba..d40d51e2d3376 100644 --- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h +++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h @@ -850,6 +850,49 @@ class IRSimilarityCandidate { IRSimilarityCandidate &SourceCand, DenseMap> &ToSourceMapping, DenseMap> &FromSourceMapping); + + /// Create a mapping for the value numbering of the calling + /// IRSimilarityCandidate, to a different separate set of numbers, based on + /// the canonical ordering in \p SourceCand. These are defined based on the + /// found mappings in \p ToSourceMapping and \p FromSourceMapping. Both of + /// these relationships should have the same information, just in opposite + /// directions. Uses the \p OneToOne mapping from target candidate to \p + /// SourceCand GVNs to determine the mapping first for values with multiple + /// mappings. This mapping is created by the ordering of operands in the + /// instruction they are first seen in the candidates. + /// + /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a + /// canonical numbering from. + /// \param [in,out] OneToOne - A mapping of value numbers from candidate + /// \p A to candidate \B using the structure of the original instructions. + /// \param ToSourceMapping - The mapping of value numbers from this candidate + /// to \p SourceCand. + /// \param FromSourceMapping - The mapping of value numbers from \p SoureCand + /// to this candidate. + void createCanonicalRelationFrom( + IRSimilarityCandidate &SourceCand, + DenseMap &OneToOne, + DenseMap> &ToSourceMapping, + DenseMap> &FromSourceMapping); + + /// Create a mapping for the value numbering of the calling + /// IRSimilarityCandidate, to a different separate set of numbers, based on + /// the canonical ordering in \p SourceCand. These are defined based on the + /// canonical mapping defined between \p SoureCandLarge and + /// \p TargetCandLarge. These IRSimilarityCandidates are already structurally + /// similar, and fully encapsulate the IRSimilarityCandidates in question. + /// These are used as a "bridge" from the \p SourceCand to the target. + /// + /// \param [in, out] SourceCand - The IRSimilarityCandidate to create a + /// canonical numbering from. + /// \param SoureCandLarge - The IRSimilarityCandidate fully containing + /// \p SourceCand. + /// \param TargetCandLarge - The IRSimilarityCandidate fully containing + /// this Candidate. + void createCanonicalRelationFrom( + IRSimilarityCandidate &SourceCand, + IRSimilarityCandidate &SourceCandLarge, + IRSimilarityCandidate &TargetCandLarge); /// \param [in,out] BBSet - The set to track the basic blocks. void getBasicBlocks(DenseSet &BBSet) const { diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp index c8007be4142cf..454ce5a6925bf 100644 --- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp +++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp @@ -1101,6 +1101,76 @@ void IRSimilarityCandidate::createCanonicalRelationFrom( } } +void IRSimilarityCandidate::createCanonicalRelationFrom( + IRSimilarityCandidate &SourceCand, IRSimilarityCandidate &SourceCandLarge, + IRSimilarityCandidate &TargetCandLarge) { + assert(!SourceCand.CanonNumToNumber.empty() && + "Canonical Relationship is non-empty"); + assert(!SourceCand.NumberToCanonNum.empty() && + "Canonical Relationship is non-empty"); + + assert(!SourceCandLarge.CanonNumToNumber.empty() && + "Canonical Relationship is non-empty"); + assert(!SourceCandLarge.NumberToCanonNum.empty() && + "Canonical Relationship is non-empty"); + + assert(!TargetCandLarge.CanonNumToNumber.empty() && + "Canonical Relationship is non-empty"); + assert(!TargetCandLarge.NumberToCanonNum.empty() && + "Canonical Relationship is non-empty"); + + assert(CanonNumToNumber.empty() && "Canonical Relationship is non-empty"); + assert(NumberToCanonNum.empty() && "Canonical Relationship is non-empty"); + + // We're going to use the larger candidates as a "bridge" to create the + // canonical number for the target candidate since we have idetified two + // candidates as subsequences of larger sequences, and therefore must be + // structurally similar. + for (std::pair &ValueNumPair : ValueToNumber) { + Value *CurrVal = ValueNumPair.first; + unsigned TargetCandGVN = ValueNumPair.second; + + // Find the numbering in the large candidate that surrounds the + // current candidate. + std::optional OLargeTargetGVN = TargetCandLarge.getGVN(CurrVal); + assert(OLargeTargetGVN.has_value() && "GVN not found for Value"); + + // Get the canonical numbering in the large target candidate. + std::optional OTargetCandCanon = + TargetCandLarge.getCanonicalNum(OLargeTargetGVN.value()); + assert(OTargetCandCanon.has_value() && + "Canononical Number not found for GVN"); + + // Get the GVN in the large source candidate from the canonical numbering. + std::optional OLargeSourceGVN = + SourceCandLarge.fromCanonicalNum(OTargetCandCanon.value()); + assert(OLargeSourceGVN.has_value() && + "GVN Number not found for Canonical Number"); + + // Get the Value from the GVN in the large source candidate. + std::optional OLargeSourceV = + SourceCandLarge.fromGVN(OLargeSourceGVN.value()); + assert(OLargeSourceV.has_value() && "Value not found for GVN"); + + // Get the GVN number for the Value in the source candidate. + std::optional OSourceGVN = + SourceCand.getGVN(OLargeSourceV.value()); + assert(OSourceGVN.has_value() && "GVN Number not found for Value"); + + // Get the canonical numbering from the GVN/ + std::optional OSourceCanon = + SourceCand.getCanonicalNum(OSourceGVN.value()); + assert(OSourceCanon.has_value() && "Canon Number not found for GVN"); + + // Insert the canonical numbering and GVN pair into their respective + // mappings. + CanonNumToNumber.insert( + std::make_pair(OSourceCanon.value(), TargetCandGVN)); + NumberToCanonNum.insert( + std::make_pair(TargetCandGVN, OSourceCanon.value())); + } +} + void IRSimilarityCandidate::createCanonicalMappingFor( IRSimilarityCandidate &CurrCand) { assert(CurrCand.CanonNumToNumber.size() == 0 && @@ -1118,6 +1188,81 @@ void IRSimilarityCandidate::createCanonicalMappingFor( } } +/// Look for larger IRSimilarityCandidates From the previously matched +/// IRSimilarityCandidates that fully contain \p CandA or \p CandB. If there is +/// an overlap, return a pair of structurally similar, larger +/// IRSimilarityCandidates. +/// +/// \param [in] CandA - The first candidate we are trying to determine the +/// structure of. +/// \param [in] CandB - The second candidate we are trying to determine the +/// structure of. +/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in +/// a circuit to the IRSimilarityCandidates that include this instruction. +/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a +/// number representing the structural group assigned to it. +static std::optional< + std::pair> +CheckLargerCands( + IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB, + DenseMap> &IndexToIncludedCand, + DenseMap &CandToGroup) { + DenseMap IncludedGroupAndCandA; + DenseMap IncludedGroupAndCandB; + DenseSet IncludedGroupsA; + DenseSet IncludedGroupsB; + + // Find the overall similarity group numbers that fully contain the candidate, + // and record the larger candidate for each group. + auto IdxToCandidateIt = IndexToIncludedCand.find(CandA.getStartIdx()); + std::optional> + Result; + + unsigned CandAStart = CandA.getStartIdx(); + unsigned CandAEnd = CandA.getEndIdx(); + unsigned CandBStart = CandB.getStartIdx(); + unsigned CandBEnd = CandB.getEndIdx(); + if (IdxToCandidateIt == IndexToIncludedCand.end()) + return Result; + for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) { + if (MatchedCand->getStartIdx() > CandAStart || + (MatchedCand->getEndIdx() < CandAEnd)) + continue; + unsigned GroupNum = CandToGroup.find(MatchedCand)->second; + IncludedGroupAndCandA.insert(std::make_pair(GroupNum, MatchedCand)); + IncludedGroupsA.insert(GroupNum); + } + + // Find the overall similarity group numbers that fully contain the next + // candidate, and record the larger candidate for each group. + IdxToCandidateIt = IndexToIncludedCand.find(CandBStart); + if (IdxToCandidateIt == IndexToIncludedCand.end()) + return Result; + for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) { + if (MatchedCand->getStartIdx() > CandBStart || + MatchedCand->getEndIdx() < CandBEnd) + continue; + unsigned GroupNum = CandToGroup.find(MatchedCand)->second; + IncludedGroupAndCandB.insert(std::make_pair(GroupNum, MatchedCand)); + IncludedGroupsB.insert(GroupNum); + } + + // Find the intersection between the two groups, these are the groups where + // the larger candidates exist. + set_intersect(IncludedGroupsA, IncludedGroupsB); + + // If there is no intersection between the sets, then we cannot determine + // whether or not there is a match. + if (IncludedGroupsA.empty()) + return Result; + + // Create a pair that contains the larger candidates. + auto ItA = IncludedGroupAndCandA.find(*IncludedGroupsA.begin()); + auto ItB = IncludedGroupAndCandB.find(*IncludedGroupsA.begin()); + Result = std::make_pair(ItA->second, ItB->second); + return Result; +} + /// From the list of IRSimilarityCandidates, perform a comparison between each /// IRSimilarityCandidate to determine if there are overlapping /// IRInstructionData, or if they do not have the same structure. @@ -1127,9 +1272,16 @@ void IRSimilarityCandidate::createCanonicalMappingFor( /// \param [out] StructuralGroups - the mapping of unsigned integers to vector /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the /// vector are structurally similar to one another. +/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in +/// a circuit to the IRSimilarityCandidates that include this instruction. +/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a +/// number representing the structural group assigned to it. static void findCandidateStructures( std::vector &CandsForRepSubstring, - DenseMap &StructuralGroups) { + DenseMap &StructuralGroups, + DenseMap> &IndexToIncludedCand, + DenseMap &CandToOverallGroup + ) { std::vector::iterator CandIt, CandEndIt, InnerCandIt, InnerCandEndIt; @@ -1192,6 +1344,24 @@ static void findCandidateStructures( if (CandToGroupItInner != CandToGroup.end()) continue; + // Check if we have found structural similarity between two candidates + // that fully contains the first and second candidates. + std::optional> + LargerPair = CheckLargerCands( + *CandIt, *InnerCandIt, IndexToIncludedCand, CandToOverallGroup); + + // If a pair was found, it means that we can assume that these smaller + // substrings are also structurally similar. Use the larger candidates to + // determine the canonical mapping between the two sections. + if (LargerPair.has_value()) { + SameStructure = true; + InnerCandIt->createCanonicalRelationFrom( + *CandIt, *LargerPair.value().first, *LargerPair.value().second); + CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum)); + CurrentGroupPair->second.push_back(*InnerCandIt); + continue; + } + // Otherwise we determine if they have the same structure and add it to // vector if they match. ValueNumberMappingA.clear(); @@ -1218,24 +1388,58 @@ void IRSimilarityIdentifier::findCandidates( std::vector NewCandidateGroups; DenseMap StructuralGroups; + DenseMap> IndexToIncludedCand; + DenseMap CandToGroup; // Iterate over the subsequences found by the Suffix Tree to create // IRSimilarityCandidates for each repeated subsequence and determine which // instances are structurally similar to one another. - for (SuffixTree::RepeatedSubstring &RS : ST) { + + // Sort the suffix tree from longest substring to shortest. + std::vector RSes; + for (SuffixTree::RepeatedSubstring &RS : ST) + RSes.push_back(RS); + + llvm::stable_sort(RSes, [](const SuffixTree::RepeatedSubstring &LHS, + const SuffixTree::RepeatedSubstring &RHS) { + return LHS.Length > RHS.Length; + }); + for (SuffixTree::RepeatedSubstring &RS : RSes) { createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS, CandsForRepSubstring); if (CandsForRepSubstring.size() < 2) continue; - findCandidateStructures(CandsForRepSubstring, StructuralGroups); - for (std::pair &Group : StructuralGroups) + findCandidateStructures(CandsForRepSubstring, StructuralGroups, + IndexToIncludedCand, CandToGroup); + for (std::pair &Group : StructuralGroups) { // We only add the group if it contains more than one // IRSimilarityCandidate. If there is only one, that means there is no // other repeated subsequence with the same structure. - if (Group.second.size() > 1) + if (Group.second.size() > 1) { SimilarityCandidates->push_back(Group.second); + // Iterate over each candidate in the group, and add an entry for each + // instruction included with a mapping to a set of + // IRSimilarityCandidates that include that instruction. + for (IRSimilarityCandidate &IRCand : SimilarityCandidates->back()) { + for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx(); + Idx <= Edx; ++Idx) { + DenseMap>::iterator + IdIt; + IdIt = IndexToIncludedCand.find(Idx); + bool Inserted = false; + if (IdIt == IndexToIncludedCand.end()) + std::tie(IdIt, Inserted) = IndexToIncludedCand.insert( + std::make_pair(Idx, DenseSet())); + IdIt->second.insert(&IRCand); + } + // Add mapping of candidate to the overall similarity group number. + CandToGroup.insert( + std::make_pair(&IRCand, SimilarityCandidates->size() - 1)); + } + } + } CandsForRepSubstring.clear(); StructuralGroups.clear(); diff --git a/llvm/test/Analysis/IRSimilarityIdentifier/basic.ll b/llvm/test/Analysis/IRSimilarityIdentifier/basic.ll index 79ae402e9f9f5..1c08cb407c2e3 100644 --- a/llvm/test/Analysis/IRSimilarityIdentifier/basic.ll +++ b/llvm/test/Analysis/IRSimilarityIdentifier/basic.ll @@ -4,33 +4,33 @@ ; This is a simple test to make sure the IRSimilarityIdentifier and ; IRSimilarityPrinterPass is working. -; CHECK: 4 candidates of length 2. Found in: +; CHECK: 4 candidates of length 6. Found in: ; CHECK-NEXT: Function: turtle, Basic Block: (unnamed) -; CHECK-NEXT: Start Instruction: store i32 5, ptr %5, align 4 +; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 ; CHECK-NEXT: End Instruction: store i32 6, ptr %6, align 4 ; CHECK-NEXT: Function: cat, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 4, ptr %4, align 4 +; CHECK-NEXT: Start Instruction: store i32 6, ptr %0, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 ; CHECK-NEXT: Function: fish, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 4, ptr %4, align 4 +; CHECK-NEXT: Start Instruction: store i32 6, ptr %0, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 ; CHECK-NEXT: Function: dog, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 4, ptr %4, align 4 +; CHECK-NEXT: Start Instruction: store i32 6, ptr %0, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 -; CHECK-NEXT:4 candidates of length 3. Found in: +; CHECK-NEXT:4 candidates of length 5. Found in: ; CHECK-NEXT: Function: turtle, Basic Block: (unnamed) -; CHECK-NEXT: Start Instruction: store i32 4, ptr %4, align 4 +; CHECK-NEXT: Start Instruction: store i32 2, ptr %2, align 4 ; CHECK-NEXT: End Instruction: store i32 6, ptr %6, align 4 ; CHECK-NEXT: Function: cat, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 3, ptr %3, align 4 +; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 ; CHECK-NEXT: Function: fish, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 3, ptr %3, align 4 +; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 ; CHECK-NEXT: Function: dog, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 3, ptr %3, align 4 +; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 -; CHECK-NEXT:4 candidates of length 4. Found in: +; CHECK-NEXT:4 candidates of length 4. Found in: ; CHECK-NEXT: Function: turtle, Basic Block: (unnamed) ; CHECK-NEXT: Start Instruction: store i32 3, ptr %3, align 4 ; CHECK-NEXT: End Instruction: store i32 6, ptr %6, align 4 @@ -43,31 +43,31 @@ ; CHECK-NEXT: Function: dog, Basic Block: entry ; CHECK-NEXT: Start Instruction: store i32 2, ptr %2, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 -; CHECK-NEXT:4 candidates of length 5. Found in: +; CHECK-NEXT:4 candidates of length 3. Found in: ; CHECK-NEXT: Function: turtle, Basic Block: (unnamed) -; CHECK-NEXT: Start Instruction: store i32 2, ptr %2, align 4 +; CHECK-NEXT: Start Instruction: store i32 4, ptr %4, align 4 ; CHECK-NEXT: End Instruction: store i32 6, ptr %6, align 4 ; CHECK-NEXT: Function: cat, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 +; CHECK-NEXT: Start Instruction: store i32 3, ptr %3, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 ; CHECK-NEXT: Function: fish, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 +; CHECK-NEXT: Start Instruction: store i32 3, ptr %3, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 ; CHECK-NEXT: Function: dog, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 +; CHECK-NEXT: Start Instruction: store i32 3, ptr %3, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 -; CHECK-NEXT:4 candidates of length 6. Found in: +; CHECK-NEXT:4 candidates of length 2. Found in: ; CHECK-NEXT: Function: turtle, Basic Block: (unnamed) -; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 +; CHECK-NEXT: Start Instruction: store i32 5, ptr %5, align 4 ; CHECK-NEXT: End Instruction: store i32 6, ptr %6, align 4 ; CHECK-NEXT: Function: cat, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 6, ptr %0, align 4 +; CHECK-NEXT: Start Instruction: store i32 4, ptr %4, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 ; CHECK-NEXT: Function: fish, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 6, ptr %0, align 4 +; CHECK-NEXT: Start Instruction: store i32 4, ptr %4, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 ; CHECK-NEXT: Function: dog, Basic Block: entry -; CHECK-NEXT: Start Instruction: store i32 6, ptr %0, align 4 +; CHECK-NEXT: Start Instruction: store i32 4, ptr %4, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 define linkonce_odr void @fish() { @@ -136,3 +136,5 @@ entry: store i32 5, ptr %5, align 4 ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/Analysis/IRSimilarityIdentifier/debug-inst-starts-block.ll b/llvm/test/Analysis/IRSimilarityIdentifier/debug-inst-starts-block.ll index 72fa361077587..78ecec92cc31a 100644 --- a/llvm/test/Analysis/IRSimilarityIdentifier/debug-inst-starts-block.ll +++ b/llvm/test/Analysis/IRSimilarityIdentifier/debug-inst-starts-block.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -disable-output -S -passes=print-ir-similarity < %s 2>&1 | FileCheck %s ; When a debug instruction is the first instruction in a block, when that block @@ -5,27 +6,27 @@ ; counted in similarity matching they must be ignored when creating canonical ; relations from one region to another. This checks that this is enforced. -; CHECK: 2 candidates of length 3. Found in: +; CHECK: 2 candidates of length 4. Found in: +; CHECK-NEXT: Function: main, Basic Block: entry +; CHECK-NEXT: Start Instruction: %0 = add i32 1, 4 +; CHECK-NEXT: End Instruction: %1 = sub i32 1, 4 +; CHECK-NEXT: Function: main, Basic Block: for.body169 +; CHECK-NEXT: Start Instruction: %2 = add i32 1, 4 +; CHECK-NEXT: End Instruction: %3 = sub i32 1, 4 +; CHECK-NEXT: 2 candidates of length 3. Found in: ; CHECK-NEXT: Function: main, Basic Block: entry ; CHECK-NEXT: Start Instruction: br label %for.body169 ; CHECK-NEXT: End Instruction: %1 = sub i32 1, 4 ; CHECK-NEXT: Function: main, Basic Block: for.body169 ; CHECK-NEXT: Start Instruction: br label %for.end122 ; CHECK-NEXT: End Instruction: %3 = sub i32 1, 4 -; CHECK-NEXT: 2 candidates of length 2. Found in: +; CHECK-NEXT: 2 candidates of length 2. Found in: ; CHECK-NEXT: Function: main, Basic Block: for.end122 ; CHECK-NEXT: Start Instruction: store i32 30, ptr undef, align 1 ; CHECK-NEXT: End Instruction: %1 = sub i32 1, 4 ; CHECK-NEXT: Function: main, Basic Block: for.end246 ; CHECK-NEXT: Start Instruction: store i32 0, ptr undef, align 1 ; CHECK-NEXT: End Instruction: %3 = sub i32 1, 4 -; CHECK-NEXT: 2 candidates of length 4. Found in: -; CHECK-NEXT: Function: main, Basic Block: entry -; CHECK-NEXT: Start Instruction: %0 = add i32 1, 4 -; CHECK-NEXT: End Instruction: %1 = sub i32 1, 4 -; CHECK-NEXT: Function: main, Basic Block: for.body169 -; CHECK-NEXT: Start Instruction: %2 = add i32 1, 4 -; CHECK-NEXT: End Instruction: %3 = sub i32 1, 4 source_filename = "irsimilarity_crash.ll" @@ -72,3 +73,5 @@ attributes #0 = { nocallback nofree nosync nounwind readnone speculatable willre !9 = !{} !10 = !DIBasicType(name: "long", size: 32, encoding: DW_ATE_signed) !11 = !DILocation(line: 522, column: 23, scope: !2) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/Analysis/IRSimilarityIdentifier/different.ll b/llvm/test/Analysis/IRSimilarityIdentifier/different.ll index 701af7e21b32e..e5c9970b159b9 100644 --- a/llvm/test/Analysis/IRSimilarityIdentifier/different.ll +++ b/llvm/test/Analysis/IRSimilarityIdentifier/different.ll @@ -5,20 +5,20 @@ ; return items only within the same function when there are different sets of ; instructions in functions. -; CHECK: 2 candidates of length 3. Found in: -; CHECK-NEXT: Function: turtle, Basic Block: (unnamed) -; CHECK-NEXT: Start Instruction: %b = load i32, ptr %1, align 4 -; CHECK-NEXT: End Instruction: %d = load i32, ptr %3, align 4 -; CHECK-NEXT: Function: turtle, Basic Block: (unnamed) -; CHECK-NEXT: Start Instruction: %a = load i32, ptr %0, align 4 -; CHECK-NEXT: End Instruction: %c = load i32, ptr %2, align 4 -; CHECK-NEXT: 2 candidates of length 5. Found in: +; CHECK: 2 candidates of length 5. Found in: ; CHECK-NEXT: Function: fish, Basic Block: entry ; CHECK-NEXT: Start Instruction: store i32 6, ptr %0, align 4 ; CHECK-NEXT: End Instruction: store i32 4, ptr %4, align 4 ; CHECK-NEXT: Function: fish, Basic Block: entry ; CHECK-NEXT: Start Instruction: store i32 1, ptr %1, align 4 ; CHECK-NEXT: End Instruction: store i32 5, ptr %5, align 4 +; CHECK-NEXT: 2 candidates of length 3. Found in: +; CHECK-NEXT: Function: turtle, Basic Block: (unnamed) +; CHECK-NEXT: Start Instruction: %b = load i32, ptr %1, align 4 +; CHECK-NEXT: End Instruction: %d = load i32, ptr %3, align 4 +; CHECK-NEXT: Function: turtle, Basic Block: (unnamed) +; CHECK-NEXT: Start Instruction: %a = load i32, ptr %0, align 4 +; CHECK-NEXT: End Instruction: %c = load i32, ptr %2, align 4 define linkonce_odr void @fish() { entry: @@ -44,3 +44,5 @@ define void @turtle(ptr %0, ptr %1, ptr %2, ptr %3) { %d = load i32, ptr %3 ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/Analysis/IRSimilarityIdentifier/nothing.ll b/llvm/test/Analysis/IRSimilarityIdentifier/nothing.ll index 5c7210790f472..4891a587d776f 100644 --- a/llvm/test/Analysis/IRSimilarityIdentifier/nothing.ll +++ b/llvm/test/Analysis/IRSimilarityIdentifier/nothing.ll @@ -10,3 +10,5 @@ define linkonce_odr void @fish() { entry: ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/Transforms/IROutliner/illegal-assumes.ll b/llvm/test/Transforms/IROutliner/illegal-assumes.ll index b91f8ec92200e..d863fe7a29903 100644 --- a/llvm/test/Transforms/IROutliner/illegal-assumes.ll +++ b/llvm/test/Transforms/IROutliner/illegal-assumes.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=verify,iroutliner -ir-outlining-no-cost < %s | FileCheck %s +; RUN: opt -S -p iroutliner,verify -ir-outlining-no-cost < %s | FileCheck %s ; This test ensures that we do not include llvm.assumes. There are exceptions ; in the CodeExtractor's algorithm for llvm.assumes, so we ignore it for now. @@ -13,13 +13,13 @@ define void @outline_assumes() { ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[DL_LOC]]) -; CHECK-NEXT: call void @outlined_ir_func_3(i1 true, ptr [[D]], ptr [[DL_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_4(i1 true, ptr [[D]], ptr [[DL_LOC]]) ; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, ptr [[DL_LOC]], align 1 ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr [[DL_LOC]]) ; CHECK-NEXT: [[SPLIT_INST:%.*]] = sub i1 [[DL_RELOAD]], [[DL_RELOAD]] -; CHECK-NEXT: call void @outlined_ir_func_0(ptr [[A]], ptr [[B]], ptr [[C]]) -; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) ; CHECK-NEXT: call void @outlined_ir_func_1(ptr [[A]], ptr [[B]], ptr [[C]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) +; CHECK-NEXT: call void @outlined_ir_func_2(ptr [[A]], ptr [[B]], ptr [[C]]) ; CHECK-NEXT: ret void ; entry: @@ -49,12 +49,12 @@ define void @outline_assumes2() { ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[DL_LOC]]) -; CHECK-NEXT: call void @outlined_ir_func_3(i1 false, ptr [[D]], ptr [[DL_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_4(i1 false, ptr [[D]], ptr [[DL_LOC]]) ; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, ptr [[DL_LOC]], align 1 ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr [[DL_LOC]]) -; CHECK-NEXT: call void @outlined_ir_func_0(ptr [[A]], ptr [[B]], ptr [[C]]) -; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) ; CHECK-NEXT: call void @outlined_ir_func_1(ptr [[A]], ptr [[B]], ptr [[C]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) +; CHECK-NEXT: call void @outlined_ir_func_2(ptr [[A]], ptr [[B]], ptr [[C]]) ; CHECK-NEXT: ret void ; entry: @@ -77,16 +77,17 @@ entry: define void @outline_assumes3() { ; CHECK-LABEL: @outline_assumes3( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[DL_LOC:%.*]] = alloca i1, align 1 ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 -; CHECK-NEXT: store i1 true, ptr [[D]], align 4 -; CHECK-NEXT: [[DL:%.*]] = load i1, ptr [[D]], align 1 -; CHECK-NEXT: [[SPLIT_INST:%.*]] = add i1 [[DL]], [[DL]] -; CHECK-NEXT: call void @outlined_ir_func_0(ptr [[A]], ptr [[B]], ptr [[C]]) -; CHECK-NEXT: call void @llvm.assume(i1 [[DL]]) -; CHECK-NEXT: call void @outlined_ir_func_2(ptr [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[DL_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i1 true, ptr [[D]], ptr [[A]], ptr [[B]], ptr [[C]], ptr [[DL_LOC]]) +; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, ptr [[DL_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr [[DL_LOC]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) +; CHECK-NEXT: call void @outlined_ir_func_3(ptr [[A]]) ; CHECK-NEXT: ret void ; entry: @@ -109,16 +110,17 @@ entry: define void @outline_assumes4() { ; CHECK-LABEL: @outline_assumes4( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[DL_LOC:%.*]] = alloca i1, align 1 ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4 -; CHECK-NEXT: store i1 false, ptr [[D]], align 4 -; CHECK-NEXT: [[DL:%.*]] = load i1, ptr [[D]], align 1 -; CHECK-NEXT: [[SPLIT_INST:%.*]] = add i1 [[DL]], [[DL]] -; CHECK-NEXT: call void @outlined_ir_func_0(ptr [[A]], ptr [[B]], ptr [[C]]) -; CHECK-NEXT: call void @llvm.assume(i1 [[DL]]) -; CHECK-NEXT: call void @outlined_ir_func_2(ptr [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr [[DL_LOC]]) +; CHECK-NEXT: call void @outlined_ir_func_0(i1 false, ptr [[D]], ptr [[A]], ptr [[B]], ptr [[C]], ptr [[DL_LOC]]) +; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, ptr [[DL_LOC]], align 1 +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 -1, ptr [[DL_LOC]]) +; CHECK-NEXT: call void @llvm.assume(i1 [[DL_RELOAD]]) +; CHECK-NEXT: call void @outlined_ir_func_3(ptr [[A]]) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/tools/llvm-sim/single-sim-file.test b/llvm/test/tools/llvm-sim/single-sim-file.test index 4d0656afd4d3a..cef14b3608500 100644 --- a/llvm/test/tools/llvm-sim/single-sim-file.test +++ b/llvm/test/tools/llvm-sim/single-sim-file.test @@ -6,21 +6,21 @@ # CHECK: { # CHECK-NEXT: "1": [ # CHECK-NEXT: { -# CHECK-NEXT: "start": 18, +# CHECK-NEXT: "start": 14, # CHECK-NEXT: "end": 19 # CHECK-NEXT: }, # CHECK-NEXT: { -# CHECK-NEXT: "start": 8, +# CHECK-NEXT: "start": 4, # CHECK-NEXT: "end": 9 # CHECK-NEXT: } # CHECK-NEXT: ], # CHECK-NEXT: "2": [ # CHECK-NEXT: { -# CHECK-NEXT: "start": 17, +# CHECK-NEXT: "start": 15, # CHECK-NEXT: "end": 19 # CHECK-NEXT: }, # CHECK-NEXT: { -# CHECK-NEXT: "start": 7, +# CHECK-NEXT: "start": 5, # CHECK-NEXT: "end": 9 # CHECK-NEXT: } # CHECK-NEXT: ], @@ -36,21 +36,21 @@ # CHECK-NEXT: ], # CHECK-NEXT: "4": [ # CHECK-NEXT: { -# CHECK-NEXT: "start": 15, +# CHECK-NEXT: "start": 17, # CHECK-NEXT: "end": 19 # CHECK-NEXT: }, # CHECK-NEXT: { -# CHECK-NEXT: "start": 5, +# CHECK-NEXT: "start": 7, # CHECK-NEXT: "end": 9 # CHECK-NEXT: } # CHECK-NEXT: ], # CHECK-NEXT: "5": [ # CHECK-NEXT: { -# CHECK-NEXT: "start": 14, +# CHECK-NEXT: "start": 18, # CHECK-NEXT: "end": 19 # CHECK-NEXT: }, # CHECK-NEXT: { -# CHECK-NEXT: "start": 4, +# CHECK-NEXT: "start": 8, # CHECK-NEXT: "end": 9 # CHECK-NEXT: } # CHECK-NEXT: ] diff --git a/llvm/test/tools/llvm-sim/single-sim.test b/llvm/test/tools/llvm-sim/single-sim.test index ba7c9c23bd916..0095ec6acbc58 100644 --- a/llvm/test/tools/llvm-sim/single-sim.test +++ b/llvm/test/tools/llvm-sim/single-sim.test @@ -5,21 +5,21 @@ # CHECK: { # CHECK-NEXT: "1": [ # CHECK-NEXT: { -# CHECK-NEXT: "start": 18, +# CHECK-NEXT: "start": 14, # CHECK-NEXT: "end": 19 # CHECK-NEXT: }, # CHECK-NEXT: { -# CHECK-NEXT: "start": 8, +# CHECK-NEXT: "start": 4, # CHECK-NEXT: "end": 9 # CHECK-NEXT: } # CHECK-NEXT: ], # CHECK-NEXT: "2": [ # CHECK-NEXT: { -# CHECK-NEXT: "start": 17, +# CHECK-NEXT: "start": 15, # CHECK-NEXT: "end": 19 # CHECK-NEXT: }, # CHECK-NEXT: { -# CHECK-NEXT: "start": 7, +# CHECK-NEXT: "start": 5, # CHECK-NEXT: "end": 9 # CHECK-NEXT: } # CHECK-NEXT: ], @@ -35,21 +35,21 @@ # CHECK-NEXT: ], # CHECK-NEXT: "4": [ # CHECK-NEXT: { -# CHECK-NEXT: "start": 15, +# CHECK-NEXT: "start": 17, # CHECK-NEXT: "end": 19 # CHECK-NEXT: }, # CHECK-NEXT: { -# CHECK-NEXT: "start": 5, +# CHECK-NEXT: "start": 7, # CHECK-NEXT: "end": 9 # CHECK-NEXT: } # CHECK-NEXT: ], # CHECK-NEXT: "5": [ # CHECK-NEXT: { -# CHECK-NEXT: "start": 14, +# CHECK-NEXT: "start": 18, # CHECK-NEXT: "end": 19 # CHECK-NEXT: }, # CHECK-NEXT: { -# CHECK-NEXT: "start": 4, +# CHECK-NEXT: "start": 8, # CHECK-NEXT: "end": 9 # CHECK-NEXT: } # CHECK-NEXT: ] From cef71d0105c5ccade716e8b96f0ebc2ea42e21b9 Mon Sep 17 00:00:00 2001 From: Snehasish Kumar Date: Sat, 11 Mar 2023 00:16:03 +0000 Subject: [PATCH 230/691] [memprof] Support symbolization of PIE binaries. Support symolization of PIE binaries in memprof. We assume that the profiled binary has one executable text segment for simplicity. Update the memprof-pic test to now expect the same output as the memprof-basic test. Reviewed By: tejohnson Differential Revision: https://reviews.llvm.org/D146181 --- .../llvm/ProfileData/RawMemProfReader.h | 16 ++- llvm/lib/ProfileData/RawMemProfReader.cpp | 106 ++++++++++++++---- .../test/tools/llvm-profdata/memprof-pic.test | 90 ++++++++++++++- 3 files changed, 184 insertions(+), 28 deletions(-) diff --git a/llvm/include/llvm/ProfileData/RawMemProfReader.h b/llvm/include/llvm/ProfileData/RawMemProfReader.h index 998e845abb555..52d5f12fc7972 100644 --- a/llvm/include/llvm/ProfileData/RawMemProfReader.h +++ b/llvm/include/llvm/ProfileData/RawMemProfReader.h @@ -106,6 +106,8 @@ class RawMemProfReader { Error initialize(std::unique_ptr DataBuffer); // Read and parse the contents of the `DataBuffer` as a binary format profile. Error readRawProfile(std::unique_ptr DataBuffer); + // Initialize the segment mapping information for symbolization. + Error setupForSymbolization(); // Symbolize and cache all the virtual addresses we encounter in the // callstacks from the raw profile. Also prune callstack frames which we can't // symbolize or those that belong to the runtime. For profile entries where @@ -125,11 +127,21 @@ class RawMemProfReader { object::SectionedAddress getModuleOffset(uint64_t VirtualAddress); + // The profiled binary. object::OwningBinary Binary; + // A symbolizer to translate virtual addresses to code locations. std::unique_ptr Symbolizer; + // The preferred load address of the executable segment. + uint64_t PreferredTextSegmentAddress = 0; + // The base address of the text segment in the process during profiling. + uint64_t ProfiledTextSegmentStart = 0; + // The limit address of the text segment in the process during profiling. + uint64_t ProfiledTextSegmentEnd = 0; + + // The memory mapped segment information for all executable segments in the + // profiled binary (filtered from the raw profile using the build id). + llvm::SmallVector SegmentInfo; - // The contents of the raw profile. - llvm::SmallVector SegmentInfo; // A map from callstack id (same as key in CallStackMap below) to the heap // information recorded for that allocation context. llvm::MapVector CallstackProfileData; diff --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp index dd62a2f8a0f6c..27df3995ae885 100644 --- a/llvm/lib/ProfileData/RawMemProfReader.cpp +++ b/llvm/lib/ProfileData/RawMemProfReader.cpp @@ -24,13 +24,16 @@ #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h" #include "llvm/Object/Binary.h" +#include "llvm/Object/BuildID.h" #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/ObjectFile.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/MemProfData.inc" #include "llvm/ProfileData/RawMemProfReader.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" #include "llvm/Support/Path.h" #define DEBUG_TYPE "memprof" @@ -270,17 +273,37 @@ Error RawMemProfReader::initialize(std::unique_ptr DataBuffer) { auto* Elf64LEObject = llvm::cast(ElfObject); const llvm::object::ELF64LEFile& ElfFile = Elf64LEObject->getELFFile(); auto PHdrsOr = ElfFile.program_headers(); - if(!PHdrsOr) - return report(make_error(Twine("Could not read program headers: "), - inconvertibleErrorCode()), - FileName); - auto FirstLoadHeader = PHdrsOr->begin(); - while (FirstLoadHeader->p_type != llvm::ELF::PT_LOAD) - ++FirstLoadHeader; - if(FirstLoadHeader->p_vaddr == 0) - return report(make_error(Twine("Unsupported position independent code"), - inconvertibleErrorCode()), - FileName); + if (!PHdrsOr) + return report( + make_error(Twine("Could not read program headers: "), + inconvertibleErrorCode()), + FileName); + + int NumExecutableSegments = 0; + for (const auto &Phdr : *PHdrsOr) { + if (Phdr.p_type == ELF::PT_LOAD) { + if (Phdr.p_flags & ELF::PF_X) { + // We assume only one text segment in the main binary for simplicity and + // reduce the overhead of checking multiple ranges during symbolization. + if (++NumExecutableSegments > 1) { + return report( + make_error( + "Expect only one executable load segment in the binary", + inconvertibleErrorCode()), + FileName); + } + // Segment will always be loaded at a page boundary, expect it to be + // aligned already. Assume 4K pagesize for the machine from which the + // profile has been collected. This should be fine for now, in case we + // want to support other pagesizes it can be recorded in the raw profile + // during collection. + PreferredTextSegmentAddress = Phdr.p_vaddr; + assert(Phdr.p_vaddr == (Phdr.p_vaddr & ~(0x1000 - 1U)) && + "Expect p_vaddr to always be page aligned"); + assert(Phdr.p_offset == 0 && "Expect p_offset = 0 for symbolization."); + } + } + } auto Triple = ElfObject->makeTriple(); if (!Triple.isX86()) @@ -299,15 +322,51 @@ Error RawMemProfReader::initialize(std::unique_ptr DataBuffer) { return report(SOFOr.takeError(), FileName); Symbolizer = std::move(SOFOr.get()); + // Process the raw profile. if (Error E = readRawProfile(std::move(DataBuffer))) return E; + if (Error E = setupForSymbolization()) + return E; + if (Error E = symbolizeAndFilterStackFrames()) return E; return mapRawProfileToRecords(); } +Error RawMemProfReader::setupForSymbolization() { + auto *Object = cast(Binary.getBinary()); + auto BuildIdOr = object::getBuildID(Object); + if (!BuildIdOr.has_value()) + return make_error(Twine("No build id found in binary ") + + Binary.getBinary()->getFileName(), + inconvertibleErrorCode()); + llvm::ArrayRef BinaryId = BuildIdOr.value(); + + int NumMatched = 0; + for (const auto &Entry : SegmentInfo) { + llvm::ArrayRef SegmentId(Entry.BuildId, Entry.BuildIdSize); + if (BinaryId == SegmentId) { + // We assume only one text segment in the main binary for simplicity and + // reduce the overhead of checking multiple ranges during symbolization. + if (++NumMatched > 1) { + return make_error( + "We expect only one executable segment in the profiled binary", + inconvertibleErrorCode()); + } + ProfiledTextSegmentStart = Entry.Start; + ProfiledTextSegmentEnd = Entry.End; + } + } + assert(NumMatched != 0 && "No matching executable segments in segment info."); + assert(PreferredTextSegmentAddress == 0 || + (PreferredTextSegmentAddress == ProfiledTextSegmentStart) && + "Expect text segment address to be 0 or equal to profiled text " + "segment start."); + return Error::success(); +} + Error RawMemProfReader::mapRawProfileToRecords() { // Hold a mapping from function to each callsite location we encounter within // it that is part of some dynamic allocation context. The location is stored @@ -516,20 +575,19 @@ Error RawMemProfReader::readRawProfile( object::SectionedAddress RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) { - LLVM_DEBUG({ - SegmentEntry *ContainingSegment = nullptr; - for (auto &SE : SegmentInfo) { - if (VirtualAddress > SE.Start && VirtualAddress <= SE.End) { - ContainingSegment = &SE; - } + if (VirtualAddress > ProfiledTextSegmentStart && + VirtualAddress <= ProfiledTextSegmentEnd) { + // For PIE binaries, the preferred address is zero and we adjust the virtual + // address by start of the profiled segment assuming that the offset of the + // segment in the binary is zero. For non-PIE binaries the preferred and + // profiled segment addresses should be equal and this is a no-op. + const uint64_t AdjustedAddress = + VirtualAddress + PreferredTextSegmentAddress - ProfiledTextSegmentStart; + return object::SectionedAddress{AdjustedAddress}; } - - // Ensure that the virtual address is valid. - assert(ContainingSegment && "Could not find a segment entry"); - }); - - // TODO: Compute the file offset based on the maps and program headers. For - // now this only works for non PIE binaries. + // Addresses which do not originate from the profiled text segment in the + // binary are not adjusted. These will fail symbolization and be filtered out + // during processing. return object::SectionedAddress{VirtualAddress}; } diff --git a/llvm/test/tools/llvm-profdata/memprof-pic.test b/llvm/test/tools/llvm-profdata/memprof-pic.test index fe55578b80731..03c98ed9200d9 100644 --- a/llvm/test/tools/llvm-profdata/memprof-pic.test +++ b/llvm/test/tools/llvm-profdata/memprof-pic.test @@ -7,6 +7,92 @@ libraries linked in which could change the number of segments recorded. To update the inputs used below run Inputs/update_memprof_inputs.sh /path/to/updated/clang -RUN: not llvm-profdata show --memory %p/Inputs/pic.memprofraw --profiled-binary %p/Inputs/pic.memprofexe -o - 2>&1 | FileCheck %s +RUN: llvm-profdata show --memory %p/Inputs/pic.memprofraw --profiled-binary %p/Inputs/pic.memprofexe -o - 2>&1 | FileCheck %s -CHECK: Unsupported position independent code +CHECK: MemprofProfile: +CHECK-NEXT: Summary: +CHECK-NEXT: Version: 3 +CHECK-NEXT: NumSegments: {{[0-9]+}} +CHECK-NEXT: NumMibInfo: 2 +CHECK-NEXT: NumAllocFunctions: 1 +CHECK-NEXT: NumStackOffsets: 2 +CHECK-NEXT: Segments: +CHECK-NEXT: - +CHECK-NEXT: BuildId: {{[[:xdigit:]]+}} +CHECK-NEXT: Start: 0x{{[[:xdigit:]]+}} +CHECK-NEXT: End: 0x{{[[:xdigit:]]+}} +CHECK-NEXT: Offset: 0x{{[[:xdigit:]]+}} +CHECK-NEXT: - + +CHECK: Records: +CHECK-NEXT: - +CHECK-NEXT: FunctionGUID: {{[0-9]+}} +CHECK-NEXT: AllocSites: +CHECK-NEXT: - +CHECK-NEXT: Callstack: +CHECK-NEXT: - +CHECK-NEXT: Function: {{[0-9]+}} +CHECK-NEXT: SymbolName: main +CHECK-NEXT: LineOffset: 1 +CHECK-NEXT: Column: 21 +CHECK-NEXT: Inline: 0 +CHECK-NEXT: MemInfoBlock: +CHECK-NEXT: AllocCount: 1 +CHECK-NEXT: TotalAccessCount: 2 +CHECK-NEXT: MinAccessCount: 2 +CHECK-NEXT: MaxAccessCount: 2 +CHECK-NEXT: TotalSize: 10 +CHECK-NEXT: MinSize: 10 +CHECK-NEXT: MaxSize: 10 +CHECK-NEXT: AllocTimestamp: {{[0-9]+}} +CHECK-NEXT: DeallocTimestamp: {{[0-9]+}} +CHECK-NEXT: TotalLifetime: 0 +CHECK-NEXT: MinLifetime: 0 +CHECK-NEXT: MaxLifetime: 0 +CHECK-NEXT: AllocCpuId: {{[0-9]+}} +CHECK-NEXT: DeallocCpuId: {{[0-9]+}} +CHECK-NEXT: NumMigratedCpu: 0 +CHECK-NEXT: NumLifetimeOverlaps: 0 +CHECK-NEXT: NumSameAllocCpu: 0 +CHECK-NEXT: NumSameDeallocCpu: 0 +CHECK-NEXT: DataTypeId: {{[0-9]+}} +CHECK-NEXT: TotalAccessDensity: 20 +CHECK-NEXT: MinAccessDensity: 20 +CHECK-NEXT: MaxAccessDensity: 20 +CHECK-NEXT: TotalLifetimeAccessDensity: 20000 +CHECK-NEXT: MinLifetimeAccessDensity: 20000 +CHECK-NEXT: MaxLifetimeAccessDensity: 20000 +CHECK-NEXT: - +CHECK-NEXT: Callstack: +CHECK-NEXT: - +CHECK-NEXT: Function: {{[0-9]+}} +CHECK-NEXT: SymbolName: main +CHECK-NEXT: LineOffset: 4 +CHECK-NEXT: Column: 15 +CHECK-NEXT: Inline: 0 +CHECK-NEXT: MemInfoBlock: +CHECK-NEXT: AllocCount: 1 +CHECK-NEXT: TotalAccessCount: 2 +CHECK-NEXT: MinAccessCount: 2 +CHECK-NEXT: MaxAccessCount: 2 +CHECK-NEXT: TotalSize: 10 +CHECK-NEXT: MinSize: 10 +CHECK-NEXT: MaxSize: 10 +CHECK-NEXT: AllocTimestamp: {{[0-9]+}} +CHECK-NEXT: DeallocTimestamp: {{[0-9]+}} +CHECK-NEXT: TotalLifetime: 0 +CHECK-NEXT: MinLifetime: 0 +CHECK-NEXT: MaxLifetime: 0 +CHECK-NEXT: AllocCpuId: {{[0-9]+}} +CHECK-NEXT: DeallocCpuId: {{[0-9]+}} +CHECK-NEXT: NumMigratedCpu: 0 +CHECK-NEXT: NumLifetimeOverlaps: 0 +CHECK-NEXT: NumSameAllocCpu: 0 +CHECK-NEXT: NumSameDeallocCpu: 0 +CHECK-NEXT: DataTypeId: {{[0-9]+}} +CHECK-NEXT: TotalAccessDensity: 20 +CHECK-NEXT: MinAccessDensity: 20 +CHECK-NEXT: MaxAccessDensity: 20 +CHECK-NEXT: TotalLifetimeAccessDensity: 20000 +CHECK-NEXT: MinLifetimeAccessDensity: 20000 +CHECK-NEXT: MaxLifetimeAccessDensity: 20000 From a2c63d7f0b77ae92c97974d0630625d83a0745ce Mon Sep 17 00:00:00 2001 From: Aart Bik Date: Fri, 17 Mar 2023 18:24:01 -0700 Subject: [PATCH 231/691] [mlir][sparse][gpu] end-to-end test for 2:4 sparsity on NVidia GPUs Example of using 2:4 sparsity on NVidia GPU Reviewed By: ThomasRaoux Differential Revision: https://reviews.llvm.org/D145447 --- .../SparseTensor/GPU/CUDA/lit.local.cfg | 2 + .../GPU/CUDA/sparse-mma-2-4-f16.mlir | 395 ++++++++++++++++++ 2 files changed, 397 insertions(+) create mode 100644 mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/lit.local.cfg create mode 100644 mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-mma-2-4-f16.mlir diff --git a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/lit.local.cfg b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/lit.local.cfg new file mode 100644 index 0000000000000..0bdebfedeee36 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/lit.local.cfg @@ -0,0 +1,2 @@ +if not config.enable_cuda_runner: + config.unsupported = True diff --git a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-mma-2-4-f16.mlir b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-mma-2-4-f16.mlir new file mode 100644 index 0000000000000..51ad9a4792281 --- /dev/null +++ b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-mma-2-4-f16.mlir @@ -0,0 +1,395 @@ +// RUN: mlir-opt \ +// RUN: --pass-pipeline="builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,convert-nvgpu-to-nvvm,affine-expand-index-ops,lower-affine,convert-arith-to-llvm),convert-vector-to-llvm,canonicalize,cse,gpu.module(gpu-to-cubin{chip=sm_80 features=+ptx71}))" \ +// RUN: %s \ +// RUN: | mlir-opt --convert-scf-to-cf -convert-cf-to-llvm --convert-vector-to-llvm \ +// RUN: --convert-arith-to-llvm --gpu-to-llvm --reconcile-unrealized-casts \ +// RUN: | mlir-cpu-runner \ +// RUN: --shared-libs=%mlir_cuda_runtime \ +// RUN: --shared-libs=%mlir_runner_utils \ +// RUN: --e main --entry-point-result=void \ +// RUN: | FileCheck %s + +module attributes {gpu.container_module} { + + // Kernels that run on the device. + + gpu.module @kernels { + + // + // An NVidia GPU kernel to compute + // C = A x B + // (or, technically, D = A x B + C) + // using 2:4 structured sparsity for A. + // + // This kernel provides building block for sparse compilation of a larger + // enveloping matrix multiplication computation on a GPU. + // + // Operand A values (2:4 sparse): row major format, logically "16x32xf16" + // but "16x16xf16" after compression + // + // Operand A metadata: + // - The metadata is logically "16x16xi2". Each 2-bit value indicates + // the position of a non-zero value within the respective group of 4 elements. + // - However, we represent it as "16x2xi16". + // - Each sparse instruction type specifies how the metadata should be distributed + // among threads. In this case, within each quad (group of 4 consecutive threads + // starting with a thread ID which is a multiple of 4), thread 4i and 4i+1 + // will require to hold the metadata different metadata. For uniformity below, + // we just have all threads load metadata, and the way they determine which metadata + // to load is given below. + // - Thread map for the 16x32x16 instruction is: + // 2i -> col 0 + // 2i + 1 -> col 1 + // + // Operand B (dense): column major format. + // + // Operand C (accum): assumed zero on entry, used as output. + // + gpu.func @mma_sp_sync_f16_16832( + %argA: memref<16x16xf16>, + %argA_meta: memref<16x2xi16>, + %argB: memref<8x32xf16>, + %argC: memref<16x8xf16>) kernel { + %f0 = arith.constant 0.0 : f16 + %c4 = arith.constant 4 : index + %c8 = arith.constant 8 : index + + // Assume we have a linear thread id and the kernel launches 32 threads (1 warp). + // So CUDA launch would be threadblock = (32, 1, 1), grid = (1, 1, 1) + %lane_id = gpu.thread_id x + // Which group of 4 threads do we belong to? + %quad_id = affine.apply affine_map<()[s0]->(s0 floordiv 4)>()[%lane_id] + // Are we even group or odd group? + %pair_id = affine.apply affine_map<()[s0]->(s0 mod 2)>()[%lane_id] + + // Now we have + // MMA lane=0 quad=0 pair=0 + // MMA lane=1 quad=0 pair=1 + // MMA lane=2 quad=0 pair=0 + // MMA lane=3 quad=0 pair=1 + // MMA lane=4 quad=1 pair=0 + // MMA lane=5 quad=1 pair=1 + // ... + // MMA lane=30 quad=7 pair=2 + // MMA lane=31 quad=7 pair=1 + // + // gpu.printf "MMA lane=%lld quad=%lld pair=%lld\n" %lane_id, %quad_id, %pair_id : index, index, index + + //===----------------------------------------------------------------------===// + // Load the operandA metadata + // (https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#sparse-mma-metadata-16832-f16bf16) + //===----------------------------------------------------------------------===// + + // For the 16x2xi16 metadata, all threads that load metadata will load one + // i16 value from the first 8 rows, and one i16 value from the second 8 rows. + // The i16 values are then put into a i32 with the value from the first 8 rows + // going in the lower bits. + // + // The below IR loads and combines the two pieces of i16 metadata required. + // Obviously, it's possible to re-pack the metadata before launching the kernel in + // order to eliminate this cost and load a single i32 operand. This just shows + // how to put them together if you do the naive load per the diagram in + // the PTX docs. Technically only the first two threads in each quad need + // to do this, but for simplicity we just have all threads participate since + // it can't hurt. + // + // The mapping is + // Lower i16 bits <- (thread_id) -> load A_meta[ quad_id , pair_id] + // Lower i16 bits <- (thread_id) -> load A_meta[ quad_id + 8, pair_id] + + %quad_id_plus_8 = affine.apply affine_map<()[s0]->(s0 + 8)>()[%quad_id] + %meta_A_per_thread0 = memref.load %argA_meta[%quad_id , %pair_id] : memref<16x2xi16> + %meta_A_per_thread1 = memref.load %argA_meta[%quad_id_plus_8, %pair_id] : memref<16x2xi16> + + %low_i32 = arith.extui %meta_A_per_thread0 : i16 to i32 + %high_i32 = arith.extui %meta_A_per_thread1 : i16 to i32 + + %meta_init = arith.constant dense<0> : vector<2xi16> + %meta_low = vector.insert %meta_A_per_thread0, %meta_init[0] : i16 into vector<2xi16> + %meta = vector.insert %meta_A_per_thread1, %meta_low[1] : i16 into vector<2xi16> + + //===----------------------------------------------------------------------===// + // Load operandA + //===----------------------------------------------------------------------===// + + // Load the actual fragments for the sparse values. This can be done using ldmatrix, + // but here we just do naive individual loads, which would also be required for + // a layout/element type that is not compatible with ldmatrix (e.g. i8 transpose load). + // + // The thread map here is different that operandA metadata. Each thread will + // load one 2xf16 vector from each of the four (8x8xf16) quadrants fo the 16x16xf16 + // operand. + // + // The (thread_id)->(row, col) map within each 8x4x(2xf16) quadrant is (t)->(t/4, t%4). We + // can use "affine.delinearize_index" which means the same thing. + + %quad_row, %col_8x4 = affine.delinearize_index %lane_id into (%c8, %c4) : index, index + %quad_col = affine.apply affine_map<()[s0]->(s0 * 2)>()[%col_8x4] // account for 2xf16/col + + // Load quad (0, 0) + %A_quad00 = vector.transfer_read %argA[%quad_row, %quad_col], %f0 {in_bounds = [true]} : memref<16x16xf16>, vector<2xf16> + + // Load quad (1, 0). Just shift row down 8. + %quad_row_plus_8 = affine.apply affine_map<(d0)[]->(d0+8)>(%quad_row)[] + %A_quad10 = vector.transfer_read %argA[%quad_row_plus_8, %quad_col], %f0 {in_bounds = [true]} : memref<16x16xf16>, vector<2xf16> + + // Load quad (0, 1). Just shift col right 8 (4 2xf16 values) + %quad_col_plus_8 = affine.apply affine_map<(d0)[]->(d0+8)>(%quad_col)[] + %A_quad01 = vector.transfer_read %argA[%quad_row, %quad_col_plus_8], %f0 {in_bounds = [true]} : memref<16x16xf16>, vector<2xf16> + + // Load quad (1, 1) + %A_quad11 = vector.transfer_read %argA[%quad_row_plus_8, %quad_col_plus_8], %f0 {in_bounds = [true]} : memref<16x16xf16>, vector<2xf16> + + // Assemble the elements into a vector + %A_init0 = arith.constant dense<0.0> : vector<4x2xf16> + %A_data0 = vector.insert %A_quad00, %A_init0[0] : vector<2xf16> into vector<4x2xf16> + %A_data1 = vector.insert %A_quad10, %A_data0[1] : vector<2xf16> into vector<4x2xf16> + %A_data2 = vector.insert %A_quad01, %A_data1[2] : vector<2xf16> into vector<4x2xf16> + %A_data = vector.insert %A_quad11, %A_data2[3] : vector<2xf16> into vector<4x2xf16> + + //===----------------------------------------------------------------------===// + // Load operand B + //===----------------------------------------------------------------------===// + + // Load the actual fragments for the dense values. This can be done using ldmatrix, + // but here we just do naive individual loads, as would be required if we could + // not use ldmatrix. + // + // The thread map here is different from operandA. This operand is in the form + // memref<8x32xf16> (col major). Each thread load a 2xf16 vector from a + // 8x8xf16 quadrant. + // + // The (thread_id)->(col, row) map within each 8x4x(2xf16) quadrant is + // (t) -> (t/4, t % 4). So we can re-use some of the calculation from A. + + // Load quad (0, 0) + %B_quad0 = vector.transfer_read %argB[%quad_row, %quad_col], %f0 {in_bounds = [true]} : memref<8x32xf16>, vector<2xf16> + + // Load quad (0, 1) + %B_quad1 = vector.transfer_read %argB[%quad_row, %quad_col_plus_8], %f0 {in_bounds = [true]} : memref<8x32xf16>, vector<2xf16> + + // Load quad (0, 2) + %quad_col_plus_16 = affine.apply affine_map<()[s0]->(s0 + 16)>()[%quad_col] + %B_quad2 = vector.transfer_read %argB[%quad_row, %quad_col_plus_16], %f0 {in_bounds = [true]} : memref<8x32xf16>, vector<2xf16> + + // Load quad (0, 3) + %quad_col_plus_24 = affine.apply affine_map<()[s0]->(s0 + 24)>()[%quad_col] + %B_quad3 = vector.transfer_read %argB[%quad_row, %quad_col_plus_24], %f0 {in_bounds = [true]} : memref<8x32xf16>, vector<2xf16> + + // Assemble into vector + %B_init0 = arith.constant dense<0.0> : vector<4x2xf16> + %B_data0 = vector.insert %B_quad0, %B_init0[0] : vector<2xf16> into vector<4x2xf16> + %B_data1 = vector.insert %B_quad1, %B_data0[1] : vector<2xf16> into vector<4x2xf16> + %B_data2 = vector.insert %B_quad2, %B_data1[2] : vector<2xf16> into vector<4x2xf16> + %B_data = vector.insert %B_quad3, %B_data2[3] : vector<2xf16> into vector<4x2xf16> + + // For now just say accum is a zero-d register + %accum = arith.constant dense<0.0> : vector<2x2xf16> + + gpu.barrier + + // Sparsity selector. For 16x8x32, the default "0" means threads T0/T1 + // within each group of four threads contribute metadata. + %d = nvgpu.mma.sp.sync(%A_data, %B_data, %accum) + metadata(%meta) + {mmaShape = [16, 8, 32]} : (vector<4x2xf16>, vector<4x2xf16>, vector<2x2xf16>) -> vector<2x2xf16> + + //===----------------------------------------------------------------------===// + // Write back results to gpu global memory + //===----------------------------------------------------------------------===// + + // The mma instruction gave us two 2xf16 vectors per thread. These values + // correspond to different positions in the 16x8xf16 result matrix. Each value belongs + // to one of the 8x4x(2xf16) halves. The halves are indexed as follows (as you might guess): + // vector0: (tid) -> (tid / 4 , tid %4) + // vector1: (tid) -> (tid / 4 + 8, tid %4) + %C_0 = vector.extract %d[0] : vector<2x2xf16> + %C_1 = vector.extract %d[1] : vector<2x2xf16> + vector.transfer_write %C_0, %argC[%quad_row, %quad_col] {in_bounds = [true]} : vector<2xf16>, memref<16x8xf16> + vector.transfer_write %C_1, %argC[%quad_row_plus_8, %quad_col] {in_bounds = [true]} : vector<2xf16>, memref<16x8xf16> + + gpu.return + } + } + + // Code than runs on the host. + + // + // This test performs a matrix multiplication + // C = A x B + // using NVidia 2:4 structured sparsity for A. + // + func.func @main() { + %f0 = arith.constant 0.0 : f16 + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + %c8 = arith.constant 8 : index + %c16 = arith.constant 16 : index + %c32 = arith.constant 32 : index + %c64 = arith.constant 64 : index + + // Matrices A, B, C (16x32, 32x8, 16x8). + %a = memref.alloc() : memref<16x16xf16> // 16x32 but 2:4, row-major + %b = memref.alloc() : memref<8x32xf16> // regular dense column-major + %c = memref.alloc() : memref<16x8xf16> // accumulator row-major + + // Metadata for A. + %m = memref.alloc() : memref<16x2xi16> + + // + // Setup matrix A. + // + scf.for %ai = %c0 to %c16 step %c1 { + scf.for %aj = %c0 to %c16 step %c1 { + %a0 = arith.addi %ai, %aj : index + %a1 = arith.addi %a0, %c1 : index + %a2 = arith.index_cast %a1 : index to i32 + %a3 = arith.sitofp %a2 : i32 to f16 + memref.store %a3, %a[%ai, %aj] : memref<16x16xf16> + } + } + + // + // Setup metadata for matrix A. + // + // Here we assume that all 2:4 elements are in pos 0 and 2, + // viz. in matrix + // | A 0 B 0 | + // { 0 2 } + // + // Note that within each i16, we need little-endian + // storage of the indices, as follows: + // + // 10 00 10 00 10 00 10 00 10 00 10 00 = 0x8888 + // + %bits = arith.constant 0x8888 : i16 + scf.for %mi = %c0 to %c16 step %c1 { + memref.store %bits, %m[%mi, %c0] : memref<16x2xi16> + memref.store %bits, %m[%mi, %c1] : memref<16x2xi16> + } + + // + // Setup matrix B. + // + scf.for %bi = %c0 to %c8 step %c1 { + scf.for %bj = %c0 to %c32 step %c1 { + %b0 = arith.subi %bi, %bj : index + %b1 = arith.index_cast %b0 : index to i32 + %b2 = arith.sitofp %b1 : i32 to f16 + memref.store %b2, %b[%bi, %bj] : memref<8x32xf16> + } + } + + // + // Reset matrix C. + // + scf.for %ci = %c0 to %c16 step %c1 { + scf.for %cj = %c0 to %c8 step %c1 { + memref.store %f0, %c[%ci, %cj] : memref<16x8xf16> + } + } + + // + // Sanity check on **compressed** input matrix A. + // + // Note that it really is a 16x32 matrix: + // | 1 0 2 0 3 0 ... + // | 2 0 3 0 4 0 ... + // etc. + // + // CHECK: ( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 ) + // CHECK-NEXT: ( 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 ) + // CHECK-NEXT: ( 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 ) + // CHECK-NEXT: ( 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 ) + // CHECK-NEXT: ( 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20 ) + // CHECK-NEXT: ( 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 ) + // CHECK-NEXT: ( 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22 ) + // CHECK-NEXT: ( 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 ) + // CHECK-NEXT: ( 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 ) + // CHECK-NEXT: ( 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 ) + // CHECK-NEXT: ( 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26 ) + // CHECK-NEXT: ( 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 ) + // CHECK-NEXT: ( 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28 ) + // CHECK-NEXT: ( 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29 ) + // CHECK-NEXT: ( 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 ) + // CHECK-NEXT: ( 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31 ) + // + scf.for %pai = %c0 to %c16 step %c1 { + %pa0 = vector.transfer_read %a[%pai, %c0], %f0 : memref<16x16xf16>, vector<16xf16> + vector.print %pa0 : vector<16xf16> + } + + // + // Sanity check on input matrix 32x8 B. + // Note that this is really shown as B^T + // + // CHECK-NEXT: ( 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31 ) + // CHECK-NEXT: ( 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30 ) + // CHECK-NEXT: ( 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29 ) + // CHECK-NEXT: ( 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28 ) + // CHECK-NEXT: ( 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27 ) + // CHECK-NEXT: ( 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26 ) + // CHECK-NEXT: ( 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25 ) + // CHECK-NEXT: ( 7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24 ) + // + // + scf.for %pbi = %c0 to %c8 step %c1 { + %pb0 = vector.transfer_read %b[%pbi, %c0], %f0 : memref<8x32xf16>, vector<32xf16> + vector.print %pb0 : vector<32xf16> + } + + // Maps the provided host buffer into the device address space. + // Writes from the host are guaranteed to be visible to device + // kernels that are launched afterwards. Writes from the device + // are guaranteed to be visible on the host after synchronizing + // with the device kernel completion. + %cast_a = memref.cast %a : memref<16x16xf16> to memref<*xf16> + gpu.host_register %cast_a : memref<*xf16> + %cast_m = memref.cast %m : memref<16x2xi16> to memref<*xi16> + gpu.host_register %cast_m : memref<*xi16> + %cast_b = memref.cast %b : memref<8x32xf16> to memref<*xf16> + gpu.host_register %cast_b : memref<*xf16> + %cast_c = memref.cast %c : memref<16x8xf16> to memref<*xf16> + gpu.host_register %cast_c : memref<*xf16> + + // Call the kernel, using a single warp of 32 threads. + %t1 = arith.constant 1 : index + %t32 = arith.constant 32 : index + gpu.launch_func + @kernels::@mma_sp_sync_f16_16832 + blocks in (%t1, %t1, %t1) // gridSizeX,Y,Z + threads in (%t32, %t1, %t1) // blockSizeX,Y,Z + args(%a : memref<16x16xf16>, + %m : memref<16x2xi16>, + %b : memref<8x32xf16>, + %c : memref<16x8xf16>) + + // + // Verify computed matrix C. + // + // CHECK-NEXT: ( -2720, -2584, -2448, -2312, -2176, -2040, -1904, -1768 ) + // CHECK-NEXT: ( -2960, -2808, -2656, -2504, -2352, -2200, -2048, -1896 ) + // CHECK-NEXT: ( -3200, -3032, -2864, -2696, -2528, -2360, -2192, -2024 ) + // CHECK-NEXT: ( -3440, -3256, -3072, -2888, -2704, -2520, -2336, -2152 ) + // CHECK-NEXT: ( -3680, -3480, -3280, -3080, -2880, -2680, -2480, -2280 ) + // CHECK-NEXT: ( -3920, -3704, -3488, -3272, -3056, -2840, -2624, -2408 ) + // CHECK-NEXT: ( -4160, -3928, -3696, -3464, -3232, -3000, -2768, -2536 ) + // CHECK-NEXT: ( -4400, -4152, -3904, -3656, -3408, -3160, -2912, -2664 ) + // CHECK-NEXT: ( -4640, -4376, -4112, -3848, -3584, -3320, -3056, -2792 ) + // CHECK-NEXT: ( -4880, -4600, -4320, -4040, -3760, -3480, -3200, -2920 ) + // CHECK-NEXT: ( -5120, -4824, -4528, -4232, -3936, -3640, -3344, -3048 ) + // CHECK-NEXT: ( -5360, -5048, -4736, -4424, -4112, -3800, -3488, -3176 ) + // CHECK-NEXT: ( -5600, -5272, -4944, -4616, -4288, -3960, -3632, -3304 ) + // CHECK-NEXT: ( -5840, -5496, -5152, -4808, -4464, -4120, -3776, -3432 ) + // CHECK-NEXT: ( -6080, -5720, -5360, -5000, -4640, -4280, -3920, -3560 ) + // CHECK-NEXT: ( -6320, -5944, -5568, -5192, -4816, -4440, -4064, -3688 ) + // + scf.for %pci = %c0 to %c16 step %c1 { + %pc0 = vector.transfer_read %c[%pci, %c0], %f0 : memref<16x8xf16>, vector<8xf16> + vector.print %pc0 : vector<8xf16> + } + + return + } +} From 08b0977a1925cf0a2cf6f87fcbf1d656e873f7c5 Mon Sep 17 00:00:00 2001 From: TatWai Chong Date: Tue, 21 Mar 2023 20:50:13 +0000 Subject: [PATCH 232/691] [mlir][tosa] Add check if the operand of the operations is constant. Some uses of TOSA rely on the constant operands of particular operations, e.g. paddings and pad_const in pad op. Add a verification pattern in the validation pass, and this is optionally enabled. Change-Id: I1628c0840a27ab06ef91150eee56ad4f5ac9543d Reviewed By: rsuderman Differential Revision: https://reviews.llvm.org/D145412 --- .../mlir/Dialect/Tosa/Transforms/Passes.td | 8 ++- .../Tosa/Transforms/TosaValidation.cpp | 59 ++++++++++++++++++- mlir/test/Dialect/Tosa/invalid.mlir | 47 ++++++++++++++- 3 files changed, 109 insertions(+), 5 deletions(-) diff --git a/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td b/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td index 7fd2f9ba54f17..1c3bfbebb1ccc 100644 --- a/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Tosa/Transforms/Passes.td @@ -84,8 +84,12 @@ def TosaValidation : Pass<"tosa-validate", "func::FuncOp"> { let options = [ Option<"profileName", "profile", "std::string", - /*default=*/"\"undefined\"", - "Validation if ops match for given profile">]; + /*default=*/"\"undefined\"", + "Validate if operations match for the given profile">, + Option<"StrictOperationSpecAlignment", "strict-op-spec-alignment", "bool", + /*default=*/"false", + "Verify if the properties of certain operations align the spec requirement">, + ]; } #endif // MLIR_DIALECT_TOSA_TRANSFORMS_PASSES diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaValidation.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaValidation.cpp index 896f81e75daab..4cb727b00ca0c 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/TosaValidation.cpp +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaValidation.cpp @@ -35,17 +35,68 @@ using namespace mlir::tosa; namespace { +static LogicalResult checkConstantOperandPad(Operation *op) { + if (auto pad_op = dyn_cast(op)) { + DenseElementsAttr paddings; + if (!matchPattern(pad_op.getPadding(), m_Constant(&paddings))) + return op->emitOpError("padding of pad is not constant"); + + DenseElementsAttr pad_const; + // Assume this op is zero-padding if pad_const is not presented. + if (pad_op.getPadConst() && + !matchPattern(pad_op.getPadConst(), m_Constant(&pad_const))) + return op->emitOpError("pad_const of pad is not constant"); + } + return success(); +} + +static LogicalResult checkConstantOperandTranspose(Operation *op) { + if (auto transpose_op = dyn_cast(op)) { + DenseElementsAttr perms; + if (!matchPattern(transpose_op.getPerms(), m_Constant(&perms))) + return op->emitOpError("perms of transpose is not constant"); + } + return success(); +} + +static LogicalResult checkConstantOperandFullyConnected(Operation *op) { + if (auto fc_op = dyn_cast(op)) { + DenseElementsAttr weight; + if (!matchPattern(fc_op.getWeight(), m_Constant(&weight))) + return op->emitOpError("weight of fully_connected is not constant"); + + DenseElementsAttr bias; + if (!matchPattern(fc_op.getBias(), m_Constant(&bias))) + return op->emitOpError("bias of fully_connected is not constant"); + } + return success(); +} + //===----------------------------------------------------------------------===// // TOSA Validation Pass. //===----------------------------------------------------------------------===// struct TosaValidation : public tosa::impl::TosaValidationBase { public: - explicit TosaValidation() = default; + explicit TosaValidation() { populateConstantOperandChecks(); } + void runOnOperation() override; + + LogicalResult applyConstantOperandCheck(Operation *op) { + for (auto &checker : const_checkers) { + if (failed(checker(op))) + return failure(); + } + return success(); + } private: - void runOnOperation() override; + void populateConstantOperandChecks() { + const_checkers.emplace_back(checkConstantOperandPad); + const_checkers.emplace_back(checkConstantOperandTranspose); + const_checkers.emplace_back(checkConstantOperandFullyConnected); + } + SmallVector> const_checkers; std::optional profileType; }; @@ -62,6 +113,10 @@ void TosaValidation::runOnOperation() { return signalPassFailure(); } } + + // Some uses of TOSA rely on the constant operands of particular operations. + if (StrictOperationSpecAlignment && failed(applyConstantOperandCheck(op))) + signalPassFailure(); }); } } // namespace diff --git a/mlir/test/Dialect/Tosa/invalid.mlir b/mlir/test/Dialect/Tosa/invalid.mlir index 9f9c6ca6ce641..bb7a3f5287c7f 100644 --- a/mlir/test/Dialect/Tosa/invalid.mlir +++ b/mlir/test/Dialect/Tosa/invalid.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -split-input-file -verify-diagnostics +// RUN: mlir-opt %s -split-input-file -verify-diagnostics --tosa-validate=strict-op-spec-alignment func.func @test_conv2d(%arg0: tensor<1x29x29x4xf32>, %arg1: tensor<16x3x3x4xi8>, %arg2: tensor<16xi8>) -> tensor<1x27x27x16xi8> { @@ -43,3 +43,48 @@ func.func @test_concat(%arg0 : tensor<2x1xf32>, %arg1 : tensor<2x2xf32>) -> tens %0 = "tosa.concat"(%arg0, %arg1) {axis = 0 : i64} : (tensor<2x1xf32>, tensor<2x2xf32>) -> tensor return %0 : tensor } + +// ----- + +func.func @test_pad_non_const(%arg0: tensor<13x21x3xf32>, %arg1: tensor<3x2xi32>) -> tensor<13x21x3xf32> { + // expected-error@+1 {{'tosa.pad' op padding of pad is not constant}} + %0 = "tosa.pad"(%arg0, %arg1) : (tensor<13x21x3xf32>, tensor<3x2xi32>) -> tensor<13x21x3xf32> + return %0 : tensor<13x21x3xf32> +} + +// ----- + +func.func @test_pad_non_const(%arg0: tensor<13x21x3xi8>, %arg1: tensor) -> tensor<13x21x3xi8> { + %0 = "tosa.const"() {value = dense<[[0, 0], [0, 1], [0, 1]]> : tensor<3x2xi32>} : () -> tensor<3x2xi32> + // expected-error@+1 {{'tosa.pad' op pad_const of pad is not constant}} + %1 = "tosa.pad"(%arg0, %0, %arg1) : (tensor<13x21x3xi8>, tensor<3x2xi32>, tensor) -> tensor<13x21x3xi8> + return %1 : tensor<13x21x3xi8> +} + +// ----- + +func.func @test_transpose_non_const(%arg0: tensor<13x21x3xf32>, %arg1: tensor<3xi32>) -> tensor<3x13x21xf32> { + // expected-error@+1 {{'tosa.transpose' op perms of transpose is not constant}} + %0 = "tosa.transpose"(%arg0, %arg1) : (tensor<13x21x3xf32>, tensor<3xi32>) -> tensor<3x13x21xf32> + return %0 : tensor<3x13x21xf32> +} + +// ----- + +func.func @test_fully_connected_non_const(%arg0: tensor<13x21x3xf32>, %arg1: tensor<2x3xf32>) -> tensor<273x2xf32> { + %0 = "tosa.const"() {value = dense<0.000000e+00> : tensor<2xf32>} : () -> tensor<2xf32> + %1 = "tosa.reshape"(%arg0) {new_shape = array} : (tensor<13x21x3xf32>) -> tensor<273x3xf32> + // expected-error@+1 {{'tosa.fully_connected' op weight of fully_connected is not constant}} + %2 = "tosa.fully_connected"(%1, %arg1, %0) : (tensor<273x3xf32>, tensor<2x3xf32>, tensor<2xf32>) -> tensor<273x2xf32> + return %2 : tensor<273x2xf32> +} + +// ----- + +func.func @test_fully_connected_non_const(%arg0: tensor<13x21x3xf32>, %arg1: tensor<2xf32>) -> tensor<273x2xf32> { + %0 = "tosa.const"() {value = dense<[[-0.613216758, -0.63714242, -0.73500061], [0.180762768, 0.773053169, -0.933686495]]> : tensor<2x3xf32>} : () -> tensor<2x3xf32> + %1 = "tosa.reshape"(%arg0) {new_shape = array} : (tensor<13x21x3xf32>) -> tensor<273x3xf32> + // expected-error@+1 {{'tosa.fully_connected' op bias of fully_connected is not constant}} + %2 = "tosa.fully_connected"(%1, %0, %arg1) : (tensor<273x3xf32>, tensor<2x3xf32>, tensor<2xf32>) -> tensor<273x2xf32> + return %2 : tensor<273x2xf32> +} From 51c8c1e1e5dbda57b645b43b201b843eabca3e5f Mon Sep 17 00:00:00 2001 From: yijia1212 Date: Tue, 21 Mar 2023 14:40:17 -0700 Subject: [PATCH 233/691] Fix dep error for transform dialect in bazel Fix dep error for transform dialect in bazel Reviewed By: anlunx Differential Revision: https://reviews.llvm.org/D146566 --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 99a8653d81478..b739ead77bf24 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -9318,7 +9318,7 @@ gentbl_cc_library( td_file = "include/mlir/Dialect/Transform/IR/TransformOps.td", deps = [ ":CallInterfacesTdFiles", - ":TransformDialectTdFiles" + ":TransformDialectTdFiles", ], ) @@ -9345,7 +9345,7 @@ cc_library( srcs = glob(["lib/Dialect/Transform/IR/*.cpp"]), hdrs = glob(["include/mlir/Dialect/Transform/IR/*.h"]), deps = [ - ":CallInterfaces", + ":CallOpInterfaces", ":ControlFlowInterfaces", ":IR", ":PDLDialect", From abe0362dd8c358bfdc7477201b683c7f2494bd93 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 21 Mar 2023 14:58:53 -0700 Subject: [PATCH 234/691] [RISCV] Remove 'rs1' field from RVInst16 class. NFC --- llvm/lib/Target/RISCV/RISCVInstrFormatsC.td | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td b/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td index fa1c3e9164123..db93c1161580d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td @@ -53,7 +53,6 @@ class RVInst16CI funct3, bits<2> opcode, dag outs, dag ins, : RVInst16 { bits<10> imm; bits<5> rd; - bits<5> rs1; let Inst{15-13} = funct3; let Inst{12} = imm{5}; From c327f9925428870e6288fa65bb709dcc3c0a0f31 Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Thu, 16 Mar 2023 19:19:05 -0700 Subject: [PATCH 235/691] [lldb] Refactor deduction of the instance variable's name (NFC) Move responsibility of providing the instance variable name (`this`, `self`) from `TypeSystem` to `Language`. `Language` the natural place for this, but also has downstream benefits. Some languages have multiple `TypeSystem` implementations (ex Swift), and by placing this logic in the `Language`, redundancy is avoided. This change relies on the tests from D145348 and D146320. Differential Revision: https://reviews.llvm.org/D146548 --- lldb/include/lldb/Symbol/CompilerDeclContext.h | 8 -------- lldb/include/lldb/Symbol/TypeSystem.h | 4 ---- lldb/include/lldb/Target/Language.h | 2 ++ .../Language/CPlusPlus/CPlusPlusLanguage.h | 2 ++ .../Plugins/Language/ObjC/ObjCLanguage.h | 2 ++ .../ObjCPlusPlus/ObjCPlusPlusLanguage.h | 2 ++ .../TypeSystem/Clang/TypeSystemClang.cpp | 16 ---------------- .../Plugins/TypeSystem/Clang/TypeSystemClang.h | 2 -- lldb/source/Symbol/CompilerDeclContext.cpp | 7 ------- lldb/source/Symbol/SymbolContext.cpp | 18 ++++++++++++------ 10 files changed, 20 insertions(+), 43 deletions(-) diff --git a/lldb/include/lldb/Symbol/CompilerDeclContext.h b/lldb/include/lldb/Symbol/CompilerDeclContext.h index 63e5f7b680e63..61a9c9c341bfe 100644 --- a/lldb/include/lldb/Symbol/CompilerDeclContext.h +++ b/lldb/include/lldb/Symbol/CompilerDeclContext.h @@ -69,14 +69,6 @@ class CompilerDeclContext { /// Determines the original language of the decl context. lldb::LanguageType GetLanguage(); - /// Determines the name of the instance variable for the this decl context. - /// - /// For C++ the name is "this", for Objective-C the name is "self". - /// - /// \return - /// Returns a string for the name of the instance variable. - ConstString GetInstanceVariableName(lldb::LanguageType language); - /// Check if the given other decl context is contained in the lookup /// of this decl context (for example because the other context is a nested /// inline namespace). diff --git a/lldb/include/lldb/Symbol/TypeSystem.h b/lldb/include/lldb/Symbol/TypeSystem.h index 0777d4d5ad6f3..a16f4af2be6d6 100644 --- a/lldb/include/lldb/Symbol/TypeSystem.h +++ b/lldb/include/lldb/Symbol/TypeSystem.h @@ -202,10 +202,6 @@ class TypeSystem : public PluginInterface, // TypeSystems can support more than one language virtual bool SupportsLanguage(lldb::LanguageType language) = 0; - /// The name of the variable used for explicitly accessing data scoped to the - /// current instance (or type). C++ uses "this", ObjC uses "self". - virtual ConstString GetInstanceVariableName(lldb::LanguageType language) = 0; - // Type Completion virtual bool GetCompleteType(lldb::opaque_compiler_type_t type) = 0; diff --git a/lldb/include/lldb/Target/Language.h b/lldb/include/lldb/Target/Language.h index 8cc1e72e138a5..59ea17bcefb26 100644 --- a/lldb/include/lldb/Target/Language.h +++ b/lldb/include/lldb/Target/Language.h @@ -326,6 +326,8 @@ class Language : public PluginInterface { return ConstString(); } + virtual ConstString GetInstanceVariableName() { return {}; } + protected: // Classes that inherit from Language can see and modify these diff --git a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h index 809996497c11a..a3e78c39044aa 100644 --- a/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h +++ b/lldb/source/Plugins/Language/CPlusPlus/CPlusPlusLanguage.h @@ -165,6 +165,8 @@ class CPlusPlusLanguage : public Language { ConstString FindBestAlternateFunctionMangledName( const Mangled mangled, const SymbolContext &sym_ctx) const override; + ConstString GetInstanceVariableName() override { return ConstString("this"); } + // PluginInterface protocol llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } }; diff --git a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h index b61348a3280ed..1344e97e469c6 100644 --- a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h +++ b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h @@ -155,6 +155,8 @@ class ObjCLanguage : public Language { return false; } + ConstString GetInstanceVariableName() override { return ConstString("self"); } + // PluginInterface protocol llvm::StringRef GetPluginName() override { return GetPluginNameStatic(); } }; diff --git a/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h b/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h index 20184fd709d5c..5fb256db46481 100644 --- a/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h +++ b/lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h @@ -40,6 +40,8 @@ class ObjCPlusPlusLanguage : public Language { static lldb_private::Language *CreateInstance(lldb::LanguageType language); + ConstString GetInstanceVariableName() override { return ConstString("self"); } + static llvm::StringRef GetPluginNameStatic() { return "objcplusplus"; } // PluginInterface protocol diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index a739494cffcc4..b661ec4453325 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -3727,22 +3727,6 @@ bool TypeSystemClang::SupportsLanguage(lldb::LanguageType language) { return TypeSystemClangSupportsLanguage(language); } -ConstString -TypeSystemClang::GetInstanceVariableName(lldb::LanguageType language) { - switch (language) { - case LanguageType::eLanguageTypeC_plus_plus: - case LanguageType::eLanguageTypeC_plus_plus_03: - case LanguageType::eLanguageTypeC_plus_plus_11: - case LanguageType::eLanguageTypeC_plus_plus_14: - return ConstString("this"); - case LanguageType::eLanguageTypeObjC: - case LanguageType::eLanguageTypeObjC_plus_plus: - return ConstString("self"); - default: - return {}; - } -} - std::optional TypeSystemClang::GetCXXClassName(const CompilerType &type) { if (!type) diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h index baddf6253beb4..414b51911cf89 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h @@ -711,8 +711,6 @@ class TypeSystemClang : public TypeSystem { bool SupportsLanguage(lldb::LanguageType language) override; - ConstString GetInstanceVariableName(lldb::LanguageType language) override; - static std::optional GetCXXClassName(const CompilerType &type); // Type Completion diff --git a/lldb/source/Symbol/CompilerDeclContext.cpp b/lldb/source/Symbol/CompilerDeclContext.cpp index 36b9131055f83..a188e60251f7c 100644 --- a/lldb/source/Symbol/CompilerDeclContext.cpp +++ b/lldb/source/Symbol/CompilerDeclContext.cpp @@ -46,13 +46,6 @@ lldb::LanguageType CompilerDeclContext::GetLanguage() { return {}; } -ConstString -CompilerDeclContext::GetInstanceVariableName(lldb::LanguageType language) { - if (IsValid()) - return m_type_system->GetInstanceVariableName(language); - return {}; -} - bool CompilerDeclContext::IsContainedInLookup(CompilerDeclContext other) const { if (!IsValid()) return false; diff --git a/lldb/source/Symbol/SymbolContext.cpp b/lldb/source/Symbol/SymbolContext.cpp index 5d4fb1cec6969..0a00802f064b9 100644 --- a/lldb/source/Symbol/SymbolContext.cpp +++ b/lldb/source/Symbol/SymbolContext.cpp @@ -19,10 +19,12 @@ #include "lldb/Symbol/SymbolFile.h" #include "lldb/Symbol/SymbolVendor.h" #include "lldb/Symbol/Variable.h" +#include "lldb/Target/Language.h" #include "lldb/Target/Target.h" #include "lldb/Utility/LLDBLog.h" #include "lldb/Utility/Log.h" #include "lldb/Utility/StreamString.h" +#include "lldb/lldb-enumerations.h" using namespace lldb; using namespace lldb_private; @@ -540,13 +542,17 @@ Block *SymbolContext::GetFunctionBlock() { } ConstString SymbolContext::GetInstanceVariableName() { + LanguageType lang_type = eLanguageTypeUnknown; + if (Block *function_block = GetFunctionBlock()) - if (CompilerDeclContext decl_ctx = function_block->GetDeclContext()) { - auto language = decl_ctx.GetLanguage(); - if (language == eLanguageTypeUnknown) - language = GetLanguage(); - return decl_ctx.GetInstanceVariableName(language); - } + if (CompilerDeclContext decl_ctx = function_block->GetDeclContext()) + lang_type = decl_ctx.GetLanguage(); + + if (lang_type == eLanguageTypeUnknown) + lang_type = GetLanguage(); + + if (auto *lang = Language::FindPlugin(lang_type)) + return lang->GetInstanceVariableName(); return {}; } From c5f6339c126c2eec43658e0fba05f5f606349d6d Mon Sep 17 00:00:00 2001 From: Anlun Xu Date: Tue, 21 Mar 2023 15:23:13 -0700 Subject: [PATCH 236/691] Add missing dependency for TransformDialect Differential Revision: https://reviews.llvm.org/D146589 --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index b739ead77bf24..4071d92641839 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -9318,6 +9318,7 @@ gentbl_cc_library( td_file = "include/mlir/Dialect/Transform/IR/TransformOps.td", deps = [ ":CallInterfacesTdFiles", + ":FunctionInterfacesTdFiles", ":TransformDialectTdFiles", ], ) @@ -9345,6 +9346,7 @@ cc_library( srcs = glob(["lib/Dialect/Transform/IR/*.cpp"]), hdrs = glob(["include/mlir/Dialect/Transform/IR/*.h"]), deps = [ + ":Analysis", ":CallOpInterfaces", ":ControlFlowInterfaces", ":IR", From 2aa90da012596712a4166e96d2a40fc90598c7fb Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Mon, 20 Mar 2023 11:18:53 -0700 Subject: [PATCH 237/691] [CUDA] Update cached kernel handle when the function instance changes. Fixes clang crash caused by a stale function pointer. The bug has been present for a pretty long time, but we were lucky not to trigger it until D140663. Differential Revision: https://reviews.llvm.org/D146448 --- clang/lib/CodeGen/CGCUDANV.cpp | 19 +++++++++- .../bug-kerner-registration-reuse.cu | 38 +++++++++++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGenCUDA/bug-kerner-registration-reuse.cu diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp index bb887df3e4e04..1f429e4305790 100644 --- a/clang/lib/CodeGen/CGCUDANV.cpp +++ b/clang/lib/CodeGen/CGCUDANV.cpp @@ -1195,8 +1195,23 @@ llvm::Function *CGNVCUDARuntime::finalizeModule() { llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F, GlobalDecl GD) { auto Loc = KernelHandles.find(F->getName()); - if (Loc != KernelHandles.end()) - return Loc->second; + if (Loc != KernelHandles.end()) { + auto OldHandle = Loc->second; + if (KernelStubs[OldHandle] == F) + return OldHandle; + + // We've found the function name, but F itself has changed, so we need to + // update the references. + if (CGM.getLangOpts().HIP) { + // For HIP compilation the handle itself does not change, so we only need + // to update the Stub value. + KernelStubs[OldHandle] = F; + return OldHandle; + } + // For non-HIP compilation, erase the old Stub and fall-through to creating + // new entries. + KernelStubs.erase(OldHandle); + } if (!CGM.getLangOpts().HIP) { KernelHandles[F->getName()] = F; diff --git a/clang/test/CodeGenCUDA/bug-kerner-registration-reuse.cu b/clang/test/CodeGenCUDA/bug-kerner-registration-reuse.cu new file mode 100644 index 0000000000000..e1fef41e25079 --- /dev/null +++ b/clang/test/CodeGenCUDA/bug-kerner-registration-reuse.cu @@ -0,0 +1,38 @@ +// RUN: echo -n "GPU binary would be here." > %t +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s \ +// RUN: -target-sdk-version=11.0 -fcuda-include-gpubinary %t -o - \ +// RUN: | FileCheck %s --check-prefixes CUDA +// RUN: %clang_cc1 -triple x86_64-linux-gnu -emit-llvm %s -x hip \ +// RUN: -fcuda-include-gpubinary %t -o - \ +// RUN: | FileCheck %s --check-prefixes HIP + +#include "Inputs/cuda.h" + +template +struct S { T t; }; + +template + static __global__ void Kernel(S) {} + +// For some reason it takes three or more instantiations of Kernel to trigger a +// crash during CUDA compilation. +auto x = &Kernel; +auto y = &Kernel; +auto z = &Kernel; + +// This triggers HIP-specific code path. +void func (){ + Kernel<<<1,1>>>({1}); +} + +// CUDA-LABEL: @__cuda_register_globals( +// CUDA: call i32 @__cudaRegisterFunction(ptr %0, ptr @_ZL21__device_stub__KernelIdEv1SIT_E +// CUDA: call i32 @__cudaRegisterFunction(ptr %0, ptr @_ZL21__device_stub__KernelIfEv1SIT_E +// CUDA: call i32 @__cudaRegisterFunction(ptr %0, ptr @_ZL21__device_stub__KernelIiEv1SIT_E +// CUDA: ret void + +// HIP-LABEL: @__hip_register_globals( +// HIP: call i32 @__hipRegisterFunction(ptr %0, ptr @_ZL6KernelIdEv1SIT_E +// HIP: call i32 @__hipRegisterFunction(ptr %0, ptr @_ZL6KernelIfEv1SIT_E +// HIP: call i32 @__hipRegisterFunction(ptr %0, ptr @_ZL6KernelIiEv1SIT_E +// HIP: ret void From a09283a55186095e57aa4e365db466f462c31482 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 21 Mar 2023 15:41:19 -0700 Subject: [PATCH 238/691] [NFC][tsan] Add __tsan_default_{options,suppressions} into interface header --- compiler-rt/include/sanitizer/tsan_interface.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/compiler-rt/include/sanitizer/tsan_interface.h b/compiler-rt/include/sanitizer/tsan_interface.h index 2782e61fb8c79..58f2513734ecb 100644 --- a/compiler-rt/include/sanitizer/tsan_interface.h +++ b/compiler-rt/include/sanitizer/tsan_interface.h @@ -172,6 +172,12 @@ int __tsan_on_finalize(int failed); // Release TSan internal memory in a best-effort manner. void __tsan_flush_memory(); +// User-provided default TSAN options. +const char* __tsan_default_options(void); + +// User-provided default TSAN suppressions. +const char* __tsan_default_suppressions(void); + #ifdef __cplusplus } // extern "C" #endif From 696fdece49e5ed94328497832d9ceed47993ef02 Mon Sep 17 00:00:00 2001 From: Congcong Cai Date: Wed, 22 Mar 2023 06:32:01 +0800 Subject: [PATCH 239/691] [WebAssembly] Fix i64_i64_func_i64_i64_i32 type signature when multivalue feature is enabled Reviewed By: tlively Differential Revision: https://reviews.llvm.org/D146533 --- .../WebAssemblyRuntimeLibcallSignatures.cpp | 2 - .../CodeGen/WebAssembly/multivalue_libcall.ll | 45 +++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp index 20891839686d7..d87355e7d39be 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp @@ -772,8 +772,6 @@ void llvm::getLibcallSignature(const WebAssemblySubtarget &Subtarget, if (Subtarget.hasMultivalue()) { Rets.push_back(wasm::ValType::I64); Rets.push_back(wasm::ValType::I64); - Rets.push_back(wasm::ValType::I64); - Rets.push_back(wasm::ValType::I64); } else { Params.push_back(PtrTy); } diff --git a/llvm/test/CodeGen/WebAssembly/multivalue_libcall.ll b/llvm/test/CodeGen/WebAssembly/multivalue_libcall.ll index 682eb912b87b9..47c5ae7b457dd 100644 --- a/llvm/test/CodeGen/WebAssembly/multivalue_libcall.ll +++ b/llvm/test/CodeGen/WebAssembly/multivalue_libcall.ll @@ -96,3 +96,48 @@ define fp128 @multivalue_fsub(fp128 %a, fp128 %b) { %sub = fsub fp128 %a, %b ret fp128 %sub } + +define i128 @multivalue_lshr(i128 %a, i128 %b) { +; MULTIVALUE-LABEL: multivalue_lshr: +; MULTIVALUE: .functype multivalue_lshr (i64, i64, i64, i64) -> (i64, i64) +; MULTIVALUE-NEXT: # %bb.0: +; MULTIVALUE-NEXT: local.get 2 +; MULTIVALUE-NEXT: local.get 3 +; MULTIVALUE-NEXT: local.get 0 +; MULTIVALUE-NEXT: i32.wrap_i64 +; MULTIVALUE-NEXT: call __ashlti3 +; MULTIVALUE-NEXT: # fallthrough-return +; +; NO_MULTIVALUE-LABEL: multivalue_lshr: +; NO_MULTIVALUE: .functype multivalue_lshr (i32, i64, i64, i64, i64) -> () +; NO_MULTIVALUE-NEXT: .local i32 +; NO_MULTIVALUE-NEXT: # %bb.0: +; NO_MULTIVALUE-NEXT: global.get __stack_pointer +; NO_MULTIVALUE-NEXT: i32.const 16 +; NO_MULTIVALUE-NEXT: i32.sub +; NO_MULTIVALUE-NEXT: local.tee 5 +; NO_MULTIVALUE-NEXT: global.set __stack_pointer +; NO_MULTIVALUE-NEXT: local.get 5 +; NO_MULTIVALUE-NEXT: local.get 3 +; NO_MULTIVALUE-NEXT: local.get 4 +; NO_MULTIVALUE-NEXT: local.get 1 +; NO_MULTIVALUE-NEXT: i32.wrap_i64 +; NO_MULTIVALUE-NEXT: call __ashlti3 +; NO_MULTIVALUE-NEXT: local.get 0 +; NO_MULTIVALUE-NEXT: local.get 5 +; NO_MULTIVALUE-NEXT: i32.const 8 +; NO_MULTIVALUE-NEXT: i32.add +; NO_MULTIVALUE-NEXT: i64.load 0 +; NO_MULTIVALUE-NEXT: i64.store 8 +; NO_MULTIVALUE-NEXT: local.get 0 +; NO_MULTIVALUE-NEXT: local.get 5 +; NO_MULTIVALUE-NEXT: i64.load 0 +; NO_MULTIVALUE-NEXT: i64.store 0 +; NO_MULTIVALUE-NEXT: local.get 5 +; NO_MULTIVALUE-NEXT: i32.const 16 +; NO_MULTIVALUE-NEXT: i32.add +; NO_MULTIVALUE-NEXT: global.set __stack_pointer +; NO_MULTIVALUE-NEXT: # fallthrough-return + %tmp = shl i128 %b, %a + ret i128 %tmp +} From 087b5f3277f36225ad26cd265aace89150fe1f06 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 21 Mar 2023 16:20:30 -0700 Subject: [PATCH 240/691] [RISCV] Improve validation of opcode for .insn. The lower 2 bits of the opcode must be 0x3. If the lower 2 bits are 0-2, it's a compressed instruction. Merge 3 slightly different error messages into 1 to reduce code. The messages differed slightly depending on whether we parsed a string or an expression. The message gets a little more generic, but is no more generic than what binutils prints. --- .../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 25 ++++++++----------- llvm/test/MC/RISCV/insn-invalid.s | 5 ++-- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index e8edc96951229..9c6d54e62b16c 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -1459,15 +1459,13 @@ RISCVAsmParser::parseInsnDirectiveOpcode(OperandVector &Operands) { auto *CE = dyn_cast(Res); if (CE) { int64_t Imm = CE->getValue(); - if (isUInt<7>(Imm)) { + if (isUInt<7>(Imm) && (Imm & 3) == 3) { Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64())); return MatchOperand_Success; } } - Twine Msg = "immediate must be an integer in the range"; - Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 7) - 1) + "]"); - return MatchOperand_ParseFail; + break; } case AsmToken::Identifier: { StringRef Identifier; @@ -1476,26 +1474,23 @@ RISCVAsmParser::parseInsnDirectiveOpcode(OperandVector &Operands) { auto Opcode = RISCVInsnOpcode::lookupRISCVOpcodeByName(Identifier); if (Opcode) { + assert(isUInt<7>(Opcode->Value) && (Opcode->Value & 0x3) == 3 && + "Unexpected opcode"); Res = MCConstantExpr::create(Opcode->Value, getContext()); E = SMLoc::getFromPointer(S.getPointer() + Identifier.size()); Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64())); return MatchOperand_Success; } - Twine Msg = "operand must be a valid opcode name or an " - "integer in the range"; - Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 7) - 1) + "]"); - return MatchOperand_ParseFail; - } - case AsmToken::Percent: { - // Discard operand with modifier. - Twine Msg = "immediate must be an integer in the range"; - Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 7) - 1) + "]"); - return MatchOperand_ParseFail; + break; } + case AsmToken::Percent: + break; } - return MatchOperand_NoMatch; + Error(S, "opcode must be in the range [0, 127] and the lower 2 bits must be " + "0x3"); + return MatchOperand_ParseFail; } OperandMatchResultTy diff --git a/llvm/test/MC/RISCV/insn-invalid.s b/llvm/test/MC/RISCV/insn-invalid.s index c9142747f07db..ab41f07ef567f 100644 --- a/llvm/test/MC/RISCV/insn-invalid.s +++ b/llvm/test/MC/RISCV/insn-invalid.s @@ -14,12 +14,13 @@ .insn q 0x13, 0, a0, a1, 13, 14 # CHECK: :[[@LINE]]:7: error: invalid instruction format # Invalid immediate -.insn i 0x99, 0, a0, 4(a1) # CHECK: :[[@LINE]]:10: error: immediate must be an integer in the range [0, 127] +.insn i 0x99, 0, a0, 4(a1) # CHECK: :[[@LINE]]:10: error: opcode must be in the range [0, 127] and the lower 2 bits must be 0x3 +.insn i 0, 0, a0, 4(a1) # CHECK: :[[@LINE]]:10: error: opcode must be in the range [0, 127] and the lower 2 bits must be 0x3 .insn r 0x33, 8, 0, a0, a1, a2 # CHECK: :[[@LINE]]:17: error: immediate must be an integer in the range [0, 7] .insn r4 0x43, 0, 4, fa0, fa1, fa2, fa3 # CHECK: :[[@LINE]]:21: error: immediate must be an integer in the range [0, 3] # Unrecognized opcode name -.insn r UNKNOWN, 0, a1, a2, a3 #CHECK: :[[@LINE]]:9: error: operand must be a valid opcode name or an integer in the range [0, 127] +.insn r UNKNOWN, 0, a1, a2, a3 #CHECK: :[[@LINE]]:9: error: opcode must be in the range [0, 127] and the lower 2 bits must be 0x3 # Make fake mnemonics we use to match these in the tablegened asm match table isn't exposed. .insn_i 0x13, 0, a0, a1, 13, 14 # CHECK: :[[@LINE]]:1: error: unknown directive From 13e9afd16d8aac49caf3abaa35bc97b5430331d3 Mon Sep 17 00:00:00 2001 From: wren romano <2998727+wrengr@users.noreply.github.com> Date: Tue, 21 Mar 2023 13:13:42 -0700 Subject: [PATCH 241/691] [mlir][sparse] Adding new `Merger::addLat` overload Reviewed By: aartbik Differential Revision: https://reviews.llvm.org/D146559 --- mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h | 1 + mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h index 6e39404bb28aa..991c920c17399 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h @@ -280,6 +280,7 @@ class Merger { /// Constructs a new iteration lattice point, and returns its identifier. LatPointId addLat(TensorId t, LoopId i, ExprId e); + LatPointId addLat(const BitVector &bits, ExprId e); /// Constructs a new (initially empty) set, and returns its identifier. LatSetId addSet(); diff --git a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp index 4a8c3cbfbe584..0691d2554f438 100644 --- a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp +++ b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp @@ -247,6 +247,13 @@ LatPointId Merger::addLat(TensorId t, LoopId i, ExprId e) { return p; } +LatPointId Merger::addLat(const BitVector &bits, ExprId e) { + assert(bits.size() == numLoops * numTensors); + const LatPointId p = latPoints.size(); + latPoints.emplace_back(bits, e); + return p; +} + LatSetId Merger::addSet() { const LatSetId s = latSets.size(); latSets.emplace_back(); @@ -322,8 +329,7 @@ LatSetId Merger::mapSet(TensorExp::Kind kind, LatSetId s0, Value v, const LatSetId s = addSet(); for (const LatPointId p : latSets[s0]) { const ExprId e = addExp(kind, latPoints[p].exp, v, op); - latPoints.emplace_back(latPoints[p].bits, e); - latSets[s].push_back(latPoints.size() - 1); + latSets[s].push_back(addLat(latPoints[p].bits, e)); } return s; } From 5499b026d218f694a8c9f148466da3259f6cc1fd Mon Sep 17 00:00:00 2001 From: Alex Langford Date: Tue, 21 Mar 2023 11:20:31 -0700 Subject: [PATCH 242/691] [lldb][CMake] Enforce not linking against plugin libs in core libs Non-plugin lldb libraries should generally not be linking against lldb plugin libraries. Enforce this in CMake. Differential Revision: https://reviews.llvm.org/D146553 --- lldb/cmake/modules/AddLLDB.cmake | 12 +++++++++++- lldb/source/Breakpoint/CMakeLists.txt | 2 +- lldb/source/Commands/CMakeLists.txt | 2 +- lldb/source/Core/CMakeLists.txt | 1 + lldb/source/DataFormatters/CMakeLists.txt | 2 +- lldb/source/Expression/CMakeLists.txt | 1 + lldb/source/Host/CMakeLists.txt | 2 +- lldb/source/Host/macosx/objcxx/CMakeLists.txt | 2 +- lldb/source/Interpreter/CMakeLists.txt | 2 +- lldb/source/Symbol/CMakeLists.txt | 2 +- lldb/source/Target/CMakeLists.txt | 2 +- lldb/source/Version/CMakeLists.txt | 2 +- 12 files changed, 22 insertions(+), 10 deletions(-) diff --git a/lldb/cmake/modules/AddLLDB.cmake b/lldb/cmake/modules/AddLLDB.cmake index e8fa70a5a6848..f2d96dfd68e00 100644 --- a/lldb/cmake/modules/AddLLDB.cmake +++ b/lldb/cmake/modules/AddLLDB.cmake @@ -37,7 +37,7 @@ function(add_lldb_library name) # only supported parameters to this macro are the optional # MODULE;SHARED;STATIC library type and source files cmake_parse_arguments(PARAM - "MODULE;SHARED;STATIC;OBJECT;PLUGIN;FRAMEWORK;NO_INTERNAL_DEPENDENCIES" + "MODULE;SHARED;STATIC;OBJECT;PLUGIN;FRAMEWORK;NO_INTERNAL_DEPENDENCIES;NO_PLUGIN_DEPENDENCIES" "INSTALL_PREFIX;ENTITLEMENTS" "EXTRA_CXXFLAGS;DEPENDS;LINK_LIBS;LINK_COMPONENTS;CLANG_LIBS" ${ARGN}) @@ -54,6 +54,16 @@ function(add_lldb_library name) endforeach() endif() + if(PARAM_NO_PLUGIN_DEPENDENCIES) + foreach(link_lib ${PARAM_LINK_LIBS}) + if (link_lib MATCHES "^lldbPlugin") + message(FATAL_ERROR + "Library ${name} cannot depend on a plugin (Found ${link_lib} in " + "LINK_LIBS)") + endif() + endforeach() + endif() + if(PARAM_PLUGIN) set_property(GLOBAL APPEND PROPERTY LLDB_PLUGINS ${name}) endif() diff --git a/lldb/source/Breakpoint/CMakeLists.txt b/lldb/source/Breakpoint/CMakeLists.txt index 4862c2b364033..5c2802322ed52 100644 --- a/lldb/source/Breakpoint/CMakeLists.txt +++ b/lldb/source/Breakpoint/CMakeLists.txt @@ -1,4 +1,4 @@ -add_lldb_library(lldbBreakpoint +add_lldb_library(lldbBreakpoint NO_PLUGIN_DEPENDENCIES Breakpoint.cpp BreakpointID.cpp BreakpointIDList.cpp diff --git a/lldb/source/Commands/CMakeLists.txt b/lldb/source/Commands/CMakeLists.txt index dc1aebc30de13..6a36c5376d5c5 100644 --- a/lldb/source/Commands/CMakeLists.txt +++ b/lldb/source/Commands/CMakeLists.txt @@ -2,7 +2,7 @@ lldb_tablegen(CommandOptions.inc -gen-lldb-option-defs SOURCE Options.td TARGET LLDBOptionsGen) -add_lldb_library(lldbCommands +add_lldb_library(lldbCommands NO_PLUGIN_DEPENDENCIES CommandCompletions.cpp CommandObjectApropos.cpp CommandObjectBreakpoint.cpp diff --git a/lldb/source/Core/CMakeLists.txt b/lldb/source/Core/CMakeLists.txt index b46ed3510e527..f0220beae032a 100644 --- a/lldb/source/Core/CMakeLists.txt +++ b/lldb/source/Core/CMakeLists.txt @@ -19,6 +19,7 @@ if (LLDB_ENABLE_CURSES) endif() endif() +# TODO: Add property `NO_PLUGIN_DEPENDENCIES` to lldbCore add_lldb_library(lldbCore Address.cpp AddressRange.cpp diff --git a/lldb/source/DataFormatters/CMakeLists.txt b/lldb/source/DataFormatters/CMakeLists.txt index e727432da4f05..7f48a2785c73f 100644 --- a/lldb/source/DataFormatters/CMakeLists.txt +++ b/lldb/source/DataFormatters/CMakeLists.txt @@ -1,4 +1,4 @@ -add_lldb_library(lldbDataFormatters +add_lldb_library(lldbDataFormatters NO_PLUGIN_DEPENDENCIES CXXFunctionPointer.cpp DataVisualization.cpp DumpValueObjectOptions.cpp diff --git a/lldb/source/Expression/CMakeLists.txt b/lldb/source/Expression/CMakeLists.txt index 54414fb2a7c4f..7e4fd81f2afdc 100644 --- a/lldb/source/Expression/CMakeLists.txt +++ b/lldb/source/Expression/CMakeLists.txt @@ -1,3 +1,4 @@ +# TODO: Add property `NO_PLUGIN_DEPENDENCIES` to lldbExpression add_lldb_library(lldbExpression DiagnosticManager.cpp DWARFExpression.cpp diff --git a/lldb/source/Host/CMakeLists.txt b/lldb/source/Host/CMakeLists.txt index 4a5ceeb60b7b9..91f353e50b190 100644 --- a/lldb/source/Host/CMakeLists.txt +++ b/lldb/source/Host/CMakeLists.txt @@ -159,7 +159,7 @@ if (LLDB_ENABLE_LIBEDIT) endif() endif() -add_lldb_library(lldbHost +add_lldb_library(lldbHost NO_PLUGIN_DEPENDENCIES ${HOST_SOURCES} LINK_LIBS diff --git a/lldb/source/Host/macosx/objcxx/CMakeLists.txt b/lldb/source/Host/macosx/objcxx/CMakeLists.txt index 9b59273e02ad0..273999f24380e 100644 --- a/lldb/source/Host/macosx/objcxx/CMakeLists.txt +++ b/lldb/source/Host/macosx/objcxx/CMakeLists.txt @@ -2,7 +2,7 @@ remove_module_flags() include_directories(..) -add_lldb_library(lldbHostMacOSXObjCXX +add_lldb_library(lldbHostMacOSXObjCXX NO_PLUGIN_DEPENDENCIES Host.mm HostInfoMacOSX.mm HostThreadMacOSX.mm diff --git a/lldb/source/Interpreter/CMakeLists.txt b/lldb/source/Interpreter/CMakeLists.txt index c8c7a38904c35..ae79b82d7c3e2 100644 --- a/lldb/source/Interpreter/CMakeLists.txt +++ b/lldb/source/Interpreter/CMakeLists.txt @@ -6,7 +6,7 @@ lldb_tablegen(InterpreterPropertiesEnum.inc -gen-lldb-property-enum-defs SOURCE InterpreterProperties.td TARGET LLDBInterpreterPropertiesEnumGen) -add_lldb_library(lldbInterpreter +add_lldb_library(lldbInterpreter NO_PLUGIN_DEPENDENCIES CommandAlias.cpp CommandHistory.cpp CommandInterpreter.cpp diff --git a/lldb/source/Symbol/CMakeLists.txt b/lldb/source/Symbol/CMakeLists.txt index 0b2e6284bd418..cec49b8b2cb4b 100644 --- a/lldb/source/Symbol/CMakeLists.txt +++ b/lldb/source/Symbol/CMakeLists.txt @@ -6,7 +6,7 @@ if (CMAKE_SYSTEM_NAME MATCHES "Darwin") ) endif() -add_lldb_library(lldbSymbol +add_lldb_library(lldbSymbol NO_PLUGIN_DEPENDENCIES ArmUnwindInfo.cpp Block.cpp CompactUnwindInfo.cpp diff --git a/lldb/source/Target/CMakeLists.txt b/lldb/source/Target/CMakeLists.txt index 0cb3573916424..3823daf370b4f 100644 --- a/lldb/source/Target/CMakeLists.txt +++ b/lldb/source/Target/CMakeLists.txt @@ -6,7 +6,7 @@ lldb_tablegen(TargetPropertiesEnum.inc -gen-lldb-property-enum-defs SOURCE TargetProperties.td TARGET LLDBTargetPropertiesEnumGen) -add_lldb_library(lldbTarget +add_lldb_library(lldbTarget NO_PLUGIN_DEPENDENCIES ABI.cpp AssertFrameRecognizer.cpp DynamicRegisterInfo.cpp diff --git a/lldb/source/Version/CMakeLists.txt b/lldb/source/Version/CMakeLists.txt index 73367f2775bde..c1393b5dd6e6b 100644 --- a/lldb/source/Version/CMakeLists.txt +++ b/lldb/source/Version/CMakeLists.txt @@ -36,7 +36,7 @@ set_source_files_properties("${version_inc}" include_directories(${CMAKE_CURRENT_BINARY_DIR}) -add_lldb_library(lldbVersion +add_lldb_library(lldbVersion NO_PLUGIN_DEPENDENCIES Version.cpp ${vcs_version_inc} ${version_inc}) From e8ad2a051c1621032d15973877891c7296603d8b Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Tue, 21 Mar 2023 23:15:44 +0000 Subject: [PATCH 243/691] [amdgpu][nfc] Comment and extract two functions in LowerModuleLDS --- .../AMDGPU/AMDGPULowerModuleLDSPass.cpp | 167 +++++++++++------- 1 file changed, 102 insertions(+), 65 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp index d44e280640ee0..455d76b0cecde 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -218,6 +218,13 @@ class AMDGPULowerModuleLDS : public ModulePass { // llvm.donothing that takes a pointer to the instance and is lowered to a // no-op after LDS is allocated, but that is not presently necessary. + // This intrinsic is eliminated shortly before instruction selection. It + // does not suffice to indicate to ISel that a given global which is not + // immediately used by the kernel must still be allocated by it. An + // equivalent target specific intrinsic which lasts until immediately after + // codegen would suffice for that, but one would still need to ensure that + // the variables are allocated in the anticpated order. + LLVMContext &Ctx = Func->getContext(); Builder.SetInsertPoint(Func->getEntryBlock().getFirstNonPHI()); @@ -241,7 +248,7 @@ class AMDGPULowerModuleLDS : public ModulePass { // This pass specialises LDS variables with respect to the kernel that // allocates them. - // This is semantically equivalent to: + // This is semantically equivalent to (the unimplemented as slow): // for (auto &F : M.functions()) // for (auto &BB : F) // for (auto &I : BB) @@ -469,28 +476,6 @@ class AMDGPULowerModuleLDS : public ModulePass { IRBuilder<> Builder(Ctx); Type *I32 = Type::getInt32Ty(Ctx); - // Accesses from a function use the amdgcn_lds_kernel_id intrinsic which - // lowers to a read from a live in register. Emit it once in the entry - // block to spare deduplicating it later. - - DenseMap tableKernelIndexCache; - auto getTableKernelIndex = [&](Function *F) -> Value * { - if (tableKernelIndexCache.count(F) == 0) { - LLVMContext &Ctx = M.getContext(); - FunctionType *FTy = FunctionType::get(Type::getInt32Ty(Ctx), {}); - Function *Decl = - Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_lds_kernel_id, {}); - - BasicBlock::iterator it = - F->getEntryBlock().getFirstNonPHIOrDbgOrAlloca(); - Instruction &i = *it; - Builder.SetInsertPoint(&i); - - tableKernelIndexCache[F] = Builder.CreateCall(FTy, Decl, {}); - } - - return tableKernelIndexCache[F]; - }; for (size_t Index = 0; Index < ModuleScopeVariables.size(); Index++) { auto *GV = ModuleScopeVariables[Index]; @@ -500,7 +485,8 @@ class AMDGPULowerModuleLDS : public ModulePass { if (!I) continue; - Value *tableKernelIndex = getTableKernelIndex(I->getFunction()); + Value *tableKernelIndex = + getTableLookupKernelIndex(M, I->getFunction()); // So if the phi uses this value multiple times, what does this look // like? @@ -517,6 +503,7 @@ class AMDGPULowerModuleLDS : public ModulePass { ConstantInt::get(I32, Index), }; + Value *Address = Builder.CreateInBoundsGEP( LookupTable->getValueType(), LookupTable, GEPIdx, GV->getName()); @@ -621,6 +608,78 @@ class AMDGPULowerModuleLDS : public ModulePass { MDNode::get(Ctx, {MinC, MaxC})); } + DenseMap tableKernelIndexCache; + Value *getTableLookupKernelIndex(Module &M, Function *F) { + // Accesses from a function use the amdgcn_lds_kernel_id intrinsic which + // lowers to a read from a live in register. Emit it once in the entry + // block to spare deduplicating it later. + if (tableKernelIndexCache.count(F) == 0) { + LLVMContext &Ctx = M.getContext(); + IRBuilder<> Builder(Ctx); + FunctionType *FTy = FunctionType::get(Type::getInt32Ty(Ctx), {}); + Function *Decl = + Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_lds_kernel_id, {}); + + BasicBlock::iterator it = + F->getEntryBlock().getFirstNonPHIOrDbgOrAlloca(); + Instruction &i = *it; + Builder.SetInsertPoint(&i); + + tableKernelIndexCache[F] = Builder.CreateCall(FTy, Decl, {}); + } + + return tableKernelIndexCache[F]; + } + + std::vector assignLDSKernelIDToEachKernel( + Module *M, DenseSet const &KernelsThatAllocateTableLDS) { + // Associate kernels in the set with an arbirary but reproducible order and + // annotate them with that order in metadata. This metadata is recognised by + // the backend and lowered to a SGPR which can be read from using + // amdgcn_lds_kernel_id. + + std::vector OrderedKernels; + + for (Function &Func : M->functions()) { + if (Func.isDeclaration()) + continue; + if (!isKernelLDS(&Func)) + continue; + + if (KernelsThatAllocateTableLDS.contains(&Func)) { + assert(Func.hasName()); // else fatal error earlier + OrderedKernels.push_back(&Func); + } + } + + // Put them in an arbitrary but reproducible order + llvm::sort(OrderedKernels.begin(), OrderedKernels.end(), + [](const Function *lhs, const Function *rhs) -> bool { + return lhs->getName() < rhs->getName(); + }); + + // Annotate the kernels with their order in this vector + LLVMContext &Ctx = M->getContext(); + IRBuilder<> Builder(Ctx); + + if (OrderedKernels.size() > UINT32_MAX) { + // 32 bit keeps it in one SGPR. > 2**32 kernels won't fit on the GPU + report_fatal_error("Unimplemented LDS lowering for > 2**32 kernels"); + } + + for (size_t i = 0; i < OrderedKernels.size(); i++) { + Metadata *AttrMDArgs[1] = { + ConstantAsMetadata::get(Builder.getInt32(i)), + }; + OrderedKernels[i]->setMetadata("llvm.amdgcn.lds.kernel.id", + MDNode::get(Ctx, AttrMDArgs)); + + } + + + return OrderedKernels; + } + bool runOnModule(Module &M) override { LLVMContext &Ctx = M.getContext(); CallGraph CG = CallGraph(M); @@ -644,7 +703,7 @@ class AMDGPULowerModuleLDS : public ModulePass { } } - // Partition variables into the different strategies + // Partition variables accessed indirectly into the different strategies DenseSet ModuleScopeVariables; DenseSet TableLookupVariables; DenseSet KernelAccessVariables; @@ -706,10 +765,12 @@ class AMDGPULowerModuleLDS : public ModulePass { } } + // All LDS variables accessed indirectly have now been partitioned into + // the distinct lowering strategies. assert(ModuleScopeVariables.size() + TableLookupVariables.size() + KernelAccessVariables.size() == LDSToKernelsThatNeedToAccessItIndirectly.size()); - } // Variables have now been partitioned into the three lowering strategies. + } // If the kernel accesses a variable that is going to be stored in the // module instance through a call then that kernel needs to allocate the @@ -787,9 +848,14 @@ class AMDGPULowerModuleLDS : public ModulePass { continue; DenseSet KernelUsedVariables; + // Allocating variables that are used directly in this struct to get + // alignment aware allocation and predictable frame size. for (auto &v : LDSUsesInfo.direct_access[&Func]) { KernelUsedVariables.insert(v); } + + // Allocating variables that are accessed indirectly so that a lookup of + // this struct instance can find them from nested functions. for (auto &v : LDSUsesInfo.indirect_access[&Func]) { KernelUsedVariables.insert(v); } @@ -803,7 +869,7 @@ class AMDGPULowerModuleLDS : public ModulePass { } if (KernelUsedVariables.empty()) { - // Either used no LDS, or all the LDS it used was also in module + // Either used no LDS, or the LDS it used was all in the module struct continue; } @@ -872,53 +938,25 @@ class AMDGPULowerModuleLDS : public ModulePass { DenseSet Vec; Vec.insert(GV); + // TODO: Looks like a latent bug, Replacement may not be marked + // UsedByKernel here replaceLDSVariablesWithStruct(M, Vec, Replacement, [](Use &U) { return isa(U.getUser()); }); } if (!KernelsThatAllocateTableLDS.empty()) { - // Collect the kernels that allocate table lookup LDS - std::vector OrderedKernels; - { - for (Function &Func : M.functions()) { - if (Func.isDeclaration()) - continue; - if (!isKernelLDS(&Func)) - continue; - - if (KernelsThatAllocateTableLDS.contains(&Func)) { - assert(Func.hasName()); // else fatal error earlier - OrderedKernels.push_back(&Func); - } - } - - // Put them in an arbitrary but reproducible order - llvm::sort(OrderedKernels.begin(), OrderedKernels.end(), - [](const Function *lhs, const Function *rhs) -> bool { - return lhs->getName() < rhs->getName(); - }); - - // Annotate the kernels with their order in this vector LLVMContext &Ctx = M.getContext(); IRBuilder<> Builder(Ctx); - if (OrderedKernels.size() > UINT32_MAX) { - // 32 bit keeps it in one SGPR. > 2**32 kernels won't fit on the GPU - report_fatal_error("Unimplemented LDS lowering for > 2**32 kernels"); - } - - for (size_t i = 0; i < OrderedKernels.size(); i++) { - Metadata *AttrMDArgs[1] = { - ConstantAsMetadata::get(Builder.getInt32(i)), - }; - OrderedKernels[i]->setMetadata("llvm.amdgcn.lds.kernel.id", - MDNode::get(Ctx, AttrMDArgs)); + // The ith element of this vector is kernel id i + std::vector OrderedKernels = + assignLDSKernelIDToEachKernel(&M, KernelsThatAllocateTableLDS); - markUsedByKernel(Builder, OrderedKernels[i], - KernelToReplacement[OrderedKernels[i]].SGV); - } - } + for (size_t i = 0; i < OrderedKernels.size(); i++) { + markUsedByKernel(Builder, OrderedKernels[i], + KernelToReplacement[OrderedKernels[i]].SGV); + } // The order must be consistent between lookup table and accesses to // lookup table @@ -938,7 +976,6 @@ class AMDGPULowerModuleLDS : public ModulePass { for (auto &GV : make_early_inc_range(M.globals())) if (AMDGPU::isLDSVariableToLower(GV)) { - // probably want to remove from used lists GV.removeDeadConstantUsers(); if (GV.use_empty()) From 2b21327fee50bf401e48d1592073d82da72a433f Mon Sep 17 00:00:00 2001 From: Peiming Liu Date: Tue, 21 Mar 2023 20:47:47 +0000 Subject: [PATCH 244/691] [mlir][sparse] fix crash when using pure constant index in indexing mapping (fixes #61530) To address https://github.com/llvm/llvm-project/issues/61530 Reviewed By: aartbik, wrengr Differential Revision: https://reviews.llvm.org/D146563 --- .../mlir/Dialect/SparseTensor/Utils/Merger.h | 5 ++- .../SparseTensor/Transforms/CodegenEnv.cpp | 10 ++--- .../SparseTensor/Transforms/CodegenEnv.h | 3 +- .../SparseTensor/Transforms/LoopEmitter.cpp | 12 ++++-- .../Transforms/Sparsification.cpp | 16 +++++++- .../lib/Dialect/SparseTensor/Utils/Merger.cpp | 6 +-- .../SparseTensor/constant_index_map.mlir | 41 +++++++++++++++++++ .../Dialect/SparseTensor/sparse_affine.mlir | 1 - .../Dialect/SparseTensor/MergerTest.cpp | 3 +- 9 files changed, 81 insertions(+), 16 deletions(-) create mode 100644 mlir/test/Dialect/SparseTensor/constant_index_map.mlir diff --git a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h index 991c920c17399..8b1e91ae8df56 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h +++ b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h @@ -254,6 +254,9 @@ class Merger { /// } /// /// to filter out coordinates that are not equal to the affine expression. + /// + /// The maxLvlRank specifies the max level rank of all inputs/output tensors. + /// It is used to pre-allocate sufficient memory for internal storage. // // TODO: we want to make the filter loop more efficient in the future, // e.g., by avoiding scanning the full list of stored coordinates (keeping @@ -264,7 +267,7 @@ class Merger { // gave the number of input tensors, instead of the current number of // input+output tensors. Merger(unsigned numInputOutputTensors, unsigned numNativeLoops, - unsigned numFilterLoops); + unsigned numFilterLoops, unsigned maxLvlRank); /// Constructs a new tensor expression, and returns its identifier. /// The type of the `e0` argument varies according to the value of the diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.cpp index f326d5b950a31..974c86d1fab5a 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.cpp @@ -51,12 +51,12 @@ static void sortArrayBasedOnOrder(std::vector &target, CodegenEnv::CodegenEnv(linalg::GenericOp linop, SparsificationOptions opts, unsigned numTensors, unsigned numLoops, - unsigned numFilterLoops) + unsigned numFilterLoops, unsigned maxRank) : linalgOp(linop), sparseOptions(opts), - latticeMerger(numTensors, numLoops, numFilterLoops), loopEmitter(), - topSort(), sparseOut(nullptr), outerParNest(-1u), insChain(), expValues(), - expFilled(), expAdded(), expCount(), redVal(), redExp(kInvalidId), - redCustom(kInvalidId), redValidLexInsert() {} + latticeMerger(numTensors, numLoops, numFilterLoops, maxRank), + loopEmitter(), topSort(), sparseOut(nullptr), outerParNest(-1u), + insChain(), expValues(), expFilled(), expAdded(), expCount(), redVal(), + redExp(kInvalidId), redCustom(kInvalidId), redValidLexInsert() {} LogicalResult CodegenEnv::initTensorExp() { // Builds the tensor expression for the Linalg operation in SSA form. diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h index 8c6a7bd6433db..0041ad0a272cb 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h +++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h @@ -38,7 +38,8 @@ class CodegenEnv { /// passed around during sparsification for bookkeeping /// together with some consistency asserts. CodegenEnv(linalg::GenericOp linop, SparsificationOptions opts, - unsigned numTensors, unsigned numLoops, unsigned numFilterLoops); + unsigned numTensors, unsigned numLoops, unsigned numFilterLoops, + unsigned maxRank); // // General methods. diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp index 459a1b38e03de..cae92c34e258d 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp @@ -288,12 +288,18 @@ void LoopEmitter::initialize(ValueRange ts, StringAttr loopTag, bool hasOutput, coordinatesBuffers[tid].assign(lvlRank, Value()); sliceOffsets[tid].assign(lvlRank, Value()); sliceStrides[tid].assign(lvlRank, Value()); - dependentLvlMap[tid].assign(lvlRank, std::vector>()); - if (dimGetter) - for (Level l = 0; l < lvlRank; l++) + if (dimGetter) { + auto reassoc = collapseReassoc[tid]; + Level dstRank = reassoc ? reassoc.size() : lvlRank; + for (Level l = 0; l < dstRank; l++) { dependentLvlMap[tid][l] = dimGetter(tid, l); + // TODO: View-base collapse and dependent index reduction are not + // compatible right now. + assert(!reassoc || dependentLvlMap[tid][l].empty()); + } + } } // Construct the inverse of the `topSort` from the sparsifier. diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp index 63228531fcf0c..f760244d59d8b 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp @@ -1811,10 +1811,24 @@ struct GenericOpSparsifier : public OpRewritePattern { // possible, we can even intermix slice-based and filter-loop based codegen. bool idxReducBased = options.enableIndexReduction && numFilterLoops != 0; + // If we have indexing map like (d0) -> (0, d0), there might be more + // levels then loops because of the constant index, that means we can not + // use numLoops as the upper bound for ranks of all tensors. + // TODO: Constant indices are currently not support on sparse tensor, but + // are allowed in non-annotated dense tensor. Support it, it would be + // required for sparse tensor slice rank reducing too. + Level maxLvlRank = 0; + for (auto operand : op.getOperands()) { + if (auto rtp = operand.getType().dyn_cast()) { + maxLvlRank = std::max(maxLvlRank, SparseTensorType(rtp).getLvlRank()); + } + } + // If we uses slice based algorithm for affine index, we do not need filter // loop. CodegenEnv env(op, options, numTensors, numLoops, - /*numFilterLoops=*/idxReducBased ? 0 : numFilterLoops); + /*numFilterLoops=*/idxReducBased ? 0 : numFilterLoops, + maxLvlRank); // Detects sparse annotations and translates the per-level sparsity // information for all tensors to loop indices in the kernel. diff --git a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp index 0691d2554f438..9b39fd04d25ee 100644 --- a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp +++ b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp @@ -210,7 +210,7 @@ LatPoint::LatPoint(unsigned numTensors, unsigned numLoops, TensorId t, LoopId i, } Merger::Merger(unsigned numInputOutputTensors, unsigned numNativeLoops, - unsigned numFilterLoops) + unsigned numFilterLoops, unsigned maxLvlRank) : outTensor(numInputOutputTensors - 1), syntheticTensor(numInputOutputTensors), numTensors(numInputOutputTensors + 1), numNativeLoops(numNativeLoops), @@ -220,11 +220,11 @@ Merger::Merger(unsigned numInputOutputTensors, unsigned numNativeLoops, loopToLvl(numTensors, std::vector>(numLoops, std::nullopt)), lvlToLoop(numTensors, - std::vector>(numLoops, std::nullopt)), + std::vector>(maxLvlRank, std::nullopt)), loopToDependencies(numLoops, std::vector>( numTensors, std::nullopt)), levelToDependentIdx(numTensors, std::vector>( - numLoops, std::vector())), + maxLvlRank, std::vector())), loopBounds(numLoops, std::make_pair(numTensors, numLoops)) {} //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/SparseTensor/constant_index_map.mlir b/mlir/test/Dialect/SparseTensor/constant_index_map.mlir new file mode 100644 index 0000000000000..cbd48b06afaaa --- /dev/null +++ b/mlir/test/Dialect/SparseTensor/constant_index_map.mlir @@ -0,0 +1,41 @@ +// Reported by https://github.com/llvm/llvm-project/issues/61530 + +// RUN: mlir-opt %s -sparsification | FileCheck %s + +#map1 = affine_map<(d0) -> (0, d0)> +#map2 = affine_map<(d0) -> (d0)> + +#SpVec = #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }> + +// CHECK-LABEL: func.func @main( +// CHECK-SAME: %[[VAL_0:.*0]]: tensor<1x77xi1>, +// CHECK-SAME: %[[VAL_1:.*1]]: tensor<1x77xi1>) -> tensor<77xi1, #{{.*}}> { +// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 77 : index +// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[VAL_5:.*]] = bufferization.alloc_tensor() : tensor<77xi1, #{{.*}}> +// CHECK-DAG: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref<1x77xi1> +// CHECK-DAG: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<1x77xi1> +// CHECK: %[[VAL_8:.*]] = scf.for %[[VAL_9:.*]] = %[[VAL_3]] to %[[VAL_2]] step %[[VAL_4]] iter_args(%[[VAL_10:.*]] = %[[VAL_5]]) -> (tensor<77xi1, #{{.*}}>) { +// CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]], %[[VAL_9]]] : memref<1x77xi1> +// CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_3]], %[[VAL_9]]] : memref<1x77xi1> +// CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_11]], %[[VAL_12]] : i1 +// CHECK: %[[VAL_14:.*]] = sparse_tensor.insert %[[VAL_13]] into %[[VAL_10]]{{\[}}%[[VAL_9]]] : tensor<77xi1, #{{.*}}> +// CHECK: scf.yield %[[VAL_14]] : tensor<77xi1, #{{.*}}> +// CHECK: } +// CHECK: %[[VAL_15:.*]] = sparse_tensor.load %[[VAL_16:.*]] hasInserts : tensor<77xi1, #{{.*}}> +// CHECK: return %[[VAL_15]] : tensor<77xi1, #{{.*}}> +// CHECK: } +func.func @main(%arg0: tensor<1x77xi1>, %arg1: tensor<1x77xi1>) -> tensor<77xi1, #SpVec> { + %0 = bufferization.alloc_tensor() : tensor<77xi1, #SpVec> + %1 = linalg.generic { + indexing_maps = [#map1, #map1, #map2], + iterator_types = ["parallel"]} + ins(%arg0, %arg1 : tensor<1x77xi1>, tensor<1x77xi1>) + outs(%0 : tensor<77xi1, #SpVec>) { + ^bb0(%in: i1, %in_0: i1, %out: i1): + %2 = arith.addi %in, %in_0 : i1 + linalg.yield %2 : i1 + } -> tensor<77xi1, #SpVec> + return %1 : tensor<77xi1, #SpVec> +} diff --git a/mlir/test/Dialect/SparseTensor/sparse_affine.mlir b/mlir/test/Dialect/SparseTensor/sparse_affine.mlir index 97293348774ca..2cda2335923ce 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_affine.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_affine.mlir @@ -496,4 +496,3 @@ func.func @mul_const_affine_dense_dim_2d(%arga: tensor<34x16xf64, #CSR>, } -> tensor<32x16xf64> return %0 : tensor<32x16xf64> } - diff --git a/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp b/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp index 270b5836907e3..599e8abd52f30 100644 --- a/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp +++ b/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp @@ -128,7 +128,8 @@ class MergerTestBase : public ::testing::Test { protected: MergerTestBase(unsigned numTensors, unsigned numLoops) : numTensors(numTensors), numLoops(numLoops), - merger(numTensors, numLoops, /*numFilterLoops=*/0) {} + merger(numTensors, numLoops, /*numFilterLoops=*/0, + /*maxRank=*/numLoops) {} /// /// Expression construction helpers. From b9c4b95b11b51b68a8e00a0b2f1829db1c858f51 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Tue, 21 Mar 2023 17:40:35 -0700 Subject: [PATCH 245/691] [llvm] Use ConstantInt::{isZero,isOne} (NFC) --- llvm/lib/Analysis/TypeMetadataUtils.cpp | 2 +- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 4 ++-- llvm/lib/IR/IRBuilder.cpp | 4 +--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 2 +- llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp | 2 +- llvm/lib/Target/ARM/ARMAsmPrinter.cpp | 4 ++-- llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp | 2 +- llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 2 +- llvm/lib/Transforms/IPO/GlobalDCE.cpp | 5 ++--- llvm/lib/Transforms/IPO/LowerTypeTests.cpp | 4 ++-- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 2 +- 11 files changed, 15 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Analysis/TypeMetadataUtils.cpp b/llvm/lib/Analysis/TypeMetadataUtils.cpp index ad300cb245094..19fea02630514 100644 --- a/llvm/lib/Analysis/TypeMetadataUtils.cpp +++ b/llvm/lib/Analysis/TypeMetadataUtils.cpp @@ -175,7 +175,7 @@ Constant *llvm::getPointerAtOffset(Constant *I, uint64_t Offset, Module &M, // Relative-pointer support starts here. if (auto *CI = dyn_cast(I)) { - if (Offset == 0 && CI->getZExtValue() == 0) { + if (Offset == 0 && CI->isZero()) { return I; } } diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index fffbb862d390f..3df83251cb447 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -844,8 +844,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, // For conditional branch lowering, we might try to do something silly like // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so, // just re-use the existing condition vreg. - if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI && - CI->getZExtValue() == 1 && CB.PredInfo.Pred == CmpInst::ICMP_EQ) { + if (MRI->getType(CondLHS).getSizeInBits() == 1 && CI && CI->isOne() && + CB.PredInfo.Pred == CmpInst::ICMP_EQ) { Cond = CondLHS; } else { Register CondRHS = getOrCreateVReg(*CB.CmpRHS); diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp index 3ebd40a862191..f3d5a6099cd60 100644 --- a/llvm/lib/IR/IRBuilder.cpp +++ b/llvm/lib/IR/IRBuilder.cpp @@ -102,9 +102,7 @@ Value *IRBuilderBase::CreateVScale(Constant *Scaling, const Twine &Name) { Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::vscale, {Scaling->getType()}); CallInst *CI = CreateCall(TheFn, {}, {}, Name); - return cast(Scaling)->getSExtValue() == 1 - ? CI - : CreateMul(CI, Scaling); + return cast(Scaling)->isOne() ? CI : CreateMul(CI, Scaling); } Value *IRBuilderBase::CreateElementCount(Type *DstType, ElementCount EC) { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 0d3857743cb33..0612304fb8fa3 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -13953,7 +13953,7 @@ bool AArch64TargetLowering::shouldSinkOperands( ConstantInt *ElementConstant = dyn_cast(Insert->getOperand(2)); // Check that the insertelement is inserting into element 0 - if (!ElementConstant || ElementConstant->getZExtValue() != 0) + if (!ElementConstant || !ElementConstant->isZero()) continue; unsigned Opcode = OperandInstr->getOpcode(); diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index ce12b59aa4ed2..5273beedcb83a 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -2240,7 +2240,7 @@ bool AArch64InstructionSelector::earlySelect(MachineInstr &I) { case TargetOpcode::G_CONSTANT: { bool IsZero = false; if (I.getOperand(1).isCImm()) - IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0; + IsZero = I.getOperand(1).getCImm()->isZero(); else if (I.getOperand(1).isImm()) IsZero = I.getOperand(1).getImm() == 0; diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 5ed8f900fb88f..d8c02e5b73921 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -762,7 +762,7 @@ void ARMAsmPrinter::emitAttributes() { auto *PACValue = mdconst::extract_or_null( SourceModule->getModuleFlag("sign-return-address")); - if (PACValue && PACValue->getZExtValue() == 1) { + if (PACValue && PACValue->isOne()) { // If "+pacbti" is used as an architecture extension, // Tag_PAC_extension is emitted in // ARMTargetStreamer::emitTargetAttributes(). @@ -775,7 +775,7 @@ void ARMAsmPrinter::emitAttributes() { auto *BTIValue = mdconst::extract_or_null( SourceModule->getModuleFlag("branch-target-enforcement")); - if (BTIValue && BTIValue->getZExtValue() == 1) { + if (BTIValue && BTIValue->isOne()) { // If "+pacbti" is used as an architecture extension, // Tag_BTI_extension is emitted in // ARMTargetStreamer::emitTargetAttributes(). diff --git a/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp b/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp index a7887712c29a4..d3d12664228be 100644 --- a/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp +++ b/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp @@ -664,7 +664,7 @@ Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) { continue; if (auto *T = dyn_cast(IncV)) - if (T->getZExtValue() == 1) + if (T->isOne()) return PN; } return nullptr; diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index b2db77a531107..d4173ac010ca0 100644 --- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -429,7 +429,7 @@ bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll( if (MDNode *UnrollCountMD = GetUnrollMetadata(LoopID, "llvm.loop.unroll.count")) { if (mdconst::extract(UnrollCountMD->getOperand(1)) - ->getZExtValue() == 1) + ->isOne()) return true; } } diff --git a/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/llvm/lib/Transforms/IPO/GlobalDCE.cpp index 4aa5ae8f6a03e..e26f8663b0ced 100644 --- a/llvm/lib/Transforms/IPO/GlobalDCE.cpp +++ b/llvm/lib/Transforms/IPO/GlobalDCE.cpp @@ -125,8 +125,7 @@ void GlobalDCEPass::ScanVTables(Module &M) { auto *LTOPostLinkMD = cast_or_null(M.getModuleFlag("LTOPostLink")); bool LTOPostLink = - LTOPostLinkMD && - (cast(LTOPostLinkMD->getValue())->getZExtValue() != 0); + LTOPostLinkMD && !cast(LTOPostLinkMD->getValue())->isZero(); for (GlobalVariable &GV : M.globals()) { Types.clear(); @@ -230,7 +229,7 @@ void GlobalDCEPass::AddVirtualFunctionDependencies(Module &M) { // Don't attempt VFE in that case. auto *Val = mdconst::dyn_extract_or_null( M.getModuleFlag("Virtual Function Elim")); - if (!Val || Val->getZExtValue() == 0) + if (!Val || Val->isZero()) return; ScanVTables(M); diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp index 8380afaaaf982..1280c421195d3 100644 --- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp +++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp @@ -244,7 +244,7 @@ bool lowertypetests::isJumpTableCanonical(Function *F) { return false; auto *CI = mdconst::extract_or_null( F->getParent()->getModuleFlag("CFI Canonical Jump Tables")); - if (!CI || CI->getZExtValue() != 0) + if (!CI || !CI->isZero()) return true; return F->hasFnAttribute("cfi-canonical-jump-table"); } @@ -1242,7 +1242,7 @@ void LowerTypeTestsModule::createJumpTableEntry( bool Endbr = false; if (const auto *MD = mdconst::extract_or_null( Dest->getParent()->getModuleFlag("cf-protection-branch"))) - Endbr = MD->getZExtValue() != 0; + Endbr = !MD->isZero(); if (Endbr) AsmOS << (JumpTableArch == Triple::x86 ? "endbr32\n" : "endbr64\n"); AsmOS << "jmp ${" << ArgIndex << ":c}@plt\n"; diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index bf525a7dcc48f..1a3c692e628eb 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -3324,7 +3324,7 @@ struct AAHeapToSharedFunction : public AAHeapToShared { auto Remark = [&](OptimizationRemark OR) { return OR << "Replaced globalized variable with " << ore::NV("SharedMemory", AllocSize->getZExtValue()) - << ((AllocSize->getZExtValue() != 1) ? " bytes " : " byte ") + << (AllocSize->isOne() ? " byte " : " bytes ") << "of shared memory."; }; A.emitRemark(CB, "OMP111", Remark); From d70e7ea0d1e556037ef5b6874734cf51396b033b Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Wed, 22 Mar 2023 01:23:19 +0000 Subject: [PATCH 246/691] [amdgpu][nfc] Extract more functions in LowerModuleLDS, mark more methods static --- .../AMDGPU/AMDGPULowerModuleLDSPass.cpp | 328 ++++++++++-------- 1 file changed, 183 insertions(+), 145 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp index 455d76b0cecde..274597cb5607e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -631,7 +631,7 @@ class AMDGPULowerModuleLDS : public ModulePass { return tableKernelIndexCache[F]; } - std::vector assignLDSKernelIDToEachKernel( + static std::vector assignLDSKernelIDToEachKernel( Module *M, DenseSet const &KernelsThatAllocateTableLDS) { // Associate kernels in the set with an arbirary but reproducible order and // annotate them with that order in metadata. This metadata is recognised by @@ -680,168 +680,157 @@ class AMDGPULowerModuleLDS : public ModulePass { return OrderedKernels; } - bool runOnModule(Module &M) override { - LLVMContext &Ctx = M.getContext(); - CallGraph CG = CallGraph(M); - bool Changed = superAlignLDSGlobals(M); + static void partitionVariablesIntoIndirectStrategies( + Module &M, LDSUsesInfoTy const &LDSUsesInfo, + VariableFunctionMap &LDSToKernelsThatNeedToAccessItIndirectly, + DenseSet &ModuleScopeVariables, + DenseSet &TableLookupVariables, + DenseSet &KernelAccessVariables) { + + GlobalVariable *HybridModuleRoot = + LoweringKindLoc != LoweringKind::hybrid + ? nullptr + : chooseBestVariableForModuleStrategy( + M.getDataLayout(), LDSToKernelsThatNeedToAccessItIndirectly); + + DenseSet const EmptySet; + DenseSet const &HybridModuleRootKernels = + HybridModuleRoot + ? LDSToKernelsThatNeedToAccessItIndirectly[HybridModuleRoot] + : EmptySet; + + for (auto &K : LDSToKernelsThatNeedToAccessItIndirectly) { + // Each iteration of this loop assigns exactly one global variable to + // exactly one of the implementation strategies. - Changed |= eliminateConstantExprUsesOfLDSFromAllInstructions(M); + GlobalVariable *GV = K.first; + assert(AMDGPU::isLDSVariableToLower(*GV)); + assert(K.second.size() != 0); - Changed = true; // todo: narrow this down + switch (LoweringKindLoc) { + case LoweringKind::module: + ModuleScopeVariables.insert(GV); + break; - // For each kernel, what variables does it access directly or through - // callees - LDSUsesInfoTy LDSUsesInfo = getTransitiveUsesOfLDS(CG, M); - - // For each variable accessed through callees, which kernels access it - VariableFunctionMap LDSToKernelsThatNeedToAccessItIndirectly; - for (auto &K : LDSUsesInfo.indirect_access) { - Function *F = K.first; - assert(isKernelLDS(F)); - for (GlobalVariable *GV : K.second) { - LDSToKernelsThatNeedToAccessItIndirectly[GV].insert(F); - } - } + case LoweringKind::table: + TableLookupVariables.insert(GV); + break; - // Partition variables accessed indirectly into the different strategies - DenseSet ModuleScopeVariables; - DenseSet TableLookupVariables; - DenseSet KernelAccessVariables; + case LoweringKind::kernel: + if (K.second.size() == 1) { + KernelAccessVariables.insert(GV); + } else { + report_fatal_error( + "cannot lower LDS '" + GV->getName() + + "' to kernel access as it is reachable from multiple kernels"); + } + break; - { - GlobalVariable *HybridModuleRoot = - LoweringKindLoc != LoweringKind::hybrid - ? nullptr - : chooseBestVariableForModuleStrategy( - M.getDataLayout(), - LDSToKernelsThatNeedToAccessItIndirectly); - - DenseSet const EmptySet; - DenseSet const &HybridModuleRootKernels = - HybridModuleRoot - ? LDSToKernelsThatNeedToAccessItIndirectly[HybridModuleRoot] - : EmptySet; - - for (auto &K : LDSToKernelsThatNeedToAccessItIndirectly) { - // Each iteration of this loop assigns exactly one global variable to - // exactly one of the implementation strategies. - - GlobalVariable *GV = K.first; - assert(AMDGPU::isLDSVariableToLower(*GV)); - assert(K.second.size() != 0); - - switch (LoweringKindLoc) { - case LoweringKind::module: + case LoweringKind::hybrid: { + if (GV == HybridModuleRoot) { + assert(K.second.size() != 1); ModuleScopeVariables.insert(GV); - break; - - case LoweringKind::table: + } else if (K.second.size() == 1) { + KernelAccessVariables.insert(GV); + } else if (set_is_subset(K.second, HybridModuleRootKernels)) { + ModuleScopeVariables.insert(GV); + } else { TableLookupVariables.insert(GV); - break; - - case LoweringKind::kernel: - if (K.second.size() == 1) { - KernelAccessVariables.insert(GV); - } else { - report_fatal_error( - "cannot lower LDS '" + GV->getName() + - "' to kernel access as it is reachable from multiple kernels"); - } - break; - - case LoweringKind::hybrid: { - if (GV == HybridModuleRoot) { - assert(K.second.size() != 1); - ModuleScopeVariables.insert(GV); - } else if (K.second.size() == 1) { - KernelAccessVariables.insert(GV); - } else if (set_is_subset(K.second, HybridModuleRootKernels)) { - ModuleScopeVariables.insert(GV); - } else { - TableLookupVariables.insert(GV); - } - break; - } } + break; } + } + } - // All LDS variables accessed indirectly have now been partitioned into - // the distinct lowering strategies. - assert(ModuleScopeVariables.size() + TableLookupVariables.size() + - KernelAccessVariables.size() == - LDSToKernelsThatNeedToAccessItIndirectly.size()); + // All LDS variables accessed indirectly have now been partitioned into + // the distinct lowering strategies. + assert(ModuleScopeVariables.size() + TableLookupVariables.size() + + KernelAccessVariables.size() == + LDSToKernelsThatNeedToAccessItIndirectly.size()); + } + + static GlobalVariable *lowerModuleScopeStructVariables( + Module &M, DenseSet const &ModuleScopeVariables, + DenseSet const &KernelsThatAllocateModuleLDS) { + // Create a struct to hold the ModuleScopeVariables + // Replace all uses of those variables from non-kernel functions with the + // new struct instance Replace only the uses from kernel functions that will + // allocate this instance. That is a space optimisation - kernels that use a + // subset of the module scope struct and do not need to allocate it for + // indirect calls will only allocate the subset they use (they do so as part + // of the per-kernel lowering). + if (ModuleScopeVariables.empty()) { + return nullptr; } - // If the kernel accesses a variable that is going to be stored in the - // module instance through a call then that kernel needs to allocate the - // module instance - DenseSet KernelsThatAllocateModuleLDS = - kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo, - ModuleScopeVariables); - DenseSet KernelsThatAllocateTableLDS = - kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo, - TableLookupVariables); + LLVMContext &Ctx = M.getContext(); - GlobalVariable *MaybeModuleScopeStruct = nullptr; - if (!ModuleScopeVariables.empty()) { - LDSVariableReplacement ModuleScopeReplacement = - createLDSVariableReplacement(M, "llvm.amdgcn.module.lds", - ModuleScopeVariables); - MaybeModuleScopeStruct = ModuleScopeReplacement.SGV; - appendToCompilerUsed(M, - {static_cast( - ConstantExpr::getPointerBitCastOrAddrSpaceCast( - cast(ModuleScopeReplacement.SGV), - Type::getInt8PtrTy(Ctx)))}); - - // module.lds will be allocated at zero in any kernel that allocates it - recordLDSAbsoluteAddress(&M, ModuleScopeReplacement.SGV, 0); - - // historic - removeLocalVarsFromUsedLists(M, ModuleScopeVariables); - - // Replace all uses of module scope variable from non-kernel functions - replaceLDSVariablesWithStruct( - M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) { - Instruction *I = dyn_cast(U.getUser()); - if (!I) { - return false; - } - Function *F = I->getFunction(); - return !isKernelLDS(F); - }); + LDSVariableReplacement ModuleScopeReplacement = + createLDSVariableReplacement(M, "llvm.amdgcn.module.lds", + ModuleScopeVariables); - // Replace uses of module scope variable from kernel functions that - // allocate the module scope variable, otherwise leave them unchanged - // Record on each kernel whether the module scope global is used by it + appendToCompilerUsed(M, {static_cast( + ConstantExpr::getPointerBitCastOrAddrSpaceCast( + cast(ModuleScopeReplacement.SGV), + Type::getInt8PtrTy(Ctx)))}); - LLVMContext &Ctx = M.getContext(); - IRBuilder<> Builder(Ctx); + // module.lds will be allocated at zero in any kernel that allocates it + recordLDSAbsoluteAddress(&M, ModuleScopeReplacement.SGV, 0); - for (Function &Func : M.functions()) { - if (Func.isDeclaration() || !isKernelLDS(&Func)) - continue; + // historic + removeLocalVarsFromUsedLists(M, ModuleScopeVariables); + + // Replace all uses of module scope variable from non-kernel functions + replaceLDSVariablesWithStruct( + M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) { + Instruction *I = dyn_cast(U.getUser()); + if (!I) { + return false; + } + Function *F = I->getFunction(); + return !isKernelLDS(F); + }); - if (KernelsThatAllocateModuleLDS.contains(&Func)) { - replaceLDSVariablesWithStruct( - M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) { - Instruction *I = dyn_cast(U.getUser()); - if (!I) { - return false; - } - Function *F = I->getFunction(); - return F == &Func; - }); + // Replace uses of module scope variable from kernel functions that + // allocate the module scope variable, otherwise leave them unchanged + // Record on each kernel whether the module scope global is used by it - markUsedByKernel(Builder, &Func, ModuleScopeReplacement.SGV); + IRBuilder<> Builder(Ctx); - } else { - Func.addFnAttr("amdgpu-elide-module-lds"); - } + for (Function &Func : M.functions()) { + if (Func.isDeclaration() || !isKernelLDS(&Func)) + continue; + + if (KernelsThatAllocateModuleLDS.contains(&Func)) { + replaceLDSVariablesWithStruct( + M, ModuleScopeVariables, ModuleScopeReplacement, [&](Use &U) { + Instruction *I = dyn_cast(U.getUser()); + if (!I) { + return false; + } + Function *F = I->getFunction(); + return F == &Func; + }); + + markUsedByKernel(Builder, &Func, ModuleScopeReplacement.SGV); + + } else { + Func.addFnAttr("amdgpu-elide-module-lds"); } } - // Create a struct for each kernel for the non-module-scope variables + return ModuleScopeReplacement.SGV; + } + + static DenseMap + lowerKernelScopeStructVariables( + Module &M, LDSUsesInfoTy &LDSUsesInfo, + DenseSet const &ModuleScopeVariables, + DenseSet const &KernelsThatAllocateModuleLDS, + GlobalVariable *MaybeModuleScopeStruct) { + + // Create a struct for each kernel for the non-module-scope variables. + DenseMap KernelToReplacement; for (Function &Func : M.functions()) { if (Func.isDeclaration() || !isKernelLDS(&Func)) @@ -927,6 +916,55 @@ class AMDGPULowerModuleLDS : public ModulePass { return I && I->getFunction() == &Func; }); } + return KernelToReplacement; + } + + bool runOnModule(Module &M) override { + CallGraph CG = CallGraph(M); + bool Changed = superAlignLDSGlobals(M); + + Changed |= eliminateConstantExprUsesOfLDSFromAllInstructions(M); + + Changed = true; // todo: narrow this down + + // For each kernel, what variables does it access directly or through + // callees + LDSUsesInfoTy LDSUsesInfo = getTransitiveUsesOfLDS(CG, M); + + // For each variable accessed through callees, which kernels access it + VariableFunctionMap LDSToKernelsThatNeedToAccessItIndirectly; + for (auto &K : LDSUsesInfo.indirect_access) { + Function *F = K.first; + assert(isKernelLDS(F)); + for (GlobalVariable *GV : K.second) { + LDSToKernelsThatNeedToAccessItIndirectly[GV].insert(F); + } + } + + DenseSet ModuleScopeVariables; + DenseSet TableLookupVariables; + DenseSet KernelAccessVariables; + partitionVariablesIntoIndirectStrategies( + M, LDSUsesInfo, LDSToKernelsThatNeedToAccessItIndirectly, + ModuleScopeVariables, TableLookupVariables, KernelAccessVariables); + + // If the kernel accesses a variable that is going to be stored in the + // module instance through a call then that kernel needs to allocate the + // module instance + DenseSet KernelsThatAllocateModuleLDS = + kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo, + ModuleScopeVariables); + DenseSet KernelsThatAllocateTableLDS = + kernelsThatIndirectlyAccessAnyOfPassedVariables(M, LDSUsesInfo, + TableLookupVariables); + + GlobalVariable *MaybeModuleScopeStruct = lowerModuleScopeStructVariables( + M, ModuleScopeVariables, KernelsThatAllocateModuleLDS); + + DenseMap KernelToReplacement = + lowerKernelScopeStructVariables(M, LDSUsesInfo, ModuleScopeVariables, + KernelsThatAllocateModuleLDS, + MaybeModuleScopeStruct); // Lower zero cost accesses to the kernel instances just created for (auto &GV : KernelAccessVariables) { @@ -1132,7 +1170,7 @@ class AMDGPULowerModuleLDS : public ModulePass { } template - void replaceLDSVariablesWithStruct( + static void replaceLDSVariablesWithStruct( Module &M, DenseSet const &LDSVarsToTransformArg, LDSVariableReplacement Replacement, PredicateTy Predicate) { LLVMContext &Ctx = M.getContext(); @@ -1190,9 +1228,9 @@ class AMDGPULowerModuleLDS : public ModulePass { } } - void refineUsesAlignmentAndAA(Value *Ptr, Align A, const DataLayout &DL, - MDNode *AliasScope, MDNode *NoAlias, - unsigned MaxDepth = 5) { + static void refineUsesAlignmentAndAA(Value *Ptr, Align A, + const DataLayout &DL, MDNode *AliasScope, + MDNode *NoAlias, unsigned MaxDepth = 5) { if (!MaxDepth || (A == 1 && !AliasScope)) return; From c49e56a2954fe77742487394279bf670213525b5 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 21 Mar 2023 18:34:40 -0700 Subject: [PATCH 247/691] [SCEV] Add coverage for a missing flag inference case --- .../max-backedge-taken-count-guard-info.ll | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll index 3f900eeecd34a..e1acec162d3c8 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll @@ -1685,3 +1685,39 @@ loop: exit: ret i32 0 } + +; TODO: The pointer induction variable can be implied No Self Wrap. +define void @gep_addrec_nw(ptr %a) { +; CHECK-LABEL: 'gep_addrec_nw' +; CHECK-NEXT: Classifying expressions for: @gep_addrec_nw +; CHECK-NEXT: %lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %a, %entry ] +; CHECK-NEXT: --> {%a,+,4}<%for.body> U: full-set S: full-set Exits: (1512 + %a) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: %lsr.iv = phi i64 [ %lsr.iv.next, %for.body ], [ 379, %entry ] +; CHECK-NEXT: --> {379,+,-1}<%for.body> U: [1,380) S: [1,380) Exits: 1 LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: %lsr.iv.next = add nsw i64 %lsr.iv, -1 +; CHECK-NEXT: --> {378,+,-1}<%for.body> U: [0,379) S: [0,379) Exits: 0 LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: %uglygep2 = getelementptr i8, ptr %lsr.iv1, i64 4 +; CHECK-NEXT: --> {(4 + %a),+,4}<%for.body> U: full-set S: full-set Exits: (1516 + %a) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: Determining loop execution counts for: @gep_addrec_nw +; CHECK-NEXT: Loop %for.body: backedge-taken count is 378 +; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is 378 +; CHECK-NEXT: Loop %for.body: symbolic max backedge-taken count is 378 +; CHECK-NEXT: Loop %for.body: Predicated backedge-taken count is 378 +; CHECK-NEXT: Predicates: +; CHECK: Loop %for.body: Trip multiple is 379 +; +entry: + br label %for.body + +for.body: ; preds = %for.body, %entry + %lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %a, %entry ] + %lsr.iv = phi i64 [ %lsr.iv.next, %for.body ], [ 379, %entry ] + store i32 1, ptr %lsr.iv1, align 4 + %lsr.iv.next = add nsw i64 %lsr.iv, -1 + %uglygep2 = getelementptr i8, ptr %lsr.iv1, i64 4 + %exitcond.not = icmp eq i64 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: ; preds = %for.body + ret void +} From 78e6818049c77c90225847758ac2a317230c0197 Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Wed, 22 Mar 2023 01:49:41 +0000 Subject: [PATCH 248/691] [amdgpu][nfc] clang-format AMDGPULowerModuleLDS for easier merging --- .../AMDGPU/AMDGPULowerModuleLDSPass.cpp | 102 +++++++++--------- 1 file changed, 50 insertions(+), 52 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp index 274597cb5607e..ee9679ef9e518 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -315,11 +315,11 @@ class AMDGPULowerModuleLDS : public ModulePass { DenseSet VariablesReachableThroughFunctionPointer; for (Function &F : M.functions()) { if (!isKernelLDS(&F)) - if (F.hasAddressTaken(nullptr, - /* IgnoreCallbackUses */ false, - /* IgnoreAssumeLikeCalls */ false, - /* IgnoreLLVMUsed */ true, - /* IgnoreArcAttachedCall */ false)) { + if (F.hasAddressTaken(nullptr, + /* IgnoreCallbackUses */ false, + /* IgnoreAssumeLikeCalls */ false, + /* IgnoreLLVMUsed */ true, + /* IgnoreArcAttachedCall */ false)) { set_union(VariablesReachableThroughFunctionPointer, direct_map_function[&F]); } @@ -476,7 +476,6 @@ class AMDGPULowerModuleLDS : public ModulePass { IRBuilder<> Builder(Ctx); Type *I32 = Type::getInt32Ty(Ctx); - for (size_t Index = 0; Index < ModuleScopeVariables.size(); Index++) { auto *GV = ModuleScopeVariables[Index]; @@ -503,7 +502,6 @@ class AMDGPULowerModuleLDS : public ModulePass { ConstantInt::get(I32, Index), }; - Value *Address = Builder.CreateInBoundsGEP( LookupTable->getValueType(), LookupTable, GEPIdx, GV->getName()); @@ -523,7 +521,8 @@ class AMDGPULowerModuleLDS : public ModulePass { DenseSet KernelSet; - if (VariableSet.empty()) return KernelSet; + if (VariableSet.empty()) + return KernelSet; for (Function &Func : M.functions()) { if (Func.isDeclaration() || !isKernelLDS(&Func)) @@ -586,8 +585,9 @@ class AMDGPULowerModuleLDS : public ModulePass { // strategy continue; } - CandidateTy Candidate(GV, K.second.size(), - DL.getTypeAllocSize(GV->getValueType()).getFixedValue()); + CandidateTy Candidate( + GV, K.second.size(), + DL.getTypeAllocSize(GV->getValueType()).getFixedValue()); if (MostUsed < Candidate) MostUsed = Candidate; } @@ -638,44 +638,42 @@ class AMDGPULowerModuleLDS : public ModulePass { // the backend and lowered to a SGPR which can be read from using // amdgcn_lds_kernel_id. - std::vector OrderedKernels; - - for (Function &Func : M->functions()) { - if (Func.isDeclaration()) - continue; - if (!isKernelLDS(&Func)) - continue; - - if (KernelsThatAllocateTableLDS.contains(&Func)) { - assert(Func.hasName()); // else fatal error earlier - OrderedKernels.push_back(&Func); - } - } + std::vector OrderedKernels; - // Put them in an arbitrary but reproducible order - llvm::sort(OrderedKernels.begin(), OrderedKernels.end(), - [](const Function *lhs, const Function *rhs) -> bool { - return lhs->getName() < rhs->getName(); - }); + for (Function &Func : M->functions()) { + if (Func.isDeclaration()) + continue; + if (!isKernelLDS(&Func)) + continue; - // Annotate the kernels with their order in this vector - LLVMContext &Ctx = M->getContext(); - IRBuilder<> Builder(Ctx); + if (KernelsThatAllocateTableLDS.contains(&Func)) { + assert(Func.hasName()); // else fatal error earlier + OrderedKernels.push_back(&Func); + } + } - if (OrderedKernels.size() > UINT32_MAX) { - // 32 bit keeps it in one SGPR. > 2**32 kernels won't fit on the GPU - report_fatal_error("Unimplemented LDS lowering for > 2**32 kernels"); - } + // Put them in an arbitrary but reproducible order + llvm::sort(OrderedKernels.begin(), OrderedKernels.end(), + [](const Function *lhs, const Function *rhs) -> bool { + return lhs->getName() < rhs->getName(); + }); - for (size_t i = 0; i < OrderedKernels.size(); i++) { - Metadata *AttrMDArgs[1] = { - ConstantAsMetadata::get(Builder.getInt32(i)), - }; - OrderedKernels[i]->setMetadata("llvm.amdgcn.lds.kernel.id", - MDNode::get(Ctx, AttrMDArgs)); + // Annotate the kernels with their order in this vector + LLVMContext &Ctx = M->getContext(); + IRBuilder<> Builder(Ctx); - } + if (OrderedKernels.size() > UINT32_MAX) { + // 32 bit keeps it in one SGPR. > 2**32 kernels won't fit on the GPU + report_fatal_error("Unimplemented LDS lowering for > 2**32 kernels"); + } + for (size_t i = 0; i < OrderedKernels.size(); i++) { + Metadata *AttrMDArgs[1] = { + ConstantAsMetadata::get(Builder.getInt32(i)), + }; + OrderedKernels[i]->setMetadata("llvm.amdgcn.lds.kernel.id", + MDNode::get(Ctx, AttrMDArgs)); + } return OrderedKernels; } @@ -979,22 +977,22 @@ class AMDGPULowerModuleLDS : public ModulePass { // TODO: Looks like a latent bug, Replacement may not be marked // UsedByKernel here replaceLDSVariablesWithStruct(M, Vec, Replacement, [](Use &U) { - return isa(U.getUser()); + return isa(U.getUser()); }); } if (!KernelsThatAllocateTableLDS.empty()) { - LLVMContext &Ctx = M.getContext(); - IRBuilder<> Builder(Ctx); + LLVMContext &Ctx = M.getContext(); + IRBuilder<> Builder(Ctx); - // The ith element of this vector is kernel id i - std::vector OrderedKernels = - assignLDSKernelIDToEachKernel(&M, KernelsThatAllocateTableLDS); + // The ith element of this vector is kernel id i + std::vector OrderedKernels = + assignLDSKernelIDToEachKernel(&M, KernelsThatAllocateTableLDS); - for (size_t i = 0; i < OrderedKernels.size(); i++) { - markUsedByKernel(Builder, OrderedKernels[i], - KernelToReplacement[OrderedKernels[i]].SGV); - } + for (size_t i = 0; i < OrderedKernels.size(); i++) { + markUsedByKernel(Builder, OrderedKernels[i], + KernelToReplacement[OrderedKernels[i]].SGV); + } // The order must be consistent between lookup table and accesses to // lookup table From 8e43c22d303835bf90a64f6732df4dfc028ac688 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 21 Mar 2023 18:52:05 -0700 Subject: [PATCH 249/691] [RISCV] Use LBU for extloadi8. The Zcb extension has c.lbu, but not c.lb. This patch makes us prefer LBU over LB if we have a choice which will enable more compression opportunities. Reviewed By: asb Differential Revision: https://reviews.llvm.org/D146270 --- llvm/lib/Target/RISCV/RISCVInstrInfo.td | 2 +- llvm/test/CodeGen/RISCV/atomic-rmw.ll | 80 ++++---- llvm/test/CodeGen/RISCV/atomic-signext.ll | 16 +- .../CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll | 8 +- llvm/test/CodeGen/RISCV/forced-atomics.ll | 4 +- .../CodeGen/RISCV/hoist-global-addr-base.ll | 4 +- .../RISCV/local-stack-slot-allocation.ll | 16 +- llvm/test/CodeGen/RISCV/mem.ll | 6 +- llvm/test/CodeGen/RISCV/mem64.ll | 6 +- llvm/test/CodeGen/RISCV/memcpy-inline.ll | 4 +- llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll | 4 +- llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll | 4 +- .../fixed-vector-strided-load-store-asm.ll | 4 +- .../RISCV/rvv/fixed-vectors-extract-i1.ll | 8 +- .../RISCV/rvv/fixed-vectors-masked-gather.ll | 172 +++++++++--------- .../RISCV/rvv/fixed-vectors-unaligned.ll | 8 +- .../CodeGen/RISCV/srem-seteq-illegal-types.ll | 12 +- .../CodeGen/RISCV/unaligned-load-store.ll | 48 ++--- .../CodeGen/RISCV/urem-seteq-illegal-types.ll | 6 +- 19 files changed, 199 insertions(+), 213 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 6ce39999b1a08..ab8a8a4cc9935 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1625,7 +1625,7 @@ multiclass LdPat { } defm : LdPat; -defm : LdPat; +defm : LdPat; // Prefer unsigned due to no c.lb in Zcb. defm : LdPat; defm : LdPat; defm : LdPat, Requires<[IsRV32]>; diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll index 424d28a87bb26..0027d0a24f072 100644 --- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll +++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll @@ -2024,7 +2024,7 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_1@plt -; RV32I-NEXT: lb a3, 15(sp) +; RV32I-NEXT: lbu a3, 15(sp) ; RV32I-NEXT: bnez a0, .LBB35_4 ; RV32I-NEXT: .LBB35_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -2095,7 +2095,7 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_1@plt -; RV64I-NEXT: lb a3, 15(sp) +; RV64I-NEXT: lbu a3, 15(sp) ; RV64I-NEXT: bnez a0, .LBB35_4 ; RV64I-NEXT: .LBB35_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -2170,7 +2170,7 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: li a4, 2 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_1@plt -; RV32I-NEXT: lb a3, 15(sp) +; RV32I-NEXT: lbu a3, 15(sp) ; RV32I-NEXT: bnez a0, .LBB36_4 ; RV32I-NEXT: .LBB36_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -2241,7 +2241,7 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: li a4, 2 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __atomic_compare_exchange_1@plt -; RV64I-NEXT: lb a3, 15(sp) +; RV64I-NEXT: lbu a3, 15(sp) ; RV64I-NEXT: bnez a0, .LBB36_4 ; RV64I-NEXT: .LBB36_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -2316,7 +2316,7 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_1@plt -; RV32I-NEXT: lb a3, 15(sp) +; RV32I-NEXT: lbu a3, 15(sp) ; RV32I-NEXT: bnez a0, .LBB37_4 ; RV32I-NEXT: .LBB37_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -2387,7 +2387,7 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_1@plt -; RV64I-NEXT: lb a3, 15(sp) +; RV64I-NEXT: lbu a3, 15(sp) ; RV64I-NEXT: bnez a0, .LBB37_4 ; RV64I-NEXT: .LBB37_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -2462,7 +2462,7 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: li a4, 2 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_1@plt -; RV32I-NEXT: lb a3, 15(sp) +; RV32I-NEXT: lbu a3, 15(sp) ; RV32I-NEXT: bnez a0, .LBB38_4 ; RV32I-NEXT: .LBB38_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -2533,7 +2533,7 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: li a4, 2 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __atomic_compare_exchange_1@plt -; RV64I-NEXT: lb a3, 15(sp) +; RV64I-NEXT: lbu a3, 15(sp) ; RV64I-NEXT: bnez a0, .LBB38_4 ; RV64I-NEXT: .LBB38_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -2608,7 +2608,7 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_1@plt -; RV32I-NEXT: lb a3, 15(sp) +; RV32I-NEXT: lbu a3, 15(sp) ; RV32I-NEXT: bnez a0, .LBB39_4 ; RV32I-NEXT: .LBB39_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -2679,7 +2679,7 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: li a4, 5 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __atomic_compare_exchange_1@plt -; RV64I-NEXT: lb a3, 15(sp) +; RV64I-NEXT: lbu a3, 15(sp) ; RV64I-NEXT: bnez a0, .LBB39_4 ; RV64I-NEXT: .LBB39_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -2754,7 +2754,7 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_1@plt -; RV32I-NEXT: lb a3, 15(sp) +; RV32I-NEXT: lbu a3, 15(sp) ; RV32I-NEXT: bnez a0, .LBB40_4 ; RV32I-NEXT: .LBB40_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -2825,7 +2825,7 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_1@plt -; RV64I-NEXT: lb a3, 15(sp) +; RV64I-NEXT: lbu a3, 15(sp) ; RV64I-NEXT: bnez a0, .LBB40_4 ; RV64I-NEXT: .LBB40_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -2900,7 +2900,7 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: li a4, 2 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_1@plt -; RV32I-NEXT: lb a3, 15(sp) +; RV32I-NEXT: lbu a3, 15(sp) ; RV32I-NEXT: bnez a0, .LBB41_4 ; RV32I-NEXT: .LBB41_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -2971,7 +2971,7 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: li a4, 2 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __atomic_compare_exchange_1@plt -; RV64I-NEXT: lb a3, 15(sp) +; RV64I-NEXT: lbu a3, 15(sp) ; RV64I-NEXT: bnez a0, .LBB41_4 ; RV64I-NEXT: .LBB41_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -3046,7 +3046,7 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_1@plt -; RV32I-NEXT: lb a3, 15(sp) +; RV32I-NEXT: lbu a3, 15(sp) ; RV32I-NEXT: bnez a0, .LBB42_4 ; RV32I-NEXT: .LBB42_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -3117,7 +3117,7 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_1@plt -; RV64I-NEXT: lb a3, 15(sp) +; RV64I-NEXT: lbu a3, 15(sp) ; RV64I-NEXT: bnez a0, .LBB42_4 ; RV64I-NEXT: .LBB42_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -3192,7 +3192,7 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: li a4, 2 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_1@plt -; RV32I-NEXT: lb a3, 15(sp) +; RV32I-NEXT: lbu a3, 15(sp) ; RV32I-NEXT: bnez a0, .LBB43_4 ; RV32I-NEXT: .LBB43_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -3263,7 +3263,7 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: li a4, 2 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __atomic_compare_exchange_1@plt -; RV64I-NEXT: lb a3, 15(sp) +; RV64I-NEXT: lbu a3, 15(sp) ; RV64I-NEXT: bnez a0, .LBB43_4 ; RV64I-NEXT: .LBB43_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -3338,7 +3338,7 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_1@plt -; RV32I-NEXT: lb a3, 15(sp) +; RV32I-NEXT: lbu a3, 15(sp) ; RV32I-NEXT: bnez a0, .LBB44_4 ; RV32I-NEXT: .LBB44_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -3409,7 +3409,7 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: li a4, 5 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __atomic_compare_exchange_1@plt -; RV64I-NEXT: lb a3, 15(sp) +; RV64I-NEXT: lbu a3, 15(sp) ; RV64I-NEXT: bnez a0, .LBB44_4 ; RV64I-NEXT: .LBB44_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -3483,7 +3483,7 @@ define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_1@plt -; RV32I-NEXT: lb a3, 15(sp) +; RV32I-NEXT: lbu a3, 15(sp) ; RV32I-NEXT: bnez a0, .LBB45_4 ; RV32I-NEXT: .LBB45_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -3547,7 +3547,7 @@ define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_1@plt -; RV64I-NEXT: lb a3, 15(sp) +; RV64I-NEXT: lbu a3, 15(sp) ; RV64I-NEXT: bnez a0, .LBB45_4 ; RV64I-NEXT: .LBB45_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -3615,7 +3615,7 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: li a4, 2 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_1@plt -; RV32I-NEXT: lb a3, 15(sp) +; RV32I-NEXT: lbu a3, 15(sp) ; RV32I-NEXT: bnez a0, .LBB46_4 ; RV32I-NEXT: .LBB46_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -3679,7 +3679,7 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: li a4, 2 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __atomic_compare_exchange_1@plt -; RV64I-NEXT: lb a3, 15(sp) +; RV64I-NEXT: lbu a3, 15(sp) ; RV64I-NEXT: bnez a0, .LBB46_4 ; RV64I-NEXT: .LBB46_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -3747,7 +3747,7 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_1@plt -; RV32I-NEXT: lb a3, 15(sp) +; RV32I-NEXT: lbu a3, 15(sp) ; RV32I-NEXT: bnez a0, .LBB47_4 ; RV32I-NEXT: .LBB47_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -3811,7 +3811,7 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_1@plt -; RV64I-NEXT: lb a3, 15(sp) +; RV64I-NEXT: lbu a3, 15(sp) ; RV64I-NEXT: bnez a0, .LBB47_4 ; RV64I-NEXT: .LBB47_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -3879,7 +3879,7 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: li a4, 2 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_1@plt -; RV32I-NEXT: lb a3, 15(sp) +; RV32I-NEXT: lbu a3, 15(sp) ; RV32I-NEXT: bnez a0, .LBB48_4 ; RV32I-NEXT: .LBB48_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -3943,7 +3943,7 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: li a4, 2 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __atomic_compare_exchange_1@plt -; RV64I-NEXT: lb a3, 15(sp) +; RV64I-NEXT: lbu a3, 15(sp) ; RV64I-NEXT: bnez a0, .LBB48_4 ; RV64I-NEXT: .LBB48_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -4011,7 +4011,7 @@ define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_1@plt -; RV32I-NEXT: lb a3, 15(sp) +; RV32I-NEXT: lbu a3, 15(sp) ; RV32I-NEXT: bnez a0, .LBB49_4 ; RV32I-NEXT: .LBB49_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -4075,7 +4075,7 @@ define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: li a4, 5 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __atomic_compare_exchange_1@plt -; RV64I-NEXT: lb a3, 15(sp) +; RV64I-NEXT: lbu a3, 15(sp) ; RV64I-NEXT: bnez a0, .LBB49_4 ; RV64I-NEXT: .LBB49_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -4143,7 +4143,7 @@ define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_1@plt -; RV32I-NEXT: lb a3, 15(sp) +; RV32I-NEXT: lbu a3, 15(sp) ; RV32I-NEXT: bnez a0, .LBB50_4 ; RV32I-NEXT: .LBB50_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -4207,7 +4207,7 @@ define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_1@plt -; RV64I-NEXT: lb a3, 15(sp) +; RV64I-NEXT: lbu a3, 15(sp) ; RV64I-NEXT: bnez a0, .LBB50_4 ; RV64I-NEXT: .LBB50_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -4275,7 +4275,7 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: li a4, 2 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_1@plt -; RV32I-NEXT: lb a3, 15(sp) +; RV32I-NEXT: lbu a3, 15(sp) ; RV32I-NEXT: bnez a0, .LBB51_4 ; RV32I-NEXT: .LBB51_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -4339,7 +4339,7 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: li a4, 2 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __atomic_compare_exchange_1@plt -; RV64I-NEXT: lb a3, 15(sp) +; RV64I-NEXT: lbu a3, 15(sp) ; RV64I-NEXT: bnez a0, .LBB51_4 ; RV64I-NEXT: .LBB51_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -4407,7 +4407,7 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_1@plt -; RV32I-NEXT: lb a3, 15(sp) +; RV32I-NEXT: lbu a3, 15(sp) ; RV32I-NEXT: bnez a0, .LBB52_4 ; RV32I-NEXT: .LBB52_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -4471,7 +4471,7 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_1@plt -; RV64I-NEXT: lb a3, 15(sp) +; RV64I-NEXT: lbu a3, 15(sp) ; RV64I-NEXT: bnez a0, .LBB52_4 ; RV64I-NEXT: .LBB52_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -4539,7 +4539,7 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: li a4, 2 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_1@plt -; RV32I-NEXT: lb a3, 15(sp) +; RV32I-NEXT: lbu a3, 15(sp) ; RV32I-NEXT: bnez a0, .LBB53_4 ; RV32I-NEXT: .LBB53_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -4603,7 +4603,7 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: li a4, 2 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __atomic_compare_exchange_1@plt -; RV64I-NEXT: lb a3, 15(sp) +; RV64I-NEXT: lbu a3, 15(sp) ; RV64I-NEXT: bnez a0, .LBB53_4 ; RV64I-NEXT: .LBB53_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -4671,7 +4671,7 @@ define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_1@plt -; RV32I-NEXT: lb a3, 15(sp) +; RV32I-NEXT: lbu a3, 15(sp) ; RV32I-NEXT: bnez a0, .LBB54_4 ; RV32I-NEXT: .LBB54_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -4735,7 +4735,7 @@ define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: li a4, 5 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __atomic_compare_exchange_1@plt -; RV64I-NEXT: lb a3, 15(sp) +; RV64I-NEXT: lbu a3, 15(sp) ; RV64I-NEXT: bnez a0, .LBB54_4 ; RV64I-NEXT: .LBB54_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll index 9b3351ebe1ffd..48fce21490592 100644 --- a/llvm/test/CodeGen/RISCV/atomic-signext.ll +++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll @@ -596,7 +596,7 @@ define signext i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_1@plt -; RV32I-NEXT: lb a3, 15(sp) +; RV32I-NEXT: lbu a3, 15(sp) ; RV32I-NEXT: bnez a0, .LBB10_4 ; RV32I-NEXT: .LBB10_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -670,7 +670,7 @@ define signext i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_1@plt -; RV64I-NEXT: lb a3, 15(sp) +; RV64I-NEXT: lbu a3, 15(sp) ; RV64I-NEXT: bnez a0, .LBB10_4 ; RV64I-NEXT: .LBB10_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -748,7 +748,7 @@ define signext i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_1@plt -; RV32I-NEXT: lb a3, 15(sp) +; RV32I-NEXT: lbu a3, 15(sp) ; RV32I-NEXT: bnez a0, .LBB11_4 ; RV32I-NEXT: .LBB11_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -822,7 +822,7 @@ define signext i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_1@plt -; RV64I-NEXT: lb a3, 15(sp) +; RV64I-NEXT: lbu a3, 15(sp) ; RV64I-NEXT: bnez a0, .LBB11_4 ; RV64I-NEXT: .LBB11_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -899,7 +899,7 @@ define signext i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_1@plt -; RV32I-NEXT: lb a3, 15(sp) +; RV32I-NEXT: lbu a3, 15(sp) ; RV32I-NEXT: bnez a0, .LBB12_4 ; RV32I-NEXT: .LBB12_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -966,7 +966,7 @@ define signext i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_1@plt -; RV64I-NEXT: lb a3, 15(sp) +; RV64I-NEXT: lbu a3, 15(sp) ; RV64I-NEXT: bnez a0, .LBB12_4 ; RV64I-NEXT: .LBB12_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1037,7 +1037,7 @@ define signext i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_1@plt -; RV32I-NEXT: lb a3, 15(sp) +; RV32I-NEXT: lbu a3, 15(sp) ; RV32I-NEXT: bnez a0, .LBB13_4 ; RV32I-NEXT: .LBB13_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1104,7 +1104,7 @@ define signext i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_1@plt -; RV64I-NEXT: lb a3, 15(sp) +; RV64I-NEXT: lbu a3, 15(sp) ; RV64I-NEXT: bnez a0, .LBB13_4 ; RV64I-NEXT: .LBB13_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll index 7c871089bca03..f1528e94c473c 100644 --- a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll @@ -41,7 +41,7 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_1@plt -; RV32I-NEXT: lb a3, 3(sp) +; RV32I-NEXT: lbu a3, 3(sp) ; RV32I-NEXT: beqz a0, .LBB0_1 ; RV32I-NEXT: # %bb.2: # %atomicrmw.end ; RV32I-NEXT: mv a0, a3 @@ -117,7 +117,7 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; RV64I-NEXT: li a4, 5 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __atomic_compare_exchange_1@plt -; RV64I-NEXT: lb a3, 7(sp) +; RV64I-NEXT: lbu a3, 7(sp) ; RV64I-NEXT: beqz a0, .LBB0_1 ; RV64I-NEXT: # %bb.2: # %atomicrmw.end ; RV64I-NEXT: mv a0, a3 @@ -670,7 +670,7 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_1@plt -; RV32I-NEXT: lb a3, 15(sp) +; RV32I-NEXT: lbu a3, 15(sp) ; RV32I-NEXT: bnez a0, .LBB4_4 ; RV32I-NEXT: .LBB4_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 @@ -766,7 +766,7 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { ; RV64I-NEXT: li a4, 5 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __atomic_compare_exchange_1@plt -; RV64I-NEXT: lb a3, 15(sp) +; RV64I-NEXT: lbu a3, 15(sp) ; RV64I-NEXT: bnez a0, .LBB4_4 ; RV64I-NEXT: .LBB4_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/RISCV/forced-atomics.ll b/llvm/test/CodeGen/RISCV/forced-atomics.ll index 7c92a74dc9517..d15bc2b53ce81 100644 --- a/llvm/test/CodeGen/RISCV/forced-atomics.ll +++ b/llvm/test/CodeGen/RISCV/forced-atomics.ll @@ -137,7 +137,7 @@ define i8 @cmpxchg8(ptr %p) nounwind { ; RV32-NO-ATOMIC-NEXT: li a3, 5 ; RV32-NO-ATOMIC-NEXT: li a4, 5 ; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_1@plt -; RV32-NO-ATOMIC-NEXT: lb a0, 11(sp) +; RV32-NO-ATOMIC-NEXT: lbu a0, 11(sp) ; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16 ; RV32-NO-ATOMIC-NEXT: ret @@ -163,7 +163,7 @@ define i8 @cmpxchg8(ptr %p) nounwind { ; RV64-NO-ATOMIC-NEXT: li a3, 5 ; RV64-NO-ATOMIC-NEXT: li a4, 5 ; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_1@plt -; RV64-NO-ATOMIC-NEXT: lb a0, 7(sp) +; RV64-NO-ATOMIC-NEXT: lbu a0, 7(sp) ; RV64-NO-ATOMIC-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NO-ATOMIC-NEXT: addi sp, sp, 16 ; RV64-NO-ATOMIC-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll index 5375189197d96..90dad26a1833f 100644 --- a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll +++ b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll @@ -382,7 +382,7 @@ define dso_local void @rmw_addi_addi() nounwind { ; RV32-LABEL: rmw_addi_addi: ; RV32: # %bb.0: # %entry ; RV32-NEXT: lui a0, %hi(bar+3211) -; RV32-NEXT: lb a1, %lo(bar+3211)(a0) +; RV32-NEXT: lbu a1, %lo(bar+3211)(a0) ; RV32-NEXT: addi a1, a1, 10 ; RV32-NEXT: sb a1, %lo(bar+3211)(a0) ; RV32-NEXT: ret @@ -390,7 +390,7 @@ define dso_local void @rmw_addi_addi() nounwind { ; RV64-LABEL: rmw_addi_addi: ; RV64: # %bb.0: # %entry ; RV64-NEXT: lui a0, %hi(bar+3211) -; RV64-NEXT: lb a1, %lo(bar+3211)(a0) +; RV64-NEXT: lbu a1, %lo(bar+3211)(a0) ; RV64-NEXT: addiw a1, a1, 10 ; RV64-NEXT: sb a1, %lo(bar+3211)(a0) ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/local-stack-slot-allocation.ll b/llvm/test/CodeGen/RISCV/local-stack-slot-allocation.ll index 186b8287d87c9..ef64eeb9b1869 100644 --- a/llvm/test/CodeGen/RISCV/local-stack-slot-allocation.ll +++ b/llvm/test/CodeGen/RISCV/local-stack-slot-allocation.ll @@ -15,8 +15,8 @@ define void @use_frame_base_reg() { ; RV32I-NEXT: lui a0, 24 ; RV32I-NEXT: addi a0, a0, 1704 ; RV32I-NEXT: add a0, sp, a0 -; RV32I-NEXT: lb a1, 4(a0) -; RV32I-NEXT: lb a0, 0(a0) +; RV32I-NEXT: lbu a1, 4(a0) +; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: lui a0, 24 ; RV32I-NEXT: addi a0, a0, 1712 ; RV32I-NEXT: add sp, sp, a0 @@ -31,8 +31,8 @@ define void @use_frame_base_reg() { ; RV64I-NEXT: lui a0, 24 ; RV64I-NEXT: addiw a0, a0, 1704 ; RV64I-NEXT: add a0, sp, a0 -; RV64I-NEXT: lb a1, 4(a0) -; RV64I-NEXT: lb a0, 0(a0) +; RV64I-NEXT: lbu a1, 4(a0) +; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: lui a0, 24 ; RV64I-NEXT: addiw a0, a0, 1712 ; RV64I-NEXT: add sp, sp, a0 @@ -57,10 +57,10 @@ define void @load_with_offset() { ; RV32I-NEXT: .cfi_def_cfa_offset 100608 ; RV32I-NEXT: lui a0, 25 ; RV32I-NEXT: add a0, sp, a0 -; RV32I-NEXT: lb a0, -292(a0) +; RV32I-NEXT: lbu a0, -292(a0) ; RV32I-NEXT: lui a0, 24 ; RV32I-NEXT: add a0, sp, a0 -; RV32I-NEXT: lb a0, 1704(a0) +; RV32I-NEXT: lbu a0, 1704(a0) ; RV32I-NEXT: lui a0, 25 ; RV32I-NEXT: addi a0, a0, -1792 ; RV32I-NEXT: add sp, sp, a0 @@ -74,10 +74,10 @@ define void @load_with_offset() { ; RV64I-NEXT: .cfi_def_cfa_offset 100608 ; RV64I-NEXT: lui a0, 25 ; RV64I-NEXT: add a0, sp, a0 -; RV64I-NEXT: lb a0, -292(a0) +; RV64I-NEXT: lbu a0, -292(a0) ; RV64I-NEXT: lui a0, 24 ; RV64I-NEXT: add a0, sp, a0 -; RV64I-NEXT: lb a0, 1704(a0) +; RV64I-NEXT: lbu a0, 1704(a0) ; RV64I-NEXT: lui a0, 25 ; RV64I-NEXT: addiw a0, a0, -1792 ; RV64I-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/mem.ll b/llvm/test/CodeGen/RISCV/mem.ll index 700680eec46c5..74874c1ca74b3 100644 --- a/llvm/test/CodeGen/RISCV/mem.ll +++ b/llvm/test/CodeGen/RISCV/mem.ll @@ -8,7 +8,7 @@ define dso_local i32 @lb(ptr %a) nounwind { ; RV32I-LABEL: lb: ; RV32I: # %bb.0: ; RV32I-NEXT: lb a1, 1(a0) -; RV32I-NEXT: lb a0, 0(a0) +; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: ret %1 = getelementptr i8, ptr %a, i32 1 @@ -123,7 +123,7 @@ define dso_local i32 @load_sext_zext_anyext_i1(ptr %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: lbu a1, 1(a0) ; RV32I-NEXT: lbu a2, 2(a0) -; RV32I-NEXT: lb a0, 0(a0) +; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: sub a0, a2, a1 ; RV32I-NEXT: ret ; sextload i1 @@ -145,7 +145,7 @@ define dso_local i16 @load_sext_zext_anyext_i1_i16(ptr %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: lbu a1, 1(a0) ; RV32I-NEXT: lbu a2, 2(a0) -; RV32I-NEXT: lb a0, 0(a0) +; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: sub a0, a2, a1 ; RV32I-NEXT: ret ; sextload i1 diff --git a/llvm/test/CodeGen/RISCV/mem64.ll b/llvm/test/CodeGen/RISCV/mem64.ll index ab775481accc3..903c5b223b69c 100644 --- a/llvm/test/CodeGen/RISCV/mem64.ll +++ b/llvm/test/CodeGen/RISCV/mem64.ll @@ -8,7 +8,7 @@ define dso_local i64 @lb(ptr %a) nounwind { ; RV64I-LABEL: lb: ; RV64I: # %bb.0: ; RV64I-NEXT: lb a1, 1(a0) -; RV64I-NEXT: lb a0, 0(a0) +; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ret %1 = getelementptr i8, ptr %a, i32 1 @@ -168,7 +168,7 @@ define dso_local i64 @load_sext_zext_anyext_i1(ptr %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: lbu a1, 1(a0) ; RV64I-NEXT: lbu a2, 2(a0) -; RV64I-NEXT: lb a0, 0(a0) +; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: sub a0, a2, a1 ; RV64I-NEXT: ret ; sextload i1 @@ -190,7 +190,7 @@ define dso_local i16 @load_sext_zext_anyext_i1_i16(ptr %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: lbu a1, 1(a0) ; RV64I-NEXT: lbu a2, 2(a0) -; RV64I-NEXT: lb a0, 0(a0) +; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: sub a0, a2, a1 ; RV64I-NEXT: ret ; sextload i1 diff --git a/llvm/test/CodeGen/RISCV/memcpy-inline.ll b/llvm/test/CodeGen/RISCV/memcpy-inline.ll index 05cb2a83e1e70..4b1b01ce0151c 100644 --- a/llvm/test/CodeGen/RISCV/memcpy-inline.ll +++ b/llvm/test/CodeGen/RISCV/memcpy-inline.ll @@ -28,7 +28,7 @@ define i32 @t0() { ; RV32-NEXT: lui a2, %hi(dst) ; RV32-NEXT: sw a1, %lo(dst)(a2) ; RV32-NEXT: addi a0, a0, %lo(src) -; RV32-NEXT: lb a1, 10(a0) +; RV32-NEXT: lbu a1, 10(a0) ; RV32-NEXT: lh a3, 8(a0) ; RV32-NEXT: lw a0, 4(a0) ; RV32-NEXT: addi a2, a2, %lo(dst) @@ -44,7 +44,7 @@ define i32 @t0() { ; RV64-NEXT: ld a1, %lo(src)(a0) ; RV64-NEXT: lui a2, %hi(dst) ; RV64-NEXT: addi a0, a0, %lo(src) -; RV64-NEXT: lb a3, 10(a0) +; RV64-NEXT: lbu a3, 10(a0) ; RV64-NEXT: lh a0, 8(a0) ; RV64-NEXT: sd a1, %lo(dst)(a2) ; RV64-NEXT: addi a1, a2, %lo(dst) diff --git a/llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll b/llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll index 961cb23714fb2..4a9d8b08a4b2f 100644 --- a/llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll +++ b/llvm/test/CodeGen/RISCV/rv64i-shift-sext.ll @@ -177,11 +177,11 @@ define i8 @test13(ptr %0, i64 %1) { ; RV64I-NEXT: li a2, 1 ; RV64I-NEXT: subw a2, a2, a1 ; RV64I-NEXT: add a2, a0, a2 -; RV64I-NEXT: lb a2, 0(a2) +; RV64I-NEXT: lbu a2, 0(a2) ; RV64I-NEXT: li a3, 2 ; RV64I-NEXT: subw a3, a3, a1 ; RV64I-NEXT: add a0, a0, a3 -; RV64I-NEXT: lb a0, 0(a0) +; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: add a0, a2, a0 ; RV64I-NEXT: ret %3 = mul i64 %1, -4294967296 diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll index 81fdef107d4db..63fcc6ad9e426 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll @@ -162,7 +162,7 @@ define i1 @extractelt_nxv128i1(* %x, i64 %idx) nounwind { ; RV32-NEXT: vmv1r.v v0, v8 ; RV32-NEXT: vmerge.vim v8, v16, 1, v0 ; RV32-NEXT: vs8r.v v8, (a2) -; RV32-NEXT: lb a0, 0(a1) +; RV32-NEXT: lbu a0, 0(a1) ; RV32-NEXT: addi sp, s0, -80 ; RV32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload @@ -202,7 +202,7 @@ define i1 @extractelt_nxv128i1(* %x, i64 %idx) nounwind { ; RV64-NEXT: vmv1r.v v0, v8 ; RV64-NEXT: vmerge.vim v8, v16, 1, v0 ; RV64-NEXT: vs8r.v v8, (a2) -; RV64-NEXT: lb a0, 0(a1) +; RV64-NEXT: lbu a0, 0(a1) ; RV64-NEXT: addi sp, s0, -80 ; RV64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll index 34002c4015b53..311491fa6018c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-asm.ll @@ -845,8 +845,8 @@ define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, pt ; CHECK-NEXT: add a1, a1, a4 ; CHECK-NEXT: .LBB13_6: # %bb35 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lb a3, 0(a1) -; CHECK-NEXT: lb a4, 0(a0) +; CHECK-NEXT: lbu a3, 0(a1) +; CHECK-NEXT: lbu a4, 0(a0) ; CHECK-NEXT: add a3, a4, a3 ; CHECK-NEXT: sb a3, 0(a0) ; CHECK-NEXT: addiw a2, a2, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll index ded6df1a77ef9..47add40335931 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll @@ -343,7 +343,7 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind { ; RV32-NEXT: vmerge.vim v8, v16, 1, v0 ; RV32-NEXT: addi a0, sp, 128 ; RV32-NEXT: vse8.v v8, (a0) -; RV32-NEXT: lb a0, 0(a1) +; RV32-NEXT: lbu a0, 0(a1) ; RV32-NEXT: addi sp, s0, -384 ; RV32-NEXT: lw ra, 380(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 376(sp) # 4-byte Folded Reload @@ -374,7 +374,7 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind { ; RV64-NEXT: vmerge.vim v8, v16, 1, v0 ; RV64-NEXT: addi a0, sp, 128 ; RV64-NEXT: vse8.v v8, (a0) -; RV64-NEXT: lb a0, 0(a1) +; RV64-NEXT: lbu a0, 0(a1) ; RV64-NEXT: addi sp, s0, -384 ; RV64-NEXT: ld ra, 376(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 368(sp) # 8-byte Folded Reload @@ -405,7 +405,7 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind { ; RV32ZBS-NEXT: vmerge.vim v8, v16, 1, v0 ; RV32ZBS-NEXT: addi a0, sp, 128 ; RV32ZBS-NEXT: vse8.v v8, (a0) -; RV32ZBS-NEXT: lb a0, 0(a1) +; RV32ZBS-NEXT: lbu a0, 0(a1) ; RV32ZBS-NEXT: addi sp, s0, -384 ; RV32ZBS-NEXT: lw ra, 380(sp) # 4-byte Folded Reload ; RV32ZBS-NEXT: lw s0, 376(sp) # 4-byte Folded Reload @@ -436,7 +436,7 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind { ; RV64ZBS-NEXT: vmerge.vim v8, v16, 1, v0 ; RV64ZBS-NEXT: addi a0, sp, 128 ; RV64ZBS-NEXT: vse8.v v8, (a0) -; RV64ZBS-NEXT: lb a0, 0(a1) +; RV64ZBS-NEXT: lbu a0, 0(a1) ; RV64ZBS-NEXT: addi sp, s0, -384 ; RV64ZBS-NEXT: ld ra, 376(sp) # 8-byte Folded Reload ; RV64ZBS-NEXT: ld s0, 368(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index b531df25cad7b..5941e662fec27 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -82,13 +82,13 @@ define <2 x i8> @mgather_v2i8(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) ; RV64ZVE32F-NEXT: .LBB1_2: # %else2 ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB1_3: # %cond.load -; RV64ZVE32F-NEXT: lb a0, 0(a0) +; RV64ZVE32F-NEXT: lbu a0, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB1_2 ; RV64ZVE32F-NEXT: .LBB1_4: # %cond.load1 -; RV64ZVE32F-NEXT: lb a0, 0(a1) +; RV64ZVE32F-NEXT: lbu a0, 0(a1) ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 @@ -129,14 +129,14 @@ define <2 x i16> @mgather_v2i8_sextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: andi a3, a2, 1 ; RV64ZVE32F-NEXT: beqz a3, .LBB2_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load -; RV64ZVE32F-NEXT: lb a0, 0(a0) +; RV64ZVE32F-NEXT: lbu a0, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB2_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB2_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 -; RV64ZVE32F-NEXT: lb a0, 0(a1) +; RV64ZVE32F-NEXT: lbu a0, 0(a1) ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 @@ -182,14 +182,14 @@ define <2 x i16> @mgather_v2i8_zextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: andi a3, a2, 1 ; RV64ZVE32F-NEXT: beqz a3, .LBB3_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load -; RV64ZVE32F-NEXT: lb a0, 0(a0) +; RV64ZVE32F-NEXT: lbu a0, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB3_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB3_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 -; RV64ZVE32F-NEXT: lb a0, 0(a1) +; RV64ZVE32F-NEXT: lbu a0, 0(a1) ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 @@ -235,14 +235,14 @@ define <2 x i32> @mgather_v2i8_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: andi a3, a2, 1 ; RV64ZVE32F-NEXT: beqz a3, .LBB4_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load -; RV64ZVE32F-NEXT: lb a0, 0(a0) +; RV64ZVE32F-NEXT: lbu a0, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB4_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB4_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 -; RV64ZVE32F-NEXT: lb a0, 0(a1) +; RV64ZVE32F-NEXT: lbu a0, 0(a1) ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 @@ -288,14 +288,14 @@ define <2 x i32> @mgather_v2i8_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: andi a3, a2, 1 ; RV64ZVE32F-NEXT: beqz a3, .LBB5_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load -; RV64ZVE32F-NEXT: lb a0, 0(a0) +; RV64ZVE32F-NEXT: lbu a0, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB5_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB5_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 -; RV64ZVE32F-NEXT: lb a0, 0(a1) +; RV64ZVE32F-NEXT: lbu a0, 0(a1) ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 @@ -349,14 +349,14 @@ define <2 x i64> @mgather_v2i8_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: andi a3, a2, 1 ; RV64ZVE32F-NEXT: beqz a3, .LBB6_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load -; RV64ZVE32F-NEXT: lb a0, 0(a0) +; RV64ZVE32F-NEXT: lbu a0, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB6_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB6_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 -; RV64ZVE32F-NEXT: lb a0, 0(a1) +; RV64ZVE32F-NEXT: lbu a0, 0(a1) ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 @@ -411,14 +411,14 @@ define <2 x i64> @mgather_v2i8_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: andi a3, a2, 1 ; RV64ZVE32F-NEXT: beqz a3, .LBB7_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load -; RV64ZVE32F-NEXT: lb a0, 0(a0) +; RV64ZVE32F-NEXT: lbu a0, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB7_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB7_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 -; RV64ZVE32F-NEXT: lb a0, 0(a1) +; RV64ZVE32F-NEXT: lbu a0, 0(a1) ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 @@ -471,14 +471,14 @@ define <4 x i8> @mgather_v4i8(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i8> %passthru) ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB8_5: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB8_2 ; RV64ZVE32F-NEXT: .LBB8_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma @@ -487,7 +487,7 @@ define <4 x i8> @mgather_v4i8(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i8> %passthru) ; RV64ZVE32F-NEXT: beqz a2, .LBB8_3 ; RV64ZVE32F-NEXT: .LBB8_7: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf4, tu, ma @@ -496,7 +496,7 @@ define <4 x i8> @mgather_v4i8(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i8> %passthru) ; RV64ZVE32F-NEXT: beqz a1, .LBB8_4 ; RV64ZVE32F-NEXT: .LBB8_8: # %cond.load7 ; RV64ZVE32F-NEXT: ld a0, 24(a0) -; RV64ZVE32F-NEXT: lb a0, 0(a0) +; RV64ZVE32F-NEXT: lbu a0, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 @@ -539,14 +539,14 @@ define <4 x i8> @mgather_truemask_v4i8(<4 x ptr> %ptrs, <4 x i8> %passthru) { ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB9_5: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB9_2 ; RV64ZVE32F-NEXT: .LBB9_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma @@ -555,7 +555,7 @@ define <4 x i8> @mgather_truemask_v4i8(<4 x ptr> %ptrs, <4 x i8> %passthru) { ; RV64ZVE32F-NEXT: beqz a2, .LBB9_3 ; RV64ZVE32F-NEXT: .LBB9_7: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf4, tu, ma @@ -564,7 +564,7 @@ define <4 x i8> @mgather_truemask_v4i8(<4 x ptr> %ptrs, <4 x i8> %passthru) { ; RV64ZVE32F-NEXT: beqz a1, .LBB9_4 ; RV64ZVE32F-NEXT: .LBB9_8: # %cond.load7 ; RV64ZVE32F-NEXT: ld a0, 24(a0) -; RV64ZVE32F-NEXT: lb a0, 0(a0) +; RV64ZVE32F-NEXT: lbu a0, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 3 @@ -641,14 +641,14 @@ define <8 x i8> @mgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i8> %passthru) ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB11_9: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_2 ; RV64ZVE32F-NEXT: .LBB11_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf2, tu, ma @@ -657,7 +657,7 @@ define <8 x i8> @mgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i8> %passthru) ; RV64ZVE32F-NEXT: beqz a2, .LBB11_3 ; RV64ZVE32F-NEXT: .LBB11_11: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf2, tu, ma @@ -666,7 +666,7 @@ define <8 x i8> @mgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i8> %passthru) ; RV64ZVE32F-NEXT: beqz a2, .LBB11_4 ; RV64ZVE32F-NEXT: .LBB11_12: # %cond.load7 ; RV64ZVE32F-NEXT: ld a2, 24(a0) -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, tu, ma @@ -675,7 +675,7 @@ define <8 x i8> @mgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i8> %passthru) ; RV64ZVE32F-NEXT: beqz a2, .LBB11_5 ; RV64ZVE32F-NEXT: .LBB11_13: # %cond.load10 ; RV64ZVE32F-NEXT: ld a2, 32(a0) -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, mf2, tu, ma @@ -684,7 +684,7 @@ define <8 x i8> @mgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i8> %passthru) ; RV64ZVE32F-NEXT: beqz a2, .LBB11_6 ; RV64ZVE32F-NEXT: .LBB11_14: # %cond.load13 ; RV64ZVE32F-NEXT: ld a2, 40(a0) -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, mf2, tu, ma @@ -693,7 +693,7 @@ define <8 x i8> @mgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i8> %passthru) ; RV64ZVE32F-NEXT: beqz a2, .LBB11_7 ; RV64ZVE32F-NEXT: .LBB11_15: # %cond.load16 ; RV64ZVE32F-NEXT: ld a2, 48(a0) -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, mf2, tu, ma @@ -702,7 +702,7 @@ define <8 x i8> @mgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i8> %passthru) ; RV64ZVE32F-NEXT: beqz a1, .LBB11_8 ; RV64ZVE32F-NEXT: .LBB11_16: # %cond.load19 ; RV64ZVE32F-NEXT: ld a0, 56(a0) -; RV64ZVE32F-NEXT: lb a0, 0(a0) +; RV64ZVE32F-NEXT: lbu a0, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 7 @@ -739,7 +739,7 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: .LBB12_2: # %else @@ -750,7 +750,7 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 @@ -762,7 +762,7 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 ; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, mf2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 2 @@ -782,7 +782,7 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, mf2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 5 @@ -802,7 +802,7 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 3 @@ -812,7 +812,7 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, mf2, tu, ma @@ -823,7 +823,7 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 ; RV64ZVE32F-NEXT: .LBB12_15: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, mf2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 6 @@ -834,7 +834,7 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: lb a0, 0(a0) +; RV64ZVE32F-NEXT: lbu a0, 0(a0) ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 7 @@ -12329,7 +12329,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: .LBB97_2: # %else @@ -12340,7 +12340,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 1 @@ -12352,7 +12352,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 2 @@ -12372,7 +12372,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 5 @@ -12397,7 +12397,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 10, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 9 @@ -12409,7 +12409,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: # %bb.16: # %cond.load28 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 11, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 10 @@ -12429,7 +12429,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 13 @@ -12441,7 +12441,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: # %bb.22: # %cond.load40 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 14 @@ -12454,7 +12454,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: lb a0, 0(a0) +; RV64ZVE32F-NEXT: lbu a0, 0(a0) ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 15 @@ -12466,7 +12466,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 3 @@ -12476,7 +12476,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m1, tu, ma @@ -12487,7 +12487,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: .LBB97_28: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 6 @@ -12498,7 +12498,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 7 @@ -12508,7 +12508,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m1, tu, ma @@ -12521,7 +12521,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v10, 11 @@ -12531,7 +12531,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m1, tu, ma @@ -12589,7 +12589,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 @@ -12601,7 +12601,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 @@ -12615,7 +12615,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: # %bb.5: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 @@ -12637,7 +12637,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vslidedown.vi v12, v13, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 @@ -12664,7 +12664,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 @@ -12678,7 +12678,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: # %bb.16: # %cond.load28 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 @@ -12694,7 +12694,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 @@ -12708,7 +12708,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: # %bb.20: # %cond.load34 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 @@ -12722,7 +12722,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vslidedown.vi v9, v12, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 @@ -12747,7 +12747,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 @@ -12761,7 +12761,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: # %bb.29: # %cond.load52 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 @@ -12783,7 +12783,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vslidedown.vi v12, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 @@ -12810,7 +12810,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 @@ -12824,7 +12824,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: # %bb.40: # %cond.load76 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 @@ -12846,7 +12846,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 @@ -12860,7 +12860,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: # %bb.46: # %cond.load88 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 @@ -12875,7 +12875,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a1, v8 ; RV64ZVE32F-NEXT: add a0, a0, a1 -; RV64ZVE32F-NEXT: lb a0, 0(a0) +; RV64ZVE32F-NEXT: lbu a0, 0(a0) ; RV64ZVE32F-NEXT: li a1, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 @@ -12889,7 +12889,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 @@ -12901,7 +12901,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 @@ -12913,7 +12913,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: .LBB98_52: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 @@ -12926,7 +12926,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 @@ -12938,7 +12938,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 @@ -12950,7 +12950,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: .LBB98_55: # %cond.load40 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 @@ -12963,7 +12963,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 @@ -12975,7 +12975,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 @@ -12989,7 +12989,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 @@ -13001,7 +13001,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 @@ -13013,7 +13013,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: .LBB98_60: # %cond.load64 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 @@ -13026,7 +13026,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 @@ -13038,7 +13038,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 @@ -13052,7 +13052,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 @@ -13064,7 +13064,7 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 0, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: lb a2, 0(a2) +; RV64ZVE32F-NEXT: lbu a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 ; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll index 90b2dd9f03830..863544e5273ad 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll @@ -62,7 +62,7 @@ define <2 x i16> @mgather_v2i16_align1(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> % ; RV32-NEXT: .LBB4_3: # %cond.load ; RV32-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV32-NEXT: vmv.x.s a1, v8 -; RV32-NEXT: lb a2, 1(a1) +; RV32-NEXT: lbu a2, 1(a1) ; RV32-NEXT: lbu a1, 0(a1) ; RV32-NEXT: slli a2, a2, 8 ; RV32-NEXT: or a1, a2, a1 @@ -74,7 +74,7 @@ define <2 x i16> @mgather_v2i16_align1(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> % ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: lb a1, 1(a0) +; RV32-NEXT: lbu a1, 1(a0) ; RV32-NEXT: lbu a0, 0(a0) ; RV32-NEXT: slli a1, a1, 8 ; RV32-NEXT: or a0, a1, a0 @@ -99,7 +99,7 @@ define <2 x i16> @mgather_v2i16_align1(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> % ; RV64-NEXT: .LBB4_3: # %cond.load ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: lb a2, 1(a1) +; RV64-NEXT: lbu a2, 1(a1) ; RV64-NEXT: lbu a1, 0(a1) ; RV64-NEXT: slli a2, a2, 8 ; RV64-NEXT: or a1, a2, a1 @@ -111,7 +111,7 @@ define <2 x i16> @mgather_v2i16_align1(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> % ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 1 ; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: lb a1, 1(a0) +; RV64-NEXT: lbu a1, 1(a0) ; RV64-NEXT: lbu a0, 0(a0) ; RV64-NEXT: slli a1, a1, 8 ; RV64-NEXT: or a0, a1, a0 diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll index 91efe2c223a07..fc7f1f588369f 100644 --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -310,7 +310,7 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV32-NEXT: sw s5, 4(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s6, 0(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a0 -; RV32-NEXT: lb a0, 12(a0) +; RV32-NEXT: lbu a0, 12(a0) ; RV32-NEXT: lw a1, 8(s0) ; RV32-NEXT: slli a2, a0, 30 ; RV32-NEXT: lw a3, 4(s0) @@ -389,7 +389,7 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV64-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: mv s0, a0 -; RV64-NEXT: lb a0, 12(a0) +; RV64-NEXT: lbu a0, 12(a0) ; RV64-NEXT: lwu a1, 8(s0) ; RV64-NEXT: slli a0, a0, 32 ; RV64-NEXT: ld a2, 0(s0) @@ -460,7 +460,7 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV32M-NEXT: sw s5, 4(sp) # 4-byte Folded Spill ; RV32M-NEXT: sw s6, 0(sp) # 4-byte Folded Spill ; RV32M-NEXT: mv s0, a0 -; RV32M-NEXT: lb a0, 12(a0) +; RV32M-NEXT: lbu a0, 12(a0) ; RV32M-NEXT: lw a1, 8(s0) ; RV32M-NEXT: slli a2, a0, 30 ; RV32M-NEXT: lw a3, 4(s0) @@ -535,7 +535,7 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV64M-NEXT: ld a1, 0(a0) ; RV64M-NEXT: lwu a2, 8(a0) ; RV64M-NEXT: srli a3, a1, 2 -; RV64M-NEXT: lb a4, 12(a0) +; RV64M-NEXT: lbu a4, 12(a0) ; RV64M-NEXT: slli a5, a2, 62 ; RV64M-NEXT: or a3, a5, a3 ; RV64M-NEXT: srai a3, a3, 31 @@ -610,7 +610,7 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV32MV-NEXT: mv s2, a0 ; RV32MV-NEXT: lw a0, 8(a0) ; RV32MV-NEXT: lw a1, 4(s2) -; RV32MV-NEXT: lb a2, 12(s2) +; RV32MV-NEXT: lbu a2, 12(s2) ; RV32MV-NEXT: slli a3, a0, 31 ; RV32MV-NEXT: srli a4, a1, 1 ; RV32MV-NEXT: or s3, a4, a3 @@ -712,7 +712,7 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV64MV-NEXT: sd s0, 48(sp) # 8-byte Folded Spill ; RV64MV-NEXT: addi s0, sp, 64 ; RV64MV-NEXT: andi sp, sp, -32 -; RV64MV-NEXT: lb a1, 12(a0) +; RV64MV-NEXT: lbu a1, 12(a0) ; RV64MV-NEXT: lwu a2, 8(a0) ; RV64MV-NEXT: slli a1, a1, 32 ; RV64MV-NEXT: ld a3, 0(a0) diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll index d46e6c680aeff..083a06c0b3558 100644 --- a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll +++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll @@ -13,7 +13,7 @@ define i8 @load_i8(ptr %p) { ; ALL-LABEL: load_i8: ; ALL: # %bb.0: -; ALL-NEXT: lb a0, 0(a0) +; ALL-NEXT: lbu a0, 0(a0) ; ALL-NEXT: ret %res = load i8, ptr %p, align 1 ret i8 %res @@ -22,7 +22,7 @@ define i8 @load_i8(ptr %p) { define i16 @load_i16(ptr %p) { ; NOMISALIGN-LABEL: load_i16: ; NOMISALIGN: # %bb.0: -; NOMISALIGN-NEXT: lb a1, 1(a0) +; NOMISALIGN-NEXT: lbu a1, 1(a0) ; NOMISALIGN-NEXT: lbu a0, 0(a0) ; NOMISALIGN-NEXT: slli a1, a1, 8 ; NOMISALIGN-NEXT: or a0, a1, a0 @@ -41,7 +41,7 @@ define i24 @load_i24(ptr %p) { ; NOMISALIGN: # %bb.0: ; NOMISALIGN-NEXT: lbu a1, 1(a0) ; NOMISALIGN-NEXT: lbu a2, 0(a0) -; NOMISALIGN-NEXT: lb a0, 2(a0) +; NOMISALIGN-NEXT: lbu a0, 2(a0) ; NOMISALIGN-NEXT: slli a1, a1, 8 ; NOMISALIGN-NEXT: or a1, a1, a2 ; NOMISALIGN-NEXT: slli a0, a0, 16 @@ -50,7 +50,7 @@ define i24 @load_i24(ptr %p) { ; ; MISALIGN-LABEL: load_i24: ; MISALIGN: # %bb.0: -; MISALIGN-NEXT: lb a1, 2(a0) +; MISALIGN-NEXT: lbu a1, 2(a0) ; MISALIGN-NEXT: lhu a0, 0(a0) ; MISALIGN-NEXT: slli a1, a1, 16 ; MISALIGN-NEXT: or a0, a0, a1 @@ -60,33 +60,19 @@ define i24 @load_i24(ptr %p) { } define i32 @load_i32(ptr %p) { -; RV32I-LABEL: load_i32: -; RV32I: # %bb.0: -; RV32I-NEXT: lbu a1, 1(a0) -; RV32I-NEXT: lbu a2, 0(a0) -; RV32I-NEXT: lbu a3, 2(a0) -; RV32I-NEXT: lbu a0, 3(a0) -; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: slli a3, a3, 16 -; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: ret -; -; RV64I-LABEL: load_i32: -; RV64I: # %bb.0: -; RV64I-NEXT: lbu a1, 1(a0) -; RV64I-NEXT: lbu a2, 0(a0) -; RV64I-NEXT: lbu a3, 2(a0) -; RV64I-NEXT: lb a0, 3(a0) -; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or a1, a1, a2 -; RV64I-NEXT: slli a3, a3, 16 -; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: ret +; NOMISALIGN-LABEL: load_i32: +; NOMISALIGN: # %bb.0: +; NOMISALIGN-NEXT: lbu a1, 1(a0) +; NOMISALIGN-NEXT: lbu a2, 0(a0) +; NOMISALIGN-NEXT: lbu a3, 2(a0) +; NOMISALIGN-NEXT: lbu a0, 3(a0) +; NOMISALIGN-NEXT: slli a1, a1, 8 +; NOMISALIGN-NEXT: or a1, a1, a2 +; NOMISALIGN-NEXT: slli a3, a3, 16 +; NOMISALIGN-NEXT: slli a0, a0, 24 +; NOMISALIGN-NEXT: or a0, a0, a3 +; NOMISALIGN-NEXT: or a0, a0, a1 +; NOMISALIGN-NEXT: ret ; ; MISALIGN-LABEL: load_i32: ; MISALIGN: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll index 36d064ea3d505..c3cb8abd82019 100644 --- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll @@ -330,7 +330,7 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a0 -; RV32-NEXT: lb a0, 4(a0) +; RV32-NEXT: lbu a0, 4(a0) ; RV32-NEXT: lw a1, 0(s0) ; RV32-NEXT: slli a0, a0, 10 ; RV32-NEXT: srli s1, a1, 22 @@ -437,7 +437,7 @@ define void @test_urem_vec(ptr %X) nounwind { ; ; RV32M-LABEL: test_urem_vec: ; RV32M: # %bb.0: -; RV32M-NEXT: lb a1, 4(a0) +; RV32M-NEXT: lbu a1, 4(a0) ; RV32M-NEXT: lw a2, 0(a0) ; RV32M-NEXT: slli a1, a1, 10 ; RV32M-NEXT: srli a3, a2, 22 @@ -528,7 +528,7 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV32MV-NEXT: slli a2, a1, 10 ; RV32MV-NEXT: srli a2, a2, 21 ; RV32MV-NEXT: sh a2, 10(sp) -; RV32MV-NEXT: lb a2, 4(a0) +; RV32MV-NEXT: lbu a2, 4(a0) ; RV32MV-NEXT: slli a2, a2, 10 ; RV32MV-NEXT: srli a1, a1, 22 ; RV32MV-NEXT: or a1, a1, a2 From ebcc6dba5f0815877322256095b400b31adac5f4 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 21 Mar 2023 21:00:46 -0500 Subject: [PATCH 250/691] [libc] Don't install the GPU startup code for now Summary: This startup code is only intended to be used internally, we shouldn't export it under a conflicting name. In the future we may package this in an exportable format. --- libc/startup/gpu/CMakeLists.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/libc/startup/gpu/CMakeLists.txt b/libc/startup/gpu/CMakeLists.txt index 33afe1ef3f836..7bd5f9e568a1e 100644 --- a/libc/startup/gpu/CMakeLists.txt +++ b/libc/startup/gpu/CMakeLists.txt @@ -56,8 +56,4 @@ set(startup_components crt1) foreach(target IN LISTS startup_components) set(fq_target_name libc.startup.gpu.${target}) add_dependencies(libc-startup ${fq_target_name}) - install(FILES $ - DESTINATION ${CMAKE_INSTALL_LIBDIR} - RENAME $ - COMPONENT libc) endforeach() From de9edf4afecc1c2caf3b552f9241008ad2bd40a8 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 21 Mar 2023 15:48:11 -0700 Subject: [PATCH 251/691] [OpenMP] Avoid zero size copies to the device This unblocks one of the XFAIL tests for AMD, though we need to work around the missing printf still. Differential Revision: https://reviews.llvm.org/D146592 --- openmp/libomptarget/src/device.cpp | 3 +- openmp/libomptarget/src/omptarget.cpp | 2 +- .../test/mapping/data_member_ref.cpp | 34 ++++++++++++------- 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp index d670bad1342bc..1f5d5a23371db 100644 --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -321,7 +321,8 @@ TargetPointerResultTy DeviceTy::getTargetPointer( // If the target pointer is valid, and we need to transfer data, issue the // data transfer. - if (TargetPointer && !IsHostPtr && HasFlagTo && (IsNew || HasFlagAlways)) { + if (TargetPointer && !IsHostPtr && HasFlagTo && (IsNew || HasFlagAlways) && + Size != 0) { // Lock the entry before releasing the mapping table lock such that another // thread that could issue data movement will get the right result. std::lock_guard LG(*Entry); diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index 392a9f79bd760..2158b948bc9ec 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -1028,7 +1028,7 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, // Move data back to the host const bool HasAlways = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS; const bool HasFrom = ArgTypes[I] & OMP_TGT_MAPTYPE_FROM; - if (HasFrom && (HasAlways || IsLast) && !IsHostPtr) { + if (HasFrom && (HasAlways || IsLast) && !IsHostPtr && DataSize != 0) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); diff --git a/openmp/libomptarget/test/mapping/data_member_ref.cpp b/openmp/libomptarget/test/mapping/data_member_ref.cpp index 6b52a04e34f1d..fdb8abcaa6506 100644 --- a/openmp/libomptarget/test/mapping/data_member_ref.cpp +++ b/openmp/libomptarget/test/mapping/data_member_ref.cpp @@ -1,8 +1,5 @@ // RUN: %libomptarget-compilexx-run-and-check-generic -// Wrong results on amdgpu -// XFAIL: amdgcn-amd-amdhsa - #include struct View { @@ -26,42 +23,55 @@ int main() { int Data = 123456; V1.Data = &Data; Foo Baz(V1); + int D1, D2; // CHECK: Host 123456. printf("Host %d.\n", Bar.VRef.Data); -#pragma omp target map(Bar.VRef) +#pragma omp target map(Bar.VRef) map(from : D1, D2) { // CHECK: Device 123456. - printf("Device %d.\n", Bar.VRef.Data); + D1 = Bar.VRef.Data; + printf("Device %d.\n", D1); V.Data = 654321; // CHECK: Device 654321. - printf("Device %d.\n", Bar.VRef.Data); + D2 = Bar.VRef.Data; + printf("Device %d.\n", D2); } + printf("Device %d.\n", D1); + printf("Device %d.\n", D2); // CHECK: Host 654321 654321. printf("Host %d %d.\n", Bar.VRef.Data, V.Data); V.Data = 123456; // CHECK: Host 123456. printf("Host %d.\n", Bar.VRef.Data); -#pragma omp target map(Bar) map(Bar.VRef) +#pragma omp target map(Bar) map(Bar.VRef) map(from : D1, D2) { // CHECK: Device 123456. - printf("Device %d.\n", Bar.VRef.Data); + D1 = Bar.VRef.Data; + printf("Device %d.\n", D1); V.Data = 654321; // CHECK: Device 654321. - printf("Device %d.\n", Bar.VRef.Data); + D2 = Bar.VRef.Data; + printf("Device %d.\n", D2); } + printf("Device %d.\n", D1); + printf("Device %d.\n", D2); // CHECK: Host 654321 654321. printf("Host %d %d.\n", Bar.VRef.Data, V.Data); // CHECK: Host 123456. printf("Host %d.\n", *Baz.VRef.Data); -#pragma omp target map(*Baz.VRef.Data) +#pragma omp target map(*Baz.VRef.Data) map(from : D1, D2) { // CHECK: Device 123456. - printf("Device %d.\n", *Baz.VRef.Data); + D1 = *Baz.VRef.Data; + printf("Device %d.\n", D1); *V1.Data = 654321; // CHECK: Device 654321. - printf("Device %d.\n", *Baz.VRef.Data); + D2 = *Baz.VRef.Data; + printf("Device %d.\n", D2); } + printf("Device %d.\n", D1); + printf("Device %d.\n", D2); // CHECK: Host 654321 654321 654321. printf("Host %d %d %d.\n", *Baz.VRef.Data, *V1.Data, Data); return 0; From 0153ab6dbc33ae27b033e0dcf2c46e6de13f8521 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 21 Mar 2023 17:43:26 -0700 Subject: [PATCH 252/691] [OpenMP] Remove restriction on the thread count for parallel regions Differential Revision: https://reviews.llvm.org/D112194 --- openmp/libomptarget/DeviceRTL/src/Parallelism.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp index 1fec2e37b2ca5..d2fee11236302 100644 --- a/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp +++ b/openmp/libomptarget/DeviceRTL/src/Parallelism.cpp @@ -54,7 +54,11 @@ uint32_t determineNumberOfThreads(int32_t NumThreadsClause) { if (NThreadsICV != 0 && NThreadsICV < NumThreads) NumThreads = NThreadsICV; - // Round down to a multiple of WARPSIZE since it is legal to do so in OpenMP. + // SPMD mode allows any number of threads, for generic mode we round down to a + // multiple of WARPSIZE since it is legal to do so in OpenMP. + if (mapping::isSPMDMode()) + return NumThreads; + if (NumThreads < mapping::getWarpSize()) NumThreads = 1; else From f2c385934b0cb5fee14e62528497f76a9a534d77 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 21 Mar 2023 13:40:36 -0700 Subject: [PATCH 253/691] [OpenMP] Remove shadow pointer map and introduce consistent locking The shadow pointer map was problematic as we scanned an entire list if an entry had shadow pointers. The new scheme stores the shadow pointers inside the entries. This allows easy access without any search. It also helps us, but also makes it necessary, to define a consistent locking scheme. The implicit locking of entries via TargetPointerResultTy makes this pretty effortless, however one has to: - Lock HDTTMap before locking an entry. - Do not lock HDTTMap while holding an entry lock. - Hold the entry lock to read or modify an entry. The changes to submitData and retrieveData have been made to ensure 2 when the LIBOMPTARGET_INFO flag is used. Most everything else happens by itself as TargetPointerResultTy acts as a lock_guard for the entry. It is a little complicated when we deal with multiple entries, especially as they can be equal. However, one can still follow the rules with reasonable effort. LookupResult are now finally also locking the entry before it is inspected. This is good even if we haven't run into a problem yet. Differential Revision: https://reviews.llvm.org/D123446 --- openmp/libomptarget/include/device.h | 197 +++++++++++----- openmp/libomptarget/src/api.cpp | 18 +- openmp/libomptarget/src/device.cpp | 313 ++++++++++++++------------ openmp/libomptarget/src/omptarget.cpp | 305 +++++++++++-------------- 4 files changed, 435 insertions(+), 398 deletions(-) diff --git a/openmp/libomptarget/include/device.h b/openmp/libomptarget/include/device.h index f1fbc287d017f..695f864b5ec66 100644 --- a/openmp/libomptarget/include/device.h +++ b/openmp/libomptarget/include/device.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -26,6 +27,7 @@ #include "ExclusiveAccess.h" #include "omptarget.h" #include "rtl.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" // Forward declarations. @@ -43,6 +45,22 @@ enum kmp_target_offload_kind { }; typedef enum kmp_target_offload_kind kmp_target_offload_kind_t; +/// Information about shadow pointers. +struct ShadowPtrInfoTy { + void **HstPtrAddr = nullptr; + void *HstPtrVal = nullptr; + void **TgtPtrAddr = nullptr; + void *TgtPtrVal = nullptr; + + bool operator==(const ShadowPtrInfoTy &Other) const { + return HstPtrAddr == Other.HstPtrAddr; + } +}; + +inline bool operator<(const ShadowPtrInfoTy &lhs, const ShadowPtrInfoTy &rhs) { + return lhs.HstPtrAddr < rhs.HstPtrAddr; +} + /// Map between host data and target data. struct HostDataToTargetTy { const uintptr_t HstPtrBase; // host info. @@ -60,8 +78,7 @@ struct HostDataToTargetTy { struct StatesTy { StatesTy(uint64_t DRC, uint64_t HRC) - : DynRefCount(DRC), HoldRefCount(HRC), - MayContainAttachedPointers(false) {} + : DynRefCount(DRC), HoldRefCount(HRC) {} /// The dynamic reference count is the standard reference count as of OpenMP /// 4.5. The hold reference count is an OpenMP extension for the sake of /// OpenACC support. @@ -80,17 +97,10 @@ struct HostDataToTargetTy { uint64_t DynRefCount; uint64_t HoldRefCount; - /// Boolean flag to remember if any subpart of the mapped region might be - /// an attached pointer. - bool MayContainAttachedPointers; - - /// This mutex will be locked when data movement is issued. For targets that - /// doesn't support async data movement, this mutex can guarantee that after - /// it is released, memory region on the target is update to date. For - /// targets that support async data movement, this can guarantee that data - /// movement has been issued. This mutex *must* be locked right before - /// releasing the mapping table lock. - std::mutex UpdateMtx; + /// A map of shadow pointers associated with this entry, the keys are host + /// pointer addresses to identify stale entries. + llvm::SmallSet ShadowPtrInfos; + /// Pointer to the event corresponding to the data update of this map. /// Note: At present this event is created when the first data transfer from /// host to device is issued, and only being used for H2D. It is not used @@ -222,16 +232,41 @@ struct HostDataToTargetTy { return ThisRefCount == 1; } - void setMayContainAttachedPointers() const { - States->MayContainAttachedPointers = true; + /// Add the shadow pointer info \p ShadowPtrInfo to this entry but only if the + /// the target ptr value was not already present in the existing set of shadow + /// pointers. Return true if something was added. + bool addShadowPointer(const ShadowPtrInfoTy &ShadowPtrInfo) const { + auto Pair = States->ShadowPtrInfos.insert(ShadowPtrInfo); + if (Pair.second) + return true; + // Check for a stale entry, if found, replace the old one. + if ((*Pair.first).TgtPtrVal == ShadowPtrInfo.TgtPtrVal) + return false; + States->ShadowPtrInfos.erase(ShadowPtrInfo); + return addShadowPointer(ShadowPtrInfo); } - bool getMayContainAttachedPointers() const { - return States->MayContainAttachedPointers; + + /// Apply \p CB to all shadow pointers of this entry. Returns OFFLOAD_FAIL if + /// \p CB returned OFFLOAD_FAIL for any of them, otherwise this returns + /// OFFLOAD_SUCCESS. The entry is locked for this operation. + template int foreachShadowPointerInfo(CBTy CB) const { + for (auto &It : States->ShadowPtrInfos) + if (CB(It) == OFFLOAD_FAIL) + return OFFLOAD_FAIL; + return OFFLOAD_SUCCESS; } - void lock() const { States->UpdateMtx.lock(); } + /// Lock this entry for exclusive access. Ensure to get exclusive access to + /// HDTTMap first! + void lock() const { Mtx.lock(); } + + /// Unlock this entry to allow other threads inspecting it. + void unlock() const { Mtx.unlock(); } - void unlock() const { States->UpdateMtx.unlock(); } +private: + // Mutex that needs to be held before the entry is inspected or modified. The + // HDTTMap mutex needs to be held before trying to lock any HDTT Entry. + mutable std::mutex Mtx; }; /// Wrapper around the HostDataToTargetTy to be used in the HDTT map. In @@ -243,6 +278,7 @@ struct HostDataToTargetMapKeyTy { uintptr_t KeyValue; HostDataToTargetMapKeyTy(void *Key) : KeyValue(uintptr_t(Key)) {} + HostDataToTargetMapKeyTy(uintptr_t Key) : KeyValue(Key) {} HostDataToTargetMapKeyTy(HostDataToTargetTy *HDTT) : KeyValue(HDTT->HstPtrBegin), HDTT(HDTT) {} HostDataToTargetTy *HDTT; @@ -260,49 +296,89 @@ inline bool operator<(const HostDataToTargetMapKeyTy &LHS, return LHS.KeyValue < RHS.KeyValue; } -struct LookupResult { - struct { - unsigned IsContained : 1; - unsigned ExtendsBefore : 1; - unsigned ExtendsAfter : 1; - } Flags; - - /// The corresponding map table entry which is stable. - HostDataToTargetTy *Entry = nullptr; - - LookupResult() : Flags({0, 0, 0}), Entry() {} -}; - /// This struct will be returned by \p DeviceTy::getTargetPointer which provides -/// more data than just a target pointer. +/// more data than just a target pointer. A TargetPointerResultTy that has a non +/// null Entry owns the entry. As long as the TargetPointerResultTy (TPR) exists +/// the entry is locked. To give up ownership without destroying the TPR use the +/// reset() function. struct TargetPointerResultTy { - struct { + struct FlagTy { /// If the map table entry is just created unsigned IsNewEntry : 1; /// If the pointer is actually a host pointer (when unified memory enabled) unsigned IsHostPointer : 1; /// If the pointer is present in the mapping table. unsigned IsPresent : 1; - } Flags = {0, 0, 0}; + /// Flag indicating that this was the last user of the entry and the ref + /// count is now 0. + unsigned IsLast : 1; + } Flags = {0, 0, 0, 0}; + + TargetPointerResultTy(const TargetPointerResultTy &) = delete; + TargetPointerResultTy &operator=(const TargetPointerResultTy &TPR) = delete; + TargetPointerResultTy() {} + + TargetPointerResultTy(FlagTy Flags, HostDataToTargetTy *Entry, + void *TargetPointer) + : Flags(Flags), TargetPointer(TargetPointer), Entry(Entry) { + if (Entry) + Entry->lock(); + } + + TargetPointerResultTy(TargetPointerResultTy &&TPR) + : Flags(TPR.Flags), TargetPointer(TPR.TargetPointer), Entry(TPR.Entry) { + TPR.Entry = nullptr; + } + + TargetPointerResultTy &operator=(TargetPointerResultTy &&TPR) { + if (&TPR != this) { + std::swap(Flags, TPR.Flags); + std::swap(Entry, TPR.Entry); + std::swap(TargetPointer, TPR.TargetPointer); + } + return *this; + } + + ~TargetPointerResultTy() { + if (Entry) + Entry->unlock(); + } bool isPresent() const { return Flags.IsPresent; } bool isHostPointer() const { return Flags.IsHostPointer; } - /// The corresponding map table entry which is stable. - HostDataToTargetTy *Entry = nullptr; - /// The corresponding target pointer void *TargetPointer = nullptr; + + HostDataToTargetTy *getEntry() const { return Entry; } + void setEntry(HostDataToTargetTy *HDTTT, + HostDataToTargetTy *OwnedTPR = nullptr) { + if (Entry) + Entry->unlock(); + Entry = HDTTT; + if (Entry && Entry != OwnedTPR) + Entry->lock(); + } + + void reset() { *this = TargetPointerResultTy(); } + +private: + /// The corresponding map table entry which is stable. + HostDataToTargetTy *Entry = nullptr; }; -/// Map for shadow pointers -struct ShadowPtrValTy { - void *HstPtrVal; - void *TgtPtrAddr; - void *TgtPtrVal; +struct LookupResult { + struct { + unsigned IsContained : 1; + unsigned ExtendsBefore : 1; + unsigned ExtendsAfter : 1; + } Flags; + + LookupResult() : Flags({0, 0, 0}), TPR() {} + + TargetPointerResultTy TPR; }; -typedef std::map ShadowPtrListTy; /// struct PendingCtorDtorListsTy { @@ -336,9 +412,7 @@ struct DeviceTy { PendingCtorsDtorsPerLibrary PendingCtorsDtors; - ShadowPtrListTy ShadowPtrMap; - - std::mutex PendingGlobalsMtx, ShadowMtx; + std::mutex PendingGlobalsMtx; DeviceTy(RTLInfoTy *RTL); // DeviceTy is not copyable @@ -353,7 +427,8 @@ struct DeviceTy { /// Lookup the mapping of \p HstPtrBegin in \p HDTTMap. The accessor ensures /// exclusive access to the HDTT map. LookupResult lookupMapping(HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, - int64_t Size); + int64_t Size, + HostDataToTargetTy *OwnedTPR = nullptr); /// Get the target pointer based on host pointer begin and base. If the /// mapping already exists, the target pointer will be returned directly. In @@ -365,12 +440,13 @@ struct DeviceTy { /// - Data allocation failed; /// - The user tried to do an illegal mapping; /// - Data transfer issue fails. - TargetPointerResultTy - getTargetPointer(void *HstPtrBegin, void *HstPtrBase, int64_t Size, - map_var_info_t HstPtrName, bool HasFlagTo, - bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount, - bool HasCloseModifier, bool HasPresentModifier, - bool HasHoldModifier, AsyncInfoTy &AsyncInfo); + TargetPointerResultTy getTargetPointer( + HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, void *HstPtrBase, + int64_t Size, map_var_info_t HstPtrName, bool HasFlagTo, + bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount, + bool HasCloseModifier, bool HasPresentModifier, bool HasHoldModifier, + AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR = nullptr, + bool ReleaseHDTTMap = true); /// Return the target pointer for \p HstPtrBegin in \p HDTTMap. The accessor /// ensures exclusive access to the HDTT map. @@ -388,10 +464,9 @@ struct DeviceTy { /// - \p FromDataEnd tracks the number of threads referencing the entry at /// targetDataEnd for delayed deletion purpose. [[nodiscard]] TargetPointerResultTy - getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, - bool UpdateRefCount, bool UseHoldRefCount, bool &IsHostPtr, - bool MustContain = false, bool ForceDelete = false, - bool FromDataEnd = false); + getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool UpdateRefCount, + bool UseHoldRefCount, bool MustContain = false, + bool ForceDelete = false, bool FromDataEnd = false); /// Remove the \p Entry from the data map. Expect the entry's total reference /// count to be zero and the caller thread to be the last one using it. \p @@ -436,10 +511,12 @@ struct DeviceTy { // synchronous. // Copy data from host to device int32_t submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size, - AsyncInfoTy &AsyncInfo); + AsyncInfoTy &AsyncInfo, + HostDataToTargetTy *Entry = nullptr); // Copy data from device back to host int32_t retrieveData(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size, - AsyncInfoTy &AsyncInfo); + AsyncInfoTy &AsyncInfo, + HostDataToTargetTy *Entry = nullptr); // Copy data from current device to destination device directly int32_t dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr, int64_t Size, AsyncInfoTy &AsyncInfo); diff --git a/openmp/libomptarget/src/api.cpp b/openmp/libomptarget/src/api.cpp index 47c9d5e2f1eff..2dfe075b8e746 100644 --- a/openmp/libomptarget/src/api.cpp +++ b/openmp/libomptarget/src/api.cpp @@ -116,16 +116,13 @@ EXTERN int omp_target_is_present(const void *Ptr, int DeviceNum) { } DeviceTy &Device = *PM->Devices[DeviceNum]; - bool IsLast; // not used - bool IsHostPtr; // omp_target_is_present tests whether a host pointer refers to storage that // is mapped to a given device. However, due to the lack of the storage size, // only check 1 byte. Cannot set size 0 which checks whether the pointer (zero // lengh array) is mapped instead of the referred storage. - TargetPointerResultTy TPR = - Device.getTgtPtrBegin(const_cast(Ptr), 1, IsLast, - /*UpdateRefCount=*/false, - /*UseHoldRefCount=*/false, IsHostPtr); + TargetPointerResultTy TPR = Device.getTgtPtrBegin(const_cast(Ptr), 1, + /*UpdateRefCount=*/false, + /*UseHoldRefCount=*/false); int Rc = TPR.isPresent(); DP("Call to omp_target_is_present returns %d\n", Rc); return Rc; @@ -360,13 +357,10 @@ EXTERN void *omp_get_mapped_ptr(const void *Ptr, int DeviceNum) { return nullptr; } - bool IsLast = false; - bool IsHostPtr = false; auto &Device = *PM->Devices[DeviceNum]; - TargetPointerResultTy TPR = - Device.getTgtPtrBegin(const_cast(Ptr), 1, IsLast, - /*UpdateRefCount=*/false, - /*UseHoldRefCount=*/false, IsHostPtr); + TargetPointerResultTy TPR = Device.getTgtPtrBegin(const_cast(Ptr), 1, + /*UpdateRefCount=*/false, + /*UseHoldRefCount=*/false); if (!TPR.isPresent()) { DP("Ptr " DPxMOD "is not present on device %d, returning nullptr.\n", DPxPTR(Ptr), DeviceNum); diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp index 1f5d5a23371db..09c8e808db463 100644 --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -51,8 +52,7 @@ int HostDataToTargetTy::addEventIfNecessary(DeviceTy &Device, DeviceTy::DeviceTy(RTLInfoTy *RTL) : DeviceID(-1), RTL(RTL), RTLDeviceID(-1), IsInit(false), InitFlag(), - HasPendingGlobals(false), PendingCtorsDtors(), ShadowPtrMap(), - PendingGlobalsMtx(), ShadowMtx() {} + HasPendingGlobals(false), PendingCtorsDtors(), PendingGlobalsMtx() {} DeviceTy::~DeviceTy() { if (DeviceID == -1 || !(getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE)) @@ -69,6 +69,7 @@ int DeviceTy::associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size) { auto It = HDTTMap->find(HstPtrBegin); if (It != HDTTMap->end()) { HostDataToTargetTy &HDTT = *It->HDTT; + std::lock_guard LG(HDTT); // Mapping already exists bool IsValid = HDTT.HstPtrEnd == (uintptr_t)HstPtrBegin + Size && HDTT.TgtPtrBegin == (uintptr_t)TgtPtrBegin; @@ -109,39 +110,41 @@ int DeviceTy::disassociatePtr(void *HstPtrBegin) { HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor(); auto It = HDTTMap->find(HstPtrBegin); - if (It != HDTTMap->end()) { - HostDataToTargetTy &HDTT = *It->HDTT; - // Mapping exists - if (HDTT.getHoldRefCount()) { - // This is based on OpenACC 3.1, sec 3.2.33 "acc_unmap_data", L3656-3657: - // "It is an error to call acc_unmap_data if the structured reference - // count for the pointer is not zero." - REPORT("Trying to disassociate a pointer with a non-zero hold reference " - "count\n"); - } else if (HDTT.isDynRefCountInf()) { - DP("Association found, removing it\n"); - void *Event = HDTT.getEvent(); - delete &HDTT; - if (Event) - destroyEvent(Event); - HDTTMap->erase(It); - - // Notify the plugin about the unmapped memory. - return notifyDataUnmapped(HstPtrBegin); - } else { - REPORT("Trying to disassociate a pointer which was not mapped via " - "omp_target_associate_ptr\n"); - } - } else { + if (It == HDTTMap->end()) { REPORT("Association not found\n"); + return OFFLOAD_FAIL; + } + // Mapping exists + HostDataToTargetTy &HDTT = *It->HDTT; + std::lock_guard LG(HDTT); + + if (HDTT.getHoldRefCount()) { + // This is based on OpenACC 3.1, sec 3.2.33 "acc_unmap_data", L3656-3657: + // "It is an error to call acc_unmap_data if the structured reference + // count for the pointer is not zero." + REPORT("Trying to disassociate a pointer with a non-zero hold reference " + "count\n"); + return OFFLOAD_FAIL; + } + + if (HDTT.isDynRefCountInf()) { + DP("Association found, removing it\n"); + void *Event = HDTT.getEvent(); + delete &HDTT; + if (Event) + destroyEvent(Event); + HDTTMap->erase(It); + return OFFLOAD_SUCCESS; } - // Mapping not found + REPORT("Trying to disassociate a pointer which was not mapped via " + "omp_target_associate_ptr\n"); return OFFLOAD_FAIL; } LookupResult DeviceTy::lookupMapping(HDTTMapAccessorTy &HDTTMap, - void *HstPtrBegin, int64_t Size) { + void *HstPtrBegin, int64_t Size, + HostDataToTargetTy *OwnedTPR) { uintptr_t HP = (uintptr_t)HstPtrBegin; LookupResult LR; @@ -159,42 +162,43 @@ LookupResult DeviceTy::lookupMapping(HDTTMapAccessorTy &HDTTMap, // upper_bound satisfies // std::prev(upper)->HDTT.HstPtrBegin <= hp < upper->HDTT.HstPtrBegin if (Upper != HDTTMap->begin()) { - LR.Entry = std::prev(Upper)->HDTT; - auto &HT = *LR.Entry; + LR.TPR.setEntry(std::prev(Upper)->HDTT, OwnedTPR); // the left side of extended address range is satisified. - // hp >= HT.HstPtrBegin || hp >= HT.HstPtrBase - LR.Flags.IsContained = HP < HT.HstPtrEnd || HP < HT.HstPtrBase; + // hp >= LR.TPR.getEntry()->HstPtrBegin || hp >= + // LR.TPR.getEntry()->HstPtrBase + LR.Flags.IsContained = HP < LR.TPR.getEntry()->HstPtrEnd || + HP < LR.TPR.getEntry()->HstPtrBase; } if (!LR.Flags.IsContained && Upper != HDTTMap->end()) { - LR.Entry = Upper->HDTT; - auto &HT = *LR.Entry; + LR.TPR.setEntry(Upper->HDTT, OwnedTPR); // the right side of extended address range is satisified. - // hp < HT.HstPtrEnd || hp < HT.HstPtrBase - LR.Flags.IsContained = HP >= HT.HstPtrBase; + // hp < LR.TPR.getEntry()->HstPtrEnd || hp < LR.TPR.getEntry()->HstPtrBase + LR.Flags.IsContained = HP >= LR.TPR.getEntry()->HstPtrBase; } } else { // check the left bin if (Upper != HDTTMap->begin()) { - LR.Entry = std::prev(Upper)->HDTT; - auto &HT = *LR.Entry; + LR.TPR.setEntry(std::prev(Upper)->HDTT, OwnedTPR); // Is it contained? - LR.Flags.IsContained = HP >= HT.HstPtrBegin && HP < HT.HstPtrEnd && - (HP + Size) <= HT.HstPtrEnd; + LR.Flags.IsContained = HP >= LR.TPR.getEntry()->HstPtrBegin && + HP < LR.TPR.getEntry()->HstPtrEnd && + (HP + Size) <= LR.TPR.getEntry()->HstPtrEnd; // Does it extend beyond the mapped region? - LR.Flags.ExtendsAfter = HP < HT.HstPtrEnd && (HP + Size) > HT.HstPtrEnd; + LR.Flags.ExtendsAfter = HP < LR.TPR.getEntry()->HstPtrEnd && + (HP + Size) > LR.TPR.getEntry()->HstPtrEnd; } // check the right bin if (!(LR.Flags.IsContained || LR.Flags.ExtendsAfter) && Upper != HDTTMap->end()) { - LR.Entry = Upper->HDTT; - auto &HT = *LR.Entry; + LR.TPR.setEntry(Upper->HDTT, OwnedTPR); // Does it extend into an already mapped region? - LR.Flags.ExtendsBefore = - HP < HT.HstPtrBegin && (HP + Size) > HT.HstPtrBegin; + LR.Flags.ExtendsBefore = HP < LR.TPR.getEntry()->HstPtrBegin && + (HP + Size) > LR.TPR.getEntry()->HstPtrBegin; // Does it extend beyond the mapped region? - LR.Flags.ExtendsAfter = HP < HT.HstPtrEnd && (HP + Size) > HT.HstPtrEnd; + LR.Flags.ExtendsAfter = HP < LR.TPR.getEntry()->HstPtrEnd && + (HP + Size) > LR.TPR.getEntry()->HstPtrEnd; } if (LR.Flags.ExtendsBefore) { @@ -211,19 +215,19 @@ LookupResult DeviceTy::lookupMapping(HDTTMapAccessorTy &HDTTMap, } TargetPointerResultTy DeviceTy::getTargetPointer( - void *HstPtrBegin, void *HstPtrBase, int64_t Size, - map_var_info_t HstPtrName, bool HasFlagTo, bool HasFlagAlways, + HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, void *HstPtrBase, + int64_t Size, map_var_info_t HstPtrName, bool HasFlagTo, bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount, bool HasCloseModifier, - bool HasPresentModifier, bool HasHoldModifier, AsyncInfoTy &AsyncInfo) { - HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor(); + bool HasPresentModifier, bool HasHoldModifier, AsyncInfoTy &AsyncInfo, + HostDataToTargetTy *OwnedTPR, bool ReleaseHDTTMap) { - void *TargetPointer = nullptr; - bool IsHostPtr = false; - bool IsPresent = true; - bool IsNew = false; + LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size, OwnedTPR); + LR.TPR.Flags.IsPresent = true; - LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size); - auto *Entry = LR.Entry; + // Release the mapping table lock only after the entry is locked by + // attaching it to TPR. Once TPR is destroyed it will release the lock + // on entry. If it is returned the lock will move to the returned object. + // If LR.Entry is already owned/locked we avoid trying to lock it again. // Check if the pointer is contained. // If a variable is mapped to the device manually by the user - which would @@ -231,38 +235,38 @@ TargetPointerResultTy DeviceTy::getTargetPointer( // device address is returned even under unified memory conditions. if (LR.Flags.IsContained || ((LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) && IsImplicit)) { - auto &HT = *LR.Entry; const char *RefCountAction; if (UpdateRefCount) { // After this, reference count >= 1. If the reference count was 0 but the // entry was still there we can reuse the data on the device and avoid a // new submission. - HT.incRefCount(HasHoldModifier); + LR.TPR.getEntry()->incRefCount(HasHoldModifier); RefCountAction = " (incremented)"; } else { // It might have been allocated with the parent, but it's still new. - IsNew = HT.getTotalRefCount() == 1; + LR.TPR.Flags.IsNewEntry = LR.TPR.getEntry()->getTotalRefCount() == 1; RefCountAction = " (update suppressed)"; } const char *DynRefCountAction = HasHoldModifier ? "" : RefCountAction; const char *HoldRefCountAction = HasHoldModifier ? RefCountAction : ""; - uintptr_t Ptr = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin); + uintptr_t Ptr = LR.TPR.getEntry()->TgtPtrBegin + + ((uintptr_t)HstPtrBegin - LR.TPR.getEntry()->HstPtrBegin); INFO(OMP_INFOTYPE_MAPPING_EXISTS, DeviceID, "Mapping exists%s with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", Size=%" PRId64 ", DynRefCount=%s%s, HoldRefCount=%s%s, Name=%s\n", (IsImplicit ? " (implicit)" : ""), DPxPTR(HstPtrBegin), DPxPTR(Ptr), - Size, HT.dynRefCountToStr().c_str(), DynRefCountAction, - HT.holdRefCountToStr().c_str(), HoldRefCountAction, + Size, LR.TPR.getEntry()->dynRefCountToStr().c_str(), DynRefCountAction, + LR.TPR.getEntry()->holdRefCountToStr().c_str(), HoldRefCountAction, (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown"); - TargetPointer = (void *)Ptr; + LR.TPR.TargetPointer = (void *)Ptr; } else if ((LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) && !IsImplicit) { // Explicit extension of mapped data - not allowed. MESSAGE("explicit extension not allowed: host address specified is " DPxMOD " (%" PRId64 " bytes), but device allocation maps to host at " DPxMOD " (%" PRId64 " bytes)", - DPxPTR(HstPtrBegin), Size, DPxPTR(Entry->HstPtrBegin), - Entry->HstPtrEnd - Entry->HstPtrBegin); + DPxPTR(HstPtrBegin), Size, DPxPTR(LR.TPR.getEntry()->HstPtrBegin), + LR.TPR.getEntry()->HstPtrEnd - LR.TPR.getEntry()->HstPtrBegin); if (HasPresentModifier) MESSAGE("device mapping required by 'present' map type modifier does not " "exist for host address " DPxMOD " (%" PRId64 " bytes)", @@ -279,9 +283,9 @@ TargetPointerResultTy DeviceTy::getTargetPointer( DP("Return HstPtrBegin " DPxMOD " Size=%" PRId64 " for unified shared " "memory\n", DPxPTR((uintptr_t)HstPtrBegin), Size); - IsPresent = false; - IsHostPtr = true; - TargetPointer = HstPtrBegin; + LR.TPR.Flags.IsPresent = false; + LR.TPR.Flags.IsHostPointer = true; + LR.TPR.TargetPointer = HstPtrBegin; } } else if (HasPresentModifier) { DP("Mapping required by 'present' map type modifier does not exist for " @@ -292,22 +296,25 @@ TargetPointerResultTy DeviceTy::getTargetPointer( DPxPTR(HstPtrBegin), Size); } else if (Size) { // If it is not contained and Size > 0, we should create a new entry for it. - IsNew = true; + LR.TPR.Flags.IsNewEntry = true; uintptr_t Ptr = (uintptr_t)allocData(Size, HstPtrBegin); - Entry = HDTTMap - ->emplace(new HostDataToTargetTy( - (uintptr_t)HstPtrBase, (uintptr_t)HstPtrBegin, - (uintptr_t)HstPtrBegin + Size, Ptr, HasHoldModifier, - HstPtrName)) - .first->HDTT; + // Release the mapping table lock only after the entry is locked by + // attaching it to TPR. + LR.TPR.setEntry(HDTTMap + ->emplace(new HostDataToTargetTy( + (uintptr_t)HstPtrBase, (uintptr_t)HstPtrBegin, + (uintptr_t)HstPtrBegin + Size, Ptr, HasHoldModifier, + HstPtrName)) + .first->HDTT); INFO(OMP_INFOTYPE_MAPPING_CHANGED, DeviceID, "Creating new map entry with HstPtrBase=" DPxMOD ", HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", Size=%ld, " "DynRefCount=%s, HoldRefCount=%s, Name=%s\n", DPxPTR(HstPtrBase), DPxPTR(HstPtrBegin), DPxPTR(Ptr), Size, - Entry->dynRefCountToStr().c_str(), Entry->holdRefCountToStr().c_str(), + LR.TPR.getEntry()->dynRefCountToStr().c_str(), + LR.TPR.getEntry()->holdRefCountToStr().c_str(), (HstPtrName) ? getNameFromMapping(HstPtrName).c_str() : "unknown"); - TargetPointer = (void *)Ptr; + LR.TPR.TargetPointer = (void *)Ptr; // Notify the plugin about the new mapping. if (notifyDataMapped(HstPtrBegin, Size)) @@ -316,42 +323,41 @@ TargetPointerResultTy DeviceTy::getTargetPointer( nullptr /* TargetPointer */}; } else { // This entry is not present and we did not create a new entry for it. - IsPresent = false; + LR.TPR.Flags.IsPresent = false; } - // If the target pointer is valid, and we need to transfer data, issue the - // data transfer. - if (TargetPointer && !IsHostPtr && HasFlagTo && (IsNew || HasFlagAlways) && - Size != 0) { - // Lock the entry before releasing the mapping table lock such that another - // thread that could issue data movement will get the right result. - std::lock_guard LG(*Entry); - // Release the mapping table lock right after the entry is locked. + // All mapping table modifications have been made. If the user requested it we + // give up the lock. + if (ReleaseHDTTMap) HDTTMap.destroy(); + // If the target pointer is valid, and we need to transfer data, issue the + // data transfer. + if (LR.TPR.TargetPointer && !LR.TPR.Flags.IsHostPointer && HasFlagTo && + (LR.TPR.Flags.IsNewEntry || HasFlagAlways) && Size != 0) { DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", Size, - DPxPTR(HstPtrBegin), DPxPTR(TargetPointer)); + DPxPTR(HstPtrBegin), DPxPTR(LR.TPR.TargetPointer)); - int Ret = submitData(TargetPointer, HstPtrBegin, Size, AsyncInfo); + int Ret = submitData(LR.TPR.TargetPointer, HstPtrBegin, Size, AsyncInfo, + LR.TPR.getEntry()); if (Ret != OFFLOAD_SUCCESS) { REPORT("Copying data to device failed.\n"); // We will also return nullptr if the data movement fails because that // pointer points to a corrupted memory region so it doesn't make any // sense to continue to use it. - TargetPointer = nullptr; - } else if (Entry->addEventIfNecessary(*this, AsyncInfo) != OFFLOAD_SUCCESS) + LR.TPR.TargetPointer = nullptr; + } else if (LR.TPR.getEntry()->addEventIfNecessary(*this, AsyncInfo) != + OFFLOAD_SUCCESS) return {{false /* IsNewEntry */, false /* IsHostPointer */}, nullptr /* Entry */, nullptr /* TargetPointer */}; } else { - // Release the mapping table lock directly. - HDTTMap.destroy(); // If not a host pointer and no present modifier, we need to wait for the // event if it exists. // Note: Entry might be nullptr because of zero length array section. - if (Entry && !IsHostPtr && !HasPresentModifier) { - std::lock_guard LG(*Entry); - void *Event = Entry->getEvent(); + if (LR.TPR.getEntry() && !LR.TPR.Flags.IsHostPointer && + !HasPresentModifier) { + void *Event = LR.TPR.getEntry()->getEvent(); if (Event) { int Ret = waitEvent(Event, AsyncInfo); if (Ret != OFFLOAD_SUCCESS) { @@ -366,31 +372,28 @@ TargetPointerResultTy DeviceTy::getTargetPointer( } } - return {{IsNew, IsHostPtr, IsPresent}, Entry, TargetPointer}; + return std::move(LR.TPR); } TargetPointerResultTy -DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, - bool UpdateRefCount, bool UseHoldRefCount, - bool &IsHostPtr, bool MustContain, bool ForceDelete, - bool FromDataEnd) { +DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool UpdateRefCount, + bool UseHoldRefCount, bool MustContain, + bool ForceDelete, bool FromDataEnd) { HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor(); - void *TargetPointer = NULL; - bool IsNew = false; - bool IsPresent = true; - IsHostPtr = false; - IsLast = false; LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size); + LR.TPR.Flags.IsPresent = true; + if (LR.Flags.IsContained || (!MustContain && (LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter))) { - auto &HT = *LR.Entry; - IsLast = HT.decShouldRemove(UseHoldRefCount, ForceDelete); + LR.TPR.Flags.IsLast = + LR.TPR.getEntry()->decShouldRemove(UseHoldRefCount, ForceDelete); if (ForceDelete) { - HT.resetRefCount(UseHoldRefCount); - assert(IsLast == HT.decShouldRemove(UseHoldRefCount) && + LR.TPR.getEntry()->resetRefCount(UseHoldRefCount); + assert(LR.TPR.Flags.IsLast == + LR.TPR.getEntry()->decShouldRemove(UseHoldRefCount) && "expected correct IsLast prediction for reset"); } @@ -400,32 +403,34 @@ DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, // for it. Thus, we must track every query on targetDataEnds to ensure only // the last thread that holds a reference to an entry actually deletes it. if (FromDataEnd) - HT.incDataEndThreadCount(); + LR.TPR.getEntry()->incDataEndThreadCount(); const char *RefCountAction; if (!UpdateRefCount) { RefCountAction = " (update suppressed)"; - } else if (IsLast) { - HT.decRefCount(UseHoldRefCount); - assert(HT.getTotalRefCount() == 0 && + } else if (LR.TPR.Flags.IsLast) { + LR.TPR.getEntry()->decRefCount(UseHoldRefCount); + assert(LR.TPR.getEntry()->getTotalRefCount() == 0 && "Expected zero reference count when deletion is scheduled"); if (ForceDelete) RefCountAction = " (reset, delayed deletion)"; else RefCountAction = " (decremented, delayed deletion)"; } else { - HT.decRefCount(UseHoldRefCount); + LR.TPR.getEntry()->decRefCount(UseHoldRefCount); RefCountAction = " (decremented)"; } const char *DynRefCountAction = UseHoldRefCount ? "" : RefCountAction; const char *HoldRefCountAction = UseHoldRefCount ? RefCountAction : ""; - uintptr_t TP = HT.TgtPtrBegin + ((uintptr_t)HstPtrBegin - HT.HstPtrBegin); + uintptr_t TP = LR.TPR.getEntry()->TgtPtrBegin + + ((uintptr_t)HstPtrBegin - LR.TPR.getEntry()->HstPtrBegin); INFO(OMP_INFOTYPE_MAPPING_EXISTS, DeviceID, "Mapping exists with HstPtrBegin=" DPxMOD ", TgtPtrBegin=" DPxMOD ", " "Size=%" PRId64 ", DynRefCount=%s%s, HoldRefCount=%s%s\n", - DPxPTR(HstPtrBegin), DPxPTR(TP), Size, HT.dynRefCountToStr().c_str(), - DynRefCountAction, HT.holdRefCountToStr().c_str(), HoldRefCountAction); - TargetPointer = (void *)TP; + DPxPTR(HstPtrBegin), DPxPTR(TP), Size, + LR.TPR.getEntry()->dynRefCountToStr().c_str(), DynRefCountAction, + LR.TPR.getEntry()->holdRefCountToStr().c_str(), HoldRefCountAction); + LR.TPR.TargetPointer = (void *)TP; } else if (PM->RTLs.RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY) { // If the value isn't found in the mapping and unified shared memory // is on then it means we have stumbled upon a value which we need to @@ -433,18 +438,18 @@ DeviceTy::getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, DP("Get HstPtrBegin " DPxMOD " Size=%" PRId64 " for unified shared " "memory\n", DPxPTR((uintptr_t)HstPtrBegin), Size); - IsPresent = false; - IsHostPtr = true; - TargetPointer = HstPtrBegin; + LR.TPR.Flags.IsPresent = false; + LR.TPR.Flags.IsHostPointer = true; + LR.TPR.TargetPointer = HstPtrBegin; } else { // OpenMP Specification v5.2: if a matching list item is not found, the // pointer retains its original value as per firstprivate semantics. - IsPresent = false; - IsHostPtr = false; - TargetPointer = HstPtrBegin; + LR.TPR.Flags.IsPresent = false; + LR.TPR.Flags.IsHostPointer = false; + LR.TPR.TargetPointer = HstPtrBegin; } - return {{IsNew, IsHostPtr, IsPresent}, LR.Entry, TargetPointer}; + return std::move(LR.TPR); } // Return the target pointer begin (where the data will be moved). @@ -453,8 +458,8 @@ void *DeviceTy::getTgtPtrBegin(HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, uintptr_t HP = (uintptr_t)HstPtrBegin; LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size); if (LR.Flags.IsContained || LR.Flags.ExtendsBefore || LR.Flags.ExtendsAfter) { - auto &HT = *LR.Entry; - uintptr_t TP = HT.TgtPtrBegin + (HP - HT.HstPtrBegin); + uintptr_t TP = + LR.TPR.getEntry()->TgtPtrBegin + (HP - LR.TPR.getEntry()->HstPtrBegin); return (void *)TP; } @@ -550,20 +555,32 @@ int32_t DeviceTy::deleteData(void *TgtPtrBegin, int32_t Kind) { return RTL->data_delete(RTLDeviceID, TgtPtrBegin, Kind); } +static void printCopyInfo(int DeviceId, bool H2D, void *SrcPtrBegin, + void *DstPtrBegin, int64_t Size, + HostDataToTargetTy *HT) { + + INFO(OMP_INFOTYPE_DATA_TRANSFER, DeviceId, + "Copying data from %s to %s, %sPtr=" DPxMOD ", %sPtr=" DPxMOD + ", Size=%" PRId64 ", Name=%s\n", + H2D ? "host" : "device", H2D ? "device" : "host", H2D ? "Hst" : "Tgt", + DPxPTR(SrcPtrBegin), H2D ? "Tgt" : "Hst", DPxPTR(DstPtrBegin), Size, + (HT && HT->HstPtrName) ? getNameFromMapping(HT->HstPtrName).c_str() + : "unknown"); +} + // Submit data to device int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size, - AsyncInfoTy &AsyncInfo) { + AsyncInfoTy &AsyncInfo, + HostDataToTargetTy *Entry) { if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) { - HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor(); - LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size); - auto *HT = &*LR.Entry; - - INFO(OMP_INFOTYPE_DATA_TRANSFER, DeviceID, - "Copying data from host to device, HstPtr=" DPxMOD ", TgtPtr=" DPxMOD - ", Size=%" PRId64 ", Name=%s\n", - DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin), Size, - (HT && HT->HstPtrName) ? getNameFromMapping(HT->HstPtrName).c_str() - : "unknown"); + HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor(Entry); + LookupResult LR; + if (!Entry) { + LR = lookupMapping(HDTTMap, HstPtrBegin, Size); + Entry = LR.TPR.getEntry(); + } + printCopyInfo(DeviceID, /* H2D */ true, HstPtrBegin, TgtPtrBegin, Size, + Entry); } if (!AsyncInfo || !RTL->data_submit_async || !RTL->synchronize) @@ -574,17 +591,17 @@ int32_t DeviceTy::submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size, // Retrieve data from device int32_t DeviceTy::retrieveData(void *HstPtrBegin, void *TgtPtrBegin, - int64_t Size, AsyncInfoTy &AsyncInfo) { + int64_t Size, AsyncInfoTy &AsyncInfo, + HostDataToTargetTy *Entry) { if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER) { - HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor(); - LookupResult LR = lookupMapping(HDTTMap, HstPtrBegin, Size); - auto *HT = &*LR.Entry; - INFO(OMP_INFOTYPE_DATA_TRANSFER, DeviceID, - "Copying data from device to host, TgtPtr=" DPxMOD ", HstPtr=" DPxMOD - ", Size=%" PRId64 ", Name=%s\n", - DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin), Size, - (HT && HT->HstPtrName) ? getNameFromMapping(HT->HstPtrName).c_str() - : "unknown"); + HDTTMapAccessorTy HDTTMap = HostDataToTargetMap.getExclusiveAccessor(Entry); + LookupResult LR; + if (!Entry) { + LR = lookupMapping(HDTTMap, HstPtrBegin, Size); + Entry = LR.TPR.getEntry(); + } + printCopyInfo(DeviceID, /* H2D */ false, TgtPtrBegin, HstPtrBegin, Size, + Entry); } if (!RTL->data_retrieve_async || !RTL->synchronize) diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp index 2158b948bc9ec..69af6ac24a41d 100644 --- a/openmp/libomptarget/src/omptarget.cpp +++ b/openmp/libomptarget/src/omptarget.cpp @@ -633,6 +633,9 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, // may be considered a hack, we could revise the scheme in the future. bool UpdateRef = !(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) && !(FromMapper && I == 0); + + DeviceTy::HDTTMapAccessorTy HDTTMap = + Device.HostDataToTargetMap.getExclusiveAccessor(); if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) { DP("Has a pointer entry: \n"); // Base is address of pointer. @@ -649,9 +652,11 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, // PTR_AND_OBJ entry is handled below, and so the allocation might fail // when HasPresentModifier. PointerTpr = Device.getTargetPointer( - HstPtrBase, HstPtrBase, sizeof(void *), /*HstPtrName=*/nullptr, + HDTTMap, HstPtrBase, HstPtrBase, sizeof(void *), + /*HstPtrName=*/nullptr, /*HasFlagTo=*/false, /*HasFlagAlways=*/false, IsImplicit, UpdateRef, - HasCloseModifier, HasPresentModifier, HasHoldModifier, AsyncInfo); + HasCloseModifier, HasPresentModifier, HasHoldModifier, AsyncInfo, + /* OwnedTPR */ nullptr, /* ReleaseHDTTMap */ false); PointerTgtPtrBegin = PointerTpr.TargetPointer; IsHostPtr = PointerTpr.Flags.IsHostPointer; if (!PointerTgtPtrBegin) { @@ -675,10 +680,11 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, const bool HasFlagTo = ArgTypes[I] & OMP_TGT_MAPTYPE_TO; const bool HasFlagAlways = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS; + // Note that HDTTMap will be released in getTargetPointer. auto TPR = Device.getTargetPointer( - HstPtrBegin, HstPtrBase, DataSize, HstPtrName, HasFlagTo, HasFlagAlways, - IsImplicit, UpdateRef, HasCloseModifier, HasPresentModifier, - HasHoldModifier, AsyncInfo); + HDTTMap, HstPtrBegin, HstPtrBase, DataSize, HstPtrName, HasFlagTo, + HasFlagAlways, IsImplicit, UpdateRef, HasCloseModifier, + HasPresentModifier, HasHoldModifier, AsyncInfo, PointerTpr.getEntry()); void *TgtPtrBegin = TPR.TargetPointer; IsHostPtr = TPR.Flags.IsHostPointer; // If data_size==0, then the argument could be a zero-length pointer to @@ -701,55 +707,30 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, } if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ && !IsHostPtr) { - // Check whether we need to update the pointer on the device - bool UpdateDevPtr = false; uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase; void *ExpectedTgtPtrBase = (void *)((uint64_t)TgtPtrBegin - Delta); - Device.ShadowMtx.lock(); - auto Entry = Device.ShadowPtrMap.find(PointerHstPtrBegin); - // If this pointer is not in the map we need to insert it. If the map - // contains a stale entry, we need to update it (e.g. if the pointee was - // deallocated and later on is reallocated at another device address). The - // latter scenario is the subject of LIT test env/base_ptr_ref_count.c. An - // entry is removed from ShadowPtrMap only when the PTR of a PTR_AND_OBJ - // pair is deallocated, not when the OBJ is deallocated. In - // env/base_ptr_ref_count.c the PTR is a global "declare target" pointer, - // so it stays in the map for the lifetime of the application. When the - // OBJ is deallocated and later on allocated again (at a different device - // address), ShadowPtrMap still contains an entry for Pointer_HstPtrBegin - // which is stale, pointing to the old ExpectedTgtPtrBase of the OBJ. - if (Entry == Device.ShadowPtrMap.end() || - Entry->second.TgtPtrVal != ExpectedTgtPtrBase) { - // create or update shadow pointers for this entry - Device.ShadowPtrMap[PointerHstPtrBegin] = { - HstPtrBase, PointerTgtPtrBegin, ExpectedTgtPtrBase}; - PointerTpr.Entry->setMayContainAttachedPointers(); - UpdateDevPtr = true; - } - - if (UpdateDevPtr) { - std::lock_guard LG(*PointerTpr.Entry); - Device.ShadowMtx.unlock(); - + if (PointerTpr.getEntry()->addShadowPointer(ShadowPtrInfoTy{ + (void **)PointerHstPtrBegin, HstPtrBase, + (void **)PointerTgtPtrBegin, ExpectedTgtPtrBase})) { DP("Update pointer (" DPxMOD ") -> [" DPxMOD "]\n", DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin)); void *&TgtPtrBase = AsyncInfo.getVoidPtrLocation(); TgtPtrBase = ExpectedTgtPtrBase; - int Ret = Device.submitData(PointerTgtPtrBegin, &TgtPtrBase, - sizeof(void *), AsyncInfo); + int Ret = + Device.submitData(PointerTgtPtrBegin, &TgtPtrBase, sizeof(void *), + AsyncInfo, PointerTpr.getEntry()); if (Ret != OFFLOAD_SUCCESS) { REPORT("Copying data to device failed.\n"); return OFFLOAD_FAIL; } - if (PointerTpr.Entry->addEventIfNecessary(Device, AsyncInfo) != + if (PointerTpr.getEntry()->addEventIfNecessary(Device, AsyncInfo) != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; - } else - Device.ShadowMtx.unlock(); + } } } @@ -777,51 +758,11 @@ struct PostProcessingInfo { TargetPointerResultTy TPR; PostProcessingInfo(void *HstPtr, int64_t Size, int64_t ArgType, - int32_t ArgIndex, TargetPointerResultTy TPR) + int32_t ArgIndex, TargetPointerResultTy &&TPR) : HstPtrBegin(HstPtr), DataSize(Size), ArgType(ArgType), - ArgIndex(ArgIndex), TPR(TPR) {} + ArgIndex(ArgIndex), TPR(std::move(TPR)) {} }; -/// Apply \p CB to the shadow map pointer entries in the range \p Begin, to -/// \p Begin + \p Size. \p CB is called with a locked shadow pointer map and the -/// passed iterator can be updated. If the callback returns OFFLOAD_FAIL the -/// rest of the map is not checked anymore. -template -static void applyToShadowMapEntries(DeviceTy &Device, CBTy CB, void *Begin, - uintptr_t Size, - const TargetPointerResultTy &TPR) { - // If we have an object that is too small to hold a pointer subobject, no need - // to do any checking. - if (Size < sizeof(void *)) - return; - - // If the map entry for the object was never marked as containing attached - // pointers, no need to do any checking. - if (!TPR.Entry || !TPR.Entry->getMayContainAttachedPointers()) - return; - - uintptr_t LB = (uintptr_t)Begin; - uintptr_t UB = LB + Size; - // Now we are looking into the shadow map so we need to lock it. - std::lock_guard LG(Device.ShadowMtx); - for (ShadowPtrListTy::iterator Itr = Device.ShadowPtrMap.begin(); - Itr != Device.ShadowPtrMap.end();) { - uintptr_t ShadowHstPtrAddr = (uintptr_t)Itr->first; - - // An STL map is sorted on its keys; use this property - // to quickly determine when to break out of the loop. - if (ShadowHstPtrAddr < LB) { - ++Itr; - continue; - } - if (ShadowHstPtrAddr >= UB) - break; - - if (CB(Itr) == OFFLOAD_FAIL) - break; - } -} - } // namespace /// Applies the necessary post-processing procedures to entries listed in \p @@ -831,7 +772,7 @@ static void applyToShadowMapEntries(DeviceTy &Device, CBTy CB, void *Begin, /// according to the successfulness of the operations. [[nodiscard]] static int postProcessingTargetDataEnd(DeviceTy *Device, - SmallVector EntriesInfo, + SmallVector &EntriesInfo, bool FromMapper) { int Ret = OFFLOAD_SUCCESS; void *FromMapperBase = nullptr; @@ -859,10 +800,16 @@ postProcessingTargetDataEnd(DeviceTy *Device, // not request (exclusive) access to the HDTT map if DelEntry is // not set. DeviceTy::HDTTMapAccessorTy HDTTMap = - Device->HostDataToTargetMap.getExclusiveAccessor(!DelEntry); + Device->HostDataToTargetMap.getExclusiveAccessor(); + + // We cannot use a lock guard because we may end up delete the mutex. + // We also explicitly unlocked the entry after it was put in the EntriesInfo + // so it can be reused. + TPR.getEntry()->lock(); + auto *Entry = TPR.getEntry(); - const bool IsNotLastUser = TPR.Entry->decDataEndThreadCount() != 0; - if (DelEntry && (TPR.Entry->getTotalRefCount() != 0 || IsNotLastUser)) { + const bool IsNotLastUser = Entry->decDataEndThreadCount() != 0; + if (DelEntry && (Entry->getTotalRefCount() != 0 || IsNotLastUser)) { // The thread is not in charge of deletion anymore. Give up access // to the HDTT map and unset the deletion flag. HDTTMap.destroy(); @@ -874,44 +821,35 @@ postProcessingTargetDataEnd(DeviceTy *Device, // shadow copies. If the struct is going to be deallocated, remove any // remaining shadow pointer entries for this struct. const bool HasFrom = ArgType & OMP_TGT_MAPTYPE_FROM; - auto CB = [&](ShadowPtrListTy::iterator &Itr) { - // If we copied the struct to the host, we need to restore the - // pointer. - if (HasFrom) { - void **ShadowHstPtrAddr = (void **)Itr->first; - *ShadowHstPtrAddr = Itr->second.HstPtrVal; - DP("Restoring original host pointer value " DPxMOD " for host " - "pointer " DPxMOD "\n", - DPxPTR(Itr->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr)); - } - // If the struct is to be deallocated, remove the shadow entry. - if (DelEntry) { - DP("Removing shadow pointer " DPxMOD "\n", DPxPTR((void **)Itr->first)); - auto OldItr = Itr; - Itr++; - Device->ShadowPtrMap.erase(OldItr); - } else { - ++Itr; - } - return OFFLOAD_SUCCESS; - }; - applyToShadowMapEntries(*Device, CB, HstPtrBegin, DataSize, TPR); + if (HasFrom) { + Entry->foreachShadowPointerInfo( + [&](const ShadowPtrInfoTy &ShadowPtr) { + *ShadowPtr.HstPtrAddr = ShadowPtr.HstPtrVal; + DP("Restoring original host pointer value " DPxMOD " for host " + "pointer " DPxMOD "\n", + DPxPTR(ShadowPtr.HstPtrVal), DPxPTR(ShadowPtr.HstPtrAddr)); + return OFFLOAD_SUCCESS; + }); + } + + // Give up the lock as we either don't need it anymore (e.g., done with + // TPR), or erase TPR. + TPR.setEntry(nullptr); if (!DelEntry || (FromMapperBase && FromMapperBase == HstPtrBegin)) continue; - // If we are deleting the entry the DataMapMtx is locked and we own - // the entry. - Ret = Device->eraseMapEntry(HDTTMap, TPR.Entry, DataSize); + Ret = Device->eraseMapEntry(HDTTMap, Entry, DataSize); // Entry is already remove from the map, we can unlock it now. HDTTMap.destroy(); - Ret |= Device->deallocTgtPtrAndEntry(TPR.Entry, DataSize); + Ret |= Device->deallocTgtPtrAndEntry(Entry, DataSize); if (Ret != OFFLOAD_SUCCESS) { REPORT("Deallocating data from device failed.\n"); break; } } + delete &EntriesInfo; return Ret; } @@ -921,7 +859,7 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, int64_t *ArgTypes, map_var_info_t *ArgNames, void **ArgMappers, AsyncInfoTy &AsyncInfo, bool FromMapper) { int Ret = OFFLOAD_SUCCESS; - SmallVector PostProcessingPtrs; + auto *PostProcessingPtrs = new SmallVector(); void *FromMapperBase = nullptr; // process each input. for (int32_t I = ArgNum - 1; I >= 0; --I) { @@ -972,7 +910,6 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, } } - bool IsLast, IsHostPtr; bool IsImplicit = ArgTypes[I] & OMP_TGT_MAPTYPE_IMPLICIT; bool UpdateRef = (!(ArgTypes[I] & OMP_TGT_MAPTYPE_MEMBER_OF) || (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ)) && @@ -982,9 +919,9 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, bool HasHoldModifier = ArgTypes[I] & OMP_TGT_MAPTYPE_OMPX_HOLD; // If PTR_AND_OBJ, HstPtrBegin is address of pointee - TargetPointerResultTy TPR = Device.getTgtPtrBegin( - HstPtrBegin, DataSize, IsLast, UpdateRef, HasHoldModifier, IsHostPtr, - !IsImplicit, ForceDelete, /*FromDataEnd=*/true); + TargetPointerResultTy TPR = + Device.getTgtPtrBegin(HstPtrBegin, DataSize, UpdateRef, HasHoldModifier, + !IsImplicit, ForceDelete, /*FromDataEnd=*/true); void *TgtPtrBegin = TPR.TargetPointer; if (!TPR.isPresent() && !TPR.isHostPointer() && (DataSize || HasPresentModifier)) { @@ -1014,7 +951,7 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, } else { DP("There are %" PRId64 " bytes allocated at target address " DPxMOD " - is%s last\n", - DataSize, DPxPTR(TgtPtrBegin), (IsLast ? "" : " not")); + DataSize, DPxPTR(TgtPtrBegin), (TPR.Flags.IsLast ? "" : " not")); } // OpenMP 5.1, sec. 2.21.7.1 "map Clause", p. 351 L14-16: @@ -1028,20 +965,21 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, // Move data back to the host const bool HasAlways = ArgTypes[I] & OMP_TGT_MAPTYPE_ALWAYS; const bool HasFrom = ArgTypes[I] & OMP_TGT_MAPTYPE_FROM; - if (HasFrom && (HasAlways || IsLast) && !IsHostPtr && DataSize != 0) { + if (HasFrom && (HasAlways || TPR.Flags.IsLast) && + !TPR.Flags.IsHostPointer && DataSize != 0) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); - std::lock_guard LG(*TPR.Entry); // Wait for any previous transfer if an event is present. - if (void *Event = TPR.Entry->getEvent()) { + if (void *Event = TPR.getEntry()->getEvent()) { if (Device.waitEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) { REPORT("Failed to wait for event " DPxMOD ".\n", DPxPTR(Event)); return OFFLOAD_FAIL; } } - Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, DataSize, AsyncInfo); + Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, DataSize, AsyncInfo, + TPR.getEntry()); if (Ret != OFFLOAD_SUCCESS) { REPORT("Copying data from device failed.\n"); return OFFLOAD_FAIL; @@ -1052,26 +990,26 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum, // as the entry can be reused and the reuse might happen after the // copy-back was issued but before it completed. Since the reuse might // also copy-back a value we would race. - if (IsLast) { - if (TPR.Entry->addEventIfNecessary(Device, AsyncInfo) != + if (TPR.Flags.IsLast) { + if (TPR.getEntry()->addEventIfNecessary(Device, AsyncInfo) != OFFLOAD_SUCCESS) return OFFLOAD_FAIL; } } // Add pointer to the buffer for post-synchronize processing. - PostProcessingPtrs.emplace_back(HstPtrBegin, DataSize, ArgTypes[I], I, TPR); + PostProcessingPtrs->emplace_back(HstPtrBegin, DataSize, ArgTypes[I], I, + std::move(TPR)); + PostProcessingPtrs->back().TPR.getEntry()->unlock(); } // Add post-processing functions // TODO: We might want to remove `mutable` in the future by not changing the // captured variables somehow. - AsyncInfo.addPostProcessingFunction( - [=, Device = &Device, - PostProcessingPtrs = std::move(PostProcessingPtrs)]() mutable -> int { - return postProcessingTargetDataEnd(Device, PostProcessingPtrs, - FromMapperBase); - }); + AsyncInfo.addPostProcessingFunction([=, Device = &Device]() mutable -> int { + return postProcessingTargetDataEnd(Device, *PostProcessingPtrs, + FromMapperBase); + }); return Ret; } @@ -1080,10 +1018,9 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase, void *HstPtrBegin, int64_t ArgSize, int64_t ArgType, AsyncInfoTy &AsyncInfo) { TIMESCOPE_WITH_IDENT(Loc); - bool IsLast, IsHostPtr; - TargetPointerResultTy TPR = Device.getTgtPtrBegin( - HstPtrBegin, ArgSize, IsLast, /*UpdateRefCount=*/false, - /*UseHoldRefCount=*/false, IsHostPtr, /*MustContain=*/true); + TargetPointerResultTy TPR = + Device.getTgtPtrBegin(HstPtrBegin, ArgSize, /*UpdateRefCount=*/false, + /*UseHoldRefCount=*/false, /*MustContain=*/true); void *TgtPtrBegin = TPR.TargetPointer; if (!TPR.isPresent()) { DP("hst data:" DPxMOD " not found, becomes a noop\n", DPxPTR(HstPtrBegin)); @@ -1096,16 +1033,48 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase, return OFFLOAD_SUCCESS; } - if (IsHostPtr) { + if (TPR.Flags.IsHostPointer) { DP("hst data:" DPxMOD " unified and shared, becomes a noop\n", DPxPTR(HstPtrBegin)); return OFFLOAD_SUCCESS; } + if (ArgType & OMP_TGT_MAPTYPE_TO) { + DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", + ArgSize, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)); + int Ret = Device.submitData(TgtPtrBegin, HstPtrBegin, ArgSize, AsyncInfo, + TPR.getEntry()); + if (Ret != OFFLOAD_SUCCESS) { + REPORT("Copying data to device failed.\n"); + return OFFLOAD_FAIL; + } + if (TPR.getEntry()) { + int Ret = TPR.getEntry()->foreachShadowPointerInfo( + [&](const ShadowPtrInfoTy &ShadowPtr) { + DP("Restoring original target pointer value " DPxMOD " for target " + "pointer " DPxMOD "\n", + DPxPTR(ShadowPtr.TgtPtrVal), DPxPTR(ShadowPtr.TgtPtrAddr)); + Ret = Device.submitData(ShadowPtr.TgtPtrAddr, + (void *)&ShadowPtr.TgtPtrVal, + sizeof(void *), AsyncInfo); + if (Ret != OFFLOAD_SUCCESS) { + REPORT("Copying data to device failed.\n"); + return OFFLOAD_FAIL; + } + return OFFLOAD_SUCCESS; + }); + if (Ret != OFFLOAD_SUCCESS) { + DP("Updating shadow map failed\n"); + return Ret; + } + } + } + if (ArgType & OMP_TGT_MAPTYPE_FROM) { DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n", ArgSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin)); - int Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, ArgSize, AsyncInfo); + int Ret = Device.retrieveData(HstPtrBegin, TgtPtrBegin, ArgSize, AsyncInfo, + TPR.getEntry()); if (Ret != OFFLOAD_SUCCESS) { REPORT("Copying data from device failed.\n"); return OFFLOAD_FAIL; @@ -1113,44 +1082,26 @@ static int targetDataContiguous(ident_t *Loc, DeviceTy &Device, void *ArgsBase, // Wait for device-to-host memcopies for whole struct to complete, // before restoring the correct host pointer. - AsyncInfo.addPostProcessingFunction([=, Device = &Device]() -> int { - auto CB = [&](ShadowPtrListTy::iterator &Itr) { - void **ShadowHstPtrAddr = (void **)Itr->first; - *ShadowHstPtrAddr = Itr->second.HstPtrVal; - DP("Restoring original host pointer value " DPxMOD - " for host pointer " DPxMOD "\n", - DPxPTR(Itr->second.HstPtrVal), DPxPTR(ShadowHstPtrAddr)); - ++Itr; + if (auto *Entry = TPR.getEntry()) { + AsyncInfo.addPostProcessingFunction([=]() -> int { + int Ret = Entry->foreachShadowPointerInfo( + [&](const ShadowPtrInfoTy &ShadowPtr) { + *ShadowPtr.HstPtrAddr = ShadowPtr.HstPtrVal; + DP("Restoring original host pointer value " DPxMOD + " for host pointer " DPxMOD "\n", + DPxPTR(ShadowPtr.HstPtrVal), DPxPTR(ShadowPtr.HstPtrAddr)); + return OFFLOAD_SUCCESS; + }); + Entry->unlock(); + if (Ret != OFFLOAD_SUCCESS) { + DP("Updating shadow map failed\n"); + return Ret; + } return OFFLOAD_SUCCESS; - }; - applyToShadowMapEntries(*Device, CB, HstPtrBegin, ArgSize, TPR); - - return OFFLOAD_SUCCESS; - }); - } - - if (ArgType & OMP_TGT_MAPTYPE_TO) { - DP("Moving %" PRId64 " bytes (hst:" DPxMOD ") -> (tgt:" DPxMOD ")\n", - ArgSize, DPxPTR(HstPtrBegin), DPxPTR(TgtPtrBegin)); - int Ret = Device.submitData(TgtPtrBegin, HstPtrBegin, ArgSize, AsyncInfo); - if (Ret != OFFLOAD_SUCCESS) { - REPORT("Copying data to device failed.\n"); - return OFFLOAD_FAIL; + }); } - - auto CB = [&](ShadowPtrListTy::iterator &Itr) { - DP("Restoring original target pointer value " DPxMOD " for target " - "pointer " DPxMOD "\n", - DPxPTR(Itr->second.TgtPtrVal), DPxPTR(Itr->second.TgtPtrAddr)); - Ret = Device.submitData(Itr->second.TgtPtrAddr, &Itr->second.TgtPtrVal, - sizeof(void *), AsyncInfo); - if (Ret != OFFLOAD_SUCCESS) - REPORT("Copying data to device failed.\n"); - ++Itr; - return Ret; - }; - applyToShadowMapEntries(Device, CB, HstPtrBegin, ArgSize, TPR); } + return OFFLOAD_SUCCESS; } @@ -1538,7 +1489,6 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr, void *HstPtrVal = Args[I]; void *HstPtrBegin = ArgBases[I]; void *HstPtrBase = Args[Idx]; - bool IsLast, IsHostPtr; // IsLast is unused. void *TgtPtrBase = (void *)((intptr_t)TgtArgs[TgtIdx] + TgtOffsets[TgtIdx]); DP("Parent lambda base " DPxMOD "\n", DPxPTR(TgtPtrBase)); @@ -1546,15 +1496,15 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr, void *TgtPtrBegin = (void *)((uintptr_t)TgtPtrBase + Delta); void *&PointerTgtPtrBegin = AsyncInfo.getVoidPtrLocation(); TargetPointerResultTy TPR = Device.getTgtPtrBegin( - HstPtrVal, ArgSizes[I], IsLast, /*UpdateRefCount=*/false, - /*UseHoldRefCount=*/false, IsHostPtr); + HstPtrVal, ArgSizes[I], /*UpdateRefCount=*/false, + /*UseHoldRefCount=*/false); PointerTgtPtrBegin = TPR.TargetPointer; if (!TPR.isPresent()) { DP("No lambda captured variable mapped (" DPxMOD ") - ignored\n", DPxPTR(HstPtrVal)); continue; } - if (IsHostPtr) { + if (TPR.Flags.IsHostPointer) { DP("Unified memory is active, no need to map lambda captured" "variable (" DPxMOD ")\n", DPxPTR(HstPtrVal)); @@ -1563,7 +1513,7 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr, DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n", DPxPTR(PointerTgtPtrBegin), DPxPTR(TgtPtrBegin)); Ret = Device.submitData(TgtPtrBegin, &PointerTgtPtrBegin, - sizeof(void *), AsyncInfo); + sizeof(void *), AsyncInfo, TPR.getEntry()); if (Ret != OFFLOAD_SUCCESS) { REPORT("Copying data to device failed.\n"); return OFFLOAD_FAIL; @@ -1576,7 +1526,6 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr, void *TgtPtrBegin; map_var_info_t HstPtrName = (!ArgNames) ? nullptr : ArgNames[I]; ptrdiff_t TgtBaseOffset; - bool IsLast, IsHostPtr; // unused. TargetPointerResultTy TPR; if (ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL) { DP("Forwarding first-private value " DPxMOD " to the target construct\n", @@ -1603,9 +1552,9 @@ static int processDataBefore(ident_t *Loc, int64_t DeviceId, void *HostPtr, } else { if (ArgTypes[I] & OMP_TGT_MAPTYPE_PTR_AND_OBJ) HstPtrBase = *reinterpret_cast(HstPtrBase); - TPR = Device.getTgtPtrBegin(HstPtrBegin, ArgSizes[I], IsLast, + TPR = Device.getTgtPtrBegin(HstPtrBegin, ArgSizes[I], /*UpdateRefCount=*/false, - /*UseHoldRefCount=*/false, IsHostPtr); + /*UseHoldRefCount=*/false); TgtPtrBegin = TPR.TargetPointer; TgtBaseOffset = (intptr_t)HstPtrBase - (intptr_t)HstPtrBegin; #ifdef OMPTARGET_DEBUG From a124b4c7f9f8b06724284b86ba8bfc2137e8c21b Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 21 Mar 2023 19:04:14 -0700 Subject: [PATCH 254/691] [LFTR] Simplify another case under assumption exit counts are integers [nfc] This invariant was introduced in 8f3d16905d75b07a933d01dc29677fe5867c1b3e. --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 18c3fe06503b4..5d6778ac75818 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -843,10 +843,6 @@ static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB, if (!isLoopCounter(Phi, L, SE)) continue; - // Avoid comparing an integer IV against a pointer Limit. - if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) - continue; - const auto *AR = cast(SE->getSCEV(Phi)); // AR may be a pointer type, while BECount is an integer type. From 206dc545347710690d3ad6bbab5bac68f193f7cd Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 21 Mar 2023 19:08:34 -0700 Subject: [PATCH 255/691] [LFTR] Use evaluateAtIteration in genLoopLimit [nfc] Note that the comments being removed appear to be very out of sync with the actual code in question. Differential Revision: https://reviews.llvm.org/D146468 --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 26 +++++-------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 5d6778ac75818..0725bd3b7e01d 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -914,28 +914,14 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter - // finds a valid pointer IV. Sign extend ExitCount in order to materialize a - // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing - // the existing GEPs whenever possible. + // finds a valid pointer IV. if (IndVar->getType()->isPointerTy()) { - // IVOffset will be the new GEP offset that is interpreted by GEP as a - // signed value. ExitCount on the other hand represents the loop trip count, - // which is an unsigned value. FindLoopCounter only allows induction - // variables that have a positive unit stride of one. This means we don't - // have to handle the case of negative offsets (yet) and just need to zero - // extend ExitCount. - Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); - const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy); - if (UsePostInc) - IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy)); - - // Expand the code for the iteration count. - assert(SE->isLoopInvariant(IVOffset, L) && + const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR; + const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE); + assert(SE->isLoopInvariant(IVLimit, L) && "Computed iteration count is not loop invariant!"); - - const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset); - BranchInst *BI = cast(ExitingBB->getTerminator()); - return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI); + return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), + ExitingBB->getTerminator()); } else { // In any other case, convert both IVInit and ExitCount to integers before // comparing. This may result in SCEV expansion of pointers, but in practice From 06006f438e1b9168a859b8d7d5273f0170330ebd Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 21 Mar 2023 19:12:35 -0700 Subject: [PATCH 256/691] [LFTR] Minor style cleanup [nfc] --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 0725bd3b7e01d..43f3beb4b34b5 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -961,9 +961,8 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, // Ensure that we generate the same type as IndVar, or a smaller integer // type. In the presence of null pointer values, we have an integer type // SCEV expression (IVInit) for a pointer type IV value (IndVar). - Type *LimitTy = ExitCount->getType(); - BranchInst *BI = cast(ExitingBB->getTerminator()); - return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); + return Rewriter.expandCodeFor(IVLimit, ExitCount->getType(), + ExitingBB->getTerminator()); } } From 4dc04557d71c1752c53eb8025957df8f1bdbb1e1 Mon Sep 17 00:00:00 2001 From: Michael Francis Date: Tue, 21 Mar 2023 22:06:47 +0000 Subject: [PATCH 257/691] [AIX][r] Do not call AddFilePathLibArgs with -r We do not want to add file path lib args when -r is specified. Differential Revision: https://reviews.llvm.org/D146578 --- clang/lib/Driver/ToolChains/AIX.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Driver/ToolChains/AIX.cpp b/clang/lib/Driver/ToolChains/AIX.cpp index 5521a38d9bc0a..711e8619d0a79 100644 --- a/clang/lib/Driver/ToolChains/AIX.cpp +++ b/clang/lib/Driver/ToolChains/AIX.cpp @@ -234,8 +234,8 @@ void aix::Linker::ConstructJob(Compilation &C, const JobAction &JA, // Add directory to library search path. Args.AddAllArgs(CmdArgs, options::OPT_L); - ToolChain.AddFilePathLibArgs(Args, CmdArgs); if (!Args.hasArg(options::OPT_r)) { + ToolChain.AddFilePathLibArgs(Args, CmdArgs); ToolChain.addProfileRTLibs(Args, CmdArgs); if (getToolChain().ShouldLinkCXXStdlib(Args)) From b50c6857a45b8fa753bde44efcea7d0000c55ac5 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 21 Mar 2023 19:15:30 -0700 Subject: [PATCH 258/691] [RISCV] Move fli selection in RISCVISelDAGToDAG.cpp. NFC We custom isel for ConstantFP that has higher priority than isel patterns. We were previously detecting valid FP constants for fli to early exit from the custom code. This detection called getLoadFPImm. Then we would run the isel patterns which would call getLoadFPImm a second time. With a little bit more code we can directly select the fli instruction in the custom handler and avoid a second call. Remove the incorrect mayRaiseFPException flag from the FLI instructions. Reviewed By: joshua-arch1 Differential Revision: https://reviews.llvm.org/D146093 --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 28 ++++++++++++++++++--- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 13 +++++++--- llvm/lib/Target/RISCV/RISCVISelLowering.h | 2 +- llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td | 13 +--------- 4 files changed, 36 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index f397ef12913db..fbdcbbfd5a190 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -842,8 +842,29 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { } case ISD::ConstantFP: { const APFloat &APF = cast(Node)->getValueAPF(); - if (static_cast(TLI)->isLegalZfaFPImm(APF, VT)) - break; + int FPImm = static_cast(TLI)->getLegalZfaFPImm( + APF, VT); + if (FPImm >= 0) { + unsigned Opc; + switch (VT.SimpleTy) { + default: + llvm_unreachable("Unexpected size"); + case MVT::f16: + Opc = RISCV::FLI_H; + break; + case MVT::f32: + Opc = RISCV::FLI_S; + break; + case MVT::f64: + Opc = RISCV::FLI_D; + break; + } + + SDNode *Res = CurDAG->getMachineNode( + Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT)); + ReplaceNode(Node, Res); + return; + } bool NegZeroF64 = APF.isNegZero() && VT == MVT::f64; SDValue Imm; @@ -2967,7 +2988,8 @@ bool RISCVDAGToDAGISel::selectFPImm(SDValue N, SDValue &Imm) { MVT VT = CFP->getSimpleValueType(0); - if (static_cast(TLI)->isLegalZfaFPImm(APF, VT)) + if (static_cast(TLI)->getLegalZfaFPImm(APF, + VT) >= 0) return false; MVT XLenVT = Subtarget->getXLenVT(); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index e56a2b3b08b54..595e094662f9a 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1545,9 +1545,11 @@ bool RISCVTargetLowering::isOffsetFoldingLegal( return false; } -bool RISCVTargetLowering::isLegalZfaFPImm(const APFloat &Imm, EVT VT) const { +// Returns 0-31 if the fli instruction is available for the type and this is +// legal FP immediate for the type. Returns -1 otherwise. +int RISCVTargetLowering::getLegalZfaFPImm(const APFloat &Imm, EVT VT) const { if (!Subtarget.hasStdExtZfa()) - return false; + return -1; bool IsSupportedVT = false; if (VT == MVT::f16) { @@ -1559,7 +1561,10 @@ bool RISCVTargetLowering::isLegalZfaFPImm(const APFloat &Imm, EVT VT) const { IsSupportedVT = true; } - return IsSupportedVT && RISCVLoadFPImm::getLoadFPImm(Imm) != -1; + if (!IsSupportedVT) + return -1; + + return RISCVLoadFPImm::getLoadFPImm(Imm); } bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, @@ -1575,7 +1580,7 @@ bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, if (!IsLegalVT) return false; - if (isLegalZfaFPImm(Imm, VT)) + if (getLegalZfaFPImm(Imm, VT) >= 0) return true; // Cannot create a 64 bit floating-point immediate value for rv32. diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index b3a202476751d..19aaebc92ba6a 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -393,7 +393,7 @@ class RISCVTargetLowering : public TargetLowering { SmallVectorImpl &Ops) const override; bool shouldScalarizeBinop(SDValue VecOp) const override; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; - bool isLegalZfaFPImm(const APFloat &Imm, EVT VT) const; + int getLegalZfaFPImm(const APFloat &Imm, EVT VT) const; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td index 28348b14a5ef1..751a0eabbd394 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td @@ -63,7 +63,7 @@ class FPBinaryOp_rr funct7, bits<3> funct3, DAGOperand rdty, : RVInstR; -let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1 in +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class FPUnaryOp_imm funct7, bits<5> rs2val, bits<3> funct3, RISCVOpcode opcode, dag outs, dag ins, string opcodestr, string argstr> : RVInst { @@ -182,14 +182,7 @@ def : InstAlias<"fgeq.h $rd, $rs, $rt", // Codegen patterns //===----------------------------------------------------------------------===// -def fpimm_to_loadfpimm : SDNodeXFormgetTargetConstant(RISCVLoadFPImm::getLoadFPImm(N->getValueAPF()), - SDLoc(N), Subtarget->getXLenVT());}]>; - - let Predicates = [HasStdExtZfa] in { -def : Pat<(f32 fpimm:$imm), (FLI_S (fpimm_to_loadfpimm fpimm:$imm))>; - def: PatFprFpr; def: PatFprFpr; @@ -212,8 +205,6 @@ def: PatSetCC; } // Predicates = [HasStdExtZfa] let Predicates = [HasStdExtZfa, HasStdExtD] in { -def : Pat<(f64 fpimm:$imm), (FLI_D (fpimm_to_loadfpimm fpimm:$imm))>; - def: PatFprFpr; def: PatFprFpr; @@ -242,8 +233,6 @@ def : Pat<(RISCVBuildPairF64 GPR:$rs1, GPR:$rs2), } let Predicates = [HasStdExtZfa, HasStdExtZfh] in { -def : Pat<(f16 fpimm:$imm), (FLI_H (fpimm_to_loadfpimm fpimm:$imm))>; - def: PatFprFpr; def: PatFprFpr; From 2023cc2b1b3ce2223091024f1687704948c4145f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 21 Mar 2023 19:42:27 -0700 Subject: [PATCH 259/691] [RISCV] Clear mayRaiseFPException for Zfa fmvh.x.d and fmvp.d.x instructions. --- llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td index 751a0eabbd394..ae38fd5c5dc24 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfa.td @@ -127,10 +127,13 @@ def FLEQ_D : FPCmp_rr<0b1010001, 0b100, "fleq.d", FPR64>; } // Predicates = [HasStdExtZfa, HasStdExtD] let Predicates = [HasStdExtZfa, HasStdExtD, IsRV32] in { +let mayRaiseFPException = 0 in { def FMVH_X_D : FPUnaryOp_r<0b1110001, 0b00001, 0b000, GPR, FPR64, "fmvh.x.d">, Sched<[WriteFMovF32ToI32, ReadFMovF32ToI32]>; def FMVP_D_X : FPBinaryOp_rr<0b1011001, 0b000, FPR64, GPR, "fmvp.d.x">, Sched<[WriteFMovI32ToF32, ReadFMovI32ToF32]>; +} + let isCodeGenOnly = 1, mayRaiseFPException = 0 in def FMV_X_W_FPR64 : FPUnaryOp_r<0b1110000, 0b00000, 0b000, GPR, FPR64, "fmv.x.w">, From c176edc013c349f2cc2a2df7fa661add017e8097 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 21 Mar 2023 19:48:20 -0700 Subject: [PATCH 260/691] [RISCV] Clear mayRaiseFPException for fclass.d instruction. We got it right for fclass.s and fclass.h. --- llvm/lib/Target/RISCV/RISCVInstrInfoD.td | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index 0adb000854248..e3d7deeb6ff84 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -145,6 +145,7 @@ defm FLT_D : FPCmp_rr_m<0b1010001, 0b001, "flt.d", DINX>; defm FLE_D : FPCmp_rr_m<0b1010001, 0b000, "fle.d", DINX>; } +let mayRaiseFPException = 0 in defm FCLASS_D : FPUnaryOp_r_m<0b1110001, 0b00000, 0b001, XDINX, "fclass.d">, Sched<[WriteFClass64, ReadFClass64]>; From 55dd04f6bcf797b4ff20e74158377bcc912b9870 Mon Sep 17 00:00:00 2001 From: yijia1212 Date: Tue, 21 Mar 2023 21:26:09 -0700 Subject: [PATCH 261/691] update dependency for TransformOpsPyTdFiles update dependency for TransformOpsPyTdFiles Differential Revision: https://reviews.llvm.org/D146605 --- utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel index 5c41fc5fc7ed6..06a97f4c921ee 100644 --- a/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/python/BUILD.bazel @@ -862,6 +862,8 @@ td_library( "//mlir:include/mlir/Bindings/Python/Attributes.td", ], deps = [ + "//mlir:CallInterfacesTdFiles", + "//mlir:FunctionInterfacesTdFiles", "//mlir:OpBaseTdFiles", "//mlir:TransformDialectTdFiles", ], From f64dc9bc6ea0905ecd0ef4e675ddeef0c8111d30 Mon Sep 17 00:00:00 2001 From: Ting Wang Date: Wed, 22 Mar 2023 00:32:18 -0400 Subject: [PATCH 262/691] [PowerPC][NFC] add const-nonsplat-array-init.ll When doing store constant vector/scalar, some duplicated values can be reused. Add test case and will show combiner can improve these. Reviewed By: shchenz Differential Revision: https://reviews.llvm.org/D146500 --- .../PowerPC/const-nonsplat-array-init.ll | 1747 +++++++++++++++++ 1 file changed, 1747 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/const-nonsplat-array-init.ll diff --git a/llvm/test/CodeGen/PowerPC/const-nonsplat-array-init.ll b/llvm/test/CodeGen/PowerPC/const-nonsplat-array-init.ll new file mode 100644 index 0000000000000..5d76f5099a662 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/const-nonsplat-array-init.ll @@ -0,0 +1,1747 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mcpu=pwr8 \ +; RUN: -mtriple=powerpc64-unknown-aix < %s | FileCheck %s --check-prefix=P8-BE +; RUN: llc -verify-machineinstrs -mcpu=pwr9 \ +; RUN: -mtriple=powerpc64-unknown-aix < %s | FileCheck %s --check-prefix=P9-BE +; RUN: llc -verify-machineinstrs -mcpu=pwr10 \ +; RUN: -mtriple=powerpc64-unknown-aix < %s | FileCheck %s --check-prefix=P10-BE +; RUN: llc -verify-machineinstrs -mcpu=pwr8 \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s --check-prefix=P8-LE +; RUN: llc -verify-machineinstrs -mcpu=pwr9 \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s --check-prefix=P9-LE +; RUN: llc -verify-machineinstrs -mcpu=pwr10 \ +; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s --check-prefix=P10-LE + +define dso_local void @foo1_int_be_reuse4B(ptr nocapture noundef writeonly %a) local_unnamed_addr { +; P8-BE-LABEL: foo1_int_be_reuse4B: +; P8-BE: # %bb.0: # %entry +; P8-BE-NEXT: ld 4, L..C0(2) # %const.0 +; P8-BE-NEXT: lxvw4x 0, 0, 4 +; P8-BE-NEXT: lis 4, 1029 +; P8-BE-NEXT: ori 4, 4, 1543 +; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: li 4, 2057 +; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: sth 4, 20(3) +; P8-BE-NEXT: blr +; +; P9-BE-LABEL: foo1_int_be_reuse4B: +; P9-BE: # %bb.0: # %entry +; P9-BE-NEXT: ld 4, L..C0(2) # %const.0 +; P9-BE-NEXT: lxv 0, 0(4) +; P9-BE-NEXT: lis 4, 1029 +; P9-BE-NEXT: ori 4, 4, 1543 +; P9-BE-NEXT: stw 4, 16(3) +; P9-BE-NEXT: li 4, 2057 +; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: sth 4, 20(3) +; P9-BE-NEXT: blr +; +; P10-BE-LABEL: foo1_int_be_reuse4B: +; P10-BE: # %bb.0: # %entry +; P10-BE-NEXT: ld 4, L..C0(2) # %const.0 +; P10-BE-NEXT: lxv 0, 0(4) +; P10-BE-NEXT: pli 4, 67438087 +; P10-BE-NEXT: stw 4, 16(3) +; P10-BE-NEXT: li 4, 2057 +; P10-BE-NEXT: sth 4, 20(3) +; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: blr +; +; P8-LE-LABEL: foo1_int_be_reuse4B: +; P8-LE: # %bb.0: # %entry +; P8-LE-NEXT: addis 4, 2, .LCPI0_0@toc@ha +; P8-LE-NEXT: li 5, 2312 +; P8-LE-NEXT: addi 4, 4, .LCPI0_0@toc@l +; P8-LE-NEXT: lxvd2x 0, 0, 4 +; P8-LE-NEXT: lis 4, 1798 +; P8-LE-NEXT: ori 4, 4, 1284 +; P8-LE-NEXT: stxvd2x 0, 0, 3 +; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: sth 5, 20(3) +; P8-LE-NEXT: blr +; +; P9-LE-LABEL: foo1_int_be_reuse4B: +; P9-LE: # %bb.0: # %entry +; P9-LE-NEXT: addis 4, 2, .LCPI0_0@toc@ha +; P9-LE-NEXT: addi 4, 4, .LCPI0_0@toc@l +; P9-LE-NEXT: lxv 0, 0(4) +; P9-LE-NEXT: lis 4, 1798 +; P9-LE-NEXT: ori 4, 4, 1284 +; P9-LE-NEXT: stw 4, 16(3) +; P9-LE-NEXT: li 4, 2312 +; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: sth 4, 20(3) +; P9-LE-NEXT: blr +; +; P10-LE-LABEL: foo1_int_be_reuse4B: +; P10-LE: # %bb.0: # %entry +; P10-LE-NEXT: plxv 0, .LCPI0_0@PCREL(0), 1 +; P10-LE-NEXT: pli 4, 117835012 +; P10-LE-NEXT: stw 4, 16(3) +; P10-LE-NEXT: li 4, 2312 +; P10-LE-NEXT: sth 4, 20(3) +; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: blr +entry: + store <16 x i8> , ptr %a, align 1 + %arrayidx16 = getelementptr inbounds i8, ptr %a, i64 16 + store i8 4, ptr %arrayidx16, align 1 + %arrayidx17 = getelementptr inbounds i8, ptr %a, i64 17 + store i8 5, ptr %arrayidx17, align 1 + %arrayidx18 = getelementptr inbounds i8, ptr %a, i64 18 + store i8 6, ptr %arrayidx18, align 1 + %arrayidx19 = getelementptr inbounds i8, ptr %a, i64 19 + store i8 7, ptr %arrayidx19, align 1 + %arrayidx20 = getelementptr inbounds i8, ptr %a, i64 20 + store i8 8, ptr %arrayidx20, align 1 + %arrayidx21 = getelementptr inbounds i8, ptr %a, i64 21 + store i8 9, ptr %arrayidx21, align 1 + ret void +} + +define dso_local void @foo2_int_le_reuse4B(ptr nocapture noundef writeonly %a) local_unnamed_addr { +; P8-BE-LABEL: foo2_int_le_reuse4B: +; P8-BE: # %bb.0: # %entry +; P8-BE-NEXT: ld 4, L..C1(2) # %const.0 +; P8-BE-NEXT: lxvw4x 0, 0, 4 +; P8-BE-NEXT: lis 4, 2057 +; P8-BE-NEXT: ori 4, 4, 2571 +; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: li 4, 3085 +; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: sth 4, 20(3) +; P8-BE-NEXT: blr +; +; P9-BE-LABEL: foo2_int_le_reuse4B: +; P9-BE: # %bb.0: # %entry +; P9-BE-NEXT: ld 4, L..C1(2) # %const.0 +; P9-BE-NEXT: lxv 0, 0(4) +; P9-BE-NEXT: lis 4, 2057 +; P9-BE-NEXT: ori 4, 4, 2571 +; P9-BE-NEXT: stw 4, 16(3) +; P9-BE-NEXT: li 4, 3085 +; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: sth 4, 20(3) +; P9-BE-NEXT: blr +; +; P10-BE-LABEL: foo2_int_le_reuse4B: +; P10-BE: # %bb.0: # %entry +; P10-BE-NEXT: ld 4, L..C1(2) # %const.0 +; P10-BE-NEXT: lxv 0, 0(4) +; P10-BE-NEXT: pli 4, 134810123 +; P10-BE-NEXT: stw 4, 16(3) +; P10-BE-NEXT: li 4, 3085 +; P10-BE-NEXT: sth 4, 20(3) +; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: blr +; +; P8-LE-LABEL: foo2_int_le_reuse4B: +; P8-LE: # %bb.0: # %entry +; P8-LE-NEXT: addis 4, 2, .LCPI1_0@toc@ha +; P8-LE-NEXT: li 5, 3340 +; P8-LE-NEXT: addi 4, 4, .LCPI1_0@toc@l +; P8-LE-NEXT: lxvd2x 0, 0, 4 +; P8-LE-NEXT: lis 4, 2826 +; P8-LE-NEXT: ori 4, 4, 2312 +; P8-LE-NEXT: stxvd2x 0, 0, 3 +; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: sth 5, 20(3) +; P8-LE-NEXT: blr +; +; P9-LE-LABEL: foo2_int_le_reuse4B: +; P9-LE: # %bb.0: # %entry +; P9-LE-NEXT: addis 4, 2, .LCPI1_0@toc@ha +; P9-LE-NEXT: addi 4, 4, .LCPI1_0@toc@l +; P9-LE-NEXT: lxv 0, 0(4) +; P9-LE-NEXT: lis 4, 2826 +; P9-LE-NEXT: ori 4, 4, 2312 +; P9-LE-NEXT: stw 4, 16(3) +; P9-LE-NEXT: li 4, 3340 +; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: sth 4, 20(3) +; P9-LE-NEXT: blr +; +; P10-LE-LABEL: foo2_int_le_reuse4B: +; P10-LE: # %bb.0: # %entry +; P10-LE-NEXT: plxv 0, .LCPI1_0@PCREL(0), 1 +; P10-LE-NEXT: pli 4, 185207048 +; P10-LE-NEXT: stw 4, 16(3) +; P10-LE-NEXT: li 4, 3340 +; P10-LE-NEXT: sth 4, 20(3) +; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: blr +entry: + store <16 x i8> , ptr %a, align 1 + %arrayidx16 = getelementptr inbounds i8, ptr %a, i64 16 + store i8 8, ptr %arrayidx16, align 1 + %arrayidx17 = getelementptr inbounds i8, ptr %a, i64 17 + store i8 9, ptr %arrayidx17, align 1 + %arrayidx18 = getelementptr inbounds i8, ptr %a, i64 18 + store i8 10, ptr %arrayidx18, align 1 + %arrayidx19 = getelementptr inbounds i8, ptr %a, i64 19 + store i8 11, ptr %arrayidx19, align 1 + %arrayidx20 = getelementptr inbounds i8, ptr %a, i64 20 + store i8 12, ptr %arrayidx20, align 1 + %arrayidx21 = getelementptr inbounds i8, ptr %a, i64 21 + store i8 13, ptr %arrayidx21, align 1 + ret void +} + +define dso_local void @foo3_int_be_reuse4B(ptr nocapture noundef writeonly %a) local_unnamed_addr { +; P8-BE-LABEL: foo3_int_be_reuse4B: +; P8-BE: # %bb.0: # %entry +; P8-BE-NEXT: ld 4, L..C2(2) # %const.0 +; P8-BE-NEXT: lxvw4x 0, 0, 4 +; P8-BE-NEXT: lis 4, 1029 +; P8-BE-NEXT: ori 4, 4, 1543 +; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: li 4, 2057 +; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: sth 4, 20(3) +; P8-BE-NEXT: blr +; +; P9-BE-LABEL: foo3_int_be_reuse4B: +; P9-BE: # %bb.0: # %entry +; P9-BE-NEXT: ld 4, L..C2(2) # %const.0 +; P9-BE-NEXT: lxv 0, 0(4) +; P9-BE-NEXT: lis 4, 1029 +; P9-BE-NEXT: ori 4, 4, 1543 +; P9-BE-NEXT: stw 4, 16(3) +; P9-BE-NEXT: li 4, 2057 +; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: sth 4, 20(3) +; P9-BE-NEXT: blr +; +; P10-BE-LABEL: foo3_int_be_reuse4B: +; P10-BE: # %bb.0: # %entry +; P10-BE-NEXT: ld 4, L..C2(2) # %const.0 +; P10-BE-NEXT: lxv 0, 0(4) +; P10-BE-NEXT: pli 4, 67438087 +; P10-BE-NEXT: stw 4, 16(3) +; P10-BE-NEXT: li 4, 2057 +; P10-BE-NEXT: sth 4, 20(3) +; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: blr +; +; P8-LE-LABEL: foo3_int_be_reuse4B: +; P8-LE: # %bb.0: # %entry +; P8-LE-NEXT: addis 4, 2, .LCPI2_0@toc@ha +; P8-LE-NEXT: li 5, 2057 +; P8-LE-NEXT: addi 4, 4, .LCPI2_0@toc@l +; P8-LE-NEXT: lxvd2x 0, 0, 4 +; P8-LE-NEXT: lis 4, 1543 +; P8-LE-NEXT: ori 4, 4, 1029 +; P8-LE-NEXT: stxvd2x 0, 0, 3 +; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: sth 5, 20(3) +; P8-LE-NEXT: blr +; +; P9-LE-LABEL: foo3_int_be_reuse4B: +; P9-LE: # %bb.0: # %entry +; P9-LE-NEXT: addis 4, 2, .LCPI2_0@toc@ha +; P9-LE-NEXT: addi 4, 4, .LCPI2_0@toc@l +; P9-LE-NEXT: lxv 0, 0(4) +; P9-LE-NEXT: lis 4, 1543 +; P9-LE-NEXT: ori 4, 4, 1029 +; P9-LE-NEXT: stw 4, 16(3) +; P9-LE-NEXT: li 4, 2057 +; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: sth 4, 20(3) +; P9-LE-NEXT: blr +; +; P10-LE-LABEL: foo3_int_be_reuse4B: +; P10-LE: # %bb.0: # %entry +; P10-LE-NEXT: plxv 0, .LCPI2_0@PCREL(0), 1 +; P10-LE-NEXT: pli 4, 101123077 +; P10-LE-NEXT: stw 4, 16(3) +; P10-LE-NEXT: li 4, 2057 +; P10-LE-NEXT: sth 4, 20(3) +; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: blr +entry: + store <8 x i16> , ptr %a, align 2 + %arrayidx8 = getelementptr inbounds i16, ptr %a, i64 8 + store i16 1029, ptr %arrayidx8, align 2 + %arrayidx9 = getelementptr inbounds i16, ptr %a, i64 9 + store i16 1543, ptr %arrayidx9, align 2 + %arrayidx10 = getelementptr inbounds i16, ptr %a, i64 10 + store i16 2057, ptr %arrayidx10, align 2 + ret void +} + +define dso_local void @foo4_int_le_reuse4B(ptr nocapture noundef writeonly %a) local_unnamed_addr { +; P8-BE-LABEL: foo4_int_le_reuse4B: +; P8-BE: # %bb.0: # %entry +; P8-BE-NEXT: ld 4, L..C3(2) # %const.0 +; P8-BE-NEXT: lxvw4x 0, 0, 4 +; P8-BE-NEXT: lis 4, 2057 +; P8-BE-NEXT: ori 4, 4, 2571 +; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: li 4, 3085 +; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: sth 4, 20(3) +; P8-BE-NEXT: blr +; +; P9-BE-LABEL: foo4_int_le_reuse4B: +; P9-BE: # %bb.0: # %entry +; P9-BE-NEXT: ld 4, L..C3(2) # %const.0 +; P9-BE-NEXT: lxv 0, 0(4) +; P9-BE-NEXT: lis 4, 2057 +; P9-BE-NEXT: ori 4, 4, 2571 +; P9-BE-NEXT: stw 4, 16(3) +; P9-BE-NEXT: li 4, 3085 +; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: sth 4, 20(3) +; P9-BE-NEXT: blr +; +; P10-BE-LABEL: foo4_int_le_reuse4B: +; P10-BE: # %bb.0: # %entry +; P10-BE-NEXT: ld 4, L..C3(2) # %const.0 +; P10-BE-NEXT: lxv 0, 0(4) +; P10-BE-NEXT: pli 4, 134810123 +; P10-BE-NEXT: stw 4, 16(3) +; P10-BE-NEXT: li 4, 3085 +; P10-BE-NEXT: sth 4, 20(3) +; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: blr +; +; P8-LE-LABEL: foo4_int_le_reuse4B: +; P8-LE: # %bb.0: # %entry +; P8-LE-NEXT: addis 4, 2, .LCPI3_0@toc@ha +; P8-LE-NEXT: li 5, 3085 +; P8-LE-NEXT: addi 4, 4, .LCPI3_0@toc@l +; P8-LE-NEXT: lxvd2x 0, 0, 4 +; P8-LE-NEXT: lis 4, 2571 +; P8-LE-NEXT: ori 4, 4, 2057 +; P8-LE-NEXT: stxvd2x 0, 0, 3 +; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: sth 5, 20(3) +; P8-LE-NEXT: blr +; +; P9-LE-LABEL: foo4_int_le_reuse4B: +; P9-LE: # %bb.0: # %entry +; P9-LE-NEXT: addis 4, 2, .LCPI3_0@toc@ha +; P9-LE-NEXT: addi 4, 4, .LCPI3_0@toc@l +; P9-LE-NEXT: lxv 0, 0(4) +; P9-LE-NEXT: lis 4, 2571 +; P9-LE-NEXT: ori 4, 4, 2057 +; P9-LE-NEXT: stw 4, 16(3) +; P9-LE-NEXT: li 4, 3085 +; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: sth 4, 20(3) +; P9-LE-NEXT: blr +; +; P10-LE-LABEL: foo4_int_le_reuse4B: +; P10-LE: # %bb.0: # %entry +; P10-LE-NEXT: plxv 0, .LCPI3_0@PCREL(0), 1 +; P10-LE-NEXT: pli 4, 168495113 +; P10-LE-NEXT: stw 4, 16(3) +; P10-LE-NEXT: li 4, 3085 +; P10-LE-NEXT: sth 4, 20(3) +; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: blr +entry: + store <8 x i16> , ptr %a, align 2 + %arrayidx8 = getelementptr inbounds i16, ptr %a, i64 8 + store i16 2057, ptr %arrayidx8, align 2 + %arrayidx9 = getelementptr inbounds i16, ptr %a, i64 9 + store i16 2571, ptr %arrayidx9, align 2 + %arrayidx10 = getelementptr inbounds i16, ptr %a, i64 10 + store i16 3085, ptr %arrayidx10, align 2 + ret void +} + +define dso_local void @foo5_int_be_reuse4B(ptr nocapture noundef writeonly %a) local_unnamed_addr { +; P8-BE-LABEL: foo5_int_be_reuse4B: +; P8-BE: # %bb.0: # %entry +; P8-BE-NEXT: ld 4, L..C4(2) # %const.0 +; P8-BE-NEXT: lxvw4x 0, 0, 4 +; P8-BE-NEXT: lis 4, 1029 +; P8-BE-NEXT: ori 4, 4, 1543 +; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: blr +; +; P9-BE-LABEL: foo5_int_be_reuse4B: +; P9-BE: # %bb.0: # %entry +; P9-BE-NEXT: ld 4, L..C4(2) # %const.0 +; P9-BE-NEXT: lxv 0, 0(4) +; P9-BE-NEXT: lis 4, 1029 +; P9-BE-NEXT: ori 4, 4, 1543 +; P9-BE-NEXT: stw 4, 16(3) +; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: blr +; +; P10-BE-LABEL: foo5_int_be_reuse4B: +; P10-BE: # %bb.0: # %entry +; P10-BE-NEXT: ld 4, L..C4(2) # %const.0 +; P10-BE-NEXT: lxv 0, 0(4) +; P10-BE-NEXT: pli 4, 67438087 +; P10-BE-NEXT: stw 4, 16(3) +; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: blr +; +; P8-LE-LABEL: foo5_int_be_reuse4B: +; P8-LE: # %bb.0: # %entry +; P8-LE-NEXT: addis 4, 2, .LCPI4_0@toc@ha +; P8-LE-NEXT: addi 4, 4, .LCPI4_0@toc@l +; P8-LE-NEXT: lxvd2x 0, 0, 4 +; P8-LE-NEXT: lis 4, 1029 +; P8-LE-NEXT: ori 4, 4, 1543 +; P8-LE-NEXT: stxvd2x 0, 0, 3 +; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: blr +; +; P9-LE-LABEL: foo5_int_be_reuse4B: +; P9-LE: # %bb.0: # %entry +; P9-LE-NEXT: addis 4, 2, .LCPI4_0@toc@ha +; P9-LE-NEXT: addi 4, 4, .LCPI4_0@toc@l +; P9-LE-NEXT: lxv 0, 0(4) +; P9-LE-NEXT: lis 4, 1029 +; P9-LE-NEXT: ori 4, 4, 1543 +; P9-LE-NEXT: stw 4, 16(3) +; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: blr +; +; P10-LE-LABEL: foo5_int_be_reuse4B: +; P10-LE: # %bb.0: # %entry +; P10-LE-NEXT: plxv 0, .LCPI4_0@PCREL(0), 1 +; P10-LE-NEXT: pli 4, 67438087 +; P10-LE-NEXT: stw 4, 16(3) +; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: blr +entry: + store <4 x i32> , ptr %a, align 4 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 4 + store i32 67438087, ptr %arrayidx4, align 4 + ret void +} + +define dso_local void @foo6_int_le_reuse4B(ptr nocapture noundef writeonly %a) local_unnamed_addr { +; P8-BE-LABEL: foo6_int_le_reuse4B: +; P8-BE: # %bb.0: # %entry +; P8-BE-NEXT: ld 4, L..C5(2) # %const.0 +; P8-BE-NEXT: lxvw4x 0, 0, 4 +; P8-BE-NEXT: lis 4, 2057 +; P8-BE-NEXT: ori 4, 4, 2571 +; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: blr +; +; P9-BE-LABEL: foo6_int_le_reuse4B: +; P9-BE: # %bb.0: # %entry +; P9-BE-NEXT: ld 4, L..C5(2) # %const.0 +; P9-BE-NEXT: lxv 0, 0(4) +; P9-BE-NEXT: lis 4, 2057 +; P9-BE-NEXT: ori 4, 4, 2571 +; P9-BE-NEXT: stw 4, 16(3) +; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: blr +; +; P10-BE-LABEL: foo6_int_le_reuse4B: +; P10-BE: # %bb.0: # %entry +; P10-BE-NEXT: ld 4, L..C5(2) # %const.0 +; P10-BE-NEXT: lxv 0, 0(4) +; P10-BE-NEXT: pli 4, 134810123 +; P10-BE-NEXT: stw 4, 16(3) +; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: blr +; +; P8-LE-LABEL: foo6_int_le_reuse4B: +; P8-LE: # %bb.0: # %entry +; P8-LE-NEXT: addis 4, 2, .LCPI5_0@toc@ha +; P8-LE-NEXT: addi 4, 4, .LCPI5_0@toc@l +; P8-LE-NEXT: lxvd2x 0, 0, 4 +; P8-LE-NEXT: lis 4, 2057 +; P8-LE-NEXT: ori 4, 4, 2571 +; P8-LE-NEXT: stxvd2x 0, 0, 3 +; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: blr +; +; P9-LE-LABEL: foo6_int_le_reuse4B: +; P9-LE: # %bb.0: # %entry +; P9-LE-NEXT: addis 4, 2, .LCPI5_0@toc@ha +; P9-LE-NEXT: addi 4, 4, .LCPI5_0@toc@l +; P9-LE-NEXT: lxv 0, 0(4) +; P9-LE-NEXT: lis 4, 2057 +; P9-LE-NEXT: ori 4, 4, 2571 +; P9-LE-NEXT: stw 4, 16(3) +; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: blr +; +; P10-LE-LABEL: foo6_int_le_reuse4B: +; P10-LE: # %bb.0: # %entry +; P10-LE-NEXT: plxv 0, .LCPI5_0@PCREL(0), 1 +; P10-LE-NEXT: pli 4, 134810123 +; P10-LE-NEXT: stw 4, 16(3) +; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: blr +entry: + store <4 x i32> , ptr %a, align 4 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 4 + store i32 134810123, ptr %arrayidx4, align 4 + ret void +} + +define dso_local void @foo7_int_be_reuse8B(ptr nocapture noundef writeonly %a) local_unnamed_addr { +; P8-BE-LABEL: foo7_int_be_reuse8B: +; P8-BE: # %bb.0: # %entry +; P8-BE-NEXT: ld 4, L..C6(2) # %const.0 +; P8-BE-NEXT: lis 5, 1 +; P8-BE-NEXT: ori 5, 5, 515 +; P8-BE-NEXT: lxvw4x 0, 0, 4 +; P8-BE-NEXT: rldic 4, 5, 32, 15 +; P8-BE-NEXT: oris 4, 4, 1029 +; P8-BE-NEXT: ori 4, 4, 1543 +; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: blr +; +; P9-BE-LABEL: foo7_int_be_reuse8B: +; P9-BE: # %bb.0: # %entry +; P9-BE-NEXT: ld 4, L..C6(2) # %const.0 +; P9-BE-NEXT: lxv 0, 0(4) +; P9-BE-NEXT: lis 4, 1 +; P9-BE-NEXT: ori 4, 4, 515 +; P9-BE-NEXT: rldic 4, 4, 32, 15 +; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: oris 4, 4, 1029 +; P9-BE-NEXT: ori 4, 4, 1543 +; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: blr +; +; P10-BE-LABEL: foo7_int_be_reuse8B: +; P10-BE: # %bb.0: # %entry +; P10-BE-NEXT: ld 4, L..C6(2) # %const.0 +; P10-BE-NEXT: pli 5, 67438087 +; P10-BE-NEXT: lxv 0, 0(4) +; P10-BE-NEXT: pli 4, 66051 +; P10-BE-NEXT: rldimi 5, 4, 32, 0 +; P10-BE-NEXT: std 5, 16(3) +; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: blr +; +; P8-LE-LABEL: foo7_int_be_reuse8B: +; P8-LE: # %bb.0: # %entry +; P8-LE-NEXT: addis 4, 2, .LCPI6_0@toc@ha +; P8-LE-NEXT: lis 5, 449 +; P8-LE-NEXT: addi 4, 4, .LCPI6_0@toc@l +; P8-LE-NEXT: ori 5, 5, 33089 +; P8-LE-NEXT: lxvd2x 0, 0, 4 +; P8-LE-NEXT: rldic 4, 5, 34, 5 +; P8-LE-NEXT: oris 4, 4, 770 +; P8-LE-NEXT: ori 4, 4, 256 +; P8-LE-NEXT: stxvd2x 0, 0, 3 +; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: blr +; +; P9-LE-LABEL: foo7_int_be_reuse8B: +; P9-LE: # %bb.0: # %entry +; P9-LE-NEXT: addis 4, 2, .LCPI6_0@toc@ha +; P9-LE-NEXT: addi 4, 4, .LCPI6_0@toc@l +; P9-LE-NEXT: lxv 0, 0(4) +; P9-LE-NEXT: lis 4, 449 +; P9-LE-NEXT: ori 4, 4, 33089 +; P9-LE-NEXT: rldic 4, 4, 34, 5 +; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: oris 4, 4, 770 +; P9-LE-NEXT: ori 4, 4, 256 +; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: blr +; +; P10-LE-LABEL: foo7_int_be_reuse8B: +; P10-LE: # %bb.0: # %entry +; P10-LE-NEXT: plxv 0, .LCPI6_0@PCREL(0), 1 +; P10-LE-NEXT: pli 4, 117835012 +; P10-LE-NEXT: pli 5, 50462976 +; P10-LE-NEXT: rldimi 5, 4, 32, 0 +; P10-LE-NEXT: std 5, 16(3) +; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: blr +entry: + store <16 x i8> , ptr %a, align 1 + %arrayidx16 = getelementptr inbounds i8, ptr %a, i64 16 + store i8 0, ptr %arrayidx16, align 1 + %arrayidx17 = getelementptr inbounds i8, ptr %a, i64 17 + store i8 1, ptr %arrayidx17, align 1 + %arrayidx18 = getelementptr inbounds i8, ptr %a, i64 18 + store i8 2, ptr %arrayidx18, align 1 + %arrayidx19 = getelementptr inbounds i8, ptr %a, i64 19 + store i8 3, ptr %arrayidx19, align 1 + %arrayidx20 = getelementptr inbounds i8, ptr %a, i64 20 + store i8 4, ptr %arrayidx20, align 1 + %arrayidx21 = getelementptr inbounds i8, ptr %a, i64 21 + store i8 5, ptr %arrayidx21, align 1 + %arrayidx22 = getelementptr inbounds i8, ptr %a, i64 22 + store i8 6, ptr %arrayidx22, align 1 + %arrayidx23 = getelementptr inbounds i8, ptr %a, i64 23 + store i8 7, ptr %arrayidx23, align 1 + ret void +} + +define dso_local void @foo8_int_le_reuse8B(ptr nocapture noundef writeonly %a) local_unnamed_addr { +; P8-BE-LABEL: foo8_int_le_reuse8B: +; P8-BE: # %bb.0: # %entry +; P8-BE-NEXT: ld 4, L..C7(2) # %const.0 +; P8-BE-NEXT: lis 5, 2057 +; P8-BE-NEXT: ori 5, 5, 2571 +; P8-BE-NEXT: lxvw4x 0, 0, 4 +; P8-BE-NEXT: rldic 4, 5, 32, 4 +; P8-BE-NEXT: oris 4, 4, 3085 +; P8-BE-NEXT: ori 4, 4, 3599 +; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: blr +; +; P9-BE-LABEL: foo8_int_le_reuse8B: +; P9-BE: # %bb.0: # %entry +; P9-BE-NEXT: ld 4, L..C7(2) # %const.0 +; P9-BE-NEXT: lxv 0, 0(4) +; P9-BE-NEXT: lis 4, 2057 +; P9-BE-NEXT: ori 4, 4, 2571 +; P9-BE-NEXT: rldic 4, 4, 32, 4 +; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: oris 4, 4, 3085 +; P9-BE-NEXT: ori 4, 4, 3599 +; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: blr +; +; P10-BE-LABEL: foo8_int_le_reuse8B: +; P10-BE: # %bb.0: # %entry +; P10-BE-NEXT: ld 4, L..C7(2) # %const.0 +; P10-BE-NEXT: pli 5, 202182159 +; P10-BE-NEXT: lxv 0, 0(4) +; P10-BE-NEXT: pli 4, 134810123 +; P10-BE-NEXT: rldimi 5, 4, 32, 0 +; P10-BE-NEXT: std 5, 16(3) +; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: blr +; +; P8-LE-LABEL: foo8_int_le_reuse8B: +; P8-LE: # %bb.0: # %entry +; P8-LE-NEXT: addis 4, 2, .LCPI7_0@toc@ha +; P8-LE-NEXT: lis 5, 963 +; P8-LE-NEXT: addi 4, 4, .LCPI7_0@toc@l +; P8-LE-NEXT: ori 5, 5, 33603 +; P8-LE-NEXT: lxvd2x 0, 0, 4 +; P8-LE-NEXT: rldic 4, 5, 34, 4 +; P8-LE-NEXT: oris 4, 4, 2826 +; P8-LE-NEXT: ori 4, 4, 2312 +; P8-LE-NEXT: stxvd2x 0, 0, 3 +; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: blr +; +; P9-LE-LABEL: foo8_int_le_reuse8B: +; P9-LE: # %bb.0: # %entry +; P9-LE-NEXT: addis 4, 2, .LCPI7_0@toc@ha +; P9-LE-NEXT: addi 4, 4, .LCPI7_0@toc@l +; P9-LE-NEXT: lxv 0, 0(4) +; P9-LE-NEXT: lis 4, 963 +; P9-LE-NEXT: ori 4, 4, 33603 +; P9-LE-NEXT: rldic 4, 4, 34, 4 +; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: oris 4, 4, 2826 +; P9-LE-NEXT: ori 4, 4, 2312 +; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: blr +; +; P10-LE-LABEL: foo8_int_le_reuse8B: +; P10-LE: # %bb.0: # %entry +; P10-LE-NEXT: plxv 0, .LCPI7_0@PCREL(0), 1 +; P10-LE-NEXT: pli 4, 252579084 +; P10-LE-NEXT: pli 5, 185207048 +; P10-LE-NEXT: rldimi 5, 4, 32, 0 +; P10-LE-NEXT: std 5, 16(3) +; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: blr +entry: + store <16 x i8> , ptr %a, align 1 + %arrayidx16 = getelementptr inbounds i8, ptr %a, i64 16 + store i8 8, ptr %arrayidx16, align 1 + %arrayidx17 = getelementptr inbounds i8, ptr %a, i64 17 + store i8 9, ptr %arrayidx17, align 1 + %arrayidx18 = getelementptr inbounds i8, ptr %a, i64 18 + store i8 10, ptr %arrayidx18, align 1 + %arrayidx19 = getelementptr inbounds i8, ptr %a, i64 19 + store i8 11, ptr %arrayidx19, align 1 + %arrayidx20 = getelementptr inbounds i8, ptr %a, i64 20 + store i8 12, ptr %arrayidx20, align 1 + %arrayidx21 = getelementptr inbounds i8, ptr %a, i64 21 + store i8 13, ptr %arrayidx21, align 1 + %arrayidx22 = getelementptr inbounds i8, ptr %a, i64 22 + store i8 14, ptr %arrayidx22, align 1 + %arrayidx23 = getelementptr inbounds i8, ptr %a, i64 23 + store i8 15, ptr %arrayidx23, align 1 + ret void +} + +define dso_local void @foo9_int_be_reuse8B(ptr nocapture noundef writeonly %a) local_unnamed_addr { +; P8-BE-LABEL: foo9_int_be_reuse8B: +; P8-BE: # %bb.0: # %entry +; P8-BE-NEXT: ld 4, L..C8(2) # %const.0 +; P8-BE-NEXT: lis 5, 1 +; P8-BE-NEXT: ori 5, 5, 515 +; P8-BE-NEXT: lxvw4x 0, 0, 4 +; P8-BE-NEXT: rldic 4, 5, 32, 15 +; P8-BE-NEXT: oris 4, 4, 1029 +; P8-BE-NEXT: ori 4, 4, 1543 +; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: blr +; +; P9-BE-LABEL: foo9_int_be_reuse8B: +; P9-BE: # %bb.0: # %entry +; P9-BE-NEXT: ld 4, L..C8(2) # %const.0 +; P9-BE-NEXT: lxv 0, 0(4) +; P9-BE-NEXT: lis 4, 1 +; P9-BE-NEXT: ori 4, 4, 515 +; P9-BE-NEXT: rldic 4, 4, 32, 15 +; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: oris 4, 4, 1029 +; P9-BE-NEXT: ori 4, 4, 1543 +; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: blr +; +; P10-BE-LABEL: foo9_int_be_reuse8B: +; P10-BE: # %bb.0: # %entry +; P10-BE-NEXT: ld 4, L..C8(2) # %const.0 +; P10-BE-NEXT: pli 5, 67438087 +; P10-BE-NEXT: lxv 0, 0(4) +; P10-BE-NEXT: pli 4, 66051 +; P10-BE-NEXT: rldimi 5, 4, 32, 0 +; P10-BE-NEXT: std 5, 16(3) +; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: blr +; +; P8-LE-LABEL: foo9_int_be_reuse8B: +; P8-LE: # %bb.0: # %entry +; P8-LE-NEXT: addis 4, 2, .LCPI8_0@toc@ha +; P8-LE-NEXT: lis 5, 1543 +; P8-LE-NEXT: addi 4, 4, .LCPI8_0@toc@l +; P8-LE-NEXT: ori 5, 5, 1029 +; P8-LE-NEXT: lxvd2x 0, 0, 4 +; P8-LE-NEXT: rldic 4, 5, 32, 5 +; P8-LE-NEXT: oris 4, 4, 515 +; P8-LE-NEXT: ori 4, 4, 1 +; P8-LE-NEXT: stxvd2x 0, 0, 3 +; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: blr +; +; P9-LE-LABEL: foo9_int_be_reuse8B: +; P9-LE: # %bb.0: # %entry +; P9-LE-NEXT: addis 4, 2, .LCPI8_0@toc@ha +; P9-LE-NEXT: addi 4, 4, .LCPI8_0@toc@l +; P9-LE-NEXT: lxv 0, 0(4) +; P9-LE-NEXT: lis 4, 1543 +; P9-LE-NEXT: ori 4, 4, 1029 +; P9-LE-NEXT: rldic 4, 4, 32, 5 +; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: oris 4, 4, 515 +; P9-LE-NEXT: ori 4, 4, 1 +; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: blr +; +; P10-LE-LABEL: foo9_int_be_reuse8B: +; P10-LE: # %bb.0: # %entry +; P10-LE-NEXT: plxv 0, .LCPI8_0@PCREL(0), 1 +; P10-LE-NEXT: pli 4, 101123077 +; P10-LE-NEXT: pli 5, 33751041 +; P10-LE-NEXT: rldimi 5, 4, 32, 0 +; P10-LE-NEXT: std 5, 16(3) +; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: blr +entry: + store <8 x i16> , ptr %a, align 2 + %arrayidx8 = getelementptr inbounds i16, ptr %a, i64 8 + store i16 1, ptr %arrayidx8, align 2 + %arrayidx9 = getelementptr inbounds i16, ptr %a, i64 9 + store i16 515, ptr %arrayidx9, align 2 + %arrayidx10 = getelementptr inbounds i16, ptr %a, i64 10 + store i16 1029, ptr %arrayidx10, align 2 + %arrayidx11 = getelementptr inbounds i16, ptr %a, i64 11 + store i16 1543, ptr %arrayidx11, align 2 + ret void +} + +define dso_local void @foo10_int_le_reuse8B(ptr nocapture noundef writeonly %a) local_unnamed_addr { +; P8-BE-LABEL: foo10_int_le_reuse8B: +; P8-BE: # %bb.0: # %entry +; P8-BE-NEXT: ld 4, L..C9(2) # %const.0 +; P8-BE-NEXT: lis 5, 2057 +; P8-BE-NEXT: ori 5, 5, 2571 +; P8-BE-NEXT: lxvw4x 0, 0, 4 +; P8-BE-NEXT: rldic 4, 5, 32, 4 +; P8-BE-NEXT: oris 4, 4, 3085 +; P8-BE-NEXT: ori 4, 4, 3599 +; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: blr +; +; P9-BE-LABEL: foo10_int_le_reuse8B: +; P9-BE: # %bb.0: # %entry +; P9-BE-NEXT: ld 4, L..C9(2) # %const.0 +; P9-BE-NEXT: lxv 0, 0(4) +; P9-BE-NEXT: lis 4, 2057 +; P9-BE-NEXT: ori 4, 4, 2571 +; P9-BE-NEXT: rldic 4, 4, 32, 4 +; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: oris 4, 4, 3085 +; P9-BE-NEXT: ori 4, 4, 3599 +; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: blr +; +; P10-BE-LABEL: foo10_int_le_reuse8B: +; P10-BE: # %bb.0: # %entry +; P10-BE-NEXT: ld 4, L..C9(2) # %const.0 +; P10-BE-NEXT: pli 5, 202182159 +; P10-BE-NEXT: lxv 0, 0(4) +; P10-BE-NEXT: pli 4, 134810123 +; P10-BE-NEXT: rldimi 5, 4, 32, 0 +; P10-BE-NEXT: std 5, 16(3) +; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: blr +; +; P8-LE-LABEL: foo10_int_le_reuse8B: +; P8-LE: # %bb.0: # %entry +; P8-LE-NEXT: addis 4, 2, .LCPI9_0@toc@ha +; P8-LE-NEXT: lis 5, 3599 +; P8-LE-NEXT: addi 4, 4, .LCPI9_0@toc@l +; P8-LE-NEXT: ori 5, 5, 3085 +; P8-LE-NEXT: lxvd2x 0, 0, 4 +; P8-LE-NEXT: rldic 4, 5, 32, 4 +; P8-LE-NEXT: oris 4, 4, 2571 +; P8-LE-NEXT: ori 4, 4, 2057 +; P8-LE-NEXT: stxvd2x 0, 0, 3 +; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: blr +; +; P9-LE-LABEL: foo10_int_le_reuse8B: +; P9-LE: # %bb.0: # %entry +; P9-LE-NEXT: addis 4, 2, .LCPI9_0@toc@ha +; P9-LE-NEXT: addi 4, 4, .LCPI9_0@toc@l +; P9-LE-NEXT: lxv 0, 0(4) +; P9-LE-NEXT: lis 4, 3599 +; P9-LE-NEXT: ori 4, 4, 3085 +; P9-LE-NEXT: rldic 4, 4, 32, 4 +; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: oris 4, 4, 2571 +; P9-LE-NEXT: ori 4, 4, 2057 +; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: blr +; +; P10-LE-LABEL: foo10_int_le_reuse8B: +; P10-LE: # %bb.0: # %entry +; P10-LE-NEXT: plxv 0, .LCPI9_0@PCREL(0), 1 +; P10-LE-NEXT: pli 4, 235867149 +; P10-LE-NEXT: pli 5, 168495113 +; P10-LE-NEXT: rldimi 5, 4, 32, 0 +; P10-LE-NEXT: std 5, 16(3) +; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: blr +entry: + store <8 x i16> , ptr %a, align 2 + %arrayidx8 = getelementptr inbounds i16, ptr %a, i64 8 + store i16 2057, ptr %arrayidx8, align 2 + %arrayidx9 = getelementptr inbounds i16, ptr %a, i64 9 + store i16 2571, ptr %arrayidx9, align 2 + %arrayidx10 = getelementptr inbounds i16, ptr %a, i64 10 + store i16 3085, ptr %arrayidx10, align 2 + %arrayidx11 = getelementptr inbounds i16, ptr %a, i64 11 + store i16 3599, ptr %arrayidx11, align 2 + ret void +} + +define dso_local void @foo11_int_be_reuse8B(ptr nocapture noundef writeonly %a) local_unnamed_addr { +; P8-BE-LABEL: foo11_int_be_reuse8B: +; P8-BE: # %bb.0: # %entry +; P8-BE-NEXT: ld 4, L..C10(2) # %const.0 +; P8-BE-NEXT: lis 5, 1 +; P8-BE-NEXT: ori 5, 5, 515 +; P8-BE-NEXT: lxvw4x 0, 0, 4 +; P8-BE-NEXT: rldic 4, 5, 32, 15 +; P8-BE-NEXT: oris 4, 4, 1029 +; P8-BE-NEXT: ori 4, 4, 1543 +; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: blr +; +; P9-BE-LABEL: foo11_int_be_reuse8B: +; P9-BE: # %bb.0: # %entry +; P9-BE-NEXT: ld 4, L..C10(2) # %const.0 +; P9-BE-NEXT: lxv 0, 0(4) +; P9-BE-NEXT: lis 4, 1 +; P9-BE-NEXT: ori 4, 4, 515 +; P9-BE-NEXT: rldic 4, 4, 32, 15 +; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: oris 4, 4, 1029 +; P9-BE-NEXT: ori 4, 4, 1543 +; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: blr +; +; P10-BE-LABEL: foo11_int_be_reuse8B: +; P10-BE: # %bb.0: # %entry +; P10-BE-NEXT: ld 4, L..C10(2) # %const.0 +; P10-BE-NEXT: pli 5, 67438087 +; P10-BE-NEXT: lxv 0, 0(4) +; P10-BE-NEXT: pli 4, 66051 +; P10-BE-NEXT: rldimi 5, 4, 32, 0 +; P10-BE-NEXT: std 5, 16(3) +; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: blr +; +; P8-LE-LABEL: foo11_int_be_reuse8B: +; P8-LE: # %bb.0: # %entry +; P8-LE-NEXT: addis 4, 2, .LCPI10_0@toc@ha +; P8-LE-NEXT: lis 5, 1029 +; P8-LE-NEXT: addi 4, 4, .LCPI10_0@toc@l +; P8-LE-NEXT: ori 5, 5, 1543 +; P8-LE-NEXT: lxvd2x 0, 0, 4 +; P8-LE-NEXT: rldic 4, 5, 32, 5 +; P8-LE-NEXT: oris 4, 4, 1 +; P8-LE-NEXT: ori 4, 4, 515 +; P8-LE-NEXT: stxvd2x 0, 0, 3 +; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: blr +; +; P9-LE-LABEL: foo11_int_be_reuse8B: +; P9-LE: # %bb.0: # %entry +; P9-LE-NEXT: addis 4, 2, .LCPI10_0@toc@ha +; P9-LE-NEXT: addi 4, 4, .LCPI10_0@toc@l +; P9-LE-NEXT: lxv 0, 0(4) +; P9-LE-NEXT: lis 4, 1029 +; P9-LE-NEXT: ori 4, 4, 1543 +; P9-LE-NEXT: rldic 4, 4, 32, 5 +; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: oris 4, 4, 1 +; P9-LE-NEXT: ori 4, 4, 515 +; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: blr +; +; P10-LE-LABEL: foo11_int_be_reuse8B: +; P10-LE: # %bb.0: # %entry +; P10-LE-NEXT: plxv 0, .LCPI10_0@PCREL(0), 1 +; P10-LE-NEXT: pli 4, 67438087 +; P10-LE-NEXT: pli 5, 66051 +; P10-LE-NEXT: rldimi 5, 4, 32, 0 +; P10-LE-NEXT: std 5, 16(3) +; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: blr +entry: + store <4 x i32> , ptr %a, align 4 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 4 + store i32 66051, ptr %arrayidx4, align 4 + %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 5 + store i32 67438087, ptr %arrayidx5, align 4 + ret void +} + +define dso_local void @foo12_int_le_reuse8B(ptr nocapture noundef writeonly %a) local_unnamed_addr { +; P8-BE-LABEL: foo12_int_le_reuse8B: +; P8-BE: # %bb.0: # %entry +; P8-BE-NEXT: ld 4, L..C11(2) # %const.0 +; P8-BE-NEXT: lis 5, 2057 +; P8-BE-NEXT: ori 5, 5, 2571 +; P8-BE-NEXT: lxvw4x 0, 0, 4 +; P8-BE-NEXT: rldic 4, 5, 32, 4 +; P8-BE-NEXT: oris 4, 4, 3085 +; P8-BE-NEXT: ori 4, 4, 3599 +; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: blr +; +; P9-BE-LABEL: foo12_int_le_reuse8B: +; P9-BE: # %bb.0: # %entry +; P9-BE-NEXT: ld 4, L..C11(2) # %const.0 +; P9-BE-NEXT: lxv 0, 0(4) +; P9-BE-NEXT: lis 4, 2057 +; P9-BE-NEXT: ori 4, 4, 2571 +; P9-BE-NEXT: rldic 4, 4, 32, 4 +; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: oris 4, 4, 3085 +; P9-BE-NEXT: ori 4, 4, 3599 +; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: blr +; +; P10-BE-LABEL: foo12_int_le_reuse8B: +; P10-BE: # %bb.0: # %entry +; P10-BE-NEXT: ld 4, L..C11(2) # %const.0 +; P10-BE-NEXT: pli 5, 202182159 +; P10-BE-NEXT: lxv 0, 0(4) +; P10-BE-NEXT: pli 4, 134810123 +; P10-BE-NEXT: rldimi 5, 4, 32, 0 +; P10-BE-NEXT: std 5, 16(3) +; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: blr +; +; P8-LE-LABEL: foo12_int_le_reuse8B: +; P8-LE: # %bb.0: # %entry +; P8-LE-NEXT: addis 4, 2, .LCPI11_0@toc@ha +; P8-LE-NEXT: lis 5, 3085 +; P8-LE-NEXT: addi 4, 4, .LCPI11_0@toc@l +; P8-LE-NEXT: ori 5, 5, 3599 +; P8-LE-NEXT: lxvd2x 0, 0, 4 +; P8-LE-NEXT: rldic 4, 5, 32, 4 +; P8-LE-NEXT: oris 4, 4, 2057 +; P8-LE-NEXT: ori 4, 4, 2571 +; P8-LE-NEXT: stxvd2x 0, 0, 3 +; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: blr +; +; P9-LE-LABEL: foo12_int_le_reuse8B: +; P9-LE: # %bb.0: # %entry +; P9-LE-NEXT: addis 4, 2, .LCPI11_0@toc@ha +; P9-LE-NEXT: addi 4, 4, .LCPI11_0@toc@l +; P9-LE-NEXT: lxv 0, 0(4) +; P9-LE-NEXT: lis 4, 3085 +; P9-LE-NEXT: ori 4, 4, 3599 +; P9-LE-NEXT: rldic 4, 4, 32, 4 +; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: oris 4, 4, 2057 +; P9-LE-NEXT: ori 4, 4, 2571 +; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: blr +; +; P10-LE-LABEL: foo12_int_le_reuse8B: +; P10-LE: # %bb.0: # %entry +; P10-LE-NEXT: plxv 0, .LCPI11_0@PCREL(0), 1 +; P10-LE-NEXT: pli 4, 202182159 +; P10-LE-NEXT: pli 5, 134810123 +; P10-LE-NEXT: rldimi 5, 4, 32, 0 +; P10-LE-NEXT: std 5, 16(3) +; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: blr +entry: + store <4 x i32> , ptr %a, align 4 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 4 + store i32 134810123, ptr %arrayidx4, align 4 + %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 5 + store i32 202182159, ptr %arrayidx5, align 4 + ret void +} + +define dso_local void @foo13_int_be_reuse8B(ptr nocapture noundef writeonly %a) local_unnamed_addr { +; P8-BE-LABEL: foo13_int_be_reuse8B: +; P8-BE: # %bb.0: # %entry +; P8-BE-NEXT: ld 4, L..C12(2) # %const.0 +; P8-BE-NEXT: lis 5, 1 +; P8-BE-NEXT: ori 5, 5, 515 +; P8-BE-NEXT: lxvd2x 0, 0, 4 +; P8-BE-NEXT: rldic 4, 5, 32, 15 +; P8-BE-NEXT: oris 4, 4, 1029 +; P8-BE-NEXT: ori 4, 4, 1543 +; P8-BE-NEXT: stxvd2x 0, 0, 3 +; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: blr +; +; P9-BE-LABEL: foo13_int_be_reuse8B: +; P9-BE: # %bb.0: # %entry +; P9-BE-NEXT: ld 4, L..C12(2) # %const.0 +; P9-BE-NEXT: lxv 0, 0(4) +; P9-BE-NEXT: lis 4, 1 +; P9-BE-NEXT: ori 4, 4, 515 +; P9-BE-NEXT: rldic 4, 4, 32, 15 +; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: oris 4, 4, 1029 +; P9-BE-NEXT: ori 4, 4, 1543 +; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: blr +; +; P10-BE-LABEL: foo13_int_be_reuse8B: +; P10-BE: # %bb.0: # %entry +; P10-BE-NEXT: ld 4, L..C12(2) # %const.0 +; P10-BE-NEXT: pli 5, 67438087 +; P10-BE-NEXT: lxv 0, 0(4) +; P10-BE-NEXT: pli 4, 66051 +; P10-BE-NEXT: rldimi 5, 4, 32, 0 +; P10-BE-NEXT: std 5, 16(3) +; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: blr +; +; P8-LE-LABEL: foo13_int_be_reuse8B: +; P8-LE: # %bb.0: # %entry +; P8-LE-NEXT: addis 4, 2, .LCPI12_0@toc@ha +; P8-LE-NEXT: lis 5, 1 +; P8-LE-NEXT: addi 4, 4, .LCPI12_0@toc@l +; P8-LE-NEXT: ori 5, 5, 515 +; P8-LE-NEXT: lxvd2x 0, 0, 4 +; P8-LE-NEXT: rldic 4, 5, 32, 15 +; P8-LE-NEXT: oris 4, 4, 1029 +; P8-LE-NEXT: ori 4, 4, 1543 +; P8-LE-NEXT: stxvd2x 0, 0, 3 +; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: blr +; +; P9-LE-LABEL: foo13_int_be_reuse8B: +; P9-LE: # %bb.0: # %entry +; P9-LE-NEXT: addis 4, 2, .LCPI12_0@toc@ha +; P9-LE-NEXT: addi 4, 4, .LCPI12_0@toc@l +; P9-LE-NEXT: lxv 0, 0(4) +; P9-LE-NEXT: lis 4, 1 +; P9-LE-NEXT: ori 4, 4, 515 +; P9-LE-NEXT: rldic 4, 4, 32, 15 +; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: oris 4, 4, 1029 +; P9-LE-NEXT: ori 4, 4, 1543 +; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: blr +; +; P10-LE-LABEL: foo13_int_be_reuse8B: +; P10-LE: # %bb.0: # %entry +; P10-LE-NEXT: plxv 0, .LCPI12_0@PCREL(0), 1 +; P10-LE-NEXT: pli 4, 66051 +; P10-LE-NEXT: pli 5, 67438087 +; P10-LE-NEXT: rldimi 5, 4, 32, 0 +; P10-LE-NEXT: std 5, 16(3) +; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: blr +entry: + store <2 x i64> , ptr %a, align 8 + %arrayidx2 = getelementptr inbounds i64, ptr %a, i64 2 + store i64 283686952306183, ptr %arrayidx2, align 8 + ret void +} + +define dso_local void @foo14_int_le_reuse8B(ptr nocapture noundef writeonly %a) local_unnamed_addr { +; P8-BE-LABEL: foo14_int_le_reuse8B: +; P8-BE: # %bb.0: # %entry +; P8-BE-NEXT: ld 4, L..C13(2) # %const.0 +; P8-BE-NEXT: lis 5, 2057 +; P8-BE-NEXT: ori 5, 5, 2571 +; P8-BE-NEXT: lxvd2x 0, 0, 4 +; P8-BE-NEXT: rldic 4, 5, 32, 4 +; P8-BE-NEXT: oris 4, 4, 3085 +; P8-BE-NEXT: ori 4, 4, 3599 +; P8-BE-NEXT: stxvd2x 0, 0, 3 +; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: blr +; +; P9-BE-LABEL: foo14_int_le_reuse8B: +; P9-BE: # %bb.0: # %entry +; P9-BE-NEXT: ld 4, L..C13(2) # %const.0 +; P9-BE-NEXT: lxv 0, 0(4) +; P9-BE-NEXT: lis 4, 2057 +; P9-BE-NEXT: ori 4, 4, 2571 +; P9-BE-NEXT: rldic 4, 4, 32, 4 +; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: oris 4, 4, 3085 +; P9-BE-NEXT: ori 4, 4, 3599 +; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: blr +; +; P10-BE-LABEL: foo14_int_le_reuse8B: +; P10-BE: # %bb.0: # %entry +; P10-BE-NEXT: ld 4, L..C13(2) # %const.0 +; P10-BE-NEXT: pli 5, 202182159 +; P10-BE-NEXT: lxv 0, 0(4) +; P10-BE-NEXT: pli 4, 134810123 +; P10-BE-NEXT: rldimi 5, 4, 32, 0 +; P10-BE-NEXT: std 5, 16(3) +; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: blr +; +; P8-LE-LABEL: foo14_int_le_reuse8B: +; P8-LE: # %bb.0: # %entry +; P8-LE-NEXT: addis 4, 2, .LCPI13_0@toc@ha +; P8-LE-NEXT: lis 5, 2057 +; P8-LE-NEXT: addi 4, 4, .LCPI13_0@toc@l +; P8-LE-NEXT: ori 5, 5, 2571 +; P8-LE-NEXT: lxvd2x 0, 0, 4 +; P8-LE-NEXT: rldic 4, 5, 32, 4 +; P8-LE-NEXT: oris 4, 4, 3085 +; P8-LE-NEXT: ori 4, 4, 3599 +; P8-LE-NEXT: stxvd2x 0, 0, 3 +; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: blr +; +; P9-LE-LABEL: foo14_int_le_reuse8B: +; P9-LE: # %bb.0: # %entry +; P9-LE-NEXT: addis 4, 2, .LCPI13_0@toc@ha +; P9-LE-NEXT: addi 4, 4, .LCPI13_0@toc@l +; P9-LE-NEXT: lxv 0, 0(4) +; P9-LE-NEXT: lis 4, 2057 +; P9-LE-NEXT: ori 4, 4, 2571 +; P9-LE-NEXT: rldic 4, 4, 32, 4 +; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: oris 4, 4, 3085 +; P9-LE-NEXT: ori 4, 4, 3599 +; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: blr +; +; P10-LE-LABEL: foo14_int_le_reuse8B: +; P10-LE: # %bb.0: # %entry +; P10-LE-NEXT: plxv 0, .LCPI13_0@PCREL(0), 1 +; P10-LE-NEXT: pli 4, 134810123 +; P10-LE-NEXT: pli 5, 202182159 +; P10-LE-NEXT: rldimi 5, 4, 32, 0 +; P10-LE-NEXT: std 5, 16(3) +; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: blr +entry: + store <2 x i64> , ptr %a, align 8 + %arrayidx2 = getelementptr inbounds i64, ptr %a, i64 2 + store i64 579005069656919567, ptr %arrayidx2, align 8 + ret void +} + +define dso_local void @foo15_int_noreuse4B(ptr nocapture noundef writeonly %a) local_unnamed_addr { +; P8-BE-LABEL: foo15_int_noreuse4B: +; P8-BE: # %bb.0: # %entry +; P8-BE-NEXT: ld 4, L..C14(2) # %const.0 +; P8-BE-NEXT: lxvw4x 0, 0, 4 +; P8-BE-NEXT: lis 4, 1029 +; P8-BE-NEXT: ori 4, 4, 1544 +; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: blr +; +; P9-BE-LABEL: foo15_int_noreuse4B: +; P9-BE: # %bb.0: # %entry +; P9-BE-NEXT: ld 4, L..C14(2) # %const.0 +; P9-BE-NEXT: lxv 0, 0(4) +; P9-BE-NEXT: lis 4, 1029 +; P9-BE-NEXT: ori 4, 4, 1544 +; P9-BE-NEXT: stw 4, 16(3) +; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: blr +; +; P10-BE-LABEL: foo15_int_noreuse4B: +; P10-BE: # %bb.0: # %entry +; P10-BE-NEXT: ld 4, L..C14(2) # %const.0 +; P10-BE-NEXT: lxv 0, 0(4) +; P10-BE-NEXT: pli 4, 67438088 +; P10-BE-NEXT: stw 4, 16(3) +; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: blr +; +; P8-LE-LABEL: foo15_int_noreuse4B: +; P8-LE: # %bb.0: # %entry +; P8-LE-NEXT: addis 4, 2, .LCPI14_0@toc@ha +; P8-LE-NEXT: addi 4, 4, .LCPI14_0@toc@l +; P8-LE-NEXT: lxvd2x 0, 0, 4 +; P8-LE-NEXT: lis 4, 1029 +; P8-LE-NEXT: ori 4, 4, 1544 +; P8-LE-NEXT: stxvd2x 0, 0, 3 +; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: blr +; +; P9-LE-LABEL: foo15_int_noreuse4B: +; P9-LE: # %bb.0: # %entry +; P9-LE-NEXT: addis 4, 2, .LCPI14_0@toc@ha +; P9-LE-NEXT: addi 4, 4, .LCPI14_0@toc@l +; P9-LE-NEXT: lxv 0, 0(4) +; P9-LE-NEXT: lis 4, 1029 +; P9-LE-NEXT: ori 4, 4, 1544 +; P9-LE-NEXT: stw 4, 16(3) +; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: blr +; +; P10-LE-LABEL: foo15_int_noreuse4B: +; P10-LE: # %bb.0: # %entry +; P10-LE-NEXT: plxv 0, .LCPI14_0@PCREL(0), 1 +; P10-LE-NEXT: pli 4, 67438088 +; P10-LE-NEXT: stw 4, 16(3) +; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: blr +entry: + store <4 x i32> , ptr %a, align 4 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 4 + store i32 67438088, ptr %arrayidx4, align 4 + ret void +} + +define dso_local void @foo16_int_noreuse8B(ptr nocapture noundef writeonly %a) local_unnamed_addr { +; P8-BE-LABEL: foo16_int_noreuse8B: +; P8-BE: # %bb.0: # %entry +; P8-BE-NEXT: ld 4, L..C15(2) # %const.0 +; P8-BE-NEXT: lis 5, 1 +; P8-BE-NEXT: ori 5, 5, 515 +; P8-BE-NEXT: lxvw4x 0, 0, 4 +; P8-BE-NEXT: rldic 4, 5, 32, 15 +; P8-BE-NEXT: oris 4, 4, 1029 +; P8-BE-NEXT: ori 4, 4, 1544 +; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: blr +; +; P9-BE-LABEL: foo16_int_noreuse8B: +; P9-BE: # %bb.0: # %entry +; P9-BE-NEXT: ld 4, L..C15(2) # %const.0 +; P9-BE-NEXT: lxv 0, 0(4) +; P9-BE-NEXT: lis 4, 1 +; P9-BE-NEXT: ori 4, 4, 515 +; P9-BE-NEXT: rldic 4, 4, 32, 15 +; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: oris 4, 4, 1029 +; P9-BE-NEXT: ori 4, 4, 1544 +; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: blr +; +; P10-BE-LABEL: foo16_int_noreuse8B: +; P10-BE: # %bb.0: # %entry +; P10-BE-NEXT: ld 4, L..C15(2) # %const.0 +; P10-BE-NEXT: pli 5, 67438088 +; P10-BE-NEXT: lxv 0, 0(4) +; P10-BE-NEXT: pli 4, 66051 +; P10-BE-NEXT: rldimi 5, 4, 32, 0 +; P10-BE-NEXT: std 5, 16(3) +; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: blr +; +; P8-LE-LABEL: foo16_int_noreuse8B: +; P8-LE: # %bb.0: # %entry +; P8-LE-NEXT: addis 4, 2, .LCPI15_0@toc@ha +; P8-LE-NEXT: lis 5, 128 +; P8-LE-NEXT: addi 4, 4, .LCPI15_0@toc@l +; P8-LE-NEXT: ori 5, 5, 41153 +; P8-LE-NEXT: lxvd2x 0, 0, 4 +; P8-LE-NEXT: rldic 4, 5, 35, 5 +; P8-LE-NEXT: oris 4, 4, 1 +; P8-LE-NEXT: ori 4, 4, 515 +; P8-LE-NEXT: stxvd2x 0, 0, 3 +; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: blr +; +; P9-LE-LABEL: foo16_int_noreuse8B: +; P9-LE: # %bb.0: # %entry +; P9-LE-NEXT: addis 4, 2, .LCPI15_0@toc@ha +; P9-LE-NEXT: addi 4, 4, .LCPI15_0@toc@l +; P9-LE-NEXT: lxv 0, 0(4) +; P9-LE-NEXT: lis 4, 128 +; P9-LE-NEXT: ori 4, 4, 41153 +; P9-LE-NEXT: rldic 4, 4, 35, 5 +; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: oris 4, 4, 1 +; P9-LE-NEXT: ori 4, 4, 515 +; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: blr +; +; P10-LE-LABEL: foo16_int_noreuse8B: +; P10-LE: # %bb.0: # %entry +; P10-LE-NEXT: plxv 0, .LCPI15_0@PCREL(0), 1 +; P10-LE-NEXT: pli 4, 67438088 +; P10-LE-NEXT: pli 5, 66051 +; P10-LE-NEXT: rldimi 5, 4, 32, 0 +; P10-LE-NEXT: std 5, 16(3) +; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: blr +entry: + store <4 x i32> , ptr %a, align 4 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 4 + store i32 66051, ptr %arrayidx4, align 4 + %arrayidx5 = getelementptr inbounds i32, ptr %a, i64 5 + store i32 67438088, ptr %arrayidx5, align 4 + ret void +} + +define dso_local void @foo17_fp_be_reuse4B(ptr nocapture noundef writeonly %a) local_unnamed_addr { +; P8-BE-LABEL: foo17_fp_be_reuse4B: +; P8-BE: # %bb.0: # %entry +; P8-BE-NEXT: ld 4, L..C16(2) # %const.0 +; P8-BE-NEXT: lxvw4x 0, 0, 4 +; P8-BE-NEXT: lis 4, 16673 +; P8-BE-NEXT: ori 4, 4, 39322 +; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: blr +; +; P9-BE-LABEL: foo17_fp_be_reuse4B: +; P9-BE: # %bb.0: # %entry +; P9-BE-NEXT: ld 4, L..C16(2) # %const.0 +; P9-BE-NEXT: lxv 0, 0(4) +; P9-BE-NEXT: lis 4, 16673 +; P9-BE-NEXT: ori 4, 4, 39322 +; P9-BE-NEXT: stw 4, 16(3) +; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: blr +; +; P10-BE-LABEL: foo17_fp_be_reuse4B: +; P10-BE: # %bb.0: # %entry +; P10-BE-NEXT: ld 4, L..C16(2) # %const.0 +; P10-BE-NEXT: lxv 0, 0(4) +; P10-BE-NEXT: pli 4, 1092721050 +; P10-BE-NEXT: stw 4, 16(3) +; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: blr +; +; P8-LE-LABEL: foo17_fp_be_reuse4B: +; P8-LE: # %bb.0: # %entry +; P8-LE-NEXT: addis 4, 2, .LCPI16_0@toc@ha +; P8-LE-NEXT: addi 4, 4, .LCPI16_0@toc@l +; P8-LE-NEXT: lxvd2x 0, 0, 4 +; P8-LE-NEXT: lis 4, 16673 +; P8-LE-NEXT: ori 4, 4, 39322 +; P8-LE-NEXT: stxvd2x 0, 0, 3 +; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: blr +; +; P9-LE-LABEL: foo17_fp_be_reuse4B: +; P9-LE: # %bb.0: # %entry +; P9-LE-NEXT: addis 4, 2, .LCPI16_0@toc@ha +; P9-LE-NEXT: addi 4, 4, .LCPI16_0@toc@l +; P9-LE-NEXT: lxv 0, 0(4) +; P9-LE-NEXT: lis 4, 16673 +; P9-LE-NEXT: ori 4, 4, 39322 +; P9-LE-NEXT: stw 4, 16(3) +; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: blr +; +; P10-LE-LABEL: foo17_fp_be_reuse4B: +; P10-LE: # %bb.0: # %entry +; P10-LE-NEXT: plxv 0, .LCPI16_0@PCREL(0), 1 +; P10-LE-NEXT: pli 4, 1092721050 +; P10-LE-NEXT: stw 4, 16(3) +; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: blr +entry: + store <4 x float> , ptr %a, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %a, i64 4 + store float 0x4024333340000000, ptr %arrayidx4, align 4 + ret void +} + +define dso_local void @foo18_fp_le_reuse4B(ptr nocapture noundef writeonly %a) local_unnamed_addr { +; P8-BE-LABEL: foo18_fp_le_reuse4B: +; P8-BE: # %bb.0: # %entry +; P8-BE-NEXT: ld 4, L..C17(2) # %const.0 +; P8-BE-NEXT: lxvw4x 0, 0, 4 +; P8-BE-NEXT: lis 4, 16675 +; P8-BE-NEXT: ori 4, 4, 13107 +; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: blr +; +; P9-BE-LABEL: foo18_fp_le_reuse4B: +; P9-BE: # %bb.0: # %entry +; P9-BE-NEXT: ld 4, L..C17(2) # %const.0 +; P9-BE-NEXT: lxv 0, 0(4) +; P9-BE-NEXT: lis 4, 16675 +; P9-BE-NEXT: ori 4, 4, 13107 +; P9-BE-NEXT: stw 4, 16(3) +; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: blr +; +; P10-BE-LABEL: foo18_fp_le_reuse4B: +; P10-BE: # %bb.0: # %entry +; P10-BE-NEXT: ld 4, L..C17(2) # %const.0 +; P10-BE-NEXT: lxv 0, 0(4) +; P10-BE-NEXT: pli 4, 1092825907 +; P10-BE-NEXT: stw 4, 16(3) +; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: blr +; +; P8-LE-LABEL: foo18_fp_le_reuse4B: +; P8-LE: # %bb.0: # %entry +; P8-LE-NEXT: addis 4, 2, .LCPI17_0@toc@ha +; P8-LE-NEXT: addi 4, 4, .LCPI17_0@toc@l +; P8-LE-NEXT: lxvd2x 0, 0, 4 +; P8-LE-NEXT: lis 4, 16675 +; P8-LE-NEXT: ori 4, 4, 13107 +; P8-LE-NEXT: stxvd2x 0, 0, 3 +; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: blr +; +; P9-LE-LABEL: foo18_fp_le_reuse4B: +; P9-LE: # %bb.0: # %entry +; P9-LE-NEXT: addis 4, 2, .LCPI17_0@toc@ha +; P9-LE-NEXT: addi 4, 4, .LCPI17_0@toc@l +; P9-LE-NEXT: lxv 0, 0(4) +; P9-LE-NEXT: lis 4, 16675 +; P9-LE-NEXT: ori 4, 4, 13107 +; P9-LE-NEXT: stw 4, 16(3) +; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: blr +; +; P10-LE-LABEL: foo18_fp_le_reuse4B: +; P10-LE: # %bb.0: # %entry +; P10-LE-NEXT: plxv 0, .LCPI17_0@PCREL(0), 1 +; P10-LE-NEXT: pli 4, 1092825907 +; P10-LE-NEXT: stw 4, 16(3) +; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: blr +entry: + store <4 x float> , ptr %a, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %a, i64 4 + store float 0x4024666660000000, ptr %arrayidx4, align 4 + ret void +} + +define dso_local void @foo19_fp_be_reuse8B(ptr nocapture noundef writeonly %a) local_unnamed_addr { +; P8-BE-LABEL: foo19_fp_be_reuse8B: +; P8-BE: # %bb.0: # %entry +; P8-BE-NEXT: ld 4, L..C18(2) # %const.0 +; P8-BE-NEXT: lxvd2x 0, 0, 4 +; P8-BE-NEXT: li 4, 4105 +; P8-BE-NEXT: rldic 4, 4, 50, 1 +; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: stxvd2x 0, 0, 3 +; P8-BE-NEXT: blr +; +; P9-BE-LABEL: foo19_fp_be_reuse8B: +; P9-BE: # %bb.0: # %entry +; P9-BE-NEXT: ld 4, L..C18(2) # %const.0 +; P9-BE-NEXT: lxv 0, 0(4) +; P9-BE-NEXT: li 4, 4105 +; P9-BE-NEXT: rldic 4, 4, 50, 1 +; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: blr +; +; P10-BE-LABEL: foo19_fp_be_reuse8B: +; P10-BE: # %bb.0: # %entry +; P10-BE-NEXT: ld 4, L..C18(2) # %const.0 +; P10-BE-NEXT: lxv 0, 0(4) +; P10-BE-NEXT: li 4, 4105 +; P10-BE-NEXT: rldic 4, 4, 50, 1 +; P10-BE-NEXT: std 4, 16(3) +; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: blr +; +; P8-LE-LABEL: foo19_fp_be_reuse8B: +; P8-LE: # %bb.0: # %entry +; P8-LE-NEXT: addis 4, 2, .LCPI18_0@toc@ha +; P8-LE-NEXT: addi 4, 4, .LCPI18_0@toc@l +; P8-LE-NEXT: lxvd2x 0, 0, 4 +; P8-LE-NEXT: li 4, 4105 +; P8-LE-NEXT: rldic 4, 4, 50, 1 +; P8-LE-NEXT: stxvd2x 0, 0, 3 +; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: blr +; +; P9-LE-LABEL: foo19_fp_be_reuse8B: +; P9-LE: # %bb.0: # %entry +; P9-LE-NEXT: addis 4, 2, .LCPI18_0@toc@ha +; P9-LE-NEXT: addi 4, 4, .LCPI18_0@toc@l +; P9-LE-NEXT: lxv 0, 0(4) +; P9-LE-NEXT: li 4, 4105 +; P9-LE-NEXT: rldic 4, 4, 50, 1 +; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: blr +; +; P10-LE-LABEL: foo19_fp_be_reuse8B: +; P10-LE: # %bb.0: # %entry +; P10-LE-NEXT: plxv 0, .LCPI18_0@PCREL(0), 1 +; P10-LE-NEXT: li 4, 4105 +; P10-LE-NEXT: rldic 4, 4, 50, 1 +; P10-LE-NEXT: std 4, 16(3) +; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: blr +entry: + store <2 x double> , ptr %a, align 8 + %arrayidx2 = getelementptr inbounds double, ptr %a, i64 2 + store double 1.000000e+01, ptr %arrayidx2, align 8 + ret void +} + +define dso_local void @foo20_fp_le_reuse8B(ptr nocapture noundef writeonly %a) local_unnamed_addr { +; P8-BE-LABEL: foo20_fp_le_reuse8B: +; P8-BE: # %bb.0: # %entry +; P8-BE-NEXT: ld 4, L..C19(2) # %const.0 +; P8-BE-NEXT: lis 5, 16420 +; P8-BE-NEXT: lxvd2x 0, 0, 4 +; P8-BE-NEXT: ori 4, 5, 13107 +; P8-BE-NEXT: rldimi 4, 4, 32, 0 +; P8-BE-NEXT: rlwimi 4, 4, 16, 0, 15 +; P8-BE-NEXT: stxvd2x 0, 0, 3 +; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: blr +; +; P9-BE-LABEL: foo20_fp_le_reuse8B: +; P9-BE: # %bb.0: # %entry +; P9-BE-NEXT: ld 4, L..C19(2) # %const.0 +; P9-BE-NEXT: lxv 0, 0(4) +; P9-BE-NEXT: lis 4, 16420 +; P9-BE-NEXT: ori 4, 4, 13107 +; P9-BE-NEXT: rldimi 4, 4, 32, 0 +; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: rlwimi 4, 4, 16, 0, 15 +; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: blr +; +; P10-BE-LABEL: foo20_fp_le_reuse8B: +; P10-BE: # %bb.0: # %entry +; P10-BE-NEXT: ld 4, L..C19(2) # %const.0 +; P10-BE-NEXT: pli 5, 858993459 +; P10-BE-NEXT: lxv 0, 0(4) +; P10-BE-NEXT: pli 4, 1076114227 +; P10-BE-NEXT: rldimi 5, 4, 32, 0 +; P10-BE-NEXT: std 5, 16(3) +; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: blr +; +; P8-LE-LABEL: foo20_fp_le_reuse8B: +; P8-LE: # %bb.0: # %entry +; P8-LE-NEXT: addis 4, 2, .LCPI19_0@toc@ha +; P8-LE-NEXT: lis 5, 16420 +; P8-LE-NEXT: addi 4, 4, .LCPI19_0@toc@l +; P8-LE-NEXT: lxvd2x 0, 0, 4 +; P8-LE-NEXT: ori 4, 5, 13107 +; P8-LE-NEXT: rldimi 4, 4, 32, 0 +; P8-LE-NEXT: rlwimi 4, 4, 16, 0, 15 +; P8-LE-NEXT: stxvd2x 0, 0, 3 +; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: blr +; +; P9-LE-LABEL: foo20_fp_le_reuse8B: +; P9-LE: # %bb.0: # %entry +; P9-LE-NEXT: addis 4, 2, .LCPI19_0@toc@ha +; P9-LE-NEXT: addi 4, 4, .LCPI19_0@toc@l +; P9-LE-NEXT: lxv 0, 0(4) +; P9-LE-NEXT: lis 4, 16420 +; P9-LE-NEXT: ori 4, 4, 13107 +; P9-LE-NEXT: rldimi 4, 4, 32, 0 +; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: rlwimi 4, 4, 16, 0, 15 +; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: blr +; +; P10-LE-LABEL: foo20_fp_le_reuse8B: +; P10-LE: # %bb.0: # %entry +; P10-LE-NEXT: plxv 0, .LCPI19_0@PCREL(0), 1 +; P10-LE-NEXT: pli 4, 1076114227 +; P10-LE-NEXT: pli 5, 858993459 +; P10-LE-NEXT: rldimi 5, 4, 32, 0 +; P10-LE-NEXT: std 5, 16(3) +; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: blr +entry: + store <2 x double> , ptr %a, align 8 + %arrayidx2 = getelementptr inbounds double, ptr %a, i64 2 + store double 1.010000e+01, ptr %arrayidx2, align 8 + ret void +} + +define dso_local void @foo21_fp_noreuse4B(ptr nocapture noundef writeonly %a) local_unnamed_addr #0 { +; P8-BE-LABEL: foo21_fp_noreuse4B: +; P8-BE: # %bb.0: # %entry +; P8-BE-NEXT: ld 4, L..C20(2) # %const.0 +; P8-BE-NEXT: lxvw4x 0, 0, 4 +; P8-BE-NEXT: lis 4, 16268 +; P8-BE-NEXT: ori 4, 4, 52430 +; P8-BE-NEXT: stw 4, 16(3) +; P8-BE-NEXT: stxvw4x 0, 0, 3 +; P8-BE-NEXT: blr +; +; P9-BE-LABEL: foo21_fp_noreuse4B: +; P9-BE: # %bb.0: # %entry +; P9-BE-NEXT: ld 4, L..C20(2) # %const.0 +; P9-BE-NEXT: lxv 0, 0(4) +; P9-BE-NEXT: lis 4, 16268 +; P9-BE-NEXT: ori 4, 4, 52430 +; P9-BE-NEXT: stw 4, 16(3) +; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: blr +; +; P10-BE-LABEL: foo21_fp_noreuse4B: +; P10-BE: # %bb.0: # %entry +; P10-BE-NEXT: ld 4, L..C20(2) # %const.0 +; P10-BE-NEXT: lxv 0, 0(4) +; P10-BE-NEXT: pli 4, 1066192078 +; P10-BE-NEXT: stw 4, 16(3) +; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: blr +; +; P8-LE-LABEL: foo21_fp_noreuse4B: +; P8-LE: # %bb.0: # %entry +; P8-LE-NEXT: addis 4, 2, .LCPI20_0@toc@ha +; P8-LE-NEXT: addi 4, 4, .LCPI20_0@toc@l +; P8-LE-NEXT: lxvd2x 0, 0, 4 +; P8-LE-NEXT: lis 4, 16268 +; P8-LE-NEXT: ori 4, 4, 52430 +; P8-LE-NEXT: stxvd2x 0, 0, 3 +; P8-LE-NEXT: stw 4, 16(3) +; P8-LE-NEXT: blr +; +; P9-LE-LABEL: foo21_fp_noreuse4B: +; P9-LE: # %bb.0: # %entry +; P9-LE-NEXT: addis 4, 2, .LCPI20_0@toc@ha +; P9-LE-NEXT: addi 4, 4, .LCPI20_0@toc@l +; P9-LE-NEXT: lxv 0, 0(4) +; P9-LE-NEXT: lis 4, 16268 +; P9-LE-NEXT: ori 4, 4, 52430 +; P9-LE-NEXT: stw 4, 16(3) +; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: blr +; +; P10-LE-LABEL: foo21_fp_noreuse4B: +; P10-LE: # %bb.0: # %entry +; P10-LE-NEXT: plxv 0, .LCPI20_0@PCREL(0), 1 +; P10-LE-NEXT: pli 4, 1066192078 +; P10-LE-NEXT: stw 4, 16(3) +; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: blr +entry: + store <4 x float> , ptr %a, align 4 + %arrayidx4 = getelementptr inbounds float, ptr %a, i64 4 + store float 0x3FF19999C0000000, ptr %arrayidx4, align 4 + ret void +} + +define dso_local void @foo22_fp_noreuse8B(ptr nocapture noundef writeonly %a) local_unnamed_addr { +; P8-BE-LABEL: foo22_fp_noreuse8B: +; P8-BE: # %bb.0: # %entry +; P8-BE-NEXT: ld 4, L..C21(2) # %const.0 +; P8-BE-NEXT: lxvd2x 0, 0, 4 +; P8-BE-NEXT: li 4, 21503 +; P8-BE-NEXT: rotldi 4, 4, 52 +; P8-BE-NEXT: std 4, 16(3) +; P8-BE-NEXT: stxvd2x 0, 0, 3 +; P8-BE-NEXT: blr +; +; P9-BE-LABEL: foo22_fp_noreuse8B: +; P9-BE: # %bb.0: # %entry +; P9-BE-NEXT: ld 4, L..C21(2) # %const.0 +; P9-BE-NEXT: lxv 0, 0(4) +; P9-BE-NEXT: li 4, 21503 +; P9-BE-NEXT: rotldi 4, 4, 52 +; P9-BE-NEXT: std 4, 16(3) +; P9-BE-NEXT: stxv 0, 0(3) +; P9-BE-NEXT: blr +; +; P10-BE-LABEL: foo22_fp_noreuse8B: +; P10-BE: # %bb.0: # %entry +; P10-BE-NEXT: ld 4, L..C21(2) # %const.0 +; P10-BE-NEXT: lxv 0, 0(4) +; P10-BE-NEXT: li 4, 21503 +; P10-BE-NEXT: rotldi 4, 4, 52 +; P10-BE-NEXT: std 4, 16(3) +; P10-BE-NEXT: stxv 0, 0(3) +; P10-BE-NEXT: blr +; +; P8-LE-LABEL: foo22_fp_noreuse8B: +; P8-LE: # %bb.0: # %entry +; P8-LE-NEXT: addis 4, 2, .LCPI21_0@toc@ha +; P8-LE-NEXT: addi 4, 4, .LCPI21_0@toc@l +; P8-LE-NEXT: lxvd2x 0, 0, 4 +; P8-LE-NEXT: li 4, 21503 +; P8-LE-NEXT: rotldi 4, 4, 52 +; P8-LE-NEXT: stxvd2x 0, 0, 3 +; P8-LE-NEXT: std 4, 16(3) +; P8-LE-NEXT: blr +; +; P9-LE-LABEL: foo22_fp_noreuse8B: +; P9-LE: # %bb.0: # %entry +; P9-LE-NEXT: addis 4, 2, .LCPI21_0@toc@ha +; P9-LE-NEXT: addi 4, 4, .LCPI21_0@toc@l +; P9-LE-NEXT: lxv 0, 0(4) +; P9-LE-NEXT: li 4, 21503 +; P9-LE-NEXT: rotldi 4, 4, 52 +; P9-LE-NEXT: std 4, 16(3) +; P9-LE-NEXT: stxv 0, 0(3) +; P9-LE-NEXT: blr +; +; P10-LE-LABEL: foo22_fp_noreuse8B: +; P10-LE: # %bb.0: # %entry +; P10-LE-NEXT: plxv 0, .LCPI21_0@PCREL(0), 1 +; P10-LE-NEXT: li 4, 21503 +; P10-LE-NEXT: rotldi 4, 4, 52 +; P10-LE-NEXT: std 4, 16(3) +; P10-LE-NEXT: stxv 0, 0(3) +; P10-LE-NEXT: blr +entry: + store <2 x double> , ptr %a, align 8 + %arrayidx2 = getelementptr inbounds double, ptr %a, i64 2 + store double 0x3FF0000000000005, ptr %arrayidx2, align 8 + ret void +} From 43f5085fa80f716acf93870618b1d93ec85c1d01 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Tue, 7 Mar 2023 12:51:34 -0800 Subject: [PATCH 263/691] [Coroutines] Fix premature conversion of return object Fix https://github.com/llvm/llvm-project/issues/56532 Effectively, this reverts behavior introduced in https://reviews.llvm.org/D117087, which did two things: 1. Change delayed to early conversion of return object. 2. Introduced RVO possibilities because of early conversion. This patches fixes (1) and removes (2). I already worked on a follow up for (2) in a separated patch. I believe it's important to split these two because if the RVO causes any problems we can explore reverting (2) while maintaining (1). Notes on some testcase changes: - `pr59221.cpp` changed to `-O1` so we can check that the front-end honors the value checked for. Sounds like `-O3` without RVO is more likely to work with LLVM optimizations... - Comment out delete members `coroutine-no-move-ctor.cpp` since behavior now requires copies again. Differential Revision: https://reviews.llvm.org/D145639 --- clang/include/clang/AST/StmtCXX.h | 3 + clang/lib/AST/StmtCXX.cpp | 1 + clang/lib/CodeGen/CGCoroutine.cpp | 97 ++++++++++++++----- clang/lib/Sema/SemaCoroutine.cpp | 45 ++++++++- clang/lib/Sema/TreeTransform.h | 6 ++ clang/test/CodeGenCoroutines/coro-gro.cpp | 20 +++- clang/test/CodeGenCoroutines/pr59221.cpp | 2 +- clang/test/SemaCXX/coroutine-no-move-ctor.cpp | 11 ++- clang/test/SemaCXX/coroutines.cpp | 2 +- .../SemaCXX/warn-throw-out-noexcept-coro.cpp | 2 - 10 files changed, 154 insertions(+), 35 deletions(-) diff --git a/clang/include/clang/AST/StmtCXX.h b/clang/include/clang/AST/StmtCXX.h index 2c71f86768963..05dfac2b50c3f 100644 --- a/clang/include/clang/AST/StmtCXX.h +++ b/clang/include/clang/AST/StmtCXX.h @@ -326,6 +326,7 @@ class CoroutineBodyStmt final OnFallthrough, ///< Handler for control flow falling off the body. Allocate, ///< Coroutine frame memory allocation. Deallocate, ///< Coroutine frame memory deallocation. + ResultDecl, ///< Declaration holding the result of get_return_object. ReturnValue, ///< Return value for thunk function: p.get_return_object(). ReturnStmt, ///< Return statement for the thunk function. ReturnStmtOnAllocFailure, ///< Return statement if allocation failed. @@ -352,6 +353,7 @@ class CoroutineBodyStmt final Stmt *OnFallthrough = nullptr; Expr *Allocate = nullptr; Expr *Deallocate = nullptr; + Stmt *ResultDecl = nullptr; Expr *ReturnValue = nullptr; Stmt *ReturnStmt = nullptr; Stmt *ReturnStmtOnAllocFailure = nullptr; @@ -404,6 +406,7 @@ class CoroutineBodyStmt final Expr *getDeallocate() const { return cast_or_null(getStoredStmts()[SubStmt::Deallocate]); } + Stmt *getResultDecl() const { return getStoredStmts()[SubStmt::ResultDecl]; } Expr *getReturnValueInit() const { return cast(getStoredStmts()[SubStmt::ReturnValue]); } diff --git a/clang/lib/AST/StmtCXX.cpp b/clang/lib/AST/StmtCXX.cpp index 33b0421ad1016..a3ae5392f54bc 100644 --- a/clang/lib/AST/StmtCXX.cpp +++ b/clang/lib/AST/StmtCXX.cpp @@ -117,6 +117,7 @@ CoroutineBodyStmt::CoroutineBodyStmt(CoroutineBodyStmt::CtorArgs const &Args) SubStmts[CoroutineBodyStmt::OnFallthrough] = Args.OnFallthrough; SubStmts[CoroutineBodyStmt::Allocate] = Args.Allocate; SubStmts[CoroutineBodyStmt::Deallocate] = Args.Deallocate; + SubStmts[CoroutineBodyStmt::ResultDecl] = Args.ResultDecl; SubStmts[CoroutineBodyStmt::ReturnValue] = Args.ReturnValue; SubStmts[CoroutineBodyStmt::ReturnStmt] = Args.ReturnStmt; SubStmts[CoroutineBodyStmt::ReturnStmtOnAllocFailure] = diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp index 9b233c1807cf1..38167cc74a7f3 100644 --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -467,6 +467,71 @@ struct CallCoroDelete final : public EHScopeStack::Cleanup { }; } +namespace { +struct GetReturnObjectManager { + CodeGenFunction &CGF; + CGBuilderTy &Builder; + const CoroutineBodyStmt &S; + + Address GroActiveFlag; + CodeGenFunction::AutoVarEmission GroEmission; + + GetReturnObjectManager(CodeGenFunction &CGF, const CoroutineBodyStmt &S) + : CGF(CGF), Builder(CGF.Builder), S(S), GroActiveFlag(Address::invalid()), + GroEmission(CodeGenFunction::AutoVarEmission::invalid()) {} + + // The gro variable has to outlive coroutine frame and coroutine promise, but, + // it can only be initialized after coroutine promise was created, thus, we + // split its emission in two parts. EmitGroAlloca emits an alloca and sets up + // cleanups. Later when coroutine promise is available we initialize the gro + // and sets the flag that the cleanup is now active. + void EmitGroAlloca() { + auto *GroDeclStmt = dyn_cast(S.getResultDecl()); + if (!GroDeclStmt) { + // If get_return_object returns void, no need to do an alloca. + return; + } + + auto *GroVarDecl = cast(GroDeclStmt->getSingleDecl()); + + // Set GRO flag that it is not initialized yet + GroActiveFlag = CGF.CreateTempAlloca(Builder.getInt1Ty(), CharUnits::One(), + "gro.active"); + Builder.CreateStore(Builder.getFalse(), GroActiveFlag); + + GroEmission = CGF.EmitAutoVarAlloca(*GroVarDecl); + + // Remember the top of EHStack before emitting the cleanup. + auto old_top = CGF.EHStack.stable_begin(); + CGF.EmitAutoVarCleanups(GroEmission); + auto top = CGF.EHStack.stable_begin(); + + // Make the cleanup conditional on gro.active + for (auto b = CGF.EHStack.find(top), e = CGF.EHStack.find(old_top); b != e; + b++) { + if (auto *Cleanup = dyn_cast(&*b)) { + assert(!Cleanup->hasActiveFlag() && "cleanup already has active flag?"); + Cleanup->setActiveFlag(GroActiveFlag); + Cleanup->setTestFlagInEHCleanup(); + Cleanup->setTestFlagInNormalCleanup(); + } + } + } + + void EmitGroInit() { + if (!GroActiveFlag.isValid()) { + // No Gro variable was allocated. Simply emit the call to + // get_return_object. + CGF.EmitStmt(S.getResultDecl()); + return; + } + + CGF.EmitAutoVarInit(GroEmission); + Builder.CreateStore(Builder.getTrue(), GroActiveFlag); + } +}; +} // namespace + static void emitBodyAndFallthrough(CodeGenFunction &CGF, const CoroutineBodyStmt &S, Stmt *Body) { CGF.EmitStmt(Body); @@ -533,6 +598,13 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { CGM.getIntrinsic(llvm::Intrinsic::coro_begin), {CoroId, Phi}); CurCoro.Data->CoroBegin = CoroBegin; + // We need to emit `get_­return_­object` first. According to: + // [dcl.fct.def.coroutine]p7 + // The call to get_­return_­object is sequenced before the call to + // initial_­suspend and is invoked at most once. + GetReturnObjectManager GroManager(*this, S); + GroManager.EmitGroAlloca(); + CurCoro.Data->CleanupJD = getJumpDestInCurrentScope(RetBB); { CGDebugInfo *DI = getDebugInfo(); @@ -570,23 +642,8 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { // promise local variable was not emitted yet. CoroId->setArgOperand(1, PromiseAddrVoidPtr); - // ReturnValue should be valid as long as the coroutine's return type - // is not void. The assertion could help us to reduce the check later. - assert(ReturnValue.isValid() == (bool)S.getReturnStmt()); - // Now we have the promise, initialize the GRO. - // We need to emit `get_return_object` first. According to: - // [dcl.fct.def.coroutine]p7 - // The call to get_return_­object is sequenced before the call to - // initial_suspend and is invoked at most once. - // - // So we couldn't emit return value when we emit return statment, - // otherwise the call to get_return_object wouldn't be in front - // of initial_suspend. - if (ReturnValue.isValid()) { - EmitAnyExprToMem(S.getReturnValue(), ReturnValue, - S.getReturnValue()->getType().getQualifiers(), - /*IsInit*/ true); - } + // Now we have the promise, initialize the GRO + GroManager.EmitGroInit(); EHStack.pushCleanup(EHCleanup); @@ -649,12 +706,8 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { llvm::Function *CoroEnd = CGM.getIntrinsic(llvm::Intrinsic::coro_end); Builder.CreateCall(CoroEnd, {NullPtr, Builder.getFalse()}); - if (Stmt *Ret = S.getReturnStmt()) { - // Since we already emitted the return value above, so we shouldn't - // emit it again here. - cast(Ret)->setRetValue(nullptr); + if (Stmt *Ret = S.getReturnStmt()) EmitStmt(Ret); - } // LLVM require the frontend to mark the coroutine. CurFn->setPresplitCoroutine(); diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp index 0dcfbd5281d1d..22f9bd6a404c9 100644 --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -1736,6 +1736,7 @@ bool CoroutineStmtBuilder::makeGroDeclAndReturnStmt() { if (Res.isInvalid()) return false; + this->ResultDecl = Res.get(); return true; } @@ -1748,12 +1749,54 @@ bool CoroutineStmtBuilder::makeGroDeclAndReturnStmt() { return false; } - StmtResult ReturnStmt = S.BuildReturnStmt(Loc, ReturnValue); + auto *GroDecl = VarDecl::Create( + S.Context, &FD, FD.getLocation(), FD.getLocation(), + &S.PP.getIdentifierTable().get("__coro_gro"), GroType, + S.Context.getTrivialTypeSourceInfo(GroType, Loc), SC_None); + GroDecl->setImplicit(); + + S.CheckVariableDeclarationType(GroDecl); + if (GroDecl->isInvalidDecl()) + return false; + + InitializedEntity Entity = InitializedEntity::InitializeVariable(GroDecl); + ExprResult Res = + S.PerformCopyInitialization(Entity, SourceLocation(), ReturnValue); + if (Res.isInvalid()) + return false; + + Res = S.ActOnFinishFullExpr(Res.get(), /*DiscardedValue*/ false); + if (Res.isInvalid()) + return false; + + S.AddInitializerToDecl(GroDecl, Res.get(), + /*DirectInit=*/false); + + S.FinalizeDeclaration(GroDecl); + + // Form a declaration statement for the return declaration, so that AST + // visitors can more easily find it. + StmtResult GroDeclStmt = + S.ActOnDeclStmt(S.ConvertDeclToDeclGroup(GroDecl), Loc, Loc); + if (GroDeclStmt.isInvalid()) + return false; + + this->ResultDecl = GroDeclStmt.get(); + + ExprResult declRef = S.BuildDeclRefExpr(GroDecl, GroType, VK_LValue, Loc); + if (declRef.isInvalid()) + return false; + + StmtResult ReturnStmt = S.BuildReturnStmt(Loc, declRef.get()); + if (ReturnStmt.isInvalid()) { noteMemberDeclaredHere(S, ReturnValue, Fn); return false; } + if (cast(ReturnStmt.get())->getNRVOCandidate() == GroDecl) + GroDecl->setNRVOVariable(true); + this->ReturnStmt = ReturnStmt.get(); return true; } diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 8ba8648f17c94..19c6f642015de 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -8103,6 +8103,12 @@ TreeTransform::TransformCoroutineBodyStmt(CoroutineBodyStmt *S) { return StmtError(); Builder.Deallocate = DeallocRes.get(); + assert(S->getResultDecl() && "ResultDecl must already be built"); + StmtResult ResultDecl = getDerived().TransformStmt(S->getResultDecl()); + if (ResultDecl.isInvalid()) + return StmtError(); + Builder.ResultDecl = ResultDecl.get(); + if (auto *ReturnStmt = S->getReturnStmt()) { StmtResult Res = getDerived().TransformStmt(ReturnStmt); if (Res.isInvalid()) diff --git a/clang/test/CodeGenCoroutines/coro-gro.cpp b/clang/test/CodeGenCoroutines/coro-gro.cpp index fad75c9433076..ddcf112f0da6b 100644 --- a/clang/test/CodeGenCoroutines/coro-gro.cpp +++ b/clang/test/CodeGenCoroutines/coro-gro.cpp @@ -46,13 +46,14 @@ void doSomething() noexcept; // CHECK: define{{.*}} i32 @_Z1fv( int f() { // CHECK: %[[RetVal:.+]] = alloca i32 + // CHECK: %[[GroActive:.+]] = alloca i1 // CHECK: %[[Size:.+]] = call i64 @llvm.coro.size.i64() // CHECK: call noalias noundef nonnull ptr @_Znwm(i64 noundef %[[Size]]) + // CHECK: store i1 false, ptr %[[GroActive]] // CHECK: call void @_ZNSt16coroutine_traitsIJiEE12promise_typeC1Ev( - // CHECK: call void @_ZNSt16coroutine_traitsIJiEE12promise_type17get_return_objectEv(ptr sret(%struct.GroType) align {{[0-9]+}} %[[GRO:.+]], - // CHECK: %[[Conv:.+]] = call noundef i32 @_ZN7GroTypecviEv({{.*}}[[GRO]] - // CHECK: store i32 %[[Conv]], ptr %[[RetVal]] + // CHECK: call void @_ZNSt16coroutine_traitsIJiEE12promise_type17get_return_objectEv( + // CHECK: store i1 true, ptr %[[GroActive]] Cleanup cleanup; doSomething(); @@ -68,7 +69,18 @@ int f() { // CHECK: %[[Mem:.+]] = call ptr @llvm.coro.free( // CHECK: call void @_ZdlPv(ptr noundef %[[Mem]]) - // CHECK: coro.ret: + // Initialize retval from Gro and destroy Gro + + // CHECK: %[[Conv:.+]] = call noundef i32 @_ZN7GroTypecviEv( + // CHECK: store i32 %[[Conv]], ptr %[[RetVal]] + // CHECK: %[[IsActive:.+]] = load i1, ptr %[[GroActive]] + // CHECK: br i1 %[[IsActive]], label %[[CleanupGro:.+]], label %[[Done:.+]] + + // CHECK: [[CleanupGro]]: + // CHECK: call void @_ZN7GroTypeD1Ev( + // CHECK: br label %[[Done]] + + // CHECK: [[Done]]: // CHECK: %[[LoadRet:.+]] = load i32, ptr %[[RetVal]] // CHECK: ret i32 %[[LoadRet]] } diff --git a/clang/test/CodeGenCoroutines/pr59221.cpp b/clang/test/CodeGenCoroutines/pr59221.cpp index ae5f6fdbdea92..e0e3de559a403 100644 --- a/clang/test/CodeGenCoroutines/pr59221.cpp +++ b/clang/test/CodeGenCoroutines/pr59221.cpp @@ -2,7 +2,7 @@ // // REQUIRES: x86-registered-target // -// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 %s -O3 -S -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 %s -O1 -S -emit-llvm -o - | FileCheck %s #include "Inputs/coroutine.h" diff --git a/clang/test/SemaCXX/coroutine-no-move-ctor.cpp b/clang/test/SemaCXX/coroutine-no-move-ctor.cpp index 824dea375ebde..08933f4df7a8e 100644 --- a/clang/test/SemaCXX/coroutine-no-move-ctor.cpp +++ b/clang/test/SemaCXX/coroutine-no-move-ctor.cpp @@ -15,10 +15,13 @@ class invoker { }; using promise_type = invoker_promise; invoker() {} - invoker(const invoker &) = delete; - invoker &operator=(const invoker &) = delete; - invoker(invoker &&) = delete; - invoker &operator=(invoker &&) = delete; + // TODO: implement RVO for get_return_object type matching + // function return type. + // + // invoker(const invoker &) = delete; + // invoker &operator=(const invoker &) = delete; + // invoker(invoker &&) = delete; + // invoker &operator=(invoker &&) = delete; }; invoker f() { diff --git a/clang/test/SemaCXX/coroutines.cpp b/clang/test/SemaCXX/coroutines.cpp index e480c0d34593a..782f4b2f63333 100644 --- a/clang/test/SemaCXX/coroutines.cpp +++ b/clang/test/SemaCXX/coroutines.cpp @@ -934,7 +934,7 @@ struct std::coroutine_traits { extern "C" int f(mismatch_gro_type_tag2) { // cxx2b-error@-1 {{cannot initialize return object of type 'int' with an rvalue of type 'void *'}} - // cxx14_20-error@-2 {{cannot initialize return object of type 'int' with an rvalue of type 'void *'}} + // cxx14_20-error@-2 {{cannot initialize return object of type 'int' with an lvalue of type 'void *'}} co_return; //expected-note {{function is a coroutine due to use of 'co_return' here}} } diff --git a/clang/test/SemaCXX/warn-throw-out-noexcept-coro.cpp b/clang/test/SemaCXX/warn-throw-out-noexcept-coro.cpp index 4d52bdca7ca93..e96aae4fefc6b 100644 --- a/clang/test/SemaCXX/warn-throw-out-noexcept-coro.cpp +++ b/clang/test/SemaCXX/warn-throw-out-noexcept-coro.cpp @@ -13,8 +13,6 @@ struct task { explicit task(promise_type& p) { throw 1; p.return_val = this; } - task(const task&) = delete; - T value; }; From fa0d4e1f12a3f69dd0afb07c0928c867ab921537 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Wed, 8 Mar 2023 16:00:02 -0800 Subject: [PATCH 264/691] [Coroutines] Implement fix for cwg2563 issue and enable RVO under certain conditions - The cwg2563 issue is fixed by delaying GRO initialization only when the types mismatch between GRO and function return. - When the types match directly initialize, which indirectly enables RVO to kick in, partially restores behavior introduced in https://reviews.llvm.org/D117087. - Add entry to release notes. Background: https://github.com/llvm/llvm-project/issues/56532 https://cplusplus.github.io/CWG/issues/2563.html https://github.com/cplusplus/papers/issues/1414 Differential Revision: https://reviews.llvm.org/D145641 --- clang/docs/ReleaseNotes.rst | 6 ++ clang/include/clang/AST/StmtCXX.h | 5 +- clang/lib/CodeGen/CGCoroutine.cpp | 67 +++++++++++++-- clang/lib/Sema/SemaCoroutine.cpp | 82 +++++++++++-------- clang/lib/Sema/TreeTransform.h | 11 +-- clang/test/CodeGenCoroutines/coro-gro.cpp | 66 +++++++++------ clang/test/SemaCXX/coroutine-no-move-ctor.cpp | 11 +-- 7 files changed, 170 insertions(+), 78 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index c0162cf506cbc..005bf99a62457 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -219,6 +219,12 @@ Bug Fixes in This Version - Fix crash when using ``[[clang::always_inline]]`` or ``[[clang::noinline]]`` statement attributes on a call to a template function in the body of a template function. +- Fix coroutines issue where ``get_return_object()`` result was always eargerly + converted to the return type. Eager initialization (allowing RVO) is now only + perfomed when these types match, otherwise deferred initialization is used, + enabling short-circuiting coroutines use cases. This fixes + (`#56532 `_) in + antecipation of `CWG2563 _`. Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/AST/StmtCXX.h b/clang/include/clang/AST/StmtCXX.h index 05dfac2b50c3f..8ba667c02fc09 100644 --- a/clang/include/clang/AST/StmtCXX.h +++ b/clang/include/clang/AST/StmtCXX.h @@ -411,9 +411,8 @@ class CoroutineBodyStmt final return cast(getStoredStmts()[SubStmt::ReturnValue]); } Expr *getReturnValue() const { - assert(getReturnStmt()); - auto *RS = cast(getReturnStmt()); - return RS->getRetValue(); + auto *RS = dyn_cast_or_null(getReturnStmt()); + return RS ? RS->getRetValue() : nullptr; } Stmt *getReturnStmt() const { return getStoredStmts()[SubStmt::ReturnStmt]; } Stmt *getReturnStmtOnAllocFailure() const { diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp index 38167cc74a7f3..da3da5e600104 100644 --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -472,13 +472,41 @@ struct GetReturnObjectManager { CodeGenFunction &CGF; CGBuilderTy &Builder; const CoroutineBodyStmt &S; + // When true, performs RVO for the return object. + bool DirectEmit = false; Address GroActiveFlag; CodeGenFunction::AutoVarEmission GroEmission; GetReturnObjectManager(CodeGenFunction &CGF, const CoroutineBodyStmt &S) : CGF(CGF), Builder(CGF.Builder), S(S), GroActiveFlag(Address::invalid()), - GroEmission(CodeGenFunction::AutoVarEmission::invalid()) {} + GroEmission(CodeGenFunction::AutoVarEmission::invalid()) { + // The call to get_­return_­object is sequenced before the call to + // initial_­suspend and is invoked at most once, but there are caveats + // regarding on whether the prvalue result object may be initialized + // directly/eager or delayed, depending on the types involved. + // + // More info at https://github.com/cplusplus/papers/issues/1414 + // + // The general cases: + // 1. Same type of get_return_object and coroutine return type (direct + // emission): + // - Constructed in the return slot. + // 2. Different types (delayed emission): + // - Constructed temporary object prior to initial suspend initialized with + // a call to get_return_object() + // - When coroutine needs to to return to the caller and needs to construct + // return value for the coroutine it is initialized with expiring value of + // the temporary obtained above. + // + // Direct emission for void returning coroutines or GROs. + DirectEmit = [&]() { + auto *RVI = S.getReturnValueInit(); + assert(RVI && "expected RVI"); + auto GroType = RVI->getType(); + return CGF.getContext().hasSameType(GroType, CGF.FnRetTy); + }(); + } // The gro variable has to outlive coroutine frame and coroutine promise, but, // it can only be initialized after coroutine promise was created, thus, we @@ -486,7 +514,10 @@ struct GetReturnObjectManager { // cleanups. Later when coroutine promise is available we initialize the gro // and sets the flag that the cleanup is now active. void EmitGroAlloca() { - auto *GroDeclStmt = dyn_cast(S.getResultDecl()); + if (DirectEmit) + return; + + auto *GroDeclStmt = dyn_cast_or_null(S.getResultDecl()); if (!GroDeclStmt) { // If get_return_object returns void, no need to do an alloca. return; @@ -519,6 +550,27 @@ struct GetReturnObjectManager { } void EmitGroInit() { + if (DirectEmit) { + // ReturnValue should be valid as long as the coroutine's return type + // is not void. The assertion could help us to reduce the check later. + assert(CGF.ReturnValue.isValid() == (bool)S.getReturnStmt()); + // Now we have the promise, initialize the GRO. + // We need to emit `get_return_object` first. According to: + // [dcl.fct.def.coroutine]p7 + // The call to get_return_­object is sequenced before the call to + // initial_suspend and is invoked at most once. + // + // So we couldn't emit return value when we emit return statment, + // otherwise the call to get_return_object wouldn't be in front + // of initial_suspend. + if (CGF.ReturnValue.isValid()) { + CGF.EmitAnyExprToMem(S.getReturnValue(), CGF.ReturnValue, + S.getReturnValue()->getType().getQualifiers(), + /*IsInit*/ true); + } + return; + } + if (!GroActiveFlag.isValid()) { // No Gro variable was allocated. Simply emit the call to // get_return_object. @@ -598,10 +650,6 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { CGM.getIntrinsic(llvm::Intrinsic::coro_begin), {CoroId, Phi}); CurCoro.Data->CoroBegin = CoroBegin; - // We need to emit `get_­return_­object` first. According to: - // [dcl.fct.def.coroutine]p7 - // The call to get_­return_­object is sequenced before the call to - // initial_­suspend and is invoked at most once. GetReturnObjectManager GroManager(*this, S); GroManager.EmitGroAlloca(); @@ -706,8 +754,13 @@ void CodeGenFunction::EmitCoroutineBody(const CoroutineBodyStmt &S) { llvm::Function *CoroEnd = CGM.getIntrinsic(llvm::Intrinsic::coro_end); Builder.CreateCall(CoroEnd, {NullPtr, Builder.getFalse()}); - if (Stmt *Ret = S.getReturnStmt()) + if (Stmt *Ret = S.getReturnStmt()) { + // Since we already emitted the return value above, so we shouldn't + // emit it again here. + if (GroManager.DirectEmit) + cast(Ret)->setRetValue(nullptr); EmitStmt(Ret); + } // LLVM require the frontend to mark the coroutine. CurFn->setPresplitCoroutine(); diff --git a/clang/lib/Sema/SemaCoroutine.cpp b/clang/lib/Sema/SemaCoroutine.cpp index 22f9bd6a404c9..e87f2a78e2394 100644 --- a/clang/lib/Sema/SemaCoroutine.cpp +++ b/clang/lib/Sema/SemaCoroutine.cpp @@ -1730,13 +1730,22 @@ bool CoroutineStmtBuilder::makeGroDeclAndReturnStmt() { assert(!FnRetType->isDependentType() && "get_return_object type must no longer be dependent"); + // The call to get_­return_­object is sequenced before the call to + // initial_­suspend and is invoked at most once, but there are caveats + // regarding on whether the prvalue result object may be initialized + // directly/eager or delayed, depending on the types involved. + // + // More info at https://github.com/cplusplus/papers/issues/1414 + bool GroMatchesRetType = S.getASTContext().hasSameType(GroType, FnRetType); + if (FnRetType->isVoidType()) { ExprResult Res = S.ActOnFinishFullExpr(this->ReturnValue, Loc, /*DiscardedValue*/ false); if (Res.isInvalid()) return false; - this->ResultDecl = Res.get(); + if (!GroMatchesRetType) + this->ResultDecl = Res.get(); return true; } @@ -1749,52 +1758,59 @@ bool CoroutineStmtBuilder::makeGroDeclAndReturnStmt() { return false; } - auto *GroDecl = VarDecl::Create( - S.Context, &FD, FD.getLocation(), FD.getLocation(), - &S.PP.getIdentifierTable().get("__coro_gro"), GroType, - S.Context.getTrivialTypeSourceInfo(GroType, Loc), SC_None); - GroDecl->setImplicit(); - - S.CheckVariableDeclarationType(GroDecl); - if (GroDecl->isInvalidDecl()) - return false; + StmtResult ReturnStmt; + clang::VarDecl *GroDecl = nullptr; + if (GroMatchesRetType) { + ReturnStmt = S.BuildReturnStmt(Loc, ReturnValue); + } else { + GroDecl = VarDecl::Create( + S.Context, &FD, FD.getLocation(), FD.getLocation(), + &S.PP.getIdentifierTable().get("__coro_gro"), GroType, + S.Context.getTrivialTypeSourceInfo(GroType, Loc), SC_None); + GroDecl->setImplicit(); + + S.CheckVariableDeclarationType(GroDecl); + if (GroDecl->isInvalidDecl()) + return false; - InitializedEntity Entity = InitializedEntity::InitializeVariable(GroDecl); - ExprResult Res = - S.PerformCopyInitialization(Entity, SourceLocation(), ReturnValue); - if (Res.isInvalid()) - return false; + InitializedEntity Entity = InitializedEntity::InitializeVariable(GroDecl); + ExprResult Res = + S.PerformCopyInitialization(Entity, SourceLocation(), ReturnValue); + if (Res.isInvalid()) + return false; - Res = S.ActOnFinishFullExpr(Res.get(), /*DiscardedValue*/ false); - if (Res.isInvalid()) - return false; + Res = S.ActOnFinishFullExpr(Res.get(), /*DiscardedValue*/ false); + if (Res.isInvalid()) + return false; - S.AddInitializerToDecl(GroDecl, Res.get(), - /*DirectInit=*/false); + S.AddInitializerToDecl(GroDecl, Res.get(), + /*DirectInit=*/false); - S.FinalizeDeclaration(GroDecl); + S.FinalizeDeclaration(GroDecl); - // Form a declaration statement for the return declaration, so that AST - // visitors can more easily find it. - StmtResult GroDeclStmt = - S.ActOnDeclStmt(S.ConvertDeclToDeclGroup(GroDecl), Loc, Loc); - if (GroDeclStmt.isInvalid()) - return false; + // Form a declaration statement for the return declaration, so that AST + // visitors can more easily find it. + StmtResult GroDeclStmt = + S.ActOnDeclStmt(S.ConvertDeclToDeclGroup(GroDecl), Loc, Loc); + if (GroDeclStmt.isInvalid()) + return false; - this->ResultDecl = GroDeclStmt.get(); + this->ResultDecl = GroDeclStmt.get(); - ExprResult declRef = S.BuildDeclRefExpr(GroDecl, GroType, VK_LValue, Loc); - if (declRef.isInvalid()) - return false; + ExprResult declRef = S.BuildDeclRefExpr(GroDecl, GroType, VK_LValue, Loc); + if (declRef.isInvalid()) + return false; - StmtResult ReturnStmt = S.BuildReturnStmt(Loc, declRef.get()); + ReturnStmt = S.BuildReturnStmt(Loc, declRef.get()); + } if (ReturnStmt.isInvalid()) { noteMemberDeclaredHere(S, ReturnValue, Fn); return false; } - if (cast(ReturnStmt.get())->getNRVOCandidate() == GroDecl) + if (!GroMatchesRetType && + cast(ReturnStmt.get())->getNRVOCandidate() == GroDecl) GroDecl->setNRVOVariable(true); this->ReturnStmt = ReturnStmt.get(); diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 19c6f642015de..d8fce0d5dc64b 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -8103,11 +8103,12 @@ TreeTransform::TransformCoroutineBodyStmt(CoroutineBodyStmt *S) { return StmtError(); Builder.Deallocate = DeallocRes.get(); - assert(S->getResultDecl() && "ResultDecl must already be built"); - StmtResult ResultDecl = getDerived().TransformStmt(S->getResultDecl()); - if (ResultDecl.isInvalid()) - return StmtError(); - Builder.ResultDecl = ResultDecl.get(); + if (auto *ResultDecl = S->getResultDecl()) { + StmtResult Res = getDerived().TransformStmt(ResultDecl); + if (Res.isInvalid()) + return StmtError(); + Builder.ResultDecl = Res.get(); + } if (auto *ReturnStmt = S->getReturnStmt()) { StmtResult Res = getDerived().TransformStmt(ReturnStmt); diff --git a/clang/test/CodeGenCoroutines/coro-gro.cpp b/clang/test/CodeGenCoroutines/coro-gro.cpp index ddcf112f0da6b..b48b769950ae8 100644 --- a/clang/test/CodeGenCoroutines/coro-gro.cpp +++ b/clang/test/CodeGenCoroutines/coro-gro.cpp @@ -2,26 +2,9 @@ // Verify that coroutine promise and allocated memory are freed up on exception. // RUN: %clang_cc1 -std=c++20 -triple=x86_64-unknown-linux-gnu -emit-llvm -o - %s -disable-llvm-passes | FileCheck %s -namespace std { -template struct coroutine_traits; +#include "Inputs/coroutine.h" -template struct coroutine_handle { - coroutine_handle() = default; - static coroutine_handle from_address(void *) noexcept; -}; -template <> struct coroutine_handle { - static coroutine_handle from_address(void *) noexcept; - coroutine_handle() = default; - template - coroutine_handle(coroutine_handle) noexcept; -}; -} // namespace std - -struct suspend_always { - bool await_ready() noexcept; - void await_suspend(std::coroutine_handle<>) noexcept; - void await_resume() noexcept; -}; +using namespace std; struct GroType { ~GroType(); @@ -51,8 +34,8 @@ int f() { // CHECK: %[[Size:.+]] = call i64 @llvm.coro.size.i64() // CHECK: call noalias noundef nonnull ptr @_Znwm(i64 noundef %[[Size]]) // CHECK: store i1 false, ptr %[[GroActive]] - // CHECK: call void @_ZNSt16coroutine_traitsIJiEE12promise_typeC1Ev( - // CHECK: call void @_ZNSt16coroutine_traitsIJiEE12promise_type17get_return_objectEv( + // CHECK: call void @_ZNSt16coroutine_traitsIiJEE12promise_typeC1Ev( + // CHECK: call void @_ZNSt16coroutine_traitsIiJEE12promise_type17get_return_objectEv( // CHECK: store i1 true, ptr %[[GroActive]] Cleanup cleanup; @@ -60,16 +43,18 @@ int f() { co_return; // CHECK: call void @_Z11doSomethingv( - // CHECK: call void @_ZNSt16coroutine_traitsIJiEE12promise_type11return_voidEv( + // CHECK: call void @_ZNSt16coroutine_traitsIiJEE12promise_type11return_voidEv( // CHECK: call void @_ZN7CleanupD1Ev( // Destroy promise and free the memory. - // CHECK: call void @_ZNSt16coroutine_traitsIJiEE12promise_typeD1Ev( + // CHECK: call void @_ZNSt16coroutine_traitsIiJEE12promise_typeD1Ev( // CHECK: %[[Mem:.+]] = call ptr @llvm.coro.free( // CHECK: call void @_ZdlPv(ptr noundef %[[Mem]]) // Initialize retval from Gro and destroy Gro + // Note this also tests delaying initialization when Gro and function return + // types mismatch (see cwg2563). // CHECK: %[[Conv:.+]] = call noundef i32 @_ZN7GroTypecviEv( // CHECK: store i32 %[[Conv]], ptr %[[RetVal]] @@ -84,3 +69,38 @@ int f() { // CHECK: %[[LoadRet:.+]] = load i32, ptr %[[RetVal]] // CHECK: ret i32 %[[LoadRet]] } + +class invoker { +public: + class invoker_promise { + public: + invoker get_return_object() { return invoker{}; } + auto initial_suspend() { return suspend_always{}; } + auto final_suspend() noexcept { return suspend_always{}; } + void return_void() {} + void unhandled_exception() {} + }; + using promise_type = invoker_promise; + invoker() {} + invoker(const invoker &) = delete; + invoker &operator=(const invoker &) = delete; + invoker(invoker &&) = delete; + invoker &operator=(invoker &&) = delete; +}; + +// According to cwg2563, matching GRO and function return type must allow +// for eager initialization and RVO. +// CHECK: define{{.*}} void @_Z1gv({{.*}} %[[AggRes:.+]]) +invoker g() { + // CHECK: %[[ResultPtr:.+]] = alloca ptr + // CHECK-NEXT: %[[Promise:.+]] = alloca %"class.invoker::invoker_promise" + + // CHECK: store ptr %[[AggRes]], ptr %[[ResultPtr]] + // CHECK: coro.init: + // CHECK: = call ptr @llvm.coro.begin + + // delayed GRO pattern stores a GRO active flag, make sure to not emit it. + // CHECK-NOT: store i1 false, ptr + // CHECK: call void @_ZN7invoker15invoker_promise17get_return_objectEv({{.*}} %[[AggRes]] + co_return; +} diff --git a/clang/test/SemaCXX/coroutine-no-move-ctor.cpp b/clang/test/SemaCXX/coroutine-no-move-ctor.cpp index 08933f4df7a8e..824dea375ebde 100644 --- a/clang/test/SemaCXX/coroutine-no-move-ctor.cpp +++ b/clang/test/SemaCXX/coroutine-no-move-ctor.cpp @@ -15,13 +15,10 @@ class invoker { }; using promise_type = invoker_promise; invoker() {} - // TODO: implement RVO for get_return_object type matching - // function return type. - // - // invoker(const invoker &) = delete; - // invoker &operator=(const invoker &) = delete; - // invoker(invoker &&) = delete; - // invoker &operator=(invoker &&) = delete; + invoker(const invoker &) = delete; + invoker &operator=(const invoker &) = delete; + invoker(invoker &&) = delete; + invoker &operator=(invoker &&) = delete; }; invoker f() { From 0d37efdbc599e61ce2a0418723a66d6b45aea8d7 Mon Sep 17 00:00:00 2001 From: Ben Shi Date: Thu, 9 Mar 2023 12:04:54 +0800 Subject: [PATCH 265/691] [clang][driver] Enable '-flto' on AVR Reviewed By: MaskRay Closes https://github.com/llvm/llvm-project/issues/55940 Differential Revision: https://reviews.llvm.org/D145646 --- clang/lib/Driver/ToolChains/AVR.cpp | 6 ++++++ clang/lib/Driver/ToolChains/AVR.h | 2 ++ clang/test/Driver/avr-ld.c | 14 ++++++++++++++ 3 files changed, 22 insertions(+) diff --git a/clang/lib/Driver/ToolChains/AVR.cpp b/clang/lib/Driver/ToolChains/AVR.cpp index be2749eba2479..0e1f8ddc46e4e 100644 --- a/clang/lib/Driver/ToolChains/AVR.cpp +++ b/clang/lib/Driver/ToolChains/AVR.cpp @@ -506,6 +506,12 @@ void AVR::Linker::ConstructJob(Compilation &C, const JobAction &JA, D.Diag(diag::warn_drv_avr_linker_section_addresses_not_implemented) << CPU; } + if (D.isUsingLTO()) { + assert(!Inputs.empty() && "Must have at least one input."); + addLTOOptions(getToolChain(), Args, CmdArgs, Output, Inputs[0], + D.getLTOMode() == LTOK_Thin); + } + // If the family name is known, we can link with the device-specific libgcc. // Without it, libgcc will simply not be linked. This matches avr-gcc // behavior. diff --git a/clang/lib/Driver/ToolChains/AVR.h b/clang/lib/Driver/ToolChains/AVR.h index ea161fe28f331..d432d81744b92 100644 --- a/clang/lib/Driver/ToolChains/AVR.h +++ b/clang/lib/Driver/ToolChains/AVR.h @@ -36,6 +36,8 @@ class LLVM_LIBRARY_VISIBILITY AVRToolChain : public Generic_ELF { std::string getCompilerRT(const llvm::opt::ArgList &Args, StringRef Component, FileType Type) const override; + bool HasNativeLLVMSupport() const override { return true; } + protected: Tool *buildLinker() const override; diff --git a/clang/test/Driver/avr-ld.c b/clang/test/Driver/avr-ld.c index 80bb4281f63ad..3088bc00446f4 100644 --- a/clang/test/Driver/avr-ld.c +++ b/clang/test/Driver/avr-ld.c @@ -43,3 +43,17 @@ // RUN: %clang -### --target=avr -mmcu=atxmega128a1 --rtlib=libgcc --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKO %s // LINKO: {{".*ld.*"}} {{.*}} {{"-L.*avrxmega7"}} {{.*}} "--defsym=__DATA_REGION_ORIGIN__=0x802000" "--start-group" {{.*}} "-latxmega128a1" {{.*}} "--end-group" "--relax" "-mavrxmega7" + +// RUN: %clang -### --target=avr -mmcu=atmega328 -flto --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKP %s +// LINKP: {{".*ld.*"}} {{.*}} "--defsym=__DATA_REGION_ORIGIN__=0x800100" "-plugin" {{.*}} "-plugin-opt=mcpu=atmega328" + +// RUN: %clang -### --target=avr -flto --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKQ %s +// LINKQ: {{".*ld.*"}} {{.*}} "-plugin" +// LINKQ-NOT: "-plugin-opt=mcpu" + +// RUN: %clang -### --target=avr -mmcu=atmega328 -flto=thin --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKR %s +// LINKR: {{".*ld.*"}} {{.*}} "--defsym=__DATA_REGION_ORIGIN__=0x800100" "-plugin" {{.*}} "-plugin-opt=mcpu=atmega328" "-plugin-opt=thinlto" + +// RUN: %clang -### --target=avr -mmcu=atmega328 -flto --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKS %s +// LINKS: {{".*ld.*"}} {{.*}} "--defsym=__DATA_REGION_ORIGIN__=0x800100" "-plugin" {{.*}} "-plugin-opt=mcpu=atmega328" +// LINKS-NOT: "-plugin-opt=thinlto" From 829446cb459fee326fc7f6ebf84b357e1665e4d1 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Mon, 20 Mar 2023 14:22:00 -0700 Subject: [PATCH 266/691] [mlir][memref] Use folded composed affine apply ops in FoldMemRefAliasOps Creating maximally folded and composd affine.apply operation during FoldMemRefAliasOps composes better with other transformations without having to interleave canonicalization passes. Differential Revision: https://reviews.llvm.org/D146515 --- .../MemRef/Transforms/FoldMemRefAliasOps.cpp | 60 +++++--- .../Dialect/MemRef/fold-memref-alias-ops.mlir | 129 ++++++++---------- 2 files changed, 94 insertions(+), 95 deletions(-) diff --git a/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp b/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp index 89b34fc14fe42..c1c3478b06efc 100644 --- a/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp +++ b/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp @@ -11,16 +11,14 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Dialect/GPU/IR/GPUDialect.h" -#include "mlir/Dialect/MemRef/Transforms/Passes.h" - #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Arith/Utils/Utils.h" +#include "mlir/Dialect/GPU/IR/GPUDialect.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/MemRef/Transforms/Passes.h" #include "mlir/Dialect/Utils/IndexingUtils.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" -#include "mlir/IR/AffineExpr.h" -#include "mlir/IR/BuiltinTypes.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/TypeSwitch.h" @@ -72,13 +70,19 @@ resolveSourceIndicesExpandShape(Location loc, PatternRewriter &rewriter, AffineExpr srcIndexExpr = linearize(ctx, dims, suffixProduct); /// Apply permutation and create AffineApplyOp. - SmallVector dynamicIndices(groupSize); + SmallVector dynamicIndices(groupSize); for (int64_t i = 0; i < groupSize; i++) dynamicIndices[i] = indices[groups[i]]; - sourceIndices.push_back(rewriter.create( - loc, - AffineMap::get(/*numDims=*/groupSize, /*numSymbols=*/0, srcIndexExpr), - dynamicIndices)); + + // Creating maximally folded and composd affine.apply composes better with + // other transformations without interleaving canonicalization passes. + OpFoldResult ofr = makeComposedFoldedAffineApply( + rewriter, loc, + AffineMap::get(/*numDims=*/groupSize, + /*numSymbols=*/0, srcIndexExpr), + dynamicIndices); + sourceIndices.push_back( + getValueOrCreateConstantIndexOp(rewriter, loc, ofr)); } return success(); } @@ -103,7 +107,7 @@ resolveSourceIndicesCollapseShape(Location loc, PatternRewriter &rewriter, SmallVectorImpl &sourceIndices) { int64_t cnt = 0; SmallVector tmp(indices.size()); - SmallVector dynamicIndices; + SmallVector dynamicIndices; for (ArrayRef groups : collapseShapeOp.getReassociationIndices()) { assert(!groups.empty() && "association indices groups cannot be empty"); dynamicIndices.push_back(indices[cnt++]); @@ -121,21 +125,27 @@ resolveSourceIndicesCollapseShape(Location loc, PatternRewriter &rewriter, SmallVector delinearizingExprs = delinearize(d0, suffixProduct); // Construct the AffineApplyOp for each delinearizingExpr. - for (int64_t i = 0; i < groupSize; i++) - sourceIndices.push_back(rewriter.create( - loc, + for (int64_t i = 0; i < groupSize; i++) { + OpFoldResult ofr = makeComposedFoldedAffineApply( + rewriter, loc, AffineMap::get(/*numDims=*/1, /*numSymbols=*/0, delinearizingExprs[i]), - dynamicIndices)); + dynamicIndices); + sourceIndices.push_back( + getValueOrCreateConstantIndexOp(rewriter, loc, ofr)); + } dynamicIndices.clear(); } if (collapseShapeOp.getReassociationIndices().empty()) { auto zeroAffineMap = rewriter.getConstantAffineMap(0); int64_t srcRank = collapseShapeOp.getViewSource().getType().cast().getRank(); - for (int64_t i = 0; i < srcRank; i++) + for (int64_t i = 0; i < srcRank; i++) { + OpFoldResult ofr = makeComposedFoldedAffineApply( + rewriter, loc, zeroAffineMap, dynamicIndices); sourceIndices.push_back( - rewriter.create(loc, zeroAffineMap, dynamicIndices)); + getValueOrCreateConstantIndexOp(rewriter, loc, ofr)); + } } return success(); } @@ -176,7 +186,7 @@ resolveSourceIndicesSubView(Location loc, PatternRewriter &rewriter, return failure(); sourceIndices.resize(useIndices.size()); for (auto index : llvm::seq(0, mixedOffsets.size())) { - SmallVector dynamicOperands; + SmallVector dynamicOperands; AffineExpr expr = rewriter.getAffineDimExpr(0); int64_t numSymbols = 0; dynamicOperands.push_back(useIndices[index]); @@ -197,8 +207,9 @@ resolveSourceIndicesSubView(Location loc, PatternRewriter &rewriter, expr = expr + rewriter.getAffineSymbolExpr(numSymbols++); } Location loc = subViewOp.getLoc(); - sourceIndices[index] = rewriter.create( - loc, AffineMap::get(1, numSymbols, expr), dynamicOperands); + OpFoldResult ofr = makeComposedFoldedAffineApply( + rewriter, loc, AffineMap::get(1, numSymbols, expr), dynamicOperands); + sourceIndices[index] = getValueOrCreateConstantIndexOp(rewriter, loc, ofr); } return success(); } @@ -367,10 +378,15 @@ static SmallVector calculateExpandedAccessIndices(AffineMap affineMap, const SmallVector &indices, Location loc, PatternRewriter &rewriter) { + SmallVector indicesOfr(llvm::to_vector( + llvm::map_range(indices, [](Value v) -> OpFoldResult { return v; }))); SmallVector expandedIndices; - for (unsigned i = 0, e = affineMap.getNumResults(); i < e; i++) + for (unsigned i = 0, e = affineMap.getNumResults(); i < e; i++) { + OpFoldResult ofr = makeComposedFoldedAffineApply( + rewriter, loc, affineMap.getSubMap({i}), indicesOfr); expandedIndices.push_back( - rewriter.create(loc, affineMap.getSubMap({i}), indices)); + getValueOrCreateConstantIndexOp(rewriter, loc, ofr)); + } return expandedIndices; } diff --git a/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir b/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir index e0e8bd48d6f86..bcbad20a2fd7a 100644 --- a/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir +++ b/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir @@ -5,16 +5,16 @@ func.func @fold_static_stride_subview_with_load(%arg0 : memref<12x32xf32>, %arg1 %1 = memref.load %0[%arg3, %arg4] : memref<4x4xf32, strided<[64, 3], offset: ?>> return %1 : f32 } -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (d0 * 2 + s0)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (d0 * 3 + s0)> +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> (s0 + s1 * 2)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1] -> (s0 + s1 * 3)> // CHECK: func @fold_static_stride_subview_with_load // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref<12x32xf32> // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]]: index -// CHECK-DAG: %[[I1:.+]] = affine.apply #[[MAP0]](%[[ARG3]])[%[[ARG1]]] -// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP1]](%[[ARG4]])[%[[ARG2]]] +// CHECK-DAG: %[[I1:.+]] = affine.apply #[[MAP0]]()[%[[ARG1]], %[[ARG3]]] +// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[ARG4]]] // CHECK: memref.load %[[ARG0]][%[[I1]], %[[I2]]] // ----- @@ -25,7 +25,7 @@ func.func @fold_dynamic_stride_subview_with_load(%arg0 : memref<12x32xf32>, %arg %1 = memref.load %0[%arg3, %arg4] : memref<4x4xf32, strided<[?, ?], offset: ?>> return %1 : f32 } -// CHECK-DAG: #[[MAP:.+]] = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1, s2] -> (s1 + s2 * s0)> // CHECK: func @fold_dynamic_stride_subview_with_load // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref<12x32xf32> // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index @@ -34,8 +34,8 @@ func.func @fold_dynamic_stride_subview_with_load(%arg0 : memref<12x32xf32>, %arg // CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG5:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG6:[a-zA-Z0-9_]+]]: index -// CHECK-DAG: %[[I1:.+]] = affine.apply #[[MAP]](%[[ARG3]])[%[[ARG5]], %[[ARG1]]] -// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP]](%[[ARG4]])[%[[ARG6]], %[[ARG2]]] +// CHECK-DAG: %[[I1:.+]] = affine.apply #[[MAP]]()[%[[ARG5]], %[[ARG1]], %[[ARG3]]] +// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP]]()[%[[ARG6]], %[[ARG2]], %[[ARG4]]] // CHECK: memref.load %[[ARG0]][%[[I1]], %[[I2]]] // ----- @@ -46,16 +46,16 @@ func.func @fold_static_stride_subview_with_store(%arg0 : memref<12x32xf32>, %arg memref.store %arg5, %0[%arg3, %arg4] : memref<4x4xf32, strided<[64, 3], offset: ?>> return } -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (d0 * 2 + s0)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (d0 * 3 + s0)> +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> (s0 + s1 * 2)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1] -> (s0 + s1 * 3)> // CHECK: func @fold_static_stride_subview_with_store // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref<12x32xf32> // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]]: index -// CHECK-DAG: %[[I1:.+]] = affine.apply #[[MAP0]](%[[ARG3]])[%[[ARG1]]] -// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP1]](%[[ARG4]])[%[[ARG2]]] +// CHECK-DAG: %[[I1:.+]] = affine.apply #[[MAP0]]()[%[[ARG1]], %[[ARG3]]] +// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[ARG4]]] // CHECK: memref.store %{{.+}}, %[[ARG0]][%[[I1]], %[[I2]]] // ----- @@ -66,7 +66,7 @@ func.func @fold_dynamic_stride_subview_with_store(%arg0 : memref<12x32xf32>, %ar memref.store %arg7, %0[%arg3, %arg4] : memref<4x4xf32, strided<[?, ?], offset: ?>> return } -// CHECK-DAG: #[[MAP:.+]] = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1, s2] -> (s1 + s2 * s0)> // CHECK: func @fold_dynamic_stride_subview_with_store // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref<12x32xf32> // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index @@ -75,8 +75,8 @@ func.func @fold_dynamic_stride_subview_with_store(%arg0 : memref<12x32xf32>, %ar // CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG5:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG6:[a-zA-Z0-9_]+]]: index -// CHECK-DAG: %[[I1:.+]] = affine.apply #[[MAP]](%[[ARG3]])[%[[ARG5]], %[[ARG1]]] -// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP]](%[[ARG4]])[%[[ARG6]], %[[ARG2]]] +// CHECK-DAG: %[[I1:.+]] = affine.apply #[[MAP]]()[%[[ARG5]], %[[ARG1]], %[[ARG3]]] +// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP]]()[%[[ARG6]], %[[ARG2]], %[[ARG4]]] // CHECK: memref.store %{{.+}}, %[[ARG0]][%[[I1]], %[[I2]]] // ----- @@ -89,17 +89,12 @@ func.func @fold_subview_with_transfer_read_0d( %1 = vector.transfer_read %0[], %f1 : memref>, vector return %1 : vector } -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (d0 * 2 + s0)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)> // CHECK: func @fold_subview_with_transfer_read_0d // CHECK-SAME: %[[MEM:[a-zA-Z0-9_]+]]: memref<12x32xf32> // CHECK-SAME: %[[SZ0:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[SZ1:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ST1:[a-zA-Z0-9_]+]]: index -// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK-DAG: %[[I1:.+]] = affine.apply #[[MAP0]](%[[C0]])[%[[SZ0]]] -// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP1]](%[[C0]])[%[[ST1]], %[[SZ1]]] -// CHECK: vector.transfer_read %[[MEM]][%[[I1]], %[[I2]]] +// CHECK: vector.transfer_read %[[MEM]][%[[SZ0]], %[[SZ1]]] // ----- @@ -109,7 +104,7 @@ func.func @fold_subview_with_transfer_read(%arg0 : memref<12x32xf32>, %arg1 : in %1 = vector.transfer_read %0[%arg3, %arg4], %f1 {in_bounds = [true]} : memref<4x4xf32, strided<[?, ?], offset: ?>>, vector<4xf32> return %1 : vector<4xf32> } -// CHECK-DAG: #[[MAP:.+]] = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1, s2] -> (s1 + s2 * s0)> // CHECK: func @fold_subview_with_transfer_read // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref<12x32xf32> // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index @@ -118,8 +113,8 @@ func.func @fold_subview_with_transfer_read(%arg0 : memref<12x32xf32>, %arg1 : in // CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG5:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG6:[a-zA-Z0-9_]+]]: index -// CHECK-DAG: %[[I1:.+]] = affine.apply #[[MAP]](%[[ARG3]])[%[[ARG5]], %[[ARG1]]] -// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP]](%[[ARG4]])[%[[ARG6]], %[[ARG2]]] +// CHECK-DAG: %[[I1:.+]] = affine.apply #[[MAP]]()[%[[ARG5]], %[[ARG1]], %[[ARG3]]] +// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP]]()[%[[ARG6]], %[[ARG2]], %[[ARG4]]] // CHECK: vector.transfer_read %[[ARG0]][%[[I1]], %[[I2]]] // ----- @@ -132,18 +127,13 @@ func.func @fold_static_stride_subview_with_transfer_write_0d( vector.transfer_write %v, %0[] {in_bounds = []} : vector, memref> return } -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0] -> (d0 * 2 + s0)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)> // CHECK: func @fold_static_stride_subview_with_transfer_write_0d // CHECK-SAME: %[[MEM:[a-zA-Z0-9_]+]]: memref<12x32xf32> // CHECK-SAME: %[[SZ0:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[SZ1:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ST1:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[V:[a-zA-Z0-9_]+]]: vector -// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK-DAG: %[[I1:.+]] = affine.apply #[[MAP0]](%[[C0]])[%[[SZ0]]] -// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP1]](%[[C0]])[%[[ST1]], %[[SZ1]]] -// CHECK: vector.transfer_write %[[V]], %[[MEM]][%[[I1]], %[[I2]]] +// CHECK: vector.transfer_write %[[V]], %[[MEM]][%[[SZ0]], %[[SZ1]]] // ----- @@ -153,7 +143,7 @@ func.func @fold_static_stride_subview_with_transfer_write(%arg0 : memref<12x32xf vector.transfer_write %arg7, %0[%arg3, %arg4] {in_bounds = [true]} : vector<4xf32>, memref<4x4xf32, strided<[?, ?], offset: ?>> return } -// CHECK-DAG: #[[MAP:.+]] = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1, s2] -> (s1 + s2 * s0)> // CHECK: func @fold_static_stride_subview_with_transfer_write // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref<12x32xf32> // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index @@ -162,8 +152,8 @@ func.func @fold_static_stride_subview_with_transfer_write(%arg0 : memref<12x32xf // CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG5:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG6:[a-zA-Z0-9_]+]]: index -// CHECK-DAG: %[[I1:.+]] = affine.apply #[[MAP]](%[[ARG3]])[%[[ARG5]], %[[ARG1]]] -// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP]](%[[ARG4]])[%[[ARG6]], %[[ARG2]]] +// CHECK-DAG: %[[I1:.+]] = affine.apply #[[MAP]]()[%[[ARG5]], %[[ARG1]], %[[ARG3]]] +// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP]]()[%[[ARG6]], %[[ARG2]], %[[ARG4]]] // CHECK: vector.transfer_write %{{.+}}, %[[ARG0]][%[[I1]], %[[I2]]] // ----- @@ -178,7 +168,7 @@ func.func @fold_rank_reducing_subview_with_load %1 = memref.load %0[%arg13, %arg14, %arg15, %arg16] : memref<4x1x4x1xf32, strided<[?, ?, ?, ?], offset: ?>> return %1 : f32 } -// CHECK-DAG: #[[MAP:.+]] = affine_map<(d0)[s0, s1] -> (d0 * s0 + s1)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1, s2] -> (s1 + s2 * s0)> // CHECK: func @fold_rank_reducing_subview_with_load // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index @@ -197,14 +187,11 @@ func.func @fold_rank_reducing_subview_with_load // CHECK-SAME: %[[ARG14:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG15:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG16:[a-zA-Z0-9_]+]]: index -// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK-DAG: %[[I1:.+]] = affine.apply #[[MAP]](%[[ARG13]])[%[[ARG7]], %[[ARG1]]] -// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP]](%[[C0]])[%[[ARG8]], %[[ARG2]]] -// CHECK-DAG: %[[I3:.+]] = affine.apply #[[MAP]](%[[ARG14]])[%[[ARG9]], %[[ARG3]]] -// CHECK-DAG: %[[I4:.+]] = affine.apply #[[MAP]](%[[ARG15]])[%[[ARG10]], %[[ARG4]]] -// CHECK-DAG: %[[I5:.+]] = affine.apply #[[MAP]](%[[ARG16]])[%[[ARG11]], %[[ARG5]]] -// CHECK-DAG: %[[I6:.+]] = affine.apply #[[MAP]](%[[C0]])[%[[ARG12]], %[[ARG6]]] -// CHECK: memref.load %[[ARG0]][%[[I1]], %[[I2]], %[[I3]], %[[I4]], %[[I5]], %[[I6]]] +// CHECK-DAG: %[[I0:.+]] = affine.apply #[[MAP]]()[%[[ARG7]], %[[ARG1]], %[[ARG13]]] +// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP]]()[%[[ARG9]], %[[ARG3]], %[[ARG14]]] +// CHECK-DAG: %[[I3:.+]] = affine.apply #[[MAP]]()[%[[ARG10]], %[[ARG4]], %[[ARG15]]] +// CHECK-DAG: %[[I4:.+]] = affine.apply #[[MAP]]()[%[[ARG11]], %[[ARG5]], %[[ARG16]]] +// CHECK: memref.load %[[ARG0]][%[[I0]], %[[ARG2]], %[[I2]], %[[I3]], %[[I4]], %[[ARG6]]] // ----- @@ -220,7 +207,7 @@ func.func @fold_vector_transfer_read_with_rank_reduced_subview( : memref>, vector<4xf32> return %1 : vector<4xf32> } -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (d0 + s0)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1] -> (s0 + s1)> // CHECK: func @fold_vector_transfer_read_with_rank_reduced_subview // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: memref> // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index @@ -230,8 +217,8 @@ func.func @fold_vector_transfer_read_with_rank_reduced_subview( // CHECK-SAME: %[[ARG5:[a-zA-Z0-9]+]]: index // CHECK-SAME: %[[ARG6:[a-zA-Z0-9]+]]: index // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]](%[[ARG5]])[%[[ARG1]]] -// CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]](%[[ARG6]])[%[[ARG2]]] +// CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]]()[%[[ARG1]], %[[ARG5]]] +// CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[ARG6]]] // CHECK: vector.transfer_read %[[ARG0]][%[[C0]], %[[IDX0]], %[[IDX1]]], %{{.*}} : memref, memref> return } -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (d0 + s0)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1] -> (s0 + s1)> // CHECK: func @fold_vector_transfer_write_with_rank_reduced_subview // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: memref> // CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: vector<4xf32> @@ -259,8 +246,8 @@ func.func @fold_vector_transfer_write_with_rank_reduced_subview( // CHECK-SAME: %[[ARG6:[a-zA-Z0-9]+]]: index // CHECK-SAME: %[[ARG7:[a-zA-Z0-9]+]]: index // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]](%[[ARG6]])[%[[ARG2]]] -// CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]](%[[ARG7]])[%[[ARG3]]] +// CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[ARG6]]] +// CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]]()[%[[ARG3]], %[[ARG7]]] // CHECK-DAG: vector.transfer_write %[[ARG1]], %[[ARG0]][%[[C0]], %[[IDX0]], %[[IDX1]]] {in_bounds = [true]} : vector<4xf32>, memref, memref> return } -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (d0 + s0)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1] -> (s0 + s1)> // CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1)> // CHECK: func @fold_vector_transfer_write_with_inner_rank_reduced_subview // CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: memref> @@ -289,8 +276,8 @@ func.func @fold_vector_transfer_write_with_inner_rank_reduced_subview( // CHECK-SAME: %[[ARG6:[a-zA-Z0-9]+]]: index // CHECK-SAME: %[[ARG7:[a-zA-Z0-9]+]]: index // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index -// CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]](%[[ARG6]])[%[[ARG2]]] -// CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]](%[[ARG7]])[%[[ARG3]]] +// CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[ARG6]]] +// CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]]()[%[[ARG3]], %[[ARG7]]] // CHECK-DAG: vector.transfer_write %[[ARG1]], %[[ARG0]][%[[IDX0]], %[[IDX1]], %[[C0]]] // CHECK-SAME: {in_bounds = [true], permutation_map = #[[MAP2]]} : vector<4xf32>, memref (d0 * 6 + d1)> +// CHECK-DAG: #[[$MAP:.*]] = affine_map<()[s0, s1] -> (s0 * 6 + s1)> // CHECK-LABEL: fold_static_stride_subview_with_affine_load_store_expand_shape // CHECK-SAME: (%[[ARG0:.*]]: memref<12x32xf32>, %[[ARG1:.*]]: index, %[[ARG2:.*]]: index, %[[ARG3:.*]]: index) -> f32 { func.func @fold_static_stride_subview_with_affine_load_store_expand_shape(%arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index) -> f32 { @@ -325,14 +312,14 @@ func.func @fold_static_stride_subview_with_affine_load_store_expand_shape(%arg0 %1 = affine.load %0[%arg1, %arg2, %arg3] : memref<2x6x32xf32> return %1 : f32 } -// CHECK: %[[INDEX:.*]] = affine.apply #[[$MAP]](%[[ARG1]], %[[ARG2]]) +// CHECK: %[[INDEX:.*]] = affine.apply #[[$MAP]]()[%[[ARG1]], %[[ARG2]]] // CHECK-NEXT: %[[RESULT:.*]] = affine.load %[[ARG0]][%[[INDEX]], %[[ARG3]]] : memref<12x32xf32> // CHECK-NEXT: return %[[RESULT]] : f32 // ----- -// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0) -> (d0 floordiv 6)> -// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0) -> (d0 mod 6)> +// CHECK-DAG: #[[$MAP0:.*]] = affine_map<()[s0] -> (s0 floordiv 6)> +// CHECK-DAG: #[[$MAP1:.*]] = affine_map<()[s0] -> (s0 mod 6)> // CHECK-LABEL: @fold_static_stride_subview_with_affine_load_store_collapse_shape // CHECK-SAME: (%[[ARG0:.*]]: memref<2x6x32xf32>, %[[ARG1:.*]]: index, %[[ARG2:.*]]: index) func.func @fold_static_stride_subview_with_affine_load_store_collapse_shape(%arg0 : memref<2x6x32xf32>, %arg1 : index, %arg2 : index) -> f32 { @@ -340,14 +327,14 @@ func.func @fold_static_stride_subview_with_affine_load_store_collapse_shape(%arg %1 = affine.load %0[%arg1, %arg2] : memref<12x32xf32> return %1 : f32 } -// CHECK-NEXT: %[[MODIFIED_INDEX0:.*]] = affine.apply #[[$MAP0]](%[[ARG1]]) -// CHECK-NEXT: %[[MODIFIED_INDEX1:.*]] = affine.apply #[[$MAP1]](%[[ARG1]]) +// CHECK-NEXT: %[[MODIFIED_INDEX0:.*]] = affine.apply #[[$MAP0]]()[%[[ARG1]]] +// CHECK-NEXT: %[[MODIFIED_INDEX1:.*]] = affine.apply #[[$MAP1]]()[%[[ARG1]]] // CHECK-NEXT: %[[RESULT:.*]] = affine.load %[[ARG0]][%[[MODIFIED_INDEX0]], %[[MODIFIED_INDEX1]], %[[ARG2]]] : memref<2x6x32xf32> // CHECK-NEXT: return %[[RESULT]] : f32 // ----- -// CHECK-DAG: #[[$MAP:.*]] = affine_map<(d0, d1, d2) -> (d0 * 6 + d1 * 3 + d2)> +// CHECK-DAG: #[[$MAP:.*]] = affine_map<()[s0, s1, s2] -> (s0 * 6 + s1 * 3 + s2)> // CHECK-LABEL: fold_static_stride_subview_with_affine_load_store_expand_shape_3d // CHECK-SAME: (%[[ARG0:.*]]: memref<12x32xf32>, %[[ARG1:.*]]: index, %[[ARG2:.*]]: index, %[[ARG3:.*]]: index, %[[ARG4:.*]]: index) -> f32 { func.func @fold_static_stride_subview_with_affine_load_store_expand_shape_3d(%arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4: index) -> f32 { @@ -355,7 +342,7 @@ func.func @fold_static_stride_subview_with_affine_load_store_expand_shape_3d(%ar %1 = affine.load %0[%arg1, %arg2, %arg3, %arg4] : memref<2x2x3x32xf32> return %1 : f32 } -// CHECK: %[[INDEX:.*]] = affine.apply #[[$MAP]](%[[ARG1]], %[[ARG2]], %[[ARG3]]) +// CHECK: %[[INDEX:.*]] = affine.apply #[[$MAP]]()[%[[ARG1]], %[[ARG2]], %[[ARG3]]] // CHECK-NEXT: %[[RESULT:.*]] = affine.load %[[ARG0]][%[[INDEX]], %[[ARG4]]] : memref<12x32xf32> // CHECK-NEXT: return %[[RESULT]] : f32 @@ -390,9 +377,8 @@ func.func @fold_static_stride_subview_with_affine_load_store_expand_shape(%arg0: // ----- -// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d1 + d0)> -// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0 * 1024 + d1)> -// CHECK-DAG: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0 + d1)> +// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0 * 1025 + d1)> +// CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0 + d1)> // CHECK-LABEL: fold_static_stride_subview_with_affine_load_store_expand_shape_when_access_index_is_an_expression // CHECK-SAME: (%[[ARG0:.*]]: memref<1024x1024xf32>, %[[ARG1:.*]]: memref<1xf32>, %[[ARG2:.*]]: index) func.func @fold_static_stride_subview_with_affine_load_store_expand_shape_when_access_index_is_an_expression(%arg0: memref<1024x1024xf32>, %arg1: memref<1xf32>, %arg2: index) -> f32 { @@ -414,14 +400,13 @@ func.func @fold_static_stride_subview_with_affine_load_store_expand_shape_when_a // CHECK-NEXT: affine.for %[[ARG4:.*]] = 0 to 1024 { // CHECK-NEXT: affine.for %[[ARG5:.*]] = 0 to 1020 { // CHECK-NEXT: affine.for %[[ARG6:.*]] = 0 to 1 { -// CHECK-NEXT: %[[TMP1:.*]] = affine.apply #[[$MAP0]](%[[ARG3]], %[[ARG4]], %[[ARG5]], %[[ARG6]]) -// CHECK-NEXT: %[[TMP2:.*]] = affine.apply #[[$MAP1]](%[[ARG3]], %[[TMP1]]) -// CHECK-NEXT: %[[TMP3:.*]] = affine.apply #{{.*}}(%[[ARG5]], %[[ARG6]]) -// CHECK-NEXT: affine.load %[[ARG0]][%[[TMP2]], %[[TMP3]]] : memref<1024x1024xf32> +// CHECK-NEXT: %[[TMP1:.*]] = affine.apply #[[$MAP0]](%[[ARG3]], %[[ARG4]]) +// CHECK-NEXT: %[[TMP3:.*]] = affine.apply #[[$MAP1]](%[[ARG5]], %[[ARG6]]) +// CHECK-NEXT: affine.load %[[ARG0]][%[[TMP1]], %[[TMP3]]] : memref<1024x1024xf32> // ----- -// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0, d1) -> (d0 * 1024 + d1)> +// CHECK-DAG: #[[$MAP0:.*]] = affine_map<(d0) -> (d0 * 1024)> // CHECK-DAG: #[[$MAP1:.*]] = affine_map<(d0, d1) -> (d0 + d1)> // CHECK-LABEL: fold_static_stride_subview_with_affine_load_store_expand_shape_with_constant_access_index // CHECK-SAME: (%[[ARG0:.*]]: memref<1024x1024xf32>, %[[ARG1:.*]]: memref<1xf32>, %[[ARG2:.*]]: index) @@ -441,12 +426,11 @@ func.func @fold_static_stride_subview_with_affine_load_store_expand_shape_with_c %2 = memref.load %arg1[%arg2] : memref<1xf32> return %2 : f32 } -// CHECK-NEXT: %[[ZERO:.*]] = arith.constant 0 : index // CHECK-NEXT: affine.for %[[ARG3:.*]] = 0 to 1 { // CHECK-NEXT: affine.for %[[ARG4:.*]] = 0 to 1024 { // CHECK-NEXT: affine.for %[[ARG5:.*]] = 0 to 1020 { // CHECK-NEXT: affine.for %[[ARG6:.*]] = 0 to 1 { -// CHECK-NEXT: %[[TMP1:.*]] = affine.apply #[[$MAP0]](%[[ARG3]], %[[ZERO]]) +// CHECK-NEXT: %[[TMP1:.*]] = affine.apply #[[$MAP0]](%[[ARG3]]) // CHECK-NEXT: %[[TMP2:.*]] = affine.apply #[[$MAP1]](%[[ARG5]], %[[ARG6]]) // CHECK-NEXT: memref.load %[[ARG0]][%[[TMP1]], %[[TMP2]]] : memref<1024x1024xf32> @@ -513,7 +497,6 @@ func.func @fold_load_keep_nontemporal(%arg0 : memref<12x32xf32>, %arg1 : index, return %1 : f32 } - // ----- // CHECK-LABEL: func @fold_store_keep_nontemporal( @@ -533,10 +516,10 @@ func.func @fold_gpu_subgroup_mma_load_matrix_1d(%src: memref>, % return %matrix: !gpu.mma_matrix<16x16xf16, "COp"> } -// CHECK-DAG: #[[MAP:.+]] = affine_map<(d0)[s0] -> (d0 + s0)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (s0 + s1)> // CHECK: func.func @fold_gpu_subgroup_mma_load_matrix_1d // CHECK-SAME: (%[[SRC:.+]]: memref>, %[[OFFSET:.+]]: index, %[[I:.+]]: index) -// CHECK: %[[APPLY:.+]] = affine.apply #[[MAP]](%[[I]])[%[[OFFSET]]] +// CHECK: %[[APPLY:.+]] = affine.apply #[[MAP]]()[%[[OFFSET]], %[[I]]] // CHECK: %[[LOAD:.+]] = gpu.subgroup_mma_load_matrix %[[SRC]][%[[APPLY]]] {leadDimension = 160 : index} : memref> -> !gpu.mma_matrix<16x16xf16, "COp"> // CHECK: return %[[LOAD]] @@ -548,10 +531,10 @@ func.func @fold_gpu_subgroup_mma_store_matrix_1d(%dst: memref>, return } -// CHECK-DAG: #[[MAP:.+]] = affine_map<(d0)[s0] -> (d0 + s0)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (s0 + s1)> // CHECK: func.func @fold_gpu_subgroup_mma_store_matrix_1d // CHECK-SAME: (%[[DST:.+]]: memref>, %[[OFFSET:.+]]: index, %[[I0:.+]]: index, %[[VAL:.+]]: !gpu.mma_matrix<16x16xf16, "COp">) -// CHECK: %[[APPLY:.+]] = affine.apply #[[MAP]](%[[I0]])[%[[OFFSET]]] +// CHECK: %[[APPLY:.+]] = affine.apply #[[MAP]]()[%[[OFFSET]], %[[I0]]] // CHECK: gpu.subgroup_mma_store_matrix %[[VAL]], %[[DST]][%[[APPLY]]] {leadDimension = 160 : index} : !gpu.mma_matrix<16x16xf16, "COp">, memref> // ----- From 792bb70d298554a2fc431ffa7b83f37883336e78 Mon Sep 17 00:00:00 2001 From: paperchalice Date: Tue, 21 Mar 2023 23:13:59 -0700 Subject: [PATCH 267/691] [MCContext] Use `const Twine &` in symbol creation methods. NFC All of these methods will invoke `getOrCreateSymbol(const Twine &Name)`, using `Twine` here makes these methods more flexible. Differential Revision: https://reviews.llvm.org/D145923 --- llvm/include/llvm/MC/MCContext.h | 10 +++++----- llvm/lib/MC/MCContext.cpp | 33 +++++++++++++++----------------- 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h index 653df37772db8..59f05bbc10cf1 100644 --- a/llvm/include/llvm/MC/MCContext.h +++ b/llvm/include/llvm/MC/MCContext.h @@ -506,17 +506,17 @@ class MCContext { /// variable after codegen. /// /// \param Idx - The index of a local variable passed to \@llvm.localescape. - MCSymbol *getOrCreateFrameAllocSymbol(StringRef FuncName, unsigned Idx); + MCSymbol *getOrCreateFrameAllocSymbol(const Twine &FuncName, unsigned Idx); - MCSymbol *getOrCreateParentFrameOffsetSymbol(StringRef FuncName); + MCSymbol *getOrCreateParentFrameOffsetSymbol(const Twine &FuncName); - MCSymbol *getOrCreateLSDASymbol(StringRef FuncName); + MCSymbol *getOrCreateLSDASymbol(const Twine &FuncName); /// Get the symbol for \p Name, or null. MCSymbol *lookupSymbol(const Twine &Name) const; /// Set value for a symbol. - void setSymbolValue(MCStreamer &Streamer, StringRef Sym, uint64_t Val); + void setSymbolValue(MCStreamer &Streamer, const Twine &Sym, uint64_t Val); /// getSymbols - Get a reference for the symbol table for clients that /// want to, for example, iterate over all symbols. 'const' because we @@ -664,7 +664,7 @@ class MCContext { MCSectionWasm *getWasmSection(const Twine &Section, SectionKind K, unsigned Flags, const MCSymbolWasm *Group, unsigned UniqueID, const char *BeginSymName); - + /// Get the section for the provided Section name MCSectionDXContainer *getDXContainerSection(StringRef Section, SectionKind K); diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp index 40e5e0f2ef24d..80a036a950bc9 100644 --- a/llvm/lib/MC/MCContext.cpp +++ b/llvm/lib/MC/MCContext.cpp @@ -211,19 +211,19 @@ MCSymbol *MCContext::getOrCreateSymbol(const Twine &Name) { return Sym; } -MCSymbol *MCContext::getOrCreateFrameAllocSymbol(StringRef FuncName, +MCSymbol *MCContext::getOrCreateFrameAllocSymbol(const Twine &FuncName, unsigned Idx) { - return getOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) + FuncName + + return getOrCreateSymbol(MAI->getPrivateGlobalPrefix() + FuncName + "$frame_escape_" + Twine(Idx)); } -MCSymbol *MCContext::getOrCreateParentFrameOffsetSymbol(StringRef FuncName) { - return getOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) + FuncName + +MCSymbol *MCContext::getOrCreateParentFrameOffsetSymbol(const Twine &FuncName) { + return getOrCreateSymbol(MAI->getPrivateGlobalPrefix() + FuncName + "$parent_frame_offset"); } -MCSymbol *MCContext::getOrCreateLSDASymbol(StringRef FuncName) { - return getOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) + "__ehtable$" + +MCSymbol *MCContext::getOrCreateLSDASymbol(const Twine &FuncName) { + return getOrCreateSymbol(MAI->getPrivateGlobalPrefix() + "__ehtable$" + FuncName); } @@ -259,8 +259,8 @@ MCSymbol *MCContext::createSymbolImpl(const StringMapEntry *Name, return new (Name, *this) MCSymbol(MCSymbol::SymbolKindUnset, Name, IsTemporary); } - return new (Name, *this) MCSymbol(MCSymbol::SymbolKindUnset, Name, - IsTemporary); + return new (Name, *this) + MCSymbol(MCSymbol::SymbolKindUnset, Name, IsTemporary); } MCSymbol *MCContext::createSymbol(StringRef Name, bool AlwaysAddSuffix, @@ -362,9 +362,8 @@ MCSymbol *MCContext::lookupSymbol(const Twine &Name) const { return Symbols.lookup(NameRef); } -void MCContext::setSymbolValue(MCStreamer &Streamer, - StringRef Sym, - uint64_t Val) { +void MCContext::setSymbolValue(MCStreamer &Streamer, const Twine &Sym, + uint64_t Val) { auto Symbol = getOrCreateSymbol(Sym); Streamer.emitAssignment(Symbol, MCConstantExpr::create(Val, *this)); } @@ -498,14 +497,13 @@ MCSectionELF *MCContext::createELFSectionImpl(StringRef Section, unsigned Type, return Ret; } -MCSectionELF *MCContext::createELFRelSection(const Twine &Name, unsigned Type, - unsigned Flags, unsigned EntrySize, - const MCSymbolELF *Group, - const MCSectionELF *RelInfoSection) { +MCSectionELF * +MCContext::createELFRelSection(const Twine &Name, unsigned Type, unsigned Flags, + unsigned EntrySize, const MCSymbolELF *Group, + const MCSectionELF *RelInfoSection) { StringMap::iterator I; bool Inserted; - std::tie(I, Inserted) = - RelSecNames.insert(std::make_pair(Name.str(), true)); + std::tie(I, Inserted) = RelSecNames.insert(std::make_pair(Name.str(), true)); return createELFSectionImpl( I->getKey(), Type, Flags, SectionKind::getReadOnly(), EntrySize, Group, @@ -669,7 +667,6 @@ MCSectionCOFF *MCContext::getCOFFSection(StringRef Section, COMDATSymName = COMDATSymbol->getName(); } - // Do the lookup, if we have a hit, return it. COFFSectionKey T{Section, COMDATSymName, Selection, UniqueID}; auto IterBool = COFFUniquingMap.insert(std::make_pair(T, nullptr)); From 823ddba1b325f30fc3fb2e9d695c211b856a4d5d Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Thu, 2 Mar 2023 12:29:05 -0600 Subject: [PATCH 268/691] [flang] Feature list plugin Plugin that counts the number of times each tree node occurs in a given program. Used for test coverage. Updated to fix build issues. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D143704 --- flang/examples/CMakeLists.txt | 1 + flang/examples/FeatureList/CMakeLists.txt | 9 + flang/examples/FeatureList/FeatureList.cpp | 761 ++++++++++++++++++ flang/test/CMakeLists.txt | 1 + flang/test/Examples/feature-list-class.f90 | 88 ++ .../test/Examples/feature-list-functions.f90 | 76 ++ 6 files changed, 936 insertions(+) create mode 100644 flang/examples/FeatureList/CMakeLists.txt create mode 100644 flang/examples/FeatureList/FeatureList.cpp create mode 100644 flang/test/Examples/feature-list-class.f90 create mode 100644 flang/test/Examples/feature-list-functions.f90 diff --git a/flang/examples/CMakeLists.txt b/flang/examples/CMakeLists.txt index 23fea3920efb6..8cc66ddbbbb0e 100644 --- a/flang/examples/CMakeLists.txt +++ b/flang/examples/CMakeLists.txt @@ -1,3 +1,4 @@ add_subdirectory(ExternalHelloWorld) add_subdirectory(PrintFlangFunctionNames) add_subdirectory(FlangOmpReport) +add_subdirectory(FeatureList) diff --git a/flang/examples/FeatureList/CMakeLists.txt b/flang/examples/FeatureList/CMakeLists.txt new file mode 100644 index 0000000000000..e17a7bebbff05 --- /dev/null +++ b/flang/examples/FeatureList/CMakeLists.txt @@ -0,0 +1,9 @@ +add_llvm_example_library(flangFeatureList + MODULE + FeatureList.cpp + + DEPENDS + acc_gen + flangFrontend + omp_gen +) diff --git a/flang/examples/FeatureList/FeatureList.cpp b/flang/examples/FeatureList/FeatureList.cpp new file mode 100644 index 0000000000000..7d7e63e148bc0 --- /dev/null +++ b/flang/examples/FeatureList/FeatureList.cpp @@ -0,0 +1,761 @@ +//===-- FeatureList.cpp ---------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A plugin that counts the amount of times a particular parse tree node +// occurs. This plugin should cover each feature covered in dump-parse-tree.h +// +//===----------------------------------------------------------------------===// + +#include "flang/Frontend/FrontendActions.h" +#include "flang/Frontend/FrontendPluginRegistry.h" +#include "flang/Parser/parse-tree-visitor.h" +#include "flang/Parser/parse-tree.h" +#include "flang/Parser/parsing.h" + +#include + +using namespace Fortran::frontend; +using namespace Fortran::parser; +using namespace Fortran; + +#define READ_FEATURE_CUST(classname, n) \ + bool Pre(const classname &) { \ + record(#n); \ + return true; \ + } \ + void Post(const classname &) {} + +#define READ_FEATURE(classname) READ_FEATURE_CUST(classname, classname) + +struct NodeVisitor { +private: + std::map frequencies; + + void record(const char *name) { + const auto [it, ins] = frequencies.insert({name, 1}); + if (!ins) { + frequencies[name] = it->second + 1; + } + } + +public: + const std::map &getFrequencies() const { + return frequencies; + } + + READ_FEATURE_CUST(format::ControlEditDesc, ControlEditDesc) + READ_FEATURE_CUST(format::DerivedTypeDataEditDesc, DerivedTypeDataEditDesc) + READ_FEATURE_CUST(format::FormatItem, FormatItem) + READ_FEATURE_CUST(format::FormatSpecification, FormatSpecification) + READ_FEATURE_CUST( + format::IntrinsicTypeDataEditDesc, IntrinsicTypeDataEditDesc) + READ_FEATURE(Abstract) + READ_FEATURE(AccAtomicCapture) + READ_FEATURE(AccAtomicCapture::Stmt1) + READ_FEATURE(AccAtomicCapture::Stmt2) + READ_FEATURE(AccAtomicRead) + READ_FEATURE(AccAtomicUpdate) + READ_FEATURE(AccAtomicWrite) + READ_FEATURE(AccBeginBlockDirective) + READ_FEATURE(AccBeginCombinedDirective) + READ_FEATURE(AccBeginLoopDirective) + READ_FEATURE(AccBlockDirective) + READ_FEATURE(AccClause) + READ_FEATURE(AccBindClause) + READ_FEATURE(AccDefaultClause) + READ_FEATURE(AccClauseList) + READ_FEATURE(AccCombinedDirective) + READ_FEATURE(AccDataModifier) + READ_FEATURE(AccDataModifier::Modifier) + READ_FEATURE(AccDeclarativeDirective) + READ_FEATURE(AccEndAtomic) + READ_FEATURE(AccEndBlockDirective) + READ_FEATURE(AccEndCombinedDirective) + READ_FEATURE(AccGangArgument) + READ_FEATURE(AccObject) + READ_FEATURE(AccObjectList) + READ_FEATURE(AccObjectListWithModifier) + READ_FEATURE(AccObjectListWithReduction) + READ_FEATURE(AccReductionOperator) + READ_FEATURE(AccReductionOperator::Operator) + READ_FEATURE(AccSizeExpr) + READ_FEATURE(AccSizeExprList) + READ_FEATURE(AccSelfClause) + READ_FEATURE(AccStandaloneDirective) + READ_FEATURE(AccDeviceTypeExpr) + READ_FEATURE(AccDeviceTypeExprList) + READ_FEATURE(AccTileExpr) + READ_FEATURE(AccTileExprList) + READ_FEATURE(AccLoopDirective) + READ_FEATURE(AccWaitArgument) + READ_FEATURE(AcImpliedDo) + READ_FEATURE(AcImpliedDoControl) + READ_FEATURE(AcValue) + READ_FEATURE(AccessStmt) + READ_FEATURE(AccessId) + READ_FEATURE(AccessSpec) + READ_FEATURE(AccessSpec::Kind) + READ_FEATURE(AcSpec) + READ_FEATURE(ActionStmt) + READ_FEATURE(ActualArg) + READ_FEATURE(ActualArg::PercentRef) + READ_FEATURE(ActualArg::PercentVal) + READ_FEATURE(ActualArgSpec) + READ_FEATURE(AcValue::Triplet) + READ_FEATURE(AllocOpt) + READ_FEATURE(AllocOpt::Mold) + READ_FEATURE(AllocOpt::Source) + READ_FEATURE(Allocatable) + READ_FEATURE(AllocatableStmt) + READ_FEATURE(AllocateCoarraySpec) + READ_FEATURE(AllocateObject) + READ_FEATURE(AllocateShapeSpec) + READ_FEATURE(AllocateStmt) + READ_FEATURE(Allocation) + READ_FEATURE(AltReturnSpec) + READ_FEATURE(ArithmeticIfStmt) + READ_FEATURE(ArrayConstructor) + READ_FEATURE(ArrayElement) + READ_FEATURE(ArraySpec) + READ_FEATURE(AssignStmt) + READ_FEATURE(AssignedGotoStmt) + READ_FEATURE(AssignmentStmt) + READ_FEATURE(AssociateConstruct) + READ_FEATURE(AssociateStmt) + READ_FEATURE(Association) + READ_FEATURE(AssumedImpliedSpec) + READ_FEATURE(AssumedRankSpec) + READ_FEATURE(AssumedShapeSpec) + READ_FEATURE(AssumedSizeSpec) + READ_FEATURE(Asynchronous) + READ_FEATURE(AsynchronousStmt) + READ_FEATURE(AttrSpec) + READ_FEATURE(BOZLiteralConstant) + READ_FEATURE(BackspaceStmt) + READ_FEATURE(BasedPointer) + READ_FEATURE(BasedPointerStmt) + READ_FEATURE(BindAttr) + READ_FEATURE(BindAttr::Deferred) + READ_FEATURE(BindAttr::Non_Overridable) + READ_FEATURE(BindEntity) + READ_FEATURE(BindEntity::Kind) + READ_FEATURE(BindStmt) + READ_FEATURE(Block) + READ_FEATURE(BlockConstruct) + READ_FEATURE(BlockData) + READ_FEATURE(BlockDataStmt) + READ_FEATURE(BlockSpecificationPart) + READ_FEATURE(BlockStmt) + READ_FEATURE(BoundsRemapping) + READ_FEATURE(BoundsSpec) + READ_FEATURE(Call) + READ_FEATURE(CallStmt) + READ_FEATURE(CaseConstruct) + READ_FEATURE(CaseConstruct::Case) + READ_FEATURE(CaseSelector) + READ_FEATURE(CaseStmt) + READ_FEATURE(CaseValueRange) + READ_FEATURE(CaseValueRange::Range) + READ_FEATURE(ChangeTeamConstruct) + READ_FEATURE(ChangeTeamStmt) + READ_FEATURE(CharLength) + READ_FEATURE(CharLiteralConstant) + READ_FEATURE(CharLiteralConstantSubstring) + READ_FEATURE(CharSelector) + READ_FEATURE(CharSelector::LengthAndKind) + READ_FEATURE(CloseStmt) + READ_FEATURE(CloseStmt::CloseSpec) + READ_FEATURE(CoarrayAssociation) + READ_FEATURE(CoarraySpec) + READ_FEATURE(CodimensionDecl) + READ_FEATURE(CodimensionStmt) + READ_FEATURE(CoindexedNamedObject) + READ_FEATURE(CommonBlockObject) + READ_FEATURE(CommonStmt) + READ_FEATURE(CommonStmt::Block) + READ_FEATURE(CompilerDirective) + READ_FEATURE(CompilerDirective::IgnoreTKR) + READ_FEATURE(CompilerDirective::LoopCount) + READ_FEATURE(CompilerDirective::NameValue) + READ_FEATURE(ComplexLiteralConstant) + READ_FEATURE(ComplexPart) + READ_FEATURE(ComponentArraySpec) + READ_FEATURE(ComponentAttrSpec) + READ_FEATURE(ComponentDataSource) + READ_FEATURE(ComponentDecl) + READ_FEATURE(FillDecl) + READ_FEATURE(ComponentOrFill) + READ_FEATURE(ComponentDefStmt) + READ_FEATURE(ComponentSpec) + READ_FEATURE(ComputedGotoStmt) + READ_FEATURE(ConcurrentControl) + READ_FEATURE(ConcurrentHeader) + READ_FEATURE(ConnectSpec) + READ_FEATURE(ConnectSpec::CharExpr) + READ_FEATURE(ConnectSpec::CharExpr::Kind) + READ_FEATURE(ConnectSpec::Newunit) + READ_FEATURE(ConnectSpec::Recl) + READ_FEATURE(ContainsStmt) + READ_FEATURE(Contiguous) + READ_FEATURE(ContiguousStmt) + READ_FEATURE(ContinueStmt) + READ_FEATURE(CriticalConstruct) + READ_FEATURE(CriticalStmt) + READ_FEATURE(CycleStmt) + READ_FEATURE(DataComponentDefStmt) + READ_FEATURE(DataIDoObject) + READ_FEATURE(DataImpliedDo) + READ_FEATURE(DataRef) + READ_FEATURE(DataStmt) + READ_FEATURE(DataStmtConstant) + READ_FEATURE(DataStmtObject) + READ_FEATURE(DataStmtRepeat) + READ_FEATURE(DataStmtSet) + READ_FEATURE(DataStmtValue) + READ_FEATURE(DeallocateStmt) + READ_FEATURE(DeclarationConstruct) + READ_FEATURE(DeclarationTypeSpec) + READ_FEATURE(DeclarationTypeSpec::Class) + READ_FEATURE(DeclarationTypeSpec::ClassStar) + READ_FEATURE(DeclarationTypeSpec::Record) + READ_FEATURE(DeclarationTypeSpec::Type) + READ_FEATURE(DeclarationTypeSpec::TypeStar) + READ_FEATURE(Default) + READ_FEATURE(DeferredCoshapeSpecList) + READ_FEATURE(DeferredShapeSpecList) + READ_FEATURE(DefinedOpName) + READ_FEATURE(DefinedOperator) + READ_FEATURE(DefinedOperator::IntrinsicOperator) + READ_FEATURE(DerivedTypeDef) + READ_FEATURE(DerivedTypeSpec) + READ_FEATURE(DerivedTypeStmt) + READ_FEATURE(Designator) + READ_FEATURE(DimensionStmt) + READ_FEATURE(DimensionStmt::Declaration) + READ_FEATURE(DoConstruct) + READ_FEATURE(DummyArg) + READ_FEATURE(ElseIfStmt) + READ_FEATURE(ElseStmt) + READ_FEATURE(ElsewhereStmt) + READ_FEATURE(EndAssociateStmt) + READ_FEATURE(EndBlockDataStmt) + READ_FEATURE(EndBlockStmt) + READ_FEATURE(EndChangeTeamStmt) + READ_FEATURE(EndCriticalStmt) + READ_FEATURE(EndDoStmt) + READ_FEATURE(EndEnumStmt) + READ_FEATURE(EndForallStmt) + READ_FEATURE(EndFunctionStmt) + READ_FEATURE(EndIfStmt) + READ_FEATURE(EndInterfaceStmt) + READ_FEATURE(EndLabel) + READ_FEATURE(EndModuleStmt) + READ_FEATURE(EndMpSubprogramStmt) + READ_FEATURE(EndProgramStmt) + READ_FEATURE(EndSelectStmt) + READ_FEATURE(EndSubmoduleStmt) + READ_FEATURE(EndSubroutineStmt) + READ_FEATURE(EndTypeStmt) + READ_FEATURE(EndWhereStmt) + READ_FEATURE(EndfileStmt) + READ_FEATURE(EntityDecl) + READ_FEATURE(EntryStmt) + READ_FEATURE(EnumDef) + READ_FEATURE(EnumDefStmt) + READ_FEATURE(Enumerator) + READ_FEATURE(EnumeratorDefStmt) + READ_FEATURE(EorLabel) + READ_FEATURE(EquivalenceObject) + READ_FEATURE(EquivalenceStmt) + READ_FEATURE(ErrLabel) + READ_FEATURE(ErrorRecovery) + READ_FEATURE(EventPostStmt) + READ_FEATURE(EventWaitStmt) + READ_FEATURE(EventWaitStmt::EventWaitSpec) + READ_FEATURE(ExecutableConstruct) + READ_FEATURE(ExecutionPart) + READ_FEATURE(ExecutionPartConstruct) + READ_FEATURE(ExitStmt) + READ_FEATURE(ExplicitCoshapeSpec) + READ_FEATURE(ExplicitShapeSpec) + READ_FEATURE(Expr) + READ_FEATURE(Expr::Parentheses) + READ_FEATURE(Expr::UnaryPlus) + READ_FEATURE(Expr::Negate) + READ_FEATURE(Expr::NOT) + READ_FEATURE(Expr::PercentLoc) + READ_FEATURE(Expr::DefinedUnary) + READ_FEATURE(Expr::Power) + READ_FEATURE(Expr::Multiply) + READ_FEATURE(Expr::Divide) + READ_FEATURE(Expr::Add) + READ_FEATURE(Expr::Subtract) + READ_FEATURE(Expr::Concat) + READ_FEATURE(Expr::LT) + READ_FEATURE(Expr::LE) + READ_FEATURE(Expr::EQ) + READ_FEATURE(Expr::NE) + READ_FEATURE(Expr::GE) + READ_FEATURE(Expr::GT) + READ_FEATURE(Expr::AND) + READ_FEATURE(Expr::OR) + READ_FEATURE(Expr::EQV) + READ_FEATURE(Expr::NEQV) + READ_FEATURE(Expr::DefinedBinary) + READ_FEATURE(Expr::ComplexConstructor) + READ_FEATURE(External) + READ_FEATURE(ExternalStmt) + READ_FEATURE(FailImageStmt) + READ_FEATURE(FileUnitNumber) + READ_FEATURE(FinalProcedureStmt) + READ_FEATURE(FlushStmt) + READ_FEATURE(ForallAssignmentStmt) + READ_FEATURE(ForallBodyConstruct) + READ_FEATURE(ForallConstruct) + READ_FEATURE(ForallConstructStmt) + READ_FEATURE(ForallStmt) + READ_FEATURE(FormTeamStmt) + READ_FEATURE(FormTeamStmt::FormTeamSpec) + READ_FEATURE(Format) + READ_FEATURE(FormatStmt) + READ_FEATURE(FunctionReference) + READ_FEATURE(FunctionStmt) + READ_FEATURE(FunctionSubprogram) + READ_FEATURE(GenericSpec) + READ_FEATURE(GenericSpec::Assignment) + READ_FEATURE(GenericSpec::ReadFormatted) + READ_FEATURE(GenericSpec::ReadUnformatted) + READ_FEATURE(GenericSpec::WriteFormatted) + READ_FEATURE(GenericSpec::WriteUnformatted) + READ_FEATURE(GenericStmt) + READ_FEATURE(GotoStmt) + READ_FEATURE(HollerithLiteralConstant) + READ_FEATURE(IdExpr) + READ_FEATURE(IdVariable) + READ_FEATURE(IfConstruct) + READ_FEATURE(IfConstruct::ElseBlock) + READ_FEATURE(IfConstruct::ElseIfBlock) + READ_FEATURE(IfStmt) + READ_FEATURE(IfThenStmt) + READ_FEATURE(TeamValue) + READ_FEATURE(ImageSelector) + READ_FEATURE(ImageSelectorSpec) + READ_FEATURE(ImageSelectorSpec::Stat) + READ_FEATURE(ImageSelectorSpec::Team_Number) + READ_FEATURE(ImplicitPart) + READ_FEATURE(ImplicitPartStmt) + READ_FEATURE(ImplicitSpec) + READ_FEATURE(ImplicitStmt) + READ_FEATURE(ImplicitStmt::ImplicitNoneNameSpec) + READ_FEATURE(ImpliedShapeSpec) + READ_FEATURE(ImportStmt) + READ_FEATURE(Initialization) + READ_FEATURE(InputImpliedDo) + READ_FEATURE(InputItem) + READ_FEATURE(InquireSpec) + READ_FEATURE(InquireSpec::CharVar) + READ_FEATURE(InquireSpec::CharVar::Kind) + READ_FEATURE(InquireSpec::IntVar) + READ_FEATURE(InquireSpec::IntVar::Kind) + READ_FEATURE(InquireSpec::LogVar) + READ_FEATURE(InquireSpec::LogVar::Kind) + READ_FEATURE(InquireStmt) + READ_FEATURE(InquireStmt::Iolength) + READ_FEATURE(IntegerTypeSpec) + READ_FEATURE(IntentSpec) + READ_FEATURE(IntentSpec::Intent) + READ_FEATURE(IntentStmt) + READ_FEATURE(InterfaceBlock) + READ_FEATURE(InterfaceBody) + READ_FEATURE(InterfaceBody::Function) + READ_FEATURE(InterfaceBody::Subroutine) + READ_FEATURE(InterfaceSpecification) + READ_FEATURE(InterfaceStmt) + READ_FEATURE(InternalSubprogram) + READ_FEATURE(InternalSubprogramPart) + READ_FEATURE(Intrinsic) + READ_FEATURE(IntrinsicStmt) + READ_FEATURE(IntrinsicTypeSpec) + READ_FEATURE(IntrinsicTypeSpec::Character) + READ_FEATURE(IntrinsicTypeSpec::Complex) + READ_FEATURE(IntrinsicTypeSpec::DoubleComplex) + READ_FEATURE(IntrinsicTypeSpec::DoublePrecision) + READ_FEATURE(IntrinsicTypeSpec::Logical) + READ_FEATURE(IntrinsicTypeSpec::Real) + READ_FEATURE(IoControlSpec) + READ_FEATURE(IoControlSpec::Asynchronous) + READ_FEATURE(IoControlSpec::CharExpr) + READ_FEATURE(IoControlSpec::CharExpr::Kind) + READ_FEATURE(IoControlSpec::Pos) + READ_FEATURE(IoControlSpec::Rec) + READ_FEATURE(IoControlSpec::Size) + READ_FEATURE(IoUnit) + READ_FEATURE(Keyword) + READ_FEATURE(KindParam) + READ_FEATURE(KindSelector) + READ_FEATURE(KindSelector::StarSize) + READ_FEATURE(LabelDoStmt) + READ_FEATURE(LanguageBindingSpec) + READ_FEATURE(LengthSelector) + READ_FEATURE(LetterSpec) + READ_FEATURE(LiteralConstant) + READ_FEATURE(IntLiteralConstant) + READ_FEATURE(LocalitySpec) + READ_FEATURE(LocalitySpec::DefaultNone) + READ_FEATURE(LocalitySpec::Local) + READ_FEATURE(LocalitySpec::LocalInit) + READ_FEATURE(LocalitySpec::Shared) + READ_FEATURE(LockStmt) + READ_FEATURE(LockStmt::LockStat) + READ_FEATURE(LogicalLiteralConstant) + READ_FEATURE(LoopControl) + READ_FEATURE(LoopControl::Concurrent) + READ_FEATURE(MainProgram) + READ_FEATURE(Map) + READ_FEATURE(Map::EndMapStmt) + READ_FEATURE(Map::MapStmt) + READ_FEATURE(MaskedElsewhereStmt) + READ_FEATURE(Module) + READ_FEATURE(ModuleStmt) + READ_FEATURE(ModuleSubprogram) + READ_FEATURE(ModuleSubprogramPart) + READ_FEATURE(MpSubprogramStmt) + READ_FEATURE(MsgVariable) + READ_FEATURE(Name) + READ_FEATURE(NamedConstant) + READ_FEATURE(NamedConstantDef) + READ_FEATURE(NamelistStmt) + READ_FEATURE(NamelistStmt::Group) + READ_FEATURE(NonLabelDoStmt) + READ_FEATURE(NoPass) + READ_FEATURE(NullifyStmt) + READ_FEATURE(NullInit) + READ_FEATURE(ObjectDecl) + READ_FEATURE(OldParameterStmt) + READ_FEATURE(OmpAlignedClause) + READ_FEATURE(OmpAtomic) + READ_FEATURE(OmpAtomicCapture) + READ_FEATURE(OmpAtomicCapture::Stmt1) + READ_FEATURE(OmpAtomicCapture::Stmt2) + READ_FEATURE(OmpAtomicRead) + READ_FEATURE(OmpAtomicUpdate) + READ_FEATURE(OmpAtomicWrite) + READ_FEATURE(OmpBeginBlockDirective) + READ_FEATURE(OmpBeginLoopDirective) + READ_FEATURE(OmpBeginSectionsDirective) + READ_FEATURE(OmpBlockDirective) + READ_FEATURE(OmpCancelType) + READ_FEATURE(OmpCancelType::Type) + READ_FEATURE(OmpClause) + READ_FEATURE(OmpClauseList) + READ_FEATURE(OmpCriticalDirective) + READ_FEATURE(OmpDeclareTargetSpecifier) + READ_FEATURE(OmpDeclareTargetWithClause) + READ_FEATURE(OmpDeclareTargetWithList) + READ_FEATURE(OmpDefaultClause) + READ_FEATURE(OmpDefaultClause::Type) + READ_FEATURE(OmpDefaultmapClause) + READ_FEATURE(OmpDefaultmapClause::ImplicitBehavior) + READ_FEATURE(OmpDefaultmapClause::VariableCategory) + READ_FEATURE(OmpDependClause) + READ_FEATURE(OmpDependClause::InOut) + READ_FEATURE(OmpDependClause::Sink) + READ_FEATURE(OmpDependClause::Source) + READ_FEATURE(OmpDependenceType) + READ_FEATURE(OmpDependenceType::Type) + READ_FEATURE(OmpDependSinkVec) + READ_FEATURE(OmpDependSinkVecLength) + READ_FEATURE(OmpEndAtomic) + READ_FEATURE(OmpEndBlockDirective) + READ_FEATURE(OmpEndCriticalDirective) + READ_FEATURE(OmpEndLoopDirective) + READ_FEATURE(OmpEndSectionsDirective) + READ_FEATURE(OmpIfClause) + READ_FEATURE(OmpIfClause::DirectiveNameModifier) + READ_FEATURE(OmpLinearClause) + READ_FEATURE(OmpLinearClause::WithModifier) + READ_FEATURE(OmpLinearClause::WithoutModifier) + READ_FEATURE(OmpLinearModifier) + READ_FEATURE(OmpLinearModifier::Type) + READ_FEATURE(OmpLoopDirective) + READ_FEATURE(OmpMapClause) + READ_FEATURE(OmpMapType) + READ_FEATURE(OmpMapType::Always) + READ_FEATURE(OmpMapType::Type) + READ_FEATURE(OmpObject) + READ_FEATURE(OmpObjectList) + READ_FEATURE(OmpOrderClause) + READ_FEATURE(OmpOrderClause::Type) + READ_FEATURE(OmpOrderModifier) + READ_FEATURE(OmpOrderModifier::Kind) + READ_FEATURE(OmpProcBindClause) + READ_FEATURE(OmpProcBindClause::Type) + READ_FEATURE(OmpReductionClause) + READ_FEATURE(OmpInReductionClause) + READ_FEATURE(OmpReductionCombiner) + READ_FEATURE(OmpReductionCombiner::FunctionCombiner) + READ_FEATURE(OmpReductionInitializerClause) + READ_FEATURE(OmpReductionOperator) + READ_FEATURE(OmpAllocateClause) + READ_FEATURE(OmpAllocateClause::Allocator) + READ_FEATURE(OmpScheduleClause) + READ_FEATURE(OmpScheduleClause::ScheduleType) + READ_FEATURE(OmpDeviceClause) + READ_FEATURE(OmpDeviceClause::DeviceModifier) + READ_FEATURE(OmpDeviceTypeClause) + READ_FEATURE(OmpDeviceTypeClause::Type) + READ_FEATURE(OmpScheduleModifier) + READ_FEATURE(OmpScheduleModifier::Modifier1) + READ_FEATURE(OmpScheduleModifier::Modifier2) + READ_FEATURE(OmpScheduleModifierType) + READ_FEATURE(OmpScheduleModifierType::ModType) + READ_FEATURE(OmpSectionBlocks) + READ_FEATURE(OmpSectionsDirective) + READ_FEATURE(OmpSimpleStandaloneDirective) + READ_FEATURE(Only) + READ_FEATURE(OpenACCAtomicConstruct) + READ_FEATURE(OpenACCBlockConstruct) + READ_FEATURE(OpenACCCacheConstruct) + READ_FEATURE(OpenACCCombinedConstruct) + READ_FEATURE(OpenACCConstruct) + READ_FEATURE(OpenACCDeclarativeConstruct) + READ_FEATURE(OpenACCLoopConstruct) + READ_FEATURE(OpenACCRoutineConstruct) + READ_FEATURE(OpenACCStandaloneDeclarativeConstruct) + READ_FEATURE(OpenACCStandaloneConstruct) + READ_FEATURE(OpenACCWaitConstruct) + READ_FEATURE(OpenMPAtomicConstruct) + READ_FEATURE(OpenMPBlockConstruct) + READ_FEATURE(OpenMPCancelConstruct) + READ_FEATURE(OpenMPCancelConstruct::If) + READ_FEATURE(OpenMPCancellationPointConstruct) + READ_FEATURE(OpenMPConstruct) + READ_FEATURE(OpenMPCriticalConstruct) + READ_FEATURE(OpenMPDeclarativeAllocate) + READ_FEATURE(OpenMPDeclarativeConstruct) + READ_FEATURE(OpenMPDeclareReductionConstruct) + READ_FEATURE(OpenMPDeclareSimdConstruct) + READ_FEATURE(OpenMPDeclareTargetConstruct) + READ_FEATURE(OmpMemoryOrderClause) + READ_FEATURE(OmpAtomicClause) + READ_FEATURE(OmpAtomicClauseList) + READ_FEATURE(OmpAtomicDefaultMemOrderClause) + READ_FEATURE(OmpAtomicDefaultMemOrderClause::Type) + READ_FEATURE(OpenMPFlushConstruct) + READ_FEATURE(OpenMPLoopConstruct) + READ_FEATURE(OpenMPExecutableAllocate) + READ_FEATURE(OpenMPRequiresConstruct) + READ_FEATURE(OpenMPSimpleStandaloneConstruct) + READ_FEATURE(OpenMPStandaloneConstruct) + READ_FEATURE(OpenMPSectionConstruct) + READ_FEATURE(OpenMPSectionsConstruct) + READ_FEATURE(OpenMPThreadprivate) + READ_FEATURE(OpenStmt) + READ_FEATURE(Optional) + READ_FEATURE(OptionalStmt) + READ_FEATURE(OtherSpecificationStmt) + READ_FEATURE(OutputImpliedDo) + READ_FEATURE(OutputItem) + READ_FEATURE(Parameter) + READ_FEATURE(ParameterStmt) + READ_FEATURE(ParentIdentifier) + READ_FEATURE(Pass) + READ_FEATURE(PauseStmt) + READ_FEATURE(Pointer) + READ_FEATURE(PointerAssignmentStmt) + READ_FEATURE(PointerAssignmentStmt::Bounds) + READ_FEATURE(PointerDecl) + READ_FEATURE(PointerObject) + READ_FEATURE(PointerStmt) + READ_FEATURE(PositionOrFlushSpec) + READ_FEATURE(PrefixSpec) + READ_FEATURE(PrefixSpec::Elemental) + READ_FEATURE(PrefixSpec::Impure) + READ_FEATURE(PrefixSpec::Module) + READ_FEATURE(PrefixSpec::Non_Recursive) + READ_FEATURE(PrefixSpec::Pure) + READ_FEATURE(PrefixSpec::Recursive) + READ_FEATURE(PrintStmt) + READ_FEATURE(PrivateStmt) + READ_FEATURE(PrivateOrSequence) + READ_FEATURE(ProcAttrSpec) + READ_FEATURE(ProcComponentAttrSpec) + READ_FEATURE(ProcComponentDefStmt) + READ_FEATURE(ProcComponentRef) + READ_FEATURE(ProcDecl) + READ_FEATURE(ProcInterface) + READ_FEATURE(ProcPointerInit) + READ_FEATURE(ProcedureDeclarationStmt) + READ_FEATURE(ProcedureDesignator) + READ_FEATURE(ProcedureStmt) + READ_FEATURE(ProcedureStmt::Kind) + READ_FEATURE(Program) + READ_FEATURE(ProgramStmt) + READ_FEATURE(ProgramUnit) + READ_FEATURE(Protected) + READ_FEATURE(ProtectedStmt) + READ_FEATURE(ReadStmt) + READ_FEATURE(RealLiteralConstant) + READ_FEATURE(RealLiteralConstant::Real) + READ_FEATURE(Rename) + READ_FEATURE(Rename::Names) + READ_FEATURE(Rename::Operators) + READ_FEATURE(ReturnStmt) + READ_FEATURE(RewindStmt) + READ_FEATURE(Save) + READ_FEATURE(SaveStmt) + READ_FEATURE(SavedEntity) + READ_FEATURE(SavedEntity::Kind) + READ_FEATURE(SectionSubscript) + READ_FEATURE(SelectCaseStmt) + READ_FEATURE(SelectRankCaseStmt) + READ_FEATURE(SelectRankCaseStmt::Rank) + READ_FEATURE(SelectRankConstruct) + READ_FEATURE(SelectRankConstruct::RankCase) + READ_FEATURE(SelectRankStmt) + READ_FEATURE(SelectTypeConstruct) + READ_FEATURE(SelectTypeConstruct::TypeCase) + READ_FEATURE(SelectTypeStmt) + READ_FEATURE(Selector) + READ_FEATURE(SeparateModuleSubprogram) + READ_FEATURE(SequenceStmt) + READ_FEATURE(Sign) + READ_FEATURE(SignedComplexLiteralConstant) + READ_FEATURE(SignedIntLiteralConstant) + READ_FEATURE(SignedRealLiteralConstant) + READ_FEATURE(SpecificationConstruct) + READ_FEATURE(SpecificationExpr) + READ_FEATURE(SpecificationPart) + READ_FEATURE(Star) + READ_FEATURE(StatOrErrmsg) + READ_FEATURE(StatVariable) + READ_FEATURE(StatusExpr) + READ_FEATURE(StmtFunctionStmt) + READ_FEATURE(StopCode) + READ_FEATURE(StopStmt) + READ_FEATURE(StopStmt::Kind) + READ_FEATURE(StructureComponent) + READ_FEATURE(StructureConstructor) + READ_FEATURE(StructureDef) + READ_FEATURE(StructureDef::EndStructureStmt) + READ_FEATURE(StructureField) + READ_FEATURE(StructureStmt) + READ_FEATURE(Submodule) + READ_FEATURE(SubmoduleStmt) + READ_FEATURE(SubroutineStmt) + READ_FEATURE(SubroutineSubprogram) + READ_FEATURE(SubscriptTriplet) + READ_FEATURE(Substring) + READ_FEATURE(SubstringInquiry) + READ_FEATURE(SubstringRange) + READ_FEATURE(Suffix) + READ_FEATURE(SyncAllStmt) + READ_FEATURE(SyncImagesStmt) + READ_FEATURE(SyncImagesStmt::ImageSet) + READ_FEATURE(SyncMemoryStmt) + READ_FEATURE(SyncTeamStmt) + READ_FEATURE(Target) + READ_FEATURE(TargetStmt) + READ_FEATURE(TypeAttrSpec) + READ_FEATURE(TypeAttrSpec::BindC) + READ_FEATURE(TypeAttrSpec::Extends) + READ_FEATURE(TypeBoundGenericStmt) + READ_FEATURE(TypeBoundProcBinding) + READ_FEATURE(TypeBoundProcDecl) + READ_FEATURE(TypeBoundProcedurePart) + READ_FEATURE(TypeBoundProcedureStmt) + READ_FEATURE(TypeBoundProcedureStmt::WithInterface) + READ_FEATURE(TypeBoundProcedureStmt::WithoutInterface) + READ_FEATURE(TypeDeclarationStmt) + READ_FEATURE(TypeGuardStmt) + READ_FEATURE(TypeGuardStmt::Guard) + READ_FEATURE(TypeParamDecl) + READ_FEATURE(TypeParamDefStmt) + READ_FEATURE(common::TypeParamAttr) + READ_FEATURE(TypeParamSpec) + READ_FEATURE(TypeParamValue) + READ_FEATURE(TypeParamValue::Deferred) + READ_FEATURE(TypeSpec) + READ_FEATURE(Union) + READ_FEATURE(Union::EndUnionStmt) + READ_FEATURE(Union::UnionStmt) + READ_FEATURE(UnlockStmt) + READ_FEATURE(UseStmt) + READ_FEATURE(UseStmt::ModuleNature) + READ_FEATURE(Value) + READ_FEATURE(ValueStmt) + READ_FEATURE(Variable) + READ_FEATURE(Verbatim) + READ_FEATURE(Volatile) + READ_FEATURE(VolatileStmt) + READ_FEATURE(WaitSpec) + READ_FEATURE(WaitStmt) + READ_FEATURE(WhereBodyConstruct) + READ_FEATURE(WhereConstruct) + READ_FEATURE(WhereConstruct::Elsewhere) + READ_FEATURE(WhereConstruct::MaskedElsewhere) + READ_FEATURE(WhereConstructStmt) + READ_FEATURE(WhereStmt) + READ_FEATURE(WriteStmt) + + READ_FEATURE(llvm::omp::Directive) + READ_FEATURE(llvm::omp::Clause) + READ_FEATURE(llvm::acc::Directive) + READ_FEATURE(llvm::acc::DefaultValue) + + template bool Pre(const A &) { return true; } + template void Post(const A &) {} + + template bool Pre(const Statement &) { return true; } + template void Post(const Statement &) {} + + template bool Pre(const UnlabeledStatement &) { return true; } + template void Post(const UnlabeledStatement &) {} + + template bool Pre(const common::Indirection &) { + return true; + } + template void Post(const common::Indirection &) {} + + template bool Pre(const Scalar &) { return true; } + template void Post(const Scalar &) {} + + template bool Pre(const Constant &) { return true; } + template void Post(const Constant &) {} + + template bool Pre(const Integer &) { return true; } + template void Post(const Integer &) {} + + template bool Pre(const Logical &) { return true; } + template void Post(const Logical &) {} + + template bool Pre(const DefaultChar &) { return true; } + template void Post(const DefaultChar &) {} + + template bool Pre(const std::tuple &) { return true; } + template void Post(const std::tuple &) {} + + template bool Pre(const std::variant &) { return true; } + template void Post(const std::variant &) {} +}; + +class FeatureListAction : public PluginParseTreeAction { + void executeAction() override { + NodeVisitor visitor; + Fortran::parser::Walk(getParsing().parseTree(), visitor); + + for (auto const &[feature, frequency] : visitor.getFrequencies()) { + llvm::outs() << feature << ": " << frequency << "\n"; + } + } + + bool beginSourceFileAction() override { return runPrescan() && runParse(); } +}; + +static FrontendPluginRegistry::Add X( + "feature-list", "List program features"); diff --git a/flang/test/CMakeLists.txt b/flang/test/CMakeLists.txt index 4de1036dfb52b..7d96a72e5f36d 100644 --- a/flang/test/CMakeLists.txt +++ b/flang/test/CMakeLists.txt @@ -79,6 +79,7 @@ if (LLVM_BUILD_EXAMPLES) list(APPEND FLANG_TEST_DEPENDS flangPrintFunctionNames flangOmpReport + flangFeatureList ) endif () diff --git a/flang/test/Examples/feature-list-class.f90 b/flang/test/Examples/feature-list-class.f90 new file mode 100644 index 0000000000000..cba361b677f2a --- /dev/null +++ b/flang/test/Examples/feature-list-class.f90 @@ -0,0 +1,88 @@ +! UNSUPPORTED: system-windows +! REQUIRES: plugins, shell, examples + +! RUN: %flang_fc1 -load %llvmshlibdir/flangFeatureList%pluginext \ +! RUN: -plugin feature-list %s 2>&1 | FileCheck %s + +module list_features_test + implicit none + + type :: test_class_1 + integer :: a + real :: b + contains + procedure :: sum => sum_test_class_1 + procedure :: set => set_values_test_class_1 + end type +contains + real function sum_test_class_1(self) + class(test_class_1), intent(in) :: self + sum_test_class_1 = self%a + self%b + end function + + subroutine set_values_test_class_1(self, a, b) + class(test_class_1), intent(out) :: self + integer, intent(in) :: a, b + self%a = a + self%b = b + end subroutine +end module list_features_test + +! CHECK: Name: 32 +! CHECK-NEXT: DerivedTypeSpec: 2 +! CHECK-NEXT: Expr::Add: 1 +! CHECK-NEXT: IntrinsicTypeSpec: 4 +! CHECK-NEXT: IntegerTypeSpec: 2 +! CHECK-NEXT: IntrinsicTypeSpec::Real: 2 +! CHECK-NEXT: DataRef: 11 +! CHECK-NEXT: StructureComponent: 4 +! CHECK-NEXT: Designator: 7 +! CHECK-NEXT: Expr: 5 +! CHECK-NEXT: Variable: 3 +! CHECK-NEXT: AttrSpec: 3 +! CHECK-NEXT: IntentSpec: 3 +! CHECK-NEXT: IntentSpec::Intent: 3 +! CHECK-NEXT: DummyArg: 3 +! CHECK-NEXT: DeclarationTypeSpec: 6 +! CHECK-NEXT: DeclarationTypeSpec::Class: 2 +! CHECK-NEXT: ImplicitStmt: 1 +! CHECK-NEXT: ImplicitPart: 3 +! CHECK-NEXT: ImplicitPartStmt: 1 +! CHECK-NEXT: PrefixSpec: 1 +! CHECK-NEXT: Module: 1 +! CHECK-NEXT: AssignmentStmt: 3 +! CHECK-NEXT: ActionStmt: 3 +! CHECK-NEXT: Block: 2 +! CHECK-NEXT: TypeBoundProcBinding: 2 +! CHECK-NEXT: TypeBoundProcedureStmt: 2 +! CHECK-NEXT: TypeBoundProcDecl: 2 +! CHECK-NEXT: TypeBoundProcedureStmt::WithoutInterface: 2 +! CHECK-NEXT: ComponentOrFill: 2 +! CHECK-NEXT: ComponentDecl: 2 +! CHECK-NEXT: DataComponentDefStmt: 2 +! CHECK-NEXT: ComponentDefStmt: 2 +! CHECK-NEXT: TypeBoundProcedurePart: 1 +! CHECK-NEXT: ContainsStmt: 2 +! CHECK-NEXT: EndTypeStmt: 1 +! CHECK-NEXT: DerivedTypeDef: 1 +! CHECK-NEXT: DerivedTypeStmt: 1 +! CHECK-NEXT: EntityDecl: 4 +! CHECK-NEXT: SpecificationConstruct: 4 +! CHECK-NEXT: TypeDeclarationStmt: 3 +! CHECK-NEXT: DeclarationConstruct: 4 +! CHECK-NEXT: EndFunctionStmt: 1 +! CHECK-NEXT: FunctionStmt: 1 +! CHECK-NEXT: EndSubroutineStmt: 1 +! CHECK-NEXT: SubroutineStmt: 1 +! CHECK-NEXT: ExecutionPartConstruct: 3 +! CHECK-NEXT: ExecutableConstruct: 3 +! CHECK-NEXT: SpecificationPart: 3 +! CHECK-NEXT: FunctionSubprogram: 1 +! CHECK-NEXT: ExecutionPart: 2 +! CHECK-NEXT: SubroutineSubprogram: 1 +! CHECK-NEXT: ModuleSubprogram: 2 +! CHECK-NEXT: ProgramUnit: 1 +! CHECK-NEXT: Program: 1 +! CHECK-NEXT: ModuleSubprogramPart: 1 +! CHECK-NEXT: EndModuleStmt: 1 +! CHECK-NEXT: ModuleStmt: 1 diff --git a/flang/test/Examples/feature-list-functions.f90 b/flang/test/Examples/feature-list-functions.f90 new file mode 100644 index 0000000000000..a1913dda697c7 --- /dev/null +++ b/flang/test/Examples/feature-list-functions.f90 @@ -0,0 +1,76 @@ +! UNSUPPORTED: system-windows +! REQUIRES: plugins, shell, examples + +! RUN: %flang_fc1 -load %llvmshlibdir/flangFeatureList%pluginext \ +! RUN: -plugin feature-list %s 2>&1 | FileCheck %s + +program list_features_test + implicit none + call test_sub(test_func(2, 3), 4) +contains + subroutine test_sub(a, b) + integer, intent(in) :: a, b + print "(I0)", a + b + end subroutine + + integer function test_func(a, b) + integer, intent(in) :: a, b + test_func = a * b + end function +end program list_features_test + +! CHECK: Name: 19 +! CHECK-NEXT: IntLiteralConstant: 3 +! CHECK-NEXT: LiteralConstant: 4 +! CHECK-NEXT: CharLiteralConstant: 1 +! CHECK-NEXT: FunctionReference: 1 +! CHECK-NEXT: Call: 2 +! CHECK-NEXT: Expr::Multiply: 1 +! CHECK-NEXT: Expr::Add: 1 +! CHECK-NEXT: IntrinsicTypeSpec: 3 +! CHECK-NEXT: IntegerTypeSpec: 3 +! CHECK-NEXT: Format: 1 +! CHECK-NEXT: DataRef: 5 +! CHECK-NEXT: ProcedureDesignator: 2 +! CHECK-NEXT: Designator: 5 +! CHECK-NEXT: ActualArgSpec: 4 +! CHECK-NEXT: ActualArg: 4 +! CHECK-NEXT: Expr: 11 +! CHECK-NEXT: Variable: 1 +! CHECK-NEXT: AttrSpec: 2 +! CHECK-NEXT: IntentSpec: 2 +! CHECK-NEXT: IntentSpec::Intent: 2 +! CHECK-NEXT: DummyArg: 2 +! CHECK-NEXT: DeclarationTypeSpec: 3 +! CHECK-NEXT: ImplicitStmt: 1 +! CHECK-NEXT: ImplicitPart: 3 +! CHECK-NEXT: ImplicitPartStmt: 1 +! CHECK-NEXT: PrefixSpec: 1 +! CHECK-NEXT: OutputItem: 1 +! CHECK-NEXT: AssignmentStmt: 1 +! CHECK-NEXT: ActionStmt: 3 +! CHECK-NEXT: PrintStmt: 1 +! CHECK-NEXT: CallStmt: 1 +! CHECK-NEXT: Block: 3 +! CHECK-NEXT: ContainsStmt: 1 +! CHECK-NEXT: EntityDecl: 4 +! CHECK-NEXT: SpecificationConstruct: 2 +! CHECK-NEXT: TypeDeclarationStmt: 2 +! CHECK-NEXT: DeclarationConstruct: 2 +! CHECK-NEXT: EndFunctionStmt: 1 +! CHECK-NEXT: FunctionStmt: 1 +! CHECK-NEXT: EndSubroutineStmt: 1 +! CHECK-NEXT: SubroutineStmt: 1 +! CHECK-NEXT: ExecutionPartConstruct: 3 +! CHECK-NEXT: ExecutableConstruct: 3 +! CHECK-NEXT: SpecificationPart: 3 +! CHECK-NEXT: FunctionSubprogram: 1 +! CHECK-NEXT: ExecutionPart: 3 +! CHECK-NEXT: InternalSubprogramPart: 1 +! CHECK-NEXT: InternalSubprogram: 2 +! CHECK-NEXT: SubroutineSubprogram: 1 +! CHECK-NEXT: ProgramUnit: 1 +! CHECK-NEXT: MainProgram: 1 +! CHECK-NEXT: Program: 1 +! CHECK-NEXT: EndProgramStmt: 1 +! CHECK-NEXT: ProgramStmt: 1 From 1fe6a8a584fe7f377a61f56af57fc1f3e8b3f82a Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 22 Mar 2023 00:25:13 -0700 Subject: [PATCH 269/691] [X86] Use llvm::isOneConstant (NFC) --- llvm/lib/Target/X86/X86ISelLowering.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e9a069b7295e1..a491ba84bf705 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -56700,11 +56700,9 @@ static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) { // This occurs frequently in our masked scalar intrinsic code and our // floating point select lowering with AVX512. // TODO: SimplifyDemandedBits instead? - if (VT == MVT::v1i1 && Src.getOpcode() == ISD::AND && Src.hasOneUse()) - if (auto *C = dyn_cast(Src.getOperand(1))) - if (C->getAPIntValue().isOne()) - return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, - Src.getOperand(0)); + if (VT == MVT::v1i1 && Src.getOpcode() == ISD::AND && Src.hasOneUse() && + isOneConstant(Src.getOperand(1))) + return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, Src.getOperand(0)); // Combine scalar_to_vector of an extract_vector_elt into an extract_subvec. if (VT == MVT::v1i1 && Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT && From e603285316cb5112c5e44ffaa1712381dab37527 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Wed, 22 Mar 2023 14:25:34 +0700 Subject: [PATCH 270/691] [Test] Add hanging test for D146276 The patch was reverted because of hang, adding the test so that this doesn't happen again. --- llvm/test/Transforms/GuardWidening/hang.ll | 32 ++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 llvm/test/Transforms/GuardWidening/hang.ll diff --git a/llvm/test/Transforms/GuardWidening/hang.ll b/llvm/test/Transforms/GuardWidening/hang.ll new file mode 100644 index 0000000000000..071a201ab6079 --- /dev/null +++ b/llvm/test/Transforms/GuardWidening/hang.ll @@ -0,0 +1,32 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -S -passes=guard-widening < %s | FileCheck %s + +; Make sure we don't hang here. +define i64 @test() { +; CHECK-LABEL: define i64 @test() { +; CHECK-NEXT: bb: +; CHECK-NEXT: [[WIDE_CHK:%.*]] = and i1 poison, poison +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[WIDE_CHK]]) [ "deopt"() ] +; CHECK-NEXT: br label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br i1 poison, label [[BB3:%.*]], label [[BB2]] +; CHECK: bb3: +; CHECK-NEXT: [[CALL:%.*]] = call i64 (...) @llvm.experimental.deoptimize.i64() [ "deopt"() ] +; CHECK-NEXT: ret i64 [[CALL]] +; +bb: + call void (i1, ...) @llvm.experimental.guard(i1 poison) [ "deopt"() ] + br label %bb2 + +bb2: ; preds = %bb2, %bb + br i1 poison, label %bb3, label %bb2 + +bb3: ; preds = %bb2 + call void (i1, ...) @llvm.experimental.guard(i1 poison) [ "deopt"() ] + %call = call i64 (...) @llvm.experimental.deoptimize.i64() [ "deopt"() ] + ret i64 %call +} + +declare i64 @llvm.experimental.deoptimize.i64(...) + +declare void @llvm.experimental.guard(i1, ...) From 9bb96fd87498a0bcf791e808ba2a7f1dc9209287 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 22 Mar 2023 00:31:48 -0700 Subject: [PATCH 271/691] [llvm] Use llvm::isNullConstant (NFC) --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 3 +-- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 6 +----- llvm/lib/Target/AMDGPU/R600ISelLowering.cpp | 6 ++---- 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 5cf9497069f78..a3667d9bd11b9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7386,8 +7386,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; - bool IsZeroVal = - isa(Src) && cast(Src)->isZero(); + bool IsZeroVal = isNullConstant(Src); unsigned Limit = AlwaysInline ? ~0 : TLI.getMaxStoresPerMemset(OptSize); if (!TLI.findOptimalMemOpLowering( diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h index ac5e9d0986011..a08b0fb02e497 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h @@ -25,11 +25,7 @@ using namespace llvm; namespace { static inline bool isNullConstantOrUndef(SDValue V) { - if (V.isUndef()) - return true; - - ConstantSDNode *Const = dyn_cast(V); - return Const != nullptr && Const->isZero(); + return V.isUndef() || isNullConstant(V); } static inline bool getConstantValue(SDValue N, uint32_t &Out) { diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index d361df26841c3..ad072cfe23b17 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -953,10 +953,8 @@ SDValue R600TargetLowering::lowerADDRSPACECAST(SDValue Op, unsigned SrcAS = ASC->getSrcAddressSpace(); unsigned DestAS = ASC->getDestAddressSpace(); - if (auto *ConstSrc = dyn_cast(Op.getOperand(0))) { - if (SrcAS == AMDGPUAS::FLAT_ADDRESS && ConstSrc->isZero()) - return DAG.getConstant(TM.getNullPointerValue(DestAS), SL, VT); - } + if (isNullConstant(Op.getOperand(0)) && SrcAS == AMDGPUAS::FLAT_ADDRESS) + return DAG.getConstant(TM.getNullPointerValue(DestAS), SL, VT); return Op; } From 558b46fde2db2215794336bbd08e411fee5240d7 Mon Sep 17 00:00:00 2001 From: Balazs Benics Date: Wed, 22 Mar 2023 08:43:09 +0100 Subject: [PATCH 272/691] [analyzer] Fix crashing getSValFromInitListExpr for nested initlists In the following example, we will end up hitting the `llvm_unreachable()`: https://godbolt.org/z/5sccc95Ec ```lang=C++ enum class E {}; const E glob[] = {{}}; void initlistWithinInitlist() { clang_analyzer_dump(glob[0]); // crashes at loading from `glob[0]` } ``` We should just return `std::nullopt` instead for these cases. It's better than crashing. Reviewed By: xazax.hun Differential Revision: https://reviews.llvm.org/D146538 --- clang/lib/StaticAnalyzer/Core/RegionStore.cpp | 8 ++++++-- clang/test/Analysis/initialization.cpp | 7 +++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp index 46948c12617c0..49855305cecc0 100644 --- a/clang/lib/StaticAnalyzer/Core/RegionStore.cpp +++ b/clang/lib/StaticAnalyzer/Core/RegionStore.cpp @@ -1849,8 +1849,12 @@ std::optional RegionStoreManager::getSValFromInitListExpr( // Go to the nested initializer list. ILE = IL; } - llvm_unreachable( - "Unhandled InitListExpr sub-expressions or invalid offsets."); + + assert(ILE); + + // FIXME: Unhandeled InitListExpr sub-expression, possibly constructing an + // enum? + return std::nullopt; } /// Returns an SVal, if possible, for the specified position in a string diff --git a/clang/test/Analysis/initialization.cpp b/clang/test/Analysis/initialization.cpp index e5b94ea7d0a2b..e624ef5bae9e9 100644 --- a/clang/test/Analysis/initialization.cpp +++ b/clang/test/Analysis/initialization.cpp @@ -249,3 +249,10 @@ void glob_array_parentheses1() { clang_analyzer_eval(glob_arr9[1][2] == 7); // expected-warning{{TRUE}} clang_analyzer_eval(glob_arr9[1][3] == 0); // expected-warning{{TRUE}} } + +enum class E {}; +const E glob[] = {{}}; +void initlistWithinInitlist() { + // no-crash + clang_analyzer_dump(glob[0]); // expected-warning-re {{reg_${{[0-9]+}}}} +} From 9c16eef1ec46e10185713043663511d49ffff6b1 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Wed, 22 Mar 2023 08:53:38 +0100 Subject: [PATCH 273/691] [mlir][IR] Add ReverseDominanceIterator for IR walkers Blocks are enumerated depth-first, but post-order. I.e., a block is enumerated when its successors have been enumerated. This iteration style is suitable when deleting blocks in a regions: in the absence of cycles, uses are deleted before their definitions. Differential Revision: https://reviews.llvm.org/D146125 --- mlir/include/mlir/IR/Iterators.h | 41 ++++++++++++++++++++++++++++++- mlir/test/IR/visitors.mlir | 30 ++++++++++++++++++++++ mlir/test/lib/IR/TestVisitors.cpp | 13 ++++++++++ 3 files changed, 83 insertions(+), 1 deletion(-) diff --git a/mlir/include/mlir/IR/Iterators.h b/mlir/include/mlir/IR/Iterators.h index c16f7117f3dc9..2c6137c72cf5d 100644 --- a/mlir/include/mlir/IR/Iterators.h +++ b/mlir/include/mlir/IR/Iterators.h @@ -21,6 +21,7 @@ #include "mlir/Support/LLVM.h" #include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" namespace mlir { /// This iterator enumerates elements in "reverse" order. It is a wrapper around @@ -37,7 +38,7 @@ struct ReverseIterator { /// This iterator enumerates elements according to their dominance relationship. /// Operations and regions are enumerated in "forward" order. Blocks are /// enumerated according to their successor relationship. Unreachable blocks are -/// not enumerated. +/// not enumerated. Blocks may not be erased during the traversal. /// /// Note: If `NoGraphRegions` is set to "true", this iterator asserts that each /// visited region has SSA dominance. In either case, the ops in such regions @@ -70,6 +71,44 @@ struct ForwardDominanceIterator { return ForwardIterator::makeIterable(range); } }; + +/// This iterator enumerates elements according to their reverse dominance +/// relationship. Operations and regions are enumerated in "reverse" order. +/// Blocks are enumerated according to their successor relationship, but +/// post-order. I.e., a block is visited after its successors have been visited. +/// Cycles in the block graph are broken in an unspecified way. Unreachable +/// blocks are not enumerated. Blocks may not be erased during the traversal. +/// +/// Note: If `NoGraphRegions` is set to "true", this iterator asserts that each +/// visited region has SSA dominance. +template +struct ReverseDominanceIterator { + // llvm::reverse uses RangeT::rbegin and RangeT::rend. + static constexpr auto makeIterable(Block &range) { + return llvm::reverse(ForwardIterator::makeIterable(range)); + } + + static constexpr auto makeIterable(Operation &range) { + return llvm::reverse(ForwardIterator::makeIterable(range)); + } + + static auto makeIterable(Region ®ion) { + if (NoGraphRegions) { + // Only regions with SSA dominance are allowed. + assert(mayHaveSSADominance(region) && "graph regions are not allowed"); + } + + // Create post-order iterator. Blocks are enumerated according to their + // successor relationship. + Block *null = nullptr; + auto it = region.empty() + ? llvm::make_range(llvm::po_end(null), llvm::po_end(null)) + : llvm::post_order(®ion.front()); + + // Walk API expects Block references instead of pointers. + return llvm::make_pointee_range(it); + } +}; } // namespace mlir #endif // MLIR_IR_ITERATORS_H diff --git a/mlir/test/IR/visitors.mlir b/mlir/test/IR/visitors.mlir index ddbc334fa4eed..2d83d6922e0cd 100644 --- a/mlir/test/IR/visitors.mlir +++ b/mlir/test/IR/visitors.mlir @@ -323,6 +323,36 @@ func.func @unordered_cfg_with_loop() { // CHECK: Visiting region 0 from operation 'regionOp0' // CHECK: Visiting region 0 from operation 'func.func' +// CHECK-LABEL: Op reverse dominance post-order visits +// CHECK: Visiting op 'func.return' +// CHECK-NOT: Visiting op 'op6' +// CHECK: Visiting op 'op7' +// CHECK: Visiting op 'cf.br' +// CHECK: Visiting op 'op5' +// CHECK: Visiting op 'cf.br' +// CHECK: Visiting op 'op1' +// CHECK: Visiting op 'cf.br' +// CHECK: Visiting op 'op2' +// CHECK: Visiting op 'cf.br' +// CHECK: Visiting op 'op3' +// CHECK: Visiting op 'cf.cond_br' +// CHECK: Visiting op 'op0' +// CHECK: Visiting op 'regionOp0' +// CHECK: Visiting op 'func.func' + +// CHECK-LABEL: Block reverse dominance post-order visits +// CHECK: Visiting block ^bb7 from region 0 from operation 'regionOp0' +// CHECK: Visiting block ^bb5 from region 0 from operation 'regionOp0' +// CHECK: Visiting block ^bb1 from region 0 from operation 'regionOp0' +// CHECK: Visiting block ^bb2 from region 0 from operation 'regionOp0' +// CHECK: Visiting block ^bb3 from region 0 from operation 'regionOp0' +// CHECK: Visiting block ^bb0 from region 0 from operation 'regionOp0' +// CHECK: Visiting block ^bb0 from region 0 from operation 'func.func' + +// CHECK-LABEL: Region reverse dominance post-order visits +// CHECK: Visiting region 0 from operation 'regionOp0' +// CHECK: Visiting region 0 from operation 'func.func' + // CHECK-LABEL: Block pre-order erasures (skip) // CHECK: Erasing block ^bb0 from region 0 from operation 'regionOp0' // CHECK: Cannot erase block ^bb0 from region 0 from operation 'regionOp0', still has uses diff --git a/mlir/test/lib/IR/TestVisitors.cpp b/mlir/test/lib/IR/TestVisitors.cpp index 6ed4abc71b7db..a14347ed2ec30 100644 --- a/mlir/test/lib/IR/TestVisitors.cpp +++ b/mlir/test/lib/IR/TestVisitors.cpp @@ -92,6 +92,19 @@ static void testPureCallbacks(Operation *op) { << "\n"; funcOp->walk>(regionPure); + + llvm::outs() << "Op reverse dominance post-order visits" + << "\n"; + funcOp->walk>(opPure); + llvm::outs() << "Block reverse dominance post-order visits" + << "\n"; + funcOp->walk>(blockPure); + llvm::outs() << "Region reverse dominance post-order visits" + << "\n"; + funcOp->walk>(regionPure); }); } From f809eb4db2d14a5a529f9f440b849b7489292976 Mon Sep 17 00:00:00 2001 From: Tobias Gysi Date: Wed, 22 Mar 2023 08:38:55 +0100 Subject: [PATCH 274/691] [mlir] Argument and result attribute handling during inlining. The revision adds the handleArgument and handleResult handlers that allow users of the inlining interface to implement argument and result conversions that take argument and result attributes into account. The motivating use cases for this revision are taken from the LLVM dialect inliner, which has to copy arguments that are marked as byval and that also has to consider zeroext / signext when converting integers. All type conversions are currently handled by the materializeCallConversion hook. It runs before isLegalToInline and supports only the introduction of a single cast operation since it may have to rollback. The new handlers run shortly before and after inlining and cannot fail. As a result, they can introduce more complex ir such as copying a struct argument. At the moment, the new hooks cannot be used to perform type conversions since all type conversions have to be done using the materializeCallConversion. A follow up revision will either relax this constraint or drop materializeCallConversion in favor of the new and more flexible handlers. The revision also extends the CallableOpInterface to provide access to the argument and result attributes if available. Reviewed By: rriddle, Dinistro Differential Revision: https://reviews.llvm.org/D145582 --- mlir/docs/Interfaces.md | 2 + mlir/docs/Tutorials/Toy/Ch-4.md | 12 ++ mlir/examples/toy/Ch4/mlir/Dialect.cpp | 12 ++ mlir/examples/toy/Ch5/mlir/Dialect.cpp | 12 ++ mlir/examples/toy/Ch6/mlir/Dialect.cpp | 12 ++ mlir/examples/toy/Ch7/mlir/Dialect.cpp | 12 ++ .../include/mlir/Dialect/Async/IR/AsyncOps.td | 12 ++ mlir/include/mlir/Dialect/Func/IR/FuncOps.td | 12 ++ mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 15 +++ .../mlir/Dialect/MLProgram/IR/MLProgramOps.td | 24 ++++ .../include/mlir/Dialect/Shape/IR/ShapeOps.td | 12 ++ .../mlir/Dialect/Transform/IR/TransformOps.td | 6 + .../include/mlir/Interfaces/CallInterfaces.td | 12 ++ mlir/include/mlir/Transforms/InliningUtils.h | 44 ++++++++ mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp | 10 ++ mlir/lib/Transforms/Utils/InliningUtils.cpp | 105 +++++++++++++++++- mlir/test/Transforms/inlining.mlir | 37 ++++++ mlir/test/lib/Dialect/Test/TestDialect.cpp | 42 +++++++ mlir/test/lib/Dialect/Test/TestOps.td | 67 +++++++++++ 19 files changed, 459 insertions(+), 1 deletion(-) diff --git a/mlir/docs/Interfaces.md b/mlir/docs/Interfaces.md index 6bb5070138632..b51adec4fc4f3 100644 --- a/mlir/docs/Interfaces.md +++ b/mlir/docs/Interfaces.md @@ -731,6 +731,8 @@ interface section goes as follows: * `CallableOpInterface` - Used to represent the target callee of call. - `Region * getCallableRegion()` - `ArrayRef getCallableResults()` + - `ArrayAttr getCallableArgAttrs()` + - `ArrayAttr getCallableResAttrs()` ##### RegionKindInterfaces diff --git a/mlir/docs/Tutorials/Toy/Ch-4.md b/mlir/docs/Tutorials/Toy/Ch-4.md index 77a52163774f2..f462274fa592e 100644 --- a/mlir/docs/Tutorials/Toy/Ch-4.md +++ b/mlir/docs/Tutorials/Toy/Ch-4.md @@ -169,6 +169,18 @@ Region *FuncOp::getCallableRegion() { return &getBody(); } /// executed. ArrayRef FuncOp::getCallableResults() { return getType().getResults(); } +/// Returns the argument attributes for all callable region arguments or +/// null if there are none. +ArrayAttr FuncOp::getCallableArgAttrs() { + return getArgAttrs().value_or(nullptr); +} + +/// Returns the result attributes for all callable region results or +/// null if there are none. +ArrayAttr FuncOp::getCallableResAttrs() { + return getResAttrs().value_or(nullptr); +} + // .... /// Return the callee of the generic call operation, this is required by the diff --git a/mlir/examples/toy/Ch4/mlir/Dialect.cpp b/mlir/examples/toy/Ch4/mlir/Dialect.cpp index 17a42d69c8f4c..f5258eb5cff14 100644 --- a/mlir/examples/toy/Ch4/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch4/mlir/Dialect.cpp @@ -307,6 +307,18 @@ llvm::ArrayRef FuncOp::getCallableResults() { return getFunctionType().getResults(); } +/// Returns the argument attributes for all callable region arguments or +/// null if there are none. +ArrayAttr FuncOp::getCallableArgAttrs() { + return getArgAttrs().value_or(nullptr); +} + +/// Returns the result attributes for all callable region results or +/// null if there are none. +ArrayAttr FuncOp::getCallableResAttrs() { + return getResAttrs().value_or(nullptr); +} + //===----------------------------------------------------------------------===// // GenericCallOp //===----------------------------------------------------------------------===// diff --git a/mlir/examples/toy/Ch5/mlir/Dialect.cpp b/mlir/examples/toy/Ch5/mlir/Dialect.cpp index 77ceb636e17f2..a959969c0449a 100644 --- a/mlir/examples/toy/Ch5/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch5/mlir/Dialect.cpp @@ -307,6 +307,18 @@ llvm::ArrayRef FuncOp::getCallableResults() { return getFunctionType().getResults(); } +/// Returns the argument attributes for all callable region arguments or +/// null if there are none. +ArrayAttr FuncOp::getCallableArgAttrs() { + return getArgAttrs().value_or(nullptr); +} + +/// Returns the result attributes for all callable region results or +/// null if there are none. +ArrayAttr FuncOp::getCallableResAttrs() { + return getResAttrs().value_or(nullptr); +} + //===----------------------------------------------------------------------===// // GenericCallOp //===----------------------------------------------------------------------===// diff --git a/mlir/examples/toy/Ch6/mlir/Dialect.cpp b/mlir/examples/toy/Ch6/mlir/Dialect.cpp index 77ceb636e17f2..a959969c0449a 100644 --- a/mlir/examples/toy/Ch6/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch6/mlir/Dialect.cpp @@ -307,6 +307,18 @@ llvm::ArrayRef FuncOp::getCallableResults() { return getFunctionType().getResults(); } +/// Returns the argument attributes for all callable region arguments or +/// null if there are none. +ArrayAttr FuncOp::getCallableArgAttrs() { + return getArgAttrs().value_or(nullptr); +} + +/// Returns the result attributes for all callable region results or +/// null if there are none. +ArrayAttr FuncOp::getCallableResAttrs() { + return getResAttrs().value_or(nullptr); +} + //===----------------------------------------------------------------------===// // GenericCallOp //===----------------------------------------------------------------------===// diff --git a/mlir/examples/toy/Ch7/mlir/Dialect.cpp b/mlir/examples/toy/Ch7/mlir/Dialect.cpp index 188b94fc2dfeb..d332411b63bb3 100644 --- a/mlir/examples/toy/Ch7/mlir/Dialect.cpp +++ b/mlir/examples/toy/Ch7/mlir/Dialect.cpp @@ -336,6 +336,18 @@ llvm::ArrayRef FuncOp::getCallableResults() { return getFunctionType().getResults(); } +/// Returns the argument attributes for all callable region arguments or +/// null if there are none. +ArrayAttr FuncOp::getCallableArgAttrs() { + return getArgAttrs().value_or(nullptr); +} + +/// Returns the result attributes for all callable region results or +/// null if there are none. +ArrayAttr FuncOp::getCallableResAttrs() { + return getResAttrs().value_or(nullptr); +} + //===----------------------------------------------------------------------===// // GenericCallOp //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td b/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td index 2cf5ee810b7a0..30147b8b6a309 100644 --- a/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td +++ b/mlir/include/mlir/Dialect/Async/IR/AsyncOps.td @@ -168,6 +168,18 @@ def Async_FuncOp : Async_Op<"func", ArrayRef getCallableResults() { return getFunctionType() .getResults(); } + /// Returns the argument attributes for all callable region arguments or + /// null if there are none. + ::mlir::ArrayAttr getCallableArgAttrs() { + return getArgAttrs().value_or(nullptr); + } + + /// Returns the result attributes for all callable region results or + /// null if there are none. + ::mlir::ArrayAttr getCallableResAttrs() { + return getResAttrs().value_or(nullptr); + } + //===------------------------------------------------------------------===// // FunctionOpInterface Methods //===------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Func/IR/FuncOps.td b/mlir/include/mlir/Dialect/Func/IR/FuncOps.td index 45ec8a9e0b7ee..1a06d6533b2d0 100644 --- a/mlir/include/mlir/Dialect/Func/IR/FuncOps.td +++ b/mlir/include/mlir/Dialect/Func/IR/FuncOps.td @@ -299,6 +299,18 @@ def FuncOp : Func_Op<"func", [ /// executed. ArrayRef getCallableResults() { return getFunctionType().getResults(); } + /// Returns the argument attributes for all callable region arguments or + /// null if there are none. + ::mlir::ArrayAttr getCallableArgAttrs() { + return getArgAttrs().value_or(nullptr); + } + + /// Returns the result attributes for all callable region results or + /// null if there are none. + ::mlir::ArrayAttr getCallableResAttrs() { + return getResAttrs().value_or(nullptr); + } + //===------------------------------------------------------------------===// // FunctionOpInterface Methods //===------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index c2bb2f34a463a..1bbc32f3d2917 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -1583,6 +1583,10 @@ def LLVM_LLVMFuncOp : LLVM_Op<"func", [ /// Returns the result types of this function. ArrayRef getResultTypes() { return getFunctionType().getReturnTypes(); } + //===------------------------------------------------------------------===// + // CallableOpInterface + //===------------------------------------------------------------------===// + /// Returns the callable region, which is the function body. If the function /// is external, returns null. Region *getCallableRegion(); @@ -1596,6 +1600,17 @@ def LLVM_LLVMFuncOp : LLVM_Op<"func", [ return getFunctionType().getReturnTypes(); } + /// Returns the argument attributes for all callable region arguments or + /// null if there are none. + ::mlir::ArrayAttr getCallableArgAttrs() { + return getArgAttrs().value_or(nullptr); + } + + /// Returns the result attributes for all callable region results or + /// null if there are none. + ::mlir::ArrayAttr getCallableResAttrs() { + return getResAttrs().value_or(nullptr); + } }]; let hasCustomAssemblyFormat = 1; diff --git a/mlir/include/mlir/Dialect/MLProgram/IR/MLProgramOps.td b/mlir/include/mlir/Dialect/MLProgram/IR/MLProgramOps.td index db6c7733130cb..7984b9744513f 100644 --- a/mlir/include/mlir/Dialect/MLProgram/IR/MLProgramOps.td +++ b/mlir/include/mlir/Dialect/MLProgram/IR/MLProgramOps.td @@ -73,6 +73,18 @@ def MLProgram_FuncOp : MLProgram_Op<"func", [ /// executed. ArrayRef getCallableResults() { return getFunctionType().getResults(); } + /// Returns the argument attributes for all callable region arguments or + /// null if there are none. + ::mlir::ArrayAttr getCallableArgAttrs() { + return getArgAttrs().value_or(nullptr); + } + + /// Returns the result attributes for all callable region results or + /// null if there are none. + ::mlir::ArrayAttr getCallableResAttrs() { + return getResAttrs().value_or(nullptr); + } + //===------------------------------------------------------------------===// // FunctionOpInterface Methods //===------------------------------------------------------------------===// @@ -422,6 +434,18 @@ def MLProgram_SubgraphOp : MLProgram_Op<"subgraph", [ /// executed. ArrayRef getCallableResults() { return getFunctionType().getResults(); } + /// Returns the argument attributes for all callable region arguments or + /// null if there are none. + ::mlir::ArrayAttr getCallableArgAttrs() { + return getArgAttrs().value_or(nullptr); + } + + /// Returns the result attributes for all callable region results or + /// null if there are none. + ::mlir::ArrayAttr getCallableResAttrs() { + return getResAttrs().value_or(nullptr); + } + //===------------------------------------------------------------------===// // FunctionOpInterface Methods //===------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td b/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td index ae84b07acab2d..47918b46dddc9 100644 --- a/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td +++ b/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td @@ -1149,6 +1149,18 @@ def Shape_FuncOp : Shape_Op<"func", return getFunctionType().getResults(); } + /// Returns the argument attributes for all callable region arguments or + /// null if there are none. + ::mlir::ArrayAttr getCallableArgAttrs() { + return getArgAttrs().value_or(nullptr); + } + + /// Returns the result attributes for all callable region results or + /// null if there are none. + ::mlir::ArrayAttr getCallableResAttrs() { + return getResAttrs().value_or(nullptr); + } + //===------------------------------------------------------------------===// // FunctionOpInterface Methods //===------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td index 3ffc3f71433cc..46dea74546351 100644 --- a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td +++ b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td @@ -394,6 +394,12 @@ def NamedSequenceOp : TransformDialectOp<"named_sequence", ::llvm::ArrayRef<::mlir::Type> getCallableResults() { return getFunctionType().getResults(); } + ::mlir::ArrayAttr getCallableArgAttrs() { + return getArgAttrs().value_or(nullptr); + } + ::mlir::ArrayAttr getCallableResAttrs() { + return getResAttrs().value_or(nullptr); + } }]; } diff --git a/mlir/include/mlir/Interfaces/CallInterfaces.td b/mlir/include/mlir/Interfaces/CallInterfaces.td index 96540675f833a..cd37222cbc270 100644 --- a/mlir/include/mlir/Interfaces/CallInterfaces.td +++ b/mlir/include/mlir/Interfaces/CallInterfaces.td @@ -84,6 +84,18 @@ def CallableOpInterface : OpInterface<"CallableOpInterface"> { }], "::llvm::ArrayRef<::mlir::Type>", "getCallableResults" >, + InterfaceMethod<[{ + Returns the argument attributes for all callable region arguments or + null if there are none. + }], + "::mlir::ArrayAttr", "getCallableArgAttrs" + >, + InterfaceMethod<[{ + Returns the result attributes for all callable region results or null + if there are none. + }], + "::mlir::ArrayAttr", "getCallableResAttrs" + > ]; } diff --git a/mlir/include/mlir/Transforms/InliningUtils.h b/mlir/include/mlir/Transforms/InliningUtils.h index 241983ef8c3dc..63aba6a08e397 100644 --- a/mlir/include/mlir/Transforms/InliningUtils.h +++ b/mlir/include/mlir/Transforms/InliningUtils.h @@ -13,6 +13,7 @@ #ifndef MLIR_TRANSFORMS_INLININGUTILS_H #define MLIR_TRANSFORMS_INLININGUTILS_H +#include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/DialectInterface.h" #include "mlir/IR/Location.h" #include "mlir/IR/Region.h" @@ -141,6 +142,40 @@ class DialectInlinerInterface return nullptr; } + /// Hook to transform the call arguments before using them to replace the + /// callee arguments. It returns the transformation result or `argument` + /// itself if the hook did not change anything. The type of the returned value + /// has to match `targetType`, and the `argumentAttrs` dictionary is non-null + /// even if no attribute is present. The hook is called after converting the + /// callsite argument types using the materializeCallConversion callback, and + /// right before inlining the callee region. Any operations created using the + /// provided `builder` are inserted right before the inlined callee region. + /// Example use cases are the insertion of copies for by value arguments, or + /// integer conversions that require signedness information. + virtual Value handleArgument(OpBuilder &builder, Operation *call, + Operation *callable, Value argument, + Type targetType, + DictionaryAttr argumentAttrs) const { + return argument; + } + + /// Hook to transform the callee results before using them to replace the call + /// results. It returns the transformation result or the `result` itself if + /// the hook did not change anything. The type of the returned values has to + /// match `targetType`, and the `resultAttrs` dictionary is non-null even if + /// no attribute is present. The hook is called right before handling + /// terminators, and obtains the callee result before converting its type + /// using the `materializeCallConversion` callback. Any operations created + /// using the provided `builder` are inserted right after the inlined callee + /// region. Example use cases are the insertion of copies for by value results + /// or integer conversions that require signedness information. + /// NOTE: This hook is invoked after inlining the `callable` region. + virtual Value handleResult(OpBuilder &builder, Operation *call, + Operation *callable, Value result, Type targetType, + DictionaryAttr resultAttrs) const { + return result; + } + /// Process a set of blocks that have been inlined for a call. This callback /// is invoked before inlined terminator operations have been processed. virtual void processInlinedCallBlocks( @@ -183,6 +218,15 @@ class InlinerInterface virtual void handleTerminator(Operation *op, Block *newDest) const; virtual void handleTerminator(Operation *op, ArrayRef valuesToRepl) const; + + virtual Value handleArgument(OpBuilder &builder, Operation *call, + Operation *callable, Value argument, + Type targetType, + DictionaryAttr argumentAttrs) const; + virtual Value handleResult(OpBuilder &builder, Operation *call, + Operation *callable, Value result, Type targetType, + DictionaryAttr resultAttrs) const; + virtual void processInlinedCallBlocks( Operation *call, iterator_range inlinedBlocks) const; }; diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp index f6865b4107098..bb3ad91ce620a 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp @@ -2469,6 +2469,16 @@ ArrayRef spirv::FuncOp::getCallableResults() { return getFunctionType().getResults(); } +// CallableOpInterface +::mlir::ArrayAttr spirv::FuncOp::getCallableArgAttrs() { + return getArgAttrs().value_or(nullptr); +} + +// CallableOpInterface +::mlir::ArrayAttr spirv::FuncOp::getCallableResAttrs() { + return getResAttrs().value_or(nullptr); +} + //===----------------------------------------------------------------------===// // spirv.FunctionCall //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Transforms/Utils/InliningUtils.cpp b/mlir/lib/Transforms/Utils/InliningUtils.cpp index f9dc69caea472..8856fd59abf99 100644 --- a/mlir/lib/Transforms/Utils/InliningUtils.cpp +++ b/mlir/lib/Transforms/Utils/InliningUtils.cpp @@ -103,6 +103,26 @@ void InlinerInterface::handleTerminator(Operation *op, handler->handleTerminator(op, valuesToRepl); } +Value InlinerInterface::handleArgument(OpBuilder &builder, Operation *call, + Operation *callable, Value argument, + Type targetType, + DictionaryAttr argumentAttrs) const { + auto *handler = getInterfaceFor(callable); + assert(handler && "expected valid dialect handler"); + return handler->handleArgument(builder, call, callable, argument, targetType, + argumentAttrs); +} + +Value InlinerInterface::handleResult(OpBuilder &builder, Operation *call, + Operation *callable, Value result, + Type targetType, + DictionaryAttr resultAttrs) const { + auto *handler = getInterfaceFor(callable); + assert(handler && "expected valid dialect handler"); + return handler->handleResult(builder, call, callable, result, targetType, + resultAttrs); +} + void InlinerInterface::processInlinedCallBlocks( Operation *call, iterator_range inlinedBlocks) const { auto *handler = getInterfaceFor(call); @@ -141,6 +161,71 @@ static bool isLegalToInline(InlinerInterface &interface, Region *src, // Inline Methods //===----------------------------------------------------------------------===// +static void handleArgumentImpl(InlinerInterface &interface, OpBuilder &builder, + CallOpInterface call, + CallableOpInterface callable, + IRMapping &mapper) { + // Unpack the argument attributes if there are any. + SmallVector argAttrs( + callable.getCallableRegion()->getNumArguments(), + builder.getDictionaryAttr({})); + if (ArrayAttr arrayAttr = callable.getCallableArgAttrs()) { + assert(arrayAttr.size() == argAttrs.size()); + for (auto [idx, attr] : llvm::enumerate(arrayAttr)) + argAttrs[idx] = cast(attr); + } + + // Run the argument attribute handler for the given argument and attribute. + for (auto [blockArg, argAttr] : + llvm::zip(callable.getCallableRegion()->getArguments(), argAttrs)) { + Value newArgument = interface.handleArgument(builder, call, callable, + mapper.lookup(blockArg), + blockArg.getType(), argAttr); + assert(newArgument.getType() == blockArg.getType() && + "expected the handled argument type to match the target type"); + + // Update the mapping to point the new argument returned by the handler. + mapper.map(blockArg, newArgument); + } +} + +static void handleResultImpl(InlinerInterface &interface, OpBuilder &builder, + CallOpInterface call, CallableOpInterface callable, + ValueRange results) { + // Unpack the result attributes if there are any. + SmallVector resAttrs(results.size(), + builder.getDictionaryAttr({})); + if (ArrayAttr arrayAttr = callable.getCallableResAttrs()) { + assert(arrayAttr.size() == resAttrs.size()); + for (auto [idx, attr] : llvm::enumerate(arrayAttr)) + resAttrs[idx] = cast(attr); + } + + // Run the result attribute handler for the given result and attribute. + SmallVector resultAttributes; + for (auto [result, resAttr] : llvm::zip(results, resAttrs)) { + // Store the original result users before running the handler. + DenseSet resultUsers; + for (Operation *user : result.getUsers()) + resultUsers.insert(user); + + // TODO: Use the type of the call result to replace once the hook can be + // used for type conversions. At the moment, all type conversions have to be + // done using materializeCallConversion. + Type targetType = result.getType(); + + Value newResult = interface.handleResult(builder, call, callable, result, + targetType, resAttr); + assert(newResult.getType() == targetType && + "expected the handled result type to match the target type"); + + // Replace the result uses except for the ones introduce by the handler. + result.replaceUsesWithIf(newResult, [&](OpOperand &operand) { + return resultUsers.count(operand.getOwner()); + }); + } +} + static LogicalResult inlineRegionImpl(InlinerInterface &interface, Region *src, Block *inlineBlock, Block::iterator inlinePoint, IRMapping &mapper, @@ -166,6 +251,12 @@ inlineRegionImpl(InlinerInterface &interface, Region *src, Block *inlineBlock, mapper)) return failure(); + // Run the argument attribute handler before inlining the callable region. + OpBuilder builder(inlineBlock, inlinePoint); + auto callable = dyn_cast(src->getParentOp()); + if (call && callable) + handleArgumentImpl(interface, builder, call, callable, mapper); + // Check to see if the region is being cloned, or moved inline. In either // case, move the new blocks after the 'insertBlock' to improve IR // readability. @@ -199,8 +290,14 @@ inlineRegionImpl(InlinerInterface &interface, Region *src, Block *inlineBlock, // Handle the case where only a single block was inlined. if (std::next(newBlocks.begin()) == newBlocks.end()) { + // Run the result attribute handler on the terminator operands. + Operation *firstBlockTerminator = firstNewBlock->getTerminator(); + builder.setInsertionPoint(firstBlockTerminator); + if (call && callable) + handleResultImpl(interface, builder, call, callable, + firstBlockTerminator->getOperands()); + // Have the interface handle the terminator of this block. - auto *firstBlockTerminator = firstNewBlock->getTerminator(); interface.handleTerminator(firstBlockTerminator, llvm::to_vector<6>(resultsToReplace)); firstBlockTerminator->erase(); @@ -218,6 +315,12 @@ inlineRegionImpl(InlinerInterface &interface, Region *src, Block *inlineBlock, resultToRepl.value().getLoc())); } + // Run the result attribute handler on the post insertion block arguments. + builder.setInsertionPointToStart(postInsertBlock); + if (call && callable) + handleResultImpl(interface, builder, call, callable, + postInsertBlock->getArguments()); + /// Handle the terminators for each of the new blocks. for (auto &newBlock : newBlocks) interface.handleTerminator(newBlock.getTerminator(), postInsertBlock); diff --git a/mlir/test/Transforms/inlining.mlir b/mlir/test/Transforms/inlining.mlir index b102c210f0560..f7eaa478cdbba 100644 --- a/mlir/test/Transforms/inlining.mlir +++ b/mlir/test/Transforms/inlining.mlir @@ -226,3 +226,40 @@ func.func @func_with_block_args_location_callee2(%arg0 : i32) { call @func_with_block_args_location(%arg0) : (i32) -> () return } + +// Check that we can handle argument and result attributes. +test.conversion_func_op @handle_attr_callee_fn_multi_arg(%arg0 : i16, %arg1 : i16 {"test.handle_argument"}) -> (i16 {"test.handle_result"}, i16) { + %0 = arith.addi %arg0, %arg1 : i16 + %1 = arith.subi %arg0, %arg1 : i16 + "test.return"(%0, %1) : (i16, i16) -> () +} +test.conversion_func_op @handle_attr_callee_fn(%arg0 : i32 {"test.handle_argument"}) -> (i32 {"test.handle_result"}) { + "test.return"(%arg0) : (i32) -> () +} + +// CHECK-LABEL: func @inline_handle_attr_call +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] +func.func @inline_handle_attr_call(%arg0 : i16, %arg1 : i16) -> (i16, i16) { + + // CHECK: %[[CHANGE_INPUT:.*]] = "test.type_changer"(%[[ARG1]]) : (i16) -> i16 + // CHECK: %[[SUM:.*]] = arith.addi %[[ARG0]], %[[CHANGE_INPUT]] + // CHECK: %[[DIFF:.*]] = arith.subi %[[ARG0]], %[[CHANGE_INPUT]] + // CHECK: %[[CHANGE_RESULT:.*]] = "test.type_changer"(%[[SUM]]) : (i16) -> i16 + // CHECK-NEXT: return %[[CHANGE_RESULT]], %[[DIFF]] + %res0, %res1 = "test.conversion_call_op"(%arg0, %arg1) { callee=@handle_attr_callee_fn_multi_arg } : (i16, i16) -> (i16, i16) + return %res0, %res1 : i16, i16 +} + +// CHECK-LABEL: func @inline_convert_and_handle_attr_call +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]] +func.func @inline_convert_and_handle_attr_call(%arg0 : i16) -> (i16) { + + // CHECK: %[[CAST_INPUT:.*]] = "test.cast"(%[[ARG0]]) : (i16) -> i32 + // CHECK: %[[CHANGE_INPUT:.*]] = "test.type_changer"(%[[CAST_INPUT]]) : (i32) -> i32 + // CHECK: %[[CHANGE_RESULT:.*]] = "test.type_changer"(%[[CHANGE_INPUT]]) : (i32) -> i32 + // CHECK: %[[CAST_RESULT:.*]] = "test.cast"(%[[CHANGE_RESULT]]) : (i32) -> i16 + // CHECK: return %[[CAST_RESULT]] + %res = "test.conversion_call_op"(%arg0) { callee=@handle_attr_callee_fn } : (i16) -> (i16) + return %res : i16 +} diff --git a/mlir/test/lib/Dialect/Test/TestDialect.cpp b/mlir/test/lib/Dialect/Test/TestDialect.cpp index 97c77b0eb4898..36e2b9882be44 100644 --- a/mlir/test/lib/Dialect/Test/TestDialect.cpp +++ b/mlir/test/lib/Dialect/Test/TestDialect.cpp @@ -19,6 +19,7 @@ #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Diagnostics.h" #include "mlir/IR/ExtensibleDialect.h" +#include "mlir/IR/FunctionImplementation.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/OperationSupport.h" #include "mlir/IR/PatternMatch.h" @@ -354,6 +355,24 @@ struct TestInlinerInterface : public DialectInlinerInterface { return builder.create(conversionLoc, resultType, input); } + Value handleArgument(OpBuilder &builder, Operation *call, Operation *callable, + Value argument, Type targetType, + DictionaryAttr argumentAttrs) const final { + if (!argumentAttrs.contains("test.handle_argument")) + return argument; + return builder.create(call->getLoc(), targetType, + argument); + } + + Value handleResult(OpBuilder &builder, Operation *call, Operation *callable, + Value result, Type targetType, + DictionaryAttr resultAttrs) const final { + if (!resultAttrs.contains("test.handle_result")) + return result; + return builder.create(call->getLoc(), targetType, + result); + } + void processInlinedCallBlocks( Operation *call, iterator_range inlinedBlocks) const final { @@ -650,6 +669,29 @@ LogicalResult TestCallOp::verifySymbolUses(SymbolTableCollection &symbolTable) { return success(); } +//===----------------------------------------------------------------------===// +// ConversionFuncOp +//===----------------------------------------------------------------------===// + +ParseResult ConversionFuncOp::parse(OpAsmParser &parser, + OperationState &result) { + auto buildFuncType = + [](Builder &builder, ArrayRef argTypes, ArrayRef results, + function_interface_impl::VariadicFlag, + std::string &) { return builder.getFunctionType(argTypes, results); }; + + return function_interface_impl::parseFunctionOp( + parser, result, /*allowVariadic=*/false, + getFunctionTypeAttrName(result.name), buildFuncType, + getArgAttrsAttrName(result.name), getResAttrsAttrName(result.name)); +} + +void ConversionFuncOp::print(OpAsmPrinter &p) { + function_interface_impl::printFunctionOp( + p, *this, /*isVariadic=*/false, getFunctionTypeAttrName(), + getArgAttrsAttrName(), getResAttrsAttrName()); +} + //===----------------------------------------------------------------------===// // TestFoldToCallOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index 3f642b8a87ea2..e747d4bddfd7a 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -14,6 +14,7 @@ include "TestInterfaces.td" include "mlir/Dialect/DLTI/DLTIBase.td" include "mlir/Dialect/Linalg/IR/LinalgInterfaces.td" include "mlir/IR/EnumAttr.td" +include "mlir/IR/FunctionInterfaces.td" include "mlir/IR/OpBase.td" include "mlir/IR/OpAsmInterface.td" include "mlir/IR/PatternBase.td" @@ -482,6 +483,66 @@ def ConversionCallOp : TEST_Op<"conversion_call_op", }]; } +def ConversionFuncOp : TEST_Op<"conversion_func_op", [CallableOpInterface, + FunctionOpInterface]> { + let arguments = (ins SymbolNameAttr:$sym_name, + TypeAttrOf:$function_type, + OptionalAttr:$arg_attrs, + OptionalAttr:$res_attrs, + OptionalAttr:$sym_visibility); + let regions = (region AnyRegion:$body); + + let extraClassDeclaration = [{ + //===------------------------------------------------------------------===// + // CallableOpInterface + //===------------------------------------------------------------------===// + + /// Returns the region on the current operation that is callable. This may + /// return null in the case of an external callable object, e.g. an external + /// function. + ::mlir::Region *getCallableRegion() { + return isExternal() ? nullptr : &getBody(); + } + + /// Returns the results types that the callable region produces when + /// executed. + ::mlir::ArrayRef<::mlir::Type> getCallableResults() { + return getFunctionType().getResults(); + } + + /// Returns the argument attributes for all callable region arguments or + /// null if there are none. + ::mlir::ArrayAttr getCallableArgAttrs() { + return getArgAttrs().value_or(nullptr); + } + + /// Returns the result attributes for all callable region results or + /// null if there are none. + ::mlir::ArrayAttr getCallableResAttrs() { + return getResAttrs().value_or(nullptr); + } + + //===------------------------------------------------------------------===// + // FunctionOpInterface Methods + //===------------------------------------------------------------------===// + + /// Returns the argument types of this async function. + ::mlir::ArrayRef<::mlir::Type> getArgumentTypes() { + return getFunctionType().getInputs(); + } + + /// Returns the result types of this async function. + ::mlir::ArrayRef<::mlir::Type> getResultTypes() { + return getFunctionType().getResults(); + } + + /// Returns the number of results of this async function + unsigned getNumResults() {return getResultTypes().size();} + }]; + + let hasCustomAssemblyFormat = 1; +} + def FunctionalRegionOp : TEST_Op<"functional_region_op", [CallableOpInterface]> { let regions = (region AnyRegion:$body); @@ -492,6 +553,12 @@ def FunctionalRegionOp : TEST_Op<"functional_region_op", ::llvm::ArrayRef<::mlir::Type> getCallableResults() { return getType().cast<::mlir::FunctionType>().getResults(); } + ::mlir::ArrayAttr getCallableArgAttrs() { + return nullptr; + } + ::mlir::ArrayAttr getCallableResAttrs() { + return nullptr; + } }]; } From 7b83a1438f9af064851bac3d74e05f794088d6d6 Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Wed, 22 Mar 2023 14:29:12 +0700 Subject: [PATCH 275/691] [GuardWidening] Improve analysis of potential widening into hotter block, try 2 The initial version was reverted because it looped infinitely if the likely successor isn't properly dominated by the predecessor. In practice it means that we went up the CFG through backedge and looped infinitely. I also added some paranoid assertion checks to make sure that every other invariant holds. I also found a hypothetical situation when we may go past the dominated block while following the likely successors (it means that in fact the dominated block is dynamically not reachable from dominating block) and explicitly prohibited this, though I don't have a motivating test showing that it's a real problem. https://reviews.llvm.org/D146276 --- llvm/lib/Transforms/Scalar/GuardWidening.cpp | 68 +++++++++++++++---- .../two_forms_behavior_consistency.ll | 40 +++++------ 2 files changed, 70 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/llvm/lib/Transforms/Scalar/GuardWidening.cpp index 064c7b1d0ad12..7100f538f38ad 100644 --- a/llvm/lib/Transforms/Scalar/GuardWidening.cpp +++ b/llvm/lib/Transforms/Scalar/GuardWidening.cpp @@ -460,27 +460,67 @@ GuardWideningImpl::computeWideningScore(Instruction *DominatedInstr, if (HoistingOutOfLoop) return WS_Positive; - // Returns true if we might be hoisting above explicit control flow. Note - // that this completely ignores implicit control flow (guards, calls which - // throw, etc...). That choice appears arbitrary. - auto MaybeHoistingOutOfIf = [&]() { - auto *DominatingBlock = DominatingGuard->getParent(); - auto *DominatedBlock = DominatedInstr->getParent(); - if (isGuardAsWidenableBranch(DominatingGuard)) - DominatingBlock = cast(DominatingGuard)->getSuccessor(0); - - // Same Block? + // For a given basic block \p BB, return its successor which is guaranteed or + // highly likely will be taken as its successor. + auto GetLikelySuccessor = [](const BasicBlock * BB)->const BasicBlock * { + if (auto *UniqueSucc = BB->getUniqueSuccessor()) + return UniqueSucc; + auto *Term = BB->getTerminator(); + Value *Cond = nullptr; + const BasicBlock *IfTrue = nullptr, *IfFalse = nullptr; + using namespace PatternMatch; + if (!match(Term, m_Br(m_Value(Cond), m_BasicBlock(IfTrue), + m_BasicBlock(IfFalse)))) + return nullptr; + // For constant conditions, only one dynamical successor is possible + if (auto *ConstCond = dyn_cast(Cond)) + return ConstCond->isAllOnesValue() ? IfTrue : IfFalse; + // If one of successors ends with deopt, another one is likely. + if (IfFalse->getPostdominatingDeoptimizeCall()) + return IfTrue; + if (IfTrue->getPostdominatingDeoptimizeCall()) + return IfFalse; + // TODO: Use branch frequency metatada to allow hoisting through non-deopt + // branches? + return nullptr; + }; + + // Returns true if we might be hoisting above explicit control flow into a + // considerably hotter block. Note that this completely ignores implicit + // control flow (guards, calls which throw, etc...). That choice appears + // arbitrary (we assume that implicit control flow exits are all rare). + auto MaybeHoistingToHotterBlock = [&]() { + const auto *DominatingBlock = DominatingGuard->getParent(); + const auto *DominatedBlock = DominatedInstr->getParent(); + + // Descend as low as we can, always taking the likely successor. + assert(DT.isReachableFromEntry(DominatingBlock) && "Unreached code"); + assert(DT.isReachableFromEntry(DominatedBlock) && "Unreached code"); + assert(DT.dominates(DominatingBlock, DominatedBlock) && "No dominance"); + while (DominatedBlock != DominatingBlock) { + auto *LikelySucc = GetLikelySuccessor(DominatingBlock); + // No likely successor? + if (!LikelySucc) + break; + // Only go down the dominator tree. + if (!DT.properlyDominates(DominatingBlock, LikelySucc)) + break; + DominatingBlock = LikelySucc; + } + + // Found? if (DominatedBlock == DominatingBlock) return false; - // Obvious successor (common loop header/preheader case) - if (DominatedBlock == DominatingBlock->getUniqueSuccessor()) - return false; + // We followed the likely successor chain and went past the dominated + // block. It means that the dominated guard is in dead/very cold code. + if (!DT.dominates(DominatingBlock, DominatedBlock)) + return true; // TODO: diamond, triangle cases if (!PDT) return true; return !PDT->dominates(DominatedBlock, DominatingBlock); }; - return MaybeHoistingOutOfIf() ? WS_IllegalOrNegative : WS_Neutral; + return MaybeHoistingToHotterBlock() ? WS_IllegalOrNegative : WS_Neutral; } bool GuardWideningImpl::canBeHoistedTo( diff --git a/llvm/test/Transforms/GuardWidening/two_forms_behavior_consistency.ll b/llvm/test/Transforms/GuardWidening/two_forms_behavior_consistency.ll index 69bddeea9fbb8..9b60f4e3e62b7 100644 --- a/llvm/test/Transforms/GuardWidening/two_forms_behavior_consistency.ll +++ b/llvm/test/Transforms/GuardWidening/two_forms_behavior_consistency.ll @@ -42,30 +42,26 @@ define void @test_01(i32 %a, i32 %b, i32 %c, i32 %d) { ; BRANCH_FORM-NEXT: entry: ; BRANCH_FORM-NEXT: br label [[LOOP:%.*]] ; BRANCH_FORM: loop: -; BRANCH_FORM-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[GUARDED5:%.*]] ] +; BRANCH_FORM-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[GUARDED:%.*]] ] ; BRANCH_FORM-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; BRANCH_FORM-NEXT: [[C1:%.*]] = icmp ult i32 [[IV]], [[A]] ; BRANCH_FORM-NEXT: [[C2:%.*]] = icmp ult i32 [[IV]], [[B]] ; BRANCH_FORM-NEXT: [[WIDE_CHK:%.*]] = and i1 [[C1]], [[C2]] +; BRANCH_FORM-NEXT: [[C3:%.*]] = icmp ult i32 [[IV]], [[C]] +; BRANCH_FORM-NEXT: [[WIDE_CHK13:%.*]] = and i1 [[WIDE_CHK]], [[C3]] +; BRANCH_FORM-NEXT: [[C4:%.*]] = icmp ult i32 [[IV]], [[D]] +; BRANCH_FORM-NEXT: [[WIDE_CHK14:%.*]] = and i1 [[WIDE_CHK13]], [[C4]] ; BRANCH_FORM-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() -; BRANCH_FORM-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]] -; BRANCH_FORM-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0:![0-9]+]] +; BRANCH_FORM-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDE_CHK14]], [[WIDENABLE_COND]] +; BRANCH_FORM-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0:![0-9]+]] ; BRANCH_FORM: deopt: ; BRANCH_FORM-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] ; BRANCH_FORM-NEXT: ret void ; BRANCH_FORM: guarded: ; BRANCH_FORM-NEXT: [[WIDENABLE_COND3:%.*]] = call i1 @llvm.experimental.widenable.condition() ; BRANCH_FORM-NEXT: [[EXIPLICIT_GUARD_COND4:%.*]] = and i1 [[C2]], [[WIDENABLE_COND3]] -; BRANCH_FORM-NEXT: [[C3:%.*]] = icmp ult i32 [[IV]], [[C]] -; BRANCH_FORM-NEXT: [[C4:%.*]] = icmp ult i32 [[IV]], [[D]] -; BRANCH_FORM-NEXT: [[WIDE_CHK13:%.*]] = and i1 [[C3]], [[C4]] ; BRANCH_FORM-NEXT: [[WIDENABLE_COND7:%.*]] = call i1 @llvm.experimental.widenable.condition() -; BRANCH_FORM-NEXT: [[EXIPLICIT_GUARD_COND8:%.*]] = and i1 [[WIDE_CHK13]], [[WIDENABLE_COND7]] -; BRANCH_FORM-NEXT: br i1 [[EXIPLICIT_GUARD_COND8]], label [[GUARDED5]], label [[DEOPT6:%.*]], !prof [[PROF0]] -; BRANCH_FORM: deopt6: -; BRANCH_FORM-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] -; BRANCH_FORM-NEXT: ret void -; BRANCH_FORM: guarded5: +; BRANCH_FORM-NEXT: [[EXIPLICIT_GUARD_COND8:%.*]] = and i1 [[C3]], [[WIDENABLE_COND7]] ; BRANCH_FORM-NEXT: [[WIDENABLE_COND11:%.*]] = call i1 @llvm.experimental.widenable.condition() ; BRANCH_FORM-NEXT: [[EXIPLICIT_GUARD_COND12:%.*]] = and i1 [[C4]], [[WIDENABLE_COND11]] ; BRANCH_FORM-NEXT: [[LOOP_COND:%.*]] = call i1 @cond() @@ -78,30 +74,26 @@ define void @test_01(i32 %a, i32 %b, i32 %c, i32 %d) { ; BRANCH_FORM_LICM-NEXT: entry: ; BRANCH_FORM_LICM-NEXT: br label [[LOOP:%.*]] ; BRANCH_FORM_LICM: loop: -; BRANCH_FORM_LICM-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[GUARDED5:%.*]] ] +; BRANCH_FORM_LICM-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[GUARDED:%.*]] ] ; BRANCH_FORM_LICM-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; BRANCH_FORM_LICM-NEXT: [[C1:%.*]] = icmp ult i32 [[IV]], [[A]] ; BRANCH_FORM_LICM-NEXT: [[C2:%.*]] = icmp ult i32 [[IV]], [[B]] ; BRANCH_FORM_LICM-NEXT: [[WIDE_CHK:%.*]] = and i1 [[C1]], [[C2]] +; BRANCH_FORM_LICM-NEXT: [[C3:%.*]] = icmp ult i32 [[IV]], [[C]] +; BRANCH_FORM_LICM-NEXT: [[WIDE_CHK13:%.*]] = and i1 [[WIDE_CHK]], [[C3]] +; BRANCH_FORM_LICM-NEXT: [[C4:%.*]] = icmp ult i32 [[IV]], [[D]] +; BRANCH_FORM_LICM-NEXT: [[WIDE_CHK14:%.*]] = and i1 [[WIDE_CHK13]], [[C4]] ; BRANCH_FORM_LICM-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() -; BRANCH_FORM_LICM-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDE_CHK]], [[WIDENABLE_COND]] -; BRANCH_FORM_LICM-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED:%.*]], label [[DEOPT:%.*]], !prof [[PROF0:![0-9]+]] +; BRANCH_FORM_LICM-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDE_CHK14]], [[WIDENABLE_COND]] +; BRANCH_FORM_LICM-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[GUARDED]], label [[DEOPT:%.*]], !prof [[PROF0:![0-9]+]] ; BRANCH_FORM_LICM: deopt: ; BRANCH_FORM_LICM-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] ; BRANCH_FORM_LICM-NEXT: ret void ; BRANCH_FORM_LICM: guarded: ; BRANCH_FORM_LICM-NEXT: [[WIDENABLE_COND3:%.*]] = call i1 @llvm.experimental.widenable.condition() ; BRANCH_FORM_LICM-NEXT: [[EXIPLICIT_GUARD_COND4:%.*]] = and i1 [[C2]], [[WIDENABLE_COND3]] -; BRANCH_FORM_LICM-NEXT: [[C3:%.*]] = icmp ult i32 [[IV]], [[C]] -; BRANCH_FORM_LICM-NEXT: [[C4:%.*]] = icmp ult i32 [[IV]], [[D]] -; BRANCH_FORM_LICM-NEXT: [[WIDE_CHK13:%.*]] = and i1 [[C3]], [[C4]] ; BRANCH_FORM_LICM-NEXT: [[WIDENABLE_COND7:%.*]] = call i1 @llvm.experimental.widenable.condition() -; BRANCH_FORM_LICM-NEXT: [[EXIPLICIT_GUARD_COND8:%.*]] = and i1 [[WIDE_CHK13]], [[WIDENABLE_COND7]] -; BRANCH_FORM_LICM-NEXT: br i1 [[EXIPLICIT_GUARD_COND8]], label [[GUARDED5]], label [[DEOPT6:%.*]], !prof [[PROF0]] -; BRANCH_FORM_LICM: deopt6: -; BRANCH_FORM_LICM-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ] -; BRANCH_FORM_LICM-NEXT: ret void -; BRANCH_FORM_LICM: guarded5: +; BRANCH_FORM_LICM-NEXT: [[EXIPLICIT_GUARD_COND8:%.*]] = and i1 [[C3]], [[WIDENABLE_COND7]] ; BRANCH_FORM_LICM-NEXT: [[WIDENABLE_COND11:%.*]] = call i1 @llvm.experimental.widenable.condition() ; BRANCH_FORM_LICM-NEXT: [[EXIPLICIT_GUARD_COND12:%.*]] = and i1 [[C4]], [[WIDENABLE_COND11]] ; BRANCH_FORM_LICM-NEXT: [[LOOP_COND:%.*]] = call i1 @cond() From 57c128e4c038a2fba0b0fc298712ba675f59203b Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Wed, 22 Mar 2023 09:20:35 +0100 Subject: [PATCH 276/691] [mlir][Bazel] Add missing dependency. --- .../bazel/llvm-project-overlay/mlir/test/BUILD.bazel | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel index 5fbf4d994a624..944088589d3ed 100644 --- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel @@ -14,11 +14,11 @@ package( licenses(["notice"]) LLVM_LIT_PATH_FUNCTION = " " + \ - "# Allow generated file to be relocatable.\n" + \ - "from pathlib import Path\n" + \ - "def path(p):\n" + \ - " if not p: return ''\n" + \ - " return str((Path(__file__).parent / p).resolve())\n" + "# Allow generated file to be relocatable.\n" + \ + "from pathlib import Path\n" + \ + "def path(p):\n" + \ + " if not p: return ''\n" + \ + " return str((Path(__file__).parent / p).resolve())\n" LIT_SITE_CFG_IN_HEADER = "# Autogenerated, do not edit." + LLVM_LIT_PATH_FUNCTION @@ -107,6 +107,7 @@ td_library( "//mlir:DLTIDialectTdFiles", "//mlir:DataLayoutInterfacesTdFiles", "//mlir:DestinationStyleOpInterfaceTdFiles", + "//mlir:FunctionInterfacesTdFiles", "//mlir:InferIntRangeInterfaceTdFiles", "//mlir:InferTypeOpInterfaceTdFiles", "//mlir:LinalgStructuredOpsTdFiles", From 9297b9f8eeecc5ea6571cf45985ba77bc2960427 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Wed, 22 Mar 2023 09:04:01 +0100 Subject: [PATCH 277/691] [mlir][Transforms][NFC] Improve builder/listener API of OperationFolder The constructor of `OperationFolder` takes a listener. Therefore, the remaining API should not take any builder/rewriters. This could lead to double notifications in case a listener is attached to the builder/rewriter. As an internal cleanup, `OperationFolder` now has an `IRRewriter` instead of a `RewriterBase::Listener`. In most cases, `OperationFolder` no longer has to notify/deal with listeners. This is done by the rewriter. Differential Revision: https://reviews.llvm.org/D146134 --- mlir/include/mlir/IR/PatternMatch.h | 7 +- mlir/include/mlir/Transforms/FoldUtils.h | 33 ++++---- mlir/lib/Transforms/SCCP.cpp | 6 +- mlir/lib/Transforms/Utils/FoldUtils.cpp | 81 ++++++++----------- .../lib/Transforms/TestIntRangeInference.cpp | 5 +- 5 files changed, 60 insertions(+), 72 deletions(-) diff --git a/mlir/include/mlir/IR/PatternMatch.h b/mlir/include/mlir/IR/PatternMatch.h index 9c4790c031201..600ace4882734 100644 --- a/mlir/include/mlir/IR/PatternMatch.h +++ b/mlir/include/mlir/IR/PatternMatch.h @@ -624,7 +624,9 @@ class RewriterBase : public OpBuilder { protected: /// Initialize the builder. - explicit RewriterBase(MLIRContext *ctx) : OpBuilder(ctx) {} + explicit RewriterBase(MLIRContext *ctx, + OpBuilder::Listener *listener = nullptr) + : OpBuilder(ctx, listener) {} explicit RewriterBase(const OpBuilder &otherBuilder) : OpBuilder(otherBuilder) {} virtual ~RewriterBase(); @@ -648,7 +650,8 @@ class RewriterBase : public OpBuilder { /// such as a `PatternRewriter`, is not available. class IRRewriter : public RewriterBase { public: - explicit IRRewriter(MLIRContext *ctx) : RewriterBase(ctx) {} + explicit IRRewriter(MLIRContext *ctx, OpBuilder::Listener *listener = nullptr) + : RewriterBase(ctx, listener) {} explicit IRRewriter(const OpBuilder &builder) : RewriterBase(builder) {} }; diff --git a/mlir/include/mlir/Transforms/FoldUtils.h b/mlir/include/mlir/Transforms/FoldUtils.h index a6dc18369e77a..2600da361496c 100644 --- a/mlir/include/mlir/Transforms/FoldUtils.h +++ b/mlir/include/mlir/Transforms/FoldUtils.h @@ -32,8 +32,8 @@ class Value; /// generated along the way. class OperationFolder { public: - OperationFolder(MLIRContext *ctx, RewriterBase::Listener *listener = nullptr) - : interfaces(ctx), listener(listener) {} + OperationFolder(MLIRContext *ctx, OpBuilder::Listener *listener = nullptr) + : interfaces(ctx), rewriter(ctx, listener) {} /// Tries to perform folding on the given `op`, including unifying /// deduplicated constants. If successful, replaces `op`'s uses with @@ -61,10 +61,11 @@ class OperationFolder { /// Clear out any constants cached inside of the folder. void clear(); - /// Get or create a constant using the given builder. On success this returns - /// the constant operation, nullptr otherwise. - Value getOrCreateConstant(OpBuilder &builder, Dialect *dialect, - Attribute value, Type type, Location loc); + /// Get or create a constant for use in the specified block. The constant may + /// be created in a parent block. On success this returns the constant + /// operation, nullptr otherwise. + Value getOrCreateConstant(Block *block, Dialect *dialect, Attribute value, + Type type, Location loc); private: /// This map keeps track of uniqued constants by dialect, attribute, and type. @@ -74,29 +75,25 @@ class OperationFolder { using ConstantMap = DenseMap, Operation *>; - /// Erase the given operation and notify the listener. - void eraseOp(Operation *op); - /// Returns true if the given operation is an already folded constant that is /// owned by this folder. bool isFolderOwnedConstant(Operation *op) const; /// Tries to perform folding on the given `op`. If successful, populates /// `results` with the results of the folding. - LogicalResult tryToFold(OpBuilder &builder, Operation *op, - SmallVectorImpl &results); + LogicalResult tryToFold(Operation *op, SmallVectorImpl &results); - /// Try to process a set of fold results, generating constants as necessary. - /// Populates `results` on success, otherwise leaves it unchanged. - LogicalResult processFoldResults(OpBuilder &builder, Operation *op, + /// Try to process a set of fold results. Populates `results` on success, + /// otherwise leaves it unchanged. + LogicalResult processFoldResults(Operation *op, SmallVectorImpl &results, ArrayRef foldResults); /// Try to get or create a new constant entry. On success this returns the /// constant operation, nullptr otherwise. Operation *tryGetOrCreateConstant(ConstantMap &uniquedConstants, - Dialect *dialect, OpBuilder &builder, - Attribute value, Type type, Location loc); + Dialect *dialect, Attribute value, + Type type, Location loc); /// A mapping between an insertion region and the constants that have been /// created within it. @@ -109,8 +106,8 @@ class OperationFolder { /// A collection of dialect folder interfaces. DialectInterfaceCollection interfaces; - /// An optional listener that is notified of all IR changes. - RewriterBase::Listener *listener = nullptr; + /// A rewriter that performs all IR modifications. + IRRewriter rewriter; }; } // namespace mlir diff --git a/mlir/lib/Transforms/SCCP.cpp b/mlir/lib/Transforms/SCCP.cpp index b32173a3a9810..14435b37acc91 100644 --- a/mlir/lib/Transforms/SCCP.cpp +++ b/mlir/lib/Transforms/SCCP.cpp @@ -51,9 +51,9 @@ static LogicalResult replaceWithConstant(DataFlowSolver &solver, // Attempt to materialize a constant for the given value. Dialect *dialect = latticeValue.getConstantDialect(); - Value constant = folder.getOrCreateConstant(builder, dialect, - latticeValue.getConstantValue(), - value.getType(), value.getLoc()); + Value constant = folder.getOrCreateConstant( + builder.getInsertionBlock(), dialect, latticeValue.getConstantValue(), + value.getType(), value.getLoc()); if (!constant) return failure(); diff --git a/mlir/lib/Transforms/Utils/FoldUtils.cpp b/mlir/lib/Transforms/Utils/FoldUtils.cpp index 22a488efb37ea..827c0ad4290b7 100644 --- a/mlir/lib/Transforms/Utils/FoldUtils.cpp +++ b/mlir/lib/Transforms/Utils/FoldUtils.cpp @@ -84,26 +84,25 @@ LogicalResult OperationFolder::tryToFold(Operation *op, bool *inPlaceUpdate) { // Try to fold the operation. SmallVector results; - OpBuilder builder(op, listener); - if (failed(tryToFold(builder, op, results))) + if (failed(tryToFold(op, results))) return failure(); // Check to see if the operation was just updated in place. if (results.empty()) { if (inPlaceUpdate) *inPlaceUpdate = true; - if (listener) - listener->notifyOperationModified(op); + if (auto *rewriteListener = dyn_cast_if_present( + rewriter.getListener())) { + // Folding API does not notify listeners, so we have to notify manually. + rewriteListener->notifyOperationModified(op); + } return success(); } // Constant folding succeeded. Replace all of the result values and erase the // operation. - if (listener) - listener->notifyOperationReplaced(op, results); - for (unsigned i = 0, e = results.size(); i != e; ++i) - op->getResult(i).replaceAllUsesWith(results[i]); - eraseOp(op); + notifyRemoval(op); + rewriter.replaceOp(op, results); return success(); } @@ -141,10 +140,8 @@ bool OperationFolder::insertKnownConstant(Operation *op, Attribute constValue) { // If there is an existing constant, replace `op`. if (folderConstOp) { - if (listener) - listener->notifyOperationReplaced(op, folderConstOp->getResults()); - op->replaceAllUsesWith(folderConstOp); - eraseOp(op); + notifyRemoval(op); + rewriter.replaceOp(op, folderConstOp->getResults()); return false; } @@ -162,13 +159,6 @@ bool OperationFolder::insertKnownConstant(Operation *op, Attribute constValue) { return true; } -void OperationFolder::eraseOp(Operation *op) { - notifyRemoval(op); - if (listener) - listener->notifyOperationRemoved(op); - op->erase(); -} - /// Notifies that the given constant `op` should be remove from this /// OperationFolder's internal bookkeeping. void OperationFolder::notifyRemoval(Operation *op) { @@ -202,22 +192,18 @@ void OperationFolder::clear() { /// Get or create a constant using the given builder. On success this returns /// the constant operation, nullptr otherwise. -Value OperationFolder::getOrCreateConstant(OpBuilder &builder, Dialect *dialect, +Value OperationFolder::getOrCreateConstant(Block *block, Dialect *dialect, Attribute value, Type type, Location loc) { - OpBuilder::InsertionGuard foldGuard(builder); - - // Use the builder insertion block to find an insertion point for the - // constant. - auto *insertRegion = - getInsertionRegion(interfaces, builder.getInsertionBlock()); + // Find an insertion point for the constant. + auto *insertRegion = getInsertionRegion(interfaces, block); auto &entry = insertRegion->front(); - builder.setInsertionPoint(&entry, entry.begin()); + rewriter.setInsertionPoint(&entry, entry.begin()); // Get the constant map for the insertion region of this operation. auto &uniquedConstants = foldScopes[insertRegion]; - Operation *constOp = tryGetOrCreateConstant(uniquedConstants, dialect, - builder, value, type, loc); + Operation *constOp = + tryGetOrCreateConstant(uniquedConstants, dialect, value, type, loc); return constOp ? constOp->getResult(0) : Value(); } @@ -227,7 +213,7 @@ bool OperationFolder::isFolderOwnedConstant(Operation *op) const { /// Tries to perform folding on the given `op`. If successful, populates /// `results` with the results of the folding. -LogicalResult OperationFolder::tryToFold(OpBuilder &builder, Operation *op, +LogicalResult OperationFolder::tryToFold(Operation *op, SmallVectorImpl &results) { SmallVector operandConstants; @@ -257,13 +243,13 @@ LogicalResult OperationFolder::tryToFold(OpBuilder &builder, Operation *op, // fold. SmallVector foldResults; if (failed(op->fold(operandConstants, foldResults)) || - failed(processFoldResults(builder, op, results, foldResults))) + failed(processFoldResults(op, results, foldResults))) return success(updatedOpOperands); return success(); } LogicalResult -OperationFolder::processFoldResults(OpBuilder &builder, Operation *op, +OperationFolder::processFoldResults(Operation *op, SmallVectorImpl &results, ArrayRef foldResults) { // Check to see if the operation was just updated in place. @@ -273,11 +259,9 @@ OperationFolder::processFoldResults(OpBuilder &builder, Operation *op, // Create a builder to insert new operations into the entry block of the // insertion region. - auto *insertRegion = - getInsertionRegion(interfaces, builder.getInsertionBlock()); + auto *insertRegion = getInsertionRegion(interfaces, op->getBlock()); auto &entry = insertRegion->front(); - OpBuilder::InsertionGuard foldGuard(builder); - builder.setInsertionPoint(&entry, entry.begin()); + rewriter.setInsertionPoint(&entry, entry.begin()); // Get the constant map for the insertion region of this operation. auto &uniquedConstants = foldScopes[insertRegion]; @@ -300,9 +284,8 @@ OperationFolder::processFoldResults(OpBuilder &builder, Operation *op, // Check to see if there is a canonicalized version of this constant. auto res = op->getResult(i); Attribute attrRepl = foldResults[i].get(); - if (auto *constOp = - tryGetOrCreateConstant(uniquedConstants, dialect, builder, attrRepl, - res.getType(), op->getLoc())) { + if (auto *constOp = tryGetOrCreateConstant( + uniquedConstants, dialect, attrRepl, res.getType(), op->getLoc())) { // Ensure that this constant dominates the operation we are replacing it // with. This may not automatically happen if the operation being folded // was inserted before the constant within the insertion block. @@ -316,8 +299,10 @@ OperationFolder::processFoldResults(OpBuilder &builder, Operation *op, // If materialization fails, cleanup any operations generated for the // previous results and return failure. for (Operation &op : llvm::make_early_inc_range( - llvm::make_range(entry.begin(), builder.getInsertionPoint()))) - eraseOp(&op); + llvm::make_range(entry.begin(), rewriter.getInsertionPoint()))) { + notifyRemoval(&op); + rewriter.eraseOp(&op); + } results.clear(); return failure(); @@ -328,9 +313,10 @@ OperationFolder::processFoldResults(OpBuilder &builder, Operation *op, /// Try to get or create a new constant entry. On success this returns the /// constant operation value, nullptr otherwise. -Operation *OperationFolder::tryGetOrCreateConstant( - ConstantMap &uniquedConstants, Dialect *dialect, OpBuilder &builder, - Attribute value, Type type, Location loc) { +Operation * +OperationFolder::tryGetOrCreateConstant(ConstantMap &uniquedConstants, + Dialect *dialect, Attribute value, + Type type, Location loc) { // Check if an existing mapping already exists. auto constKey = std::make_tuple(dialect, value, type); Operation *&constOp = uniquedConstants[constKey]; @@ -338,7 +324,7 @@ Operation *OperationFolder::tryGetOrCreateConstant( return constOp; // If one doesn't exist, try to materialize one. - if (!(constOp = materializeConstant(dialect, builder, value, type, loc))) + if (!(constOp = materializeConstant(dialect, rewriter, value, type, loc))) return nullptr; // Check to see if the generated constant is in the expected dialect. @@ -355,7 +341,8 @@ Operation *OperationFolder::tryGetOrCreateConstant( // If an existing operation in the new dialect already exists, delete the // materialized operation in favor of the existing one. if (auto *existingOp = uniquedConstants.lookup(newKey)) { - eraseOp(constOp); + notifyRemoval(constOp); + rewriter.eraseOp(constOp); referencedDialects[existingOp].push_back(dialect); return constOp = existingOp; } diff --git a/mlir/test/lib/Transforms/TestIntRangeInference.cpp b/mlir/test/lib/Transforms/TestIntRangeInference.cpp index 64ff4ce5b9e51..d1978b6099f04 100644 --- a/mlir/test/lib/Transforms/TestIntRangeInference.cpp +++ b/mlir/test/lib/Transforms/TestIntRangeInference.cpp @@ -39,8 +39,9 @@ static LogicalResult replaceWithConstant(DataFlowSolver &solver, OpBuilder &b, maybeDefiningOp ? maybeDefiningOp->getDialect() : value.getParentRegion()->getParentOp()->getDialect(); Attribute constAttr = b.getIntegerAttr(value.getType(), *maybeConstValue); - Value constant = folder.getOrCreateConstant(b, valueDialect, constAttr, - value.getType(), value.getLoc()); + Value constant = + folder.getOrCreateConstant(b.getInsertionBlock(), valueDialect, constAttr, + value.getType(), value.getLoc()); if (!constant) return failure(); From 56d94a90dbbf1845ec71cd749691c74c1dd8a3ef Mon Sep 17 00:00:00 2001 From: Tobias Gysi Date: Wed, 22 Mar 2023 10:20:55 +0100 Subject: [PATCH 278/691] [mlir][llvm] Add experimental alias scope decl intrinsic. The revision adds the llvm.experimental.noalias.scope.decl intrinsic to the LLVM dialect and updates the import and export accordingly. Reviewed By: Dinistro Differential Revision: https://reviews.llvm.org/D146504 --- .../mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td | 25 +++++++ .../include/mlir/Target/LLVMIR/ModuleImport.h | 5 ++ .../mlir/Target/LLVMIR/ModuleTranslation.h | 7 +- mlir/lib/Target/LLVMIR/ModuleImport.cpp | 7 ++ mlir/lib/Target/LLVMIR/ModuleTranslation.cpp | 20 ++++-- mlir/test/Dialect/LLVMIR/roundtrip.mlir | 12 ++++ .../Target/LLVMIR/Import/import-failure.ll | 15 +++++ .../LLVMIR/Import/metadata-alias-scopes.ll | 37 ++++++----- mlir/test/Target/LLVMIR/llvmir.mlir | 66 ++++++++++--------- 9 files changed, 139 insertions(+), 55 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td index 1b62ce0ca3e6e..391de1ffaa5dc 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td @@ -167,6 +167,31 @@ def LLVM_MemsetOp : LLVM_ZeroResultIntrOp<"memset", [0, 2], [], >]; } +def LLVM_NoAliasScopeDeclOp + : LLVM_ZeroResultIntrOp<"experimental.noalias.scope.decl"> { + let arguments = (ins SymbolRefAttr:$scope); + string llvmBuilder = [{ + // Wrap the scope argument into a list since the LLVM IR intrinsic takes + // a list containing exactly one scope rather than a scope itself. + llvm::MDNode* node = moduleTranslation.getAliasScopes(op, {$scope}); + builder.CreateNoAliasScopeDeclaration(node); + }]; + string mlirBuilder = [{ + FailureOr> scopeAttrs = + moduleImport.matchAliasScopeAttrs(llvmOperands[0]); + // Drop the intrinsic if the alias scope translation fails since the scope + // is not used by an aliasing operation, such as a load or store, that is + // used to convert the alias scope metadata. + if (failed(scopeAttrs)) + return success(); + if (scopeAttrs->size() != 1) + return failure(); + $_op = $_builder.create( + $_location, (*scopeAttrs)[0]); + }]; + let assemblyFormat = "$scope attr-dict"; +} + //===----------------------------------------------------------------------===// // Lifetime Markers //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleImport.h b/mlir/include/mlir/Target/LLVMIR/ModuleImport.h index be4f6e5717b11..e1a94d6b80cc9 100644 --- a/mlir/include/mlir/Target/LLVMIR/ModuleImport.h +++ b/mlir/include/mlir/Target/LLVMIR/ModuleImport.h @@ -135,6 +135,11 @@ class ModuleImport { /// fails. DILocalVariableAttr matchLocalVariableAttr(llvm::Value *value); + /// Converts `value` to an array of symbol references pointing to alias scope + /// operations, or returns failure if the conversion fails. + FailureOr> + matchAliasScopeAttrs(llvm::Value *value); + /// Translates the debug location. Location translateLoc(llvm::DILocation *loc); diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h index 67091257a3cf9..a04e285af580e 100644 --- a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h +++ b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h @@ -122,9 +122,14 @@ class ModuleTranslation { void forgetMapping(Region ®ion); /// Returns the LLVM metadata corresponding to a symbol reference to an mlir - /// LLVM dialect alias scope operation + /// LLVM dialect alias scope operation. llvm::MDNode *getAliasScope(Operation *op, SymbolRefAttr aliasScopeRef) const; + /// Returns the LLVM metadata corresponding to an array of symbol references + /// to mlir LLVM dialect alias scope operations. + llvm::MDNode *getAliasScopes(Operation *op, + ArrayRef aliasScopeRefs) const; + // Sets LLVM metadata for memory operations that are in a parallel loop. void setAccessGroupsMetadata(AccessGroupOpInterface op, llvm::Instruction *inst); diff --git a/mlir/lib/Target/LLVMIR/ModuleImport.cpp b/mlir/lib/Target/LLVMIR/ModuleImport.cpp index a45768726bc43..d3ac7dcc17554 100644 --- a/mlir/lib/Target/LLVMIR/ModuleImport.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleImport.cpp @@ -1215,6 +1215,13 @@ DILocalVariableAttr ModuleImport::matchLocalVariableAttr(llvm::Value *value) { return debugImporter->translate(node); } +FailureOr> +ModuleImport::matchAliasScopeAttrs(llvm::Value *value) { + auto *nodeAsVal = cast(value); + auto *node = cast(nodeAsVal->getMetadata()); + return lookupAliasScopeAttrs(node); +} + Location ModuleImport::translateLoc(llvm::DILocation *loc) { return debugImporter->translateLoc(loc); } diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp index ca5fb7dda3841..7159aa49540ef 100644 --- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp @@ -1065,16 +1065,22 @@ ModuleTranslation::getAliasScope(Operation *op, return aliasScopeMetadataMapping.lookup(aliasScopeOp); } +llvm::MDNode *ModuleTranslation::getAliasScopes( + Operation *op, ArrayRef aliasScopeRefs) const { + SmallVector nodes; + nodes.reserve(aliasScopeRefs.size()); + for (SymbolRefAttr aliasScopeRef : aliasScopeRefs) + nodes.push_back(getAliasScope(op, aliasScopeRef)); + return llvm::MDNode::get(getLLVMContext(), nodes); +} + void ModuleTranslation::setAliasScopeMetadata(AliasAnalysisOpInterface op, llvm::Instruction *inst) { - auto populateScopeMetadata = [&](ArrayAttr scopeRefs, unsigned kind) { - if (!scopeRefs || scopeRefs.empty()) + auto populateScopeMetadata = [&](ArrayAttr aliasScopeRefs, unsigned kind) { + if (!aliasScopeRefs || aliasScopeRefs.empty()) return; - llvm::Module *module = inst->getModule(); - SmallVector scopeMDs; - for (SymbolRefAttr scopeRef : scopeRefs.getAsRange()) - scopeMDs.push_back(getAliasScope(op, scopeRef)); - llvm::MDNode *node = llvm::MDNode::get(module->getContext(), scopeMDs); + llvm::MDNode *node = getAliasScopes( + op, llvm::to_vector(aliasScopeRefs.getAsRange())); inst->setMetadata(kind, node); }; diff --git a/mlir/test/Dialect/LLVMIR/roundtrip.mlir b/mlir/test/Dialect/LLVMIR/roundtrip.mlir index c9db19b409e25..9147027c9d4b2 100644 --- a/mlir/test/Dialect/LLVMIR/roundtrip.mlir +++ b/mlir/test/Dialect/LLVMIR/roundtrip.mlir @@ -596,3 +596,15 @@ llvm.func @stackrestore_opaque_pointers(%arg0: !llvm.ptr) { llvm.intr.stackrestore %arg0 : !llvm.ptr llvm.return } + +// CHECK-LABEL: @experimental_noalias_scope_decl +llvm.func @experimental_noalias_scope_decl() { + // CHECK: llvm.intr.experimental.noalias.scope.decl @metadata::@scope + llvm.intr.experimental.noalias.scope.decl @metadata::@scope + llvm.return +} + +llvm.metadata @metadata { + llvm.alias_scope_domain @domain {description = "The domain"} + llvm.alias_scope @scope {domain = @domain, description = "The first scope"} +} diff --git a/mlir/test/Target/LLVMIR/Import/import-failure.ll b/mlir/test/Target/LLVMIR/Import/import-failure.ll index 16f2cf2b6283c..b330f654d3d51 100644 --- a/mlir/test/Target/LLVMIR/Import/import-failure.ll +++ b/mlir/test/Target/LLVMIR/Import/import-failure.ll @@ -322,3 +322,18 @@ bb2: } !0 = !{!"function_entry_count", i64 42} + +; // ----- + +; CHECK: import-failure.ll +; CHECK-SAME: warning: dropped instruction: call void @llvm.experimental.noalias.scope.decl(metadata !0) +define void @unused_scope() { + call void @llvm.experimental.noalias.scope.decl(metadata !0) + ret void +} + +declare void @llvm.experimental.noalias.scope.decl(metadata) + +!0 = !{!1} +!1 = !{!1, !2} +!2 = distinct !{!2, !"The domain"} diff --git a/mlir/test/Target/LLVMIR/Import/metadata-alias-scopes.ll b/mlir/test/Target/LLVMIR/Import/metadata-alias-scopes.ll index a9ed8e9ee556f..eb74b0ab880bb 100644 --- a/mlir/test/Target/LLVMIR/Import/metadata-alias-scopes.ll +++ b/mlir/test/Target/LLVMIR/Import/metadata-alias-scopes.ll @@ -62,27 +62,34 @@ define void @two_domains(ptr %arg1) { ; // ----- +; CHECK: llvm.metadata @__llvm_global_metadata { +; CHECK: llvm.alias_scope_domain @[[DOMAIN:.*]] {description = "The domain"} +; CHECK: llvm.alias_scope @[[$SCOPE:.*]] {domain = @[[DOMAIN]]} +; CHECK: } + ; CHECK-LABEL: llvm.func @supported_ops define void @supported_ops(ptr %arg1, float %arg2, i32 %arg3, i32 %arg4) { - ; CHECK: llvm.load {{.*}}alias_scopes = - %1 = load i32, ptr %arg1, !alias.scope !3 - ; CHECK: llvm.store {{.*}}alias_scopes = - store i32 %1, ptr %arg1, !alias.scope !3 - ; CHECK: llvm.atomicrmw {{.*}}alias_scopes = - %2 = atomicrmw fmax ptr %arg1, float %arg2 acquire, !alias.scope !3 - ; CHECK: llvm.cmpxchg {{.*}}alias_scopes = - %3 = cmpxchg ptr %arg1, i32 %arg3, i32 %arg4 monotonic seq_cst, !alias.scope !3 - ; CHECK: "llvm.intr.memcpy"{{.*}}alias_scopes = - call void @llvm.memcpy.p0.p0.i32(ptr %arg1, ptr %arg1, i32 4, i1 false), !alias.scope !3 - ; CHECK: "llvm.intr.memset"{{.*}}alias_scopes = - call void @llvm.memset.p0.i32(ptr %arg1, i8 42, i32 4, i1 false), !alias.scope !3 + ; CHECK: llvm.intr.experimental.noalias.scope.decl @__llvm_global_metadata::@[[$SCOPE]] + call void @llvm.experimental.noalias.scope.decl(metadata !2) + ; CHECK: llvm.load {{.*}}alias_scopes = [@__llvm_global_metadata::@[[$SCOPE]]] + %1 = load i32, ptr %arg1, !alias.scope !2 + ; CHECK: llvm.store {{.*}}alias_scopes = [@__llvm_global_metadata::@[[$SCOPE]]] + store i32 %1, ptr %arg1, !alias.scope !2 + ; CHECK: llvm.atomicrmw {{.*}}alias_scopes = [@__llvm_global_metadata::@[[$SCOPE]]] + %2 = atomicrmw fmax ptr %arg1, float %arg2 acquire, !alias.scope !2 + ; CHECK: llvm.cmpxchg {{.*}}alias_scopes = [@__llvm_global_metadata::@[[$SCOPE]]] + %3 = cmpxchg ptr %arg1, i32 %arg3, i32 %arg4 monotonic seq_cst, !alias.scope !2 + ; CHECK: "llvm.intr.memcpy"{{.*}}alias_scopes = [@__llvm_global_metadata::@[[$SCOPE]]] + call void @llvm.memcpy.p0.p0.i32(ptr %arg1, ptr %arg1, i32 4, i1 false), !alias.scope !2 + ; CHECK: "llvm.intr.memset"{{.*}}alias_scopes = [@__llvm_global_metadata::@[[$SCOPE]]] + call void @llvm.memset.p0.i32(ptr %arg1, i8 42, i32 4, i1 false), !alias.scope !2 ret void } +declare void @llvm.experimental.noalias.scope.decl(metadata) declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg) declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1 immarg) !0 = distinct !{!0, !"The domain"} -!1 = distinct !{!1} -!2 = !{!2, !0} -!3 = !{!2} +!1 = !{!1, !0} +!2 = !{!1} diff --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir index ce65ff995709c..46120cb348296 100644 --- a/mlir/test/Target/LLVMIR/llvmir.mlir +++ b/mlir/test/Target/LLVMIR/llvmir.mlir @@ -2016,38 +2016,41 @@ llvm.func @switch_weights(%arg0: i32) -> i32 { // ----- -module { - llvm.func @aliasScope(%arg1 : !llvm.ptr) { - %0 = llvm.mlir.constant(0 : i32) : i32 - llvm.store %0, %arg1 {alias_scopes = [@metadata::@scope1], noalias_scopes = [@metadata::@scope2, @metadata::@scope3]} : i32, !llvm.ptr - %1 = llvm.load %arg1 {alias_scopes = [@metadata::@scope2], noalias_scopes = [@metadata::@scope1, @metadata::@scope3]} : !llvm.ptr -> i32 - %2 = llvm.atomicrmw add %arg1, %0 monotonic {alias_scopes = [@metadata::@scope3], noalias_scopes = [@metadata::@scope1, @metadata::@scope2]} : !llvm.ptr, i32 - %3 = llvm.cmpxchg %arg1, %1, %2 acq_rel monotonic {alias_scopes = [@metadata::@scope3]} : !llvm.ptr, i32 - %4 = llvm.mlir.constant(0 : i1) : i1 - %5 = llvm.mlir.constant(42 : i8) : i8 - "llvm.intr.memcpy"(%arg1, %arg1, %0, %4) {alias_scopes = [@metadata::@scope3]} : (!llvm.ptr, !llvm.ptr, i32, i1) -> () - "llvm.intr.memset"(%arg1, %5, %0, %4) {noalias_scopes = [@metadata::@scope3]} : (!llvm.ptr, i8, i32, i1) -> () - llvm.return - } - - llvm.metadata @metadata { - llvm.alias_scope_domain @domain {description = "The domain"} - llvm.alias_scope @scope1 {domain = @domain, description = "The first scope"} - llvm.alias_scope @scope2 {domain = @domain} - llvm.alias_scope @scope3 {domain = @domain} - } -} - -// Function // CHECK-LABEL: aliasScope -// CHECK: store {{.*}}, !alias.scope ![[SCOPES1:[0-9]+]], !noalias ![[SCOPES23:[0-9]+]] -// CHECK: load {{.*}}, !alias.scope ![[SCOPES2:[0-9]+]], !noalias ![[SCOPES13:[0-9]+]] -// CHECK: atomicrmw {{.*}}, !alias.scope ![[SCOPES3:[0-9]+]], !noalias ![[SCOPES12:[0-9]+]] -// CHECK: cmpxchg {{.*}}, !alias.scope ![[SCOPES3]] -// CHECK: llvm.memcpy{{.*}}, !alias.scope ![[SCOPES3]] -// CHECK: llvm.memset{{.*}}, !noalias ![[SCOPES3]] - -// Metadata +llvm.func @aliasScope(%arg1 : !llvm.ptr) { + %0 = llvm.mlir.constant(0 : i32) : i32 + // CHECK: call void @llvm.experimental.noalias.scope.decl(metadata ![[SCOPES1:[0-9]+]]) + llvm.intr.experimental.noalias.scope.decl @metadata::@scope1 + // CHECK: store {{.*}}, !alias.scope ![[SCOPES1]], !noalias ![[SCOPES23:[0-9]+]] + llvm.store %0, %arg1 {alias_scopes = [@metadata::@scope1], noalias_scopes = [@metadata::@scope2, @metadata::@scope3]} : i32, !llvm.ptr + // CHECK: load {{.*}}, !alias.scope ![[SCOPES2:[0-9]+]], !noalias ![[SCOPES13:[0-9]+]] + %1 = llvm.load %arg1 {alias_scopes = [@metadata::@scope2], noalias_scopes = [@metadata::@scope1, @metadata::@scope3]} : !llvm.ptr -> i32 + // CHECK: atomicrmw {{.*}}, !alias.scope ![[SCOPES3:[0-9]+]], !noalias ![[SCOPES12:[0-9]+]] + %2 = llvm.atomicrmw add %arg1, %0 monotonic {alias_scopes = [@metadata::@scope3], noalias_scopes = [@metadata::@scope1, @metadata::@scope2]} : !llvm.ptr, i32 + // CHECK: cmpxchg {{.*}}, !alias.scope ![[SCOPES3]] + %3 = llvm.cmpxchg %arg1, %1, %2 acq_rel monotonic {alias_scopes = [@metadata::@scope3]} : !llvm.ptr, i32 + %4 = llvm.mlir.constant(0 : i1) : i1 + %5 = llvm.mlir.constant(42 : i8) : i8 + // CHECK: llvm.memcpy{{.*}}, !alias.scope ![[SCOPES3]] + "llvm.intr.memcpy"(%arg1, %arg1, %0, %4) {alias_scopes = [@metadata::@scope3]} : (!llvm.ptr, !llvm.ptr, i32, i1) -> () + // CHECK: llvm.memset{{.*}}, !noalias ![[SCOPES3]] + "llvm.intr.memset"(%arg1, %5, %0, %4) {noalias_scopes = [@metadata::@scope3]} : (!llvm.ptr, i8, i32, i1) -> () + llvm.return +} + +llvm.metadata @metadata { + llvm.alias_scope_domain @domain {description = "The domain"} + llvm.alias_scope @scope1 {domain = @domain, description = "The first scope"} + llvm.alias_scope @scope2 {domain = @domain} + llvm.alias_scope @scope3 {domain = @domain} +} + +// Check the intrinsic declarations. +// CHECK-DAG: declare void @llvm.experimental.noalias.scope.decl(metadata) +// CHECK-DAG: declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg) +// CHECK-DAG: declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1 immarg) + +// Check the translated metadata. // CHECK-DAG: ![[DOMAIN:[0-9]+]] = distinct !{![[DOMAIN]], !"The domain"} // CHECK-DAG: ![[SCOPE1:[0-9]+]] = distinct !{![[SCOPE1]], ![[DOMAIN]], !"The first scope"} // CHECK-DAG: ![[SCOPE2:[0-9]+]] = distinct !{![[SCOPE2]], ![[DOMAIN]]} @@ -2059,7 +2062,6 @@ module { // CHECK-DAG: ![[SCOPES13]] = !{![[SCOPE1]], ![[SCOPE3]]} // CHECK-DAG: ![[SCOPES23]] = !{![[SCOPE2]], ![[SCOPE3]]} - // ----- // It is okay to have repeated successors if they have no arguments. From eb78886246da610bd78e4a8be82d3b820e12ee2b Mon Sep 17 00:00:00 2001 From: David Spickett Date: Mon, 13 Mar 2023 14:28:39 +0000 Subject: [PATCH 279/691] [lldb] Add test for unavailable registers Prior to this the only check was that we did not print this message when reading registers that should exist. I thought there was an indentation bug here so I wrote a test for it. There is not, but we could do with the coverage anyway. Reviewed By: rupprecht Differential Revision: https://reviews.llvm.org/D145940 --- .../register/TestRegistersUnavailable.py | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 lldb/test/API/commands/register/register/TestRegistersUnavailable.py diff --git a/lldb/test/API/commands/register/register/TestRegistersUnavailable.py b/lldb/test/API/commands/register/register/TestRegistersUnavailable.py new file mode 100644 index 0000000000000..006be849bdbb2 --- /dev/null +++ b/lldb/test/API/commands/register/register/TestRegistersUnavailable.py @@ -0,0 +1,53 @@ +""" Check that unavailable registers are reported when reading register sets.""" + +from textwrap import dedent +import lldb +from lldbsuite.test.lldbtest import * +from lldbsuite.test.decorators import * +from lldbsuite.test.gdbclientutils import * +from lldbsuite.test.lldbgdbclient import GDBRemoteTestBase + +class MyResponder(MockGDBServerResponder): + def readRegisters(self): + return "E01" + + def readRegister(self, regnum): + if regnum in [0, 1, 2]: + return "E01" + return "5555555555555555" + + def qHostInfo(self): + # The triple is hex encoded ASCII "x86_64-linux-gnu". + return "triple:7838365F36342D6C696E75782D676E75;" + +class TestRegistersUnavailable(GDBRemoteTestBase): + @skipIfRemote + # So that we have multiple register sets. + @skipIfLLVMTargetMissing("X86") + def test_unavailable_registers(self): + self.server.responder = MyResponder() + target = self.dbg.CreateTarget('') + + if self.TraceOn(): + self.runCmd("log enable gdb-remote packets process") + self.addTearDownHook( + lambda: self.runCmd("log disable gdb-remote packets process")) + + process = self.connect(target) + lldbutil.expect_state_changes(self, self.dbg.GetListener(), process, + [lldb.eStateStopped]) + + # We are using a long regex pattern here to check that the indentation + # is correct when you have multiple register sets and they all have + # some missing registers. + self.expect("register read --all", patterns=[ + "(?sm)^general purpose registers:\n" + "^\s+rdx = 0x5555555555555555\n" + ".*" + "^3 registers were unavailable.\n" + "\n" + "^supplementary registers:\n" + "^\s+edx = 0x55555555\n" + ".*" + "^12 registers were unavailable." + ]) From b7165fdc7ecaa5fa77849066d4307d45ac026a9d Mon Sep 17 00:00:00 2001 From: David Spickett Date: Wed, 22 Mar 2023 09:49:02 +0000 Subject: [PATCH 280/691] [llvm][ARM] Fix bounds check after use in ARMAsmParser Fixes #61605 --- llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 7ce9e04657c7d..e0bc431fd4bd8 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -11007,7 +11007,7 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { // Find the optional-def operand (cc_out). unsigned OpNo; for (OpNo = 0; - !MCID.operands()[OpNo].isOptionalDef() && OpNo < MCID.NumOperands; + OpNo < MCID.NumOperands && !MCID.operands()[OpNo].isOptionalDef(); ++OpNo) ; // If we're parsing Thumb1, reject it completely. From ee2cd606abd98380bc71974863354a0d54ccfab3 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Tue, 21 Mar 2023 15:34:31 +0100 Subject: [PATCH 281/691] [dataflow] Log flow condition to the correct stream. Differential Revision: https://reviews.llvm.org/D146527 --- .../clang/Analysis/FlowSensitive/DataflowAnalysisContext.h | 3 ++- clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp | 5 +++-- clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h index 970b17be224db..702aaff9c7e71 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h @@ -264,7 +264,8 @@ class DataflowAnalysisContext { /// `Val2` imposed by the flow condition. bool equivalentBoolValues(BoolValue &Val1, BoolValue &Val2); - LLVM_DUMP_METHOD void dumpFlowCondition(AtomicBoolValue &Token); + LLVM_DUMP_METHOD void dumpFlowCondition(AtomicBoolValue &Token, + llvm::raw_ostream &OS = llvm::dbgs()); /// Returns the `ControlFlowContext` registered for `F`, if any. Otherwise, /// returns null. diff --git a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp index 44053246bb744..a1b813982502b 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp @@ -342,7 +342,8 @@ BoolValue &DataflowAnalysisContext::buildAndSubstituteFlowConditionWithCache( return substituteBoolValue(*ConstraintsIt->second, SubstitutionsCache); } -void DataflowAnalysisContext::dumpFlowCondition(AtomicBoolValue &Token) { +void DataflowAnalysisContext::dumpFlowCondition(AtomicBoolValue &Token, + llvm::raw_ostream &OS) { llvm::DenseSet Constraints = {&Token}; llvm::DenseSet VisitedTokens; addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens); @@ -350,7 +351,7 @@ void DataflowAnalysisContext::dumpFlowCondition(AtomicBoolValue &Token) { llvm::DenseMap AtomNames = { {&getBoolLiteralValue(false), "False"}, {&getBoolLiteralValue(true), "True"}}; - llvm::dbgs() << debugString(Constraints, AtomNames); + OS << debugString(Constraints, AtomNames); } const ControlFlowContext * diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp index 46fb7bd2fd5e9..e3bde37ea68f7 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -812,7 +812,7 @@ void Environment::dump(raw_ostream &OS) const { } OS << "FlowConditionToken:\n"; - DACtx->dumpFlowCondition(*FlowConditionToken); + DACtx->dumpFlowCondition(*FlowConditionToken, OS); } void Environment::dump() const { From a5788836b92c3e1721eaef668bf5fb6920497d5d Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 22 Mar 2023 10:48:26 +0100 Subject: [PATCH 282/691] [IR] Rename dropUndefImplying to dropUBImplying (NFC) Clarify that this is only about immediate undefined behavior, not about undef or poison. --- llvm/include/llvm/IR/Instruction.h | 5 ++--- llvm/lib/IR/Instruction.cpp | 2 +- llvm/lib/Transforms/Scalar/LICM.cpp | 2 +- llvm/lib/Transforms/Utils/Local.cpp | 2 +- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 4 ++-- 5 files changed, 7 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h index 03873eb4094cc..8818925f78610 100644 --- a/llvm/include/llvm/IR/Instruction.h +++ b/llvm/include/llvm/IR/Instruction.h @@ -320,7 +320,7 @@ class Instruction : public User, /// @{ /// Passes are required to drop metadata they don't understand. This is a /// convenience method for passes to do so. - /// dropUndefImplyingAttrsAndUnknownMetadata should be used instead of + /// dropUBImplyingAttrsAndUnknownMetadata should be used instead of /// this API if the Instruction being modified is a call. void dropUnknownNonDebugMetadata(ArrayRef KnownIDs); void dropUnknownNonDebugMetadata() { @@ -404,8 +404,7 @@ class Instruction : public User, /// dropUnknownNonDebugMetadata). For calls, it also drops parameter and /// return attributes that can cause undefined behaviour. Both of these should /// be done by passes which move instructions in IR. - void - dropUndefImplyingAttrsAndUnknownMetadata(ArrayRef KnownIDs = {}); + void dropUBImplyingAttrsAndUnknownMetadata(ArrayRef KnownIDs = {}); /// Determine whether the exact flag is set. bool isExact() const LLVM_READONLY; diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp index 38cce828b9baf..dd79fbe4dbdfc 100644 --- a/llvm/lib/IR/Instruction.cpp +++ b/llvm/lib/IR/Instruction.cpp @@ -224,7 +224,7 @@ void Instruction::dropPoisonGeneratingMetadata() { eraseMetadata(LLVMContext::MD_align); } -void Instruction::dropUndefImplyingAttrsAndUnknownMetadata( +void Instruction::dropUBImplyingAttrsAndUnknownMetadata( ArrayRef KnownIDs) { dropUnknownNonDebugMetadata(KnownIDs); auto *CB = dyn_cast(this); diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index d74e8fb4fa24a..dfd17ff3d760c 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -1744,7 +1744,7 @@ static void hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop, // time in isGuaranteedToExecute if we don't actually have anything to // drop. It is a compile time optimization, not required for correctness. !SafetyInfo->isGuaranteedToExecute(I, DT, CurLoop)) - I.dropUndefImplyingAttrsAndUnknownMetadata(); + I.dropUBImplyingAttrsAndUnknownMetadata(); if (isa(I)) // Move the new node to the end of the phi list in the destination block. diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 3f486147564ee..5c1fd6e9ae4a4 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -2994,7 +2994,7 @@ void llvm::hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) { Instruction *I = &*II; - I->dropUndefImplyingAttrsAndUnknownMetadata(); + I->dropUBImplyingAttrsAndUnknownMetadata(); if (I->isUsedByMetadata()) dropDebugUsers(*I); if (I->isDebugOrPseudoInst()) { diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 187f11a0a0897..10d7f96c56f14 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1125,7 +1125,7 @@ static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses( // it is tied to the instruction itself, not the value or position. // Similarly strip attributes on call parameters that may cause UB in // location the call is moved to. - NewBonusInst->dropUndefImplyingAttrsAndUnknownMetadata( + NewBonusInst->dropUBImplyingAttrsAndUnknownMetadata( LLVMContext::MD_annotation); NewBonusInst->insertInto(PredBlock, PTI->getIterator()); @@ -3021,7 +3021,7 @@ bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, if (!isa(&I)) I.setDebugLoc(DebugLoc()); } - I.dropUndefImplyingAttrsAndUnknownMetadata(); + I.dropUBImplyingAttrsAndUnknownMetadata(); // Drop ephemeral values. if (EphTracker.contains(&I)) { From 1a9d49524a5243df669bce152870beca2b6d7aeb Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 22 Mar 2023 11:40:07 +0100 Subject: [PATCH 283/691] [GVN] Regenerate test checks (NFC) --- llvm/test/Transforms/GVN/range.ll | 86 ++++++++++++++++++---------- llvm/test/Transforms/NewGVN/range.ll | 86 ++++++++++++++++++---------- 2 files changed, 112 insertions(+), 60 deletions(-) diff --git a/llvm/test/Transforms/GVN/range.ll b/llvm/test/Transforms/GVN/range.ll index 8705d521ae05c..ae0801ee59da1 100644 --- a/llvm/test/Transforms/GVN/range.ll +++ b/llvm/test/Transforms/GVN/range.ll @@ -1,9 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 2 ; RUN: opt -passes=gvn -S < %s | FileCheck %s define i32 @test1(ptr %p) { -; CHECK-LABEL: @test1(ptr %p) -; CHECK: %a = load i32, ptr %p, align 4, !range ![[RANGE0:[0-9]+]] -; CHECK: %c = add i32 %a, %a +; CHECK-LABEL: define i32 @test1 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0:![0-9]+]] +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] +; %a = load i32, ptr %p, !range !0 %b = load i32, ptr %p, !range !0 %c = add i32 %a, %b @@ -11,9 +15,12 @@ define i32 @test1(ptr %p) { } define i32 @test2(ptr %p) { -; CHECK-LABEL: @test2(ptr %p) -; CHECK: %a = load i32, ptr %p, align 4, !range ![[RANGE0]] -; CHECK: %c = add i32 %a, %a +; CHECK-LABEL: define i32 @test2 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] +; %a = load i32, ptr %p, !range !0 %b = load i32, ptr %p %c = add i32 %a, %b @@ -21,9 +28,12 @@ define i32 @test2(ptr %p) { } define i32 @test3(ptr %p) { -; CHECK-LABEL: @test3(ptr %p) -; CHECK: %a = load i32, ptr %p, align 4, !range ![[RANGE0]] -; CHECK: %c = add i32 %a, %a +; CHECK-LABEL: define i32 @test3 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] +; %a = load i32, ptr %p, !range !0 %b = load i32, ptr %p, !range !1 %c = add i32 %a, %b @@ -31,9 +41,12 @@ define i32 @test3(ptr %p) { } define i32 @test4(ptr %p) { -; CHECK-LABEL: @test4(ptr %p) -; CHECK: %a = load i32, ptr %p, align 4, !range ![[RANGE0]] -; CHECK: %c = add i32 %a, %a +; CHECK-LABEL: define i32 @test4 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] +; %a = load i32, ptr %p, !range !0 %b = load i32, ptr %p, !range !2 %c = add i32 %a, %b @@ -41,9 +54,12 @@ define i32 @test4(ptr %p) { } define i32 @test5(ptr %p) { -; CHECK-LABEL: @test5(ptr %p) -; CHECK: %a = load i32, ptr %p, align 4, !range ![[RANGE3:[0-9]+]] -; CHECK: %c = add i32 %a, %a +; CHECK-LABEL: define i32 @test5 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG1:![0-9]+]] +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] +; %a = load i32, ptr %p, !range !3 %b = load i32, ptr %p, !range !4 %c = add i32 %a, %b @@ -51,9 +67,12 @@ define i32 @test5(ptr %p) { } define i32 @test6(ptr %p) { -; CHECK-LABEL: @test6(ptr %p) -; CHECK: %a = load i32, ptr %p, align 4, !range ![[RANGE5:[0-9]+]] -; CHECK: %c = add i32 %a, %a +; CHECK-LABEL: define i32 @test6 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG2:![0-9]+]] +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] +; %a = load i32, ptr %p, !range !5 %b = load i32, ptr %p, !range !6 %c = add i32 %a, %b @@ -61,9 +80,12 @@ define i32 @test6(ptr %p) { } define i32 @test7(ptr %p) { -; CHECK-LABEL: @test7(ptr %p) -; CHECK: %a = load i32, ptr %p, align 4, !range ![[RANGE7:[0-9]+]] -; CHECK: %c = add i32 %a, %a +; CHECK-LABEL: define i32 @test7 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG3:![0-9]+]] +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] +; %a = load i32, ptr %p, !range !7 %b = load i32, ptr %p, !range !8 %c = add i32 %a, %b @@ -71,21 +93,18 @@ define i32 @test7(ptr %p) { } define i32 @test8(ptr %p) { -; CHECK-LABEL: @test8(ptr %p) -; CHECK: %a = load i32, ptr %p, align 4, !range ![[RANGE9:[0-9]+]] -; CHECK-NOT: range -; CHECK: %c = add i32 %a, %a +; CHECK-LABEL: define i32 @test8 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG4:![0-9]+]] +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] +; %a = load i32, ptr %p, !range !9 %b = load i32, ptr %p, !range !10 %c = add i32 %a, %b ret i32 %c } -; CHECK: ![[RANGE0]] = !{i32 0, i32 2} -; CHECK: ![[RANGE3]] = !{i32 -5, i32 -2} -; CHECK: ![[RANGE5]] = !{i32 10, i32 1} -; CHECK: ![[RANGE7]] = !{i32 1, i32 2, i32 3, i32 4} -; CHECK: ![[RANGE9]] = !{i32 1, i32 5} !0 = !{i32 0, i32 2} !1 = !{i32 3, i32 5} @@ -98,3 +117,10 @@ define i32 @test8(ptr %p) { !8 = !{i32 5, i32 1} !9 = !{i32 1, i32 5} !10 = !{i32 5, i32 1} +;. +; CHECK: [[RNG0]] = !{i32 0, i32 2} +; CHECK: [[RNG1]] = !{i32 -5, i32 -2} +; CHECK: [[RNG2]] = !{i32 10, i32 1} +; CHECK: [[RNG3]] = !{i32 1, i32 2, i32 3, i32 4} +; CHECK: [[RNG4]] = !{i32 1, i32 5} +;. diff --git a/llvm/test/Transforms/NewGVN/range.ll b/llvm/test/Transforms/NewGVN/range.ll index 4adfa3da7c1f9..8803737c7bd4a 100644 --- a/llvm/test/Transforms/NewGVN/range.ll +++ b/llvm/test/Transforms/NewGVN/range.ll @@ -1,9 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 2 ; RUN: opt -passes=newgvn -S < %s | FileCheck %s define i32 @test1(ptr %p) { -; CHECK-LABEL: @test1(ptr %p) -; CHECK: %a = load i32, ptr %p, align 4, !range ![[RANGE0:[0-9]+]] -; CHECK: %c = add i32 %a, %a +; CHECK-LABEL: define i32 @test1 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0:![0-9]+]] +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] +; %a = load i32, ptr %p, !range !0 %b = load i32, ptr %p, !range !0 %c = add i32 %a, %b @@ -11,9 +15,12 @@ define i32 @test1(ptr %p) { } define i32 @test2(ptr %p) { -; CHECK-LABEL: @test2(ptr %p) -; CHECK: %a = load i32, ptr %p, align 4, !range ![[RANGE0]] -; CHECK: %c = add i32 %a, %a +; CHECK-LABEL: define i32 @test2 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] +; %a = load i32, ptr %p, !range !0 %b = load i32, ptr %p %c = add i32 %a, %b @@ -21,9 +28,12 @@ define i32 @test2(ptr %p) { } define i32 @test3(ptr %p) { -; CHECK-LABEL: @test3(ptr %p) -; CHECK: %a = load i32, ptr %p, align 4, !range ![[RANGE0]] -; CHECK: %c = add i32 %a, %a +; CHECK-LABEL: define i32 @test3 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] +; %a = load i32, ptr %p, !range !0 %b = load i32, ptr %p, !range !1 %c = add i32 %a, %b @@ -31,9 +41,12 @@ define i32 @test3(ptr %p) { } define i32 @test4(ptr %p) { -; CHECK-LABEL: @test4(ptr %p) -; CHECK: %a = load i32, ptr %p, align 4, !range ![[RANGE0]] -; CHECK: %c = add i32 %a, %a +; CHECK-LABEL: define i32 @test4 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] +; %a = load i32, ptr %p, !range !0 %b = load i32, ptr %p, !range !2 %c = add i32 %a, %b @@ -41,9 +54,12 @@ define i32 @test4(ptr %p) { } define i32 @test5(ptr %p) { -; CHECK-LABEL: @test5(ptr %p) -; CHECK: %a = load i32, ptr %p, align 4, !range ![[RANGE3:[0-9]+]] -; CHECK: %c = add i32 %a, %a +; CHECK-LABEL: define i32 @test5 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG1:![0-9]+]] +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] +; %a = load i32, ptr %p, !range !3 %b = load i32, ptr %p, !range !4 %c = add i32 %a, %b @@ -51,9 +67,12 @@ define i32 @test5(ptr %p) { } define i32 @test6(ptr %p) { -; CHECK-LABEL: @test6(ptr %p) -; CHECK: %a = load i32, ptr %p, align 4, !range ![[RANGE5:[0-9]+]] -; CHECK: %c = add i32 %a, %a +; CHECK-LABEL: define i32 @test6 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG2:![0-9]+]] +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] +; %a = load i32, ptr %p, !range !5 %b = load i32, ptr %p, !range !6 %c = add i32 %a, %b @@ -61,9 +80,12 @@ define i32 @test6(ptr %p) { } define i32 @test7(ptr %p) { -; CHECK-LABEL: @test7(ptr %p) -; CHECK: %a = load i32, ptr %p, align 4, !range ![[RANGE7:[0-9]+]] -; CHECK: %c = add i32 %a, %a +; CHECK-LABEL: define i32 @test7 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG3:![0-9]+]] +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] +; %a = load i32, ptr %p, !range !7 %b = load i32, ptr %p, !range !8 %c = add i32 %a, %b @@ -71,21 +93,18 @@ define i32 @test7(ptr %p) { } define i32 @test8(ptr %p) { -; CHECK-LABEL: @test8(ptr %p) -; CHECK: %a = load i32, ptr %p, align 4, !range ![[RANGE9:[0-9]+]] -; CHECK-NOT: range -; CHECK: %c = add i32 %a, %a +; CHECK-LABEL: define i32 @test8 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG4:![0-9]+]] +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] +; %a = load i32, ptr %p, !range !9 %b = load i32, ptr %p, !range !10 %c = add i32 %a, %b ret i32 %c } -; CHECK: ![[RANGE0]] = !{i32 0, i32 2} -; CHECK: ![[RANGE3]] = !{i32 -5, i32 -2} -; CHECK: ![[RANGE5]] = !{i32 10, i32 1} -; CHECK: ![[RANGE7]] = !{i32 1, i32 2, i32 3, i32 4} -; CHECK: ![[RANGE9]] = !{i32 1, i32 5} !0 = !{i32 0, i32 2} !1 = !{i32 3, i32 5} @@ -98,3 +117,10 @@ define i32 @test8(ptr %p) { !8 = !{i32 5, i32 1} !9 = !{i32 1, i32 5} !10 = !{i32 5, i32 1} +;. +; CHECK: [[RNG0]] = !{i32 0, i32 2} +; CHECK: [[RNG1]] = !{i32 -5, i32 -2} +; CHECK: [[RNG2]] = !{i32 10, i32 1} +; CHECK: [[RNG3]] = !{i32 1, i32 2, i32 3, i32 4} +; CHECK: [[RNG4]] = !{i32 1, i32 5} +;. From d25e022cd19b83c22a6022edb78c4b97a5fc1b49 Mon Sep 17 00:00:00 2001 From: Uday Bondhugula Date: Wed, 22 Mar 2023 07:16:35 +0530 Subject: [PATCH 284/691] [MLIR][Affine] Fix assumption on int type in memref elt size method Fix assumption on memref element type being int/float in memref elt size related method and affine data copy generate. Fixes https://github.com/llvm/llvm-project/issues/61310 Differential Revision: https://reviews.llvm.org/D146495 --- .../mlir/Dialect/Affine/Analysis/Utils.h | 10 ++++--- mlir/include/mlir/Dialect/Affine/LoopUtils.h | 4 ++- mlir/lib/Dialect/Affine/Analysis/Utils.cpp | 27 +++++++++++++------ .../Dialect/Affine/Transforms/LoopFusion.cpp | 19 +++---------- mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp | 6 ++++- .../test/Dialect/Affine/affine-data-copy.mlir | 23 ++++++++++++++++ 6 files changed, 60 insertions(+), 29 deletions(-) diff --git a/mlir/include/mlir/Dialect/Affine/Analysis/Utils.h b/mlir/include/mlir/Dialect/Affine/Analysis/Utils.h index 7f6eced071b15..99e511f152618 100644 --- a/mlir/include/mlir/Dialect/Affine/Analysis/Utils.h +++ b/mlir/include/mlir/Dialect/Affine/Analysis/Utils.h @@ -359,9 +359,9 @@ struct MemRefRegion { FlatAffineValueConstraints cst; }; -/// Returns the size of memref data in bytes if it's statically shaped, -/// std::nullopt otherwise. -std::optional getMemRefSizeInBytes(MemRefType memRefType); +/// Returns the size of a memref with element type int or float in bytes if it's +/// statically shaped, std::nullopt otherwise. +std::optional getIntOrFloatMemRefSizeInBytes(MemRefType memRefType); /// Checks a load or store op for an out of bound access; returns failure if the /// access is out of bounds along any of the dimensions, success otherwise. @@ -378,6 +378,10 @@ unsigned getNumCommonSurroundingLoops(Operation &a, Operation &b); std::optional getMemoryFootprintBytes(AffineForOp forOp, int memorySpace = -1); +/// Returns the memref's element type's size in bytes where the elemental type +/// is an int or float or a vector of such types. +std::optional getMemRefIntOrFloatEltSizeInBytes(MemRefType memRefType); + /// Simplify the integer set by simplifying the underlying affine expressions by /// flattening and some simple inference. Also, drop any duplicate constraints. /// Returns the simplified integer set. This method runs in time linear in the diff --git a/mlir/include/mlir/Dialect/Affine/LoopUtils.h b/mlir/include/mlir/Dialect/Affine/LoopUtils.h index 828f06129167c..8bab83bc0d992 100644 --- a/mlir/include/mlir/Dialect/Affine/LoopUtils.h +++ b/mlir/include/mlir/Dialect/Affine/LoopUtils.h @@ -184,7 +184,9 @@ struct AffineCopyOptions { /// available for processing this block range. When 'filterMemRef' is specified, /// copies are only generated for the provided MemRef. Returns success if the /// explicit copying succeeded for all memrefs on which affine load/stores were -/// encountered. +/// encountered. For memrefs for whose element types a size in bytes can't be +/// computed (`index` type), their capacity is not accounted for and the +/// `fastMemCapacityBytes` copy option would be non-functional in such cases. LogicalResult affineDataCopyGenerate(Block::iterator begin, Block::iterator end, const AffineCopyOptions ©Options, std::optional filterMemRef, diff --git a/mlir/lib/Dialect/Affine/Analysis/Utils.cpp b/mlir/lib/Dialect/Affine/Analysis/Utils.cpp index 8ab219af98c9d..db4fa354d4c2d 100644 --- a/mlir/lib/Dialect/Affine/Analysis/Utils.cpp +++ b/mlir/lib/Dialect/Affine/Analysis/Utils.cpp @@ -594,16 +594,21 @@ LogicalResult MemRefRegion::compute(Operation *op, unsigned loopDepth, return success(); } -static unsigned getMemRefEltSizeInBytes(MemRefType memRefType) { +std::optional +mlir::getMemRefIntOrFloatEltSizeInBytes(MemRefType memRefType) { auto elementType = memRefType.getElementType(); unsigned sizeInBits; if (elementType.isIntOrFloat()) { sizeInBits = elementType.getIntOrFloatBitWidth(); + } else if (auto vectorType = elementType.dyn_cast()) { + if (vectorType.getElementType().isIntOrFloat()) + sizeInBits = + vectorType.getElementTypeBitWidth() * vectorType.getNumElements(); + else + return std::nullopt; } else { - auto vectorType = elementType.cast(); - sizeInBits = - vectorType.getElementTypeBitWidth() * vectorType.getNumElements(); + return std::nullopt; } return llvm::divideCeil(sizeInBits, 8); } @@ -629,23 +634,29 @@ std::optional MemRefRegion::getRegionSize() { LLVM_DEBUG(llvm::dbgs() << "Dynamic shapes not yet supported\n"); return std::nullopt; } - return getMemRefEltSizeInBytes(memRefType) * *numElements; + auto eltSize = getMemRefIntOrFloatEltSizeInBytes(memRefType); + if (!eltSize) + return std::nullopt; + return *eltSize * *numElements; } /// Returns the size of memref data in bytes if it's statically shaped, /// std::nullopt otherwise. If the element of the memref has vector type, takes /// into account size of the vector as well. // TODO: improve/complete this when we have target data. -std::optional mlir::getMemRefSizeInBytes(MemRefType memRefType) { +std::optional +mlir::getIntOrFloatMemRefSizeInBytes(MemRefType memRefType) { if (!memRefType.hasStaticShape()) return std::nullopt; auto elementType = memRefType.getElementType(); if (!elementType.isIntOrFloat() && !elementType.isa()) return std::nullopt; - uint64_t sizeInBytes = getMemRefEltSizeInBytes(memRefType); + auto sizeInBytes = getMemRefIntOrFloatEltSizeInBytes(memRefType); + if (!sizeInBytes) + return std::nullopt; for (unsigned i = 0, e = memRefType.getRank(); i < e; i++) { - sizeInBytes = sizeInBytes * memRefType.getDimSize(i); + sizeInBytes = *sizeInBytes * memRefType.getDimSize(i); } return sizeInBytes; } diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp index 79e8949c92a56..f398526da34b5 100644 --- a/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp @@ -901,21 +901,6 @@ static void sinkSequentialLoops(MemRefDependenceGraph::Node *node) { node->op = newRootForOp; } -// TODO: improve/complete this when we have target data. -static unsigned getMemRefEltSizeInBytes(MemRefType memRefType) { - auto elementType = memRefType.getElementType(); - - unsigned sizeInBits; - if (elementType.isIntOrFloat()) { - sizeInBits = elementType.getIntOrFloatBitWidth(); - } else { - auto vectorType = elementType.cast(); - sizeInBits = - vectorType.getElementTypeBitWidth() * vectorType.getNumElements(); - } - return llvm::divideCeil(sizeInBits, 8); -} - // Creates and returns a private (single-user) memref for fused loop rooted // at 'forOp', with (potentially reduced) memref size based on the // MemRefRegion written to by 'srcStoreOpInst' at depth 'dstLoopDepth'. @@ -976,7 +961,9 @@ static Value createPrivateMemRef(AffineForOp forOp, Operation *srcStoreOpInst, // Create 'newMemRefType' using 'newShape' from MemRefRegion accessed // by 'srcStoreOpInst'. - uint64_t bufSize = getMemRefEltSizeInBytes(oldMemRefType) * *numElements; + auto eltSize = getMemRefIntOrFloatEltSizeInBytes(oldMemRefType); + assert(eltSize && "memrefs with size elt types expected"); + uint64_t bufSize = *eltSize * *numElements; unsigned newMemSpace; if (bufSize <= localBufSizeThreshold && fastMemorySpace.has_value()) { newMemSpace = *fastMemorySpace; diff --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp index bcd87fcc570a3..38d660d4ff90b 100644 --- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp @@ -2181,7 +2181,11 @@ static LogicalResult generateCopy( // Record it. fastBufferMap[memref] = fastMemRef; // fastMemRefType is a constant shaped memref. - *sizeInBytes = *getMemRefSizeInBytes(fastMemRefType); + auto maySizeInBytes = getIntOrFloatMemRefSizeInBytes(fastMemRefType); + // We don't account for things of unknown size. + if (!maySizeInBytes) + maySizeInBytes = 0; + LLVM_DEBUG(emitRemarkForBlock(*block) << "Creating fast buffer of type " << fastMemRefType << " and size " << llvm::divideCeil(*sizeInBytes, 1024) diff --git a/mlir/test/Dialect/Affine/affine-data-copy.mlir b/mlir/test/Dialect/Affine/affine-data-copy.mlir index 22fbd7306d253..fe3b4a206e2b9 100644 --- a/mlir/test/Dialect/Affine/affine-data-copy.mlir +++ b/mlir/test/Dialect/Affine/affine-data-copy.mlir @@ -310,3 +310,26 @@ func.func @affine_parallel(%85:memref<2x5x4x2xi64>) { // CHECK-NEXT: affine.parallel return } + +// CHECK-LABEL: func @index_elt_type +func.func @index_elt_type(%arg0: memref<1x2x4x8xindex>) { + affine.for %arg1 = 0 to 1 { + affine.for %arg2 = 0 to 2 { + affine.for %arg3 = 0 to 4 { + affine.for %arg4 = 0 to 8 { + affine.store %arg4, %arg0[%arg1, %arg2, %arg3, %arg4] : memref<1x2x4x8xindex> + } + } + } + } + + // CHECK: affine.for %{{.*}} = 0 to 1 + // CHECK-NEXT: affine.for %{{.*}} = 0 to 2 + // CHECK-NEXT: affine.for %{{.*}} = 0 to 4 + // CHECK-NEXT: affine.for %{{.*}} = 0 to 8 + + // CHECK: affine.for %{{.*}} = 0 to 2 + // CHECK-NEXT: affine.for %{{.*}} = 0 to 4 + // CHECK-NEXT: affine.for %{{.*}} = 0 to 8 + return +} From c96269055f374f2bc354aedba5570102c38cbb11 Mon Sep 17 00:00:00 2001 From: Serguei Katkov Date: Wed, 22 Mar 2023 17:19:48 +0700 Subject: [PATCH 285/691] [LoopPredication] Add a test demonstrating bug. LoopPredication may introduce undefined behavior. --- .../test/Transforms/LoopPredication/poison.ll | 111 ++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 llvm/test/Transforms/LoopPredication/poison.ll diff --git a/llvm/test/Transforms/LoopPredication/poison.ll b/llvm/test/Transforms/LoopPredication/poison.ll new file mode 100644 index 0000000000000..886ec090a0216 --- /dev/null +++ b/llvm/test/Transforms/LoopPredication/poison.ll @@ -0,0 +1,111 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=loop-predication < %s 2>&1 | FileCheck %s +; RUN: opt -S -passes='require,loop-mssa(loop-predication)' -verify-memoryssa < %s 2>&1 | FileCheck %s + +declare void @llvm.experimental.guard(i1, ...) + +; Check the case when N on latch exit might be poison. +define i32 @test1(i32 %length, i32 %nlimit) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[M:%.*]] = add nuw nsw i32 [[NLIMIT:%.*]], 2 +; CHECK-NEXT: [[N:%.*]] = xor i32 [[M]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i32 [[N]], [[LENGTH:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 0, [[LENGTH]] +; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[OK:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[J_NEXT:%.*]], [[OK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[J]], [[LENGTH]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP2]], i32 9) [ "deopt"() ] +; CHECK-NEXT: call void @llvm.assume(i1 [[WITHIN_BOUNDS]]) +; CHECK-NEXT: [[TMP:%.*]] = icmp ult i32 [[NLIMIT]], 1000 +; CHECK-NEXT: br i1 [[TMP]], label [[OK]], label [[EXIT:%.*]] +; CHECK: ok: +; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 +; CHECK-NEXT: [[J_NEXT]] = add i32 [[J]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret i32 0 +; +entry: + ; %n might be a poison. + %m = add nuw nsw i32 %nlimit, 2 + %n = xor i32 %m, 1 + br label %loop + +loop: + %i = phi i32 [ %i.next, %ok ], [ 0, %entry ] + %j = phi i32 [ %j.next, %ok ], [ 0, %entry ] + %within.bounds = icmp ult i32 %j, %length + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + + ; This check guarantees %n is not a posion. + %tmp = icmp ult i32 %nlimit, 1000 + br i1 %tmp, label %ok, label %exit + +ok: + %i.next = add i32 %i, 1 + %j.next = add i32 %j, 1 + %continue = icmp ult i32 %i.next, %n + br i1 %continue, label %loop, label %exit + +exit: + ret i32 0 +} + + +; Check the case when start value of IV might be a poison. +define i32 @test2(i32 noundef %length, i32 noundef %nlimit, i32 %istart) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[M:%.*]] = add nuw nsw i32 [[ISTART:%.*]], 2 +; CHECK-NEXT: [[ISTART2:%.*]] = xor i32 [[M]], 1 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ISTART2]], [[LENGTH:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule i32 [[NLIMIT:%.*]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 1, [[LENGTH]] +; CHECK-NEXT: [[TMP4:%.*]] = and i1 [[TMP3]], [[TMP2]] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[OK:%.*]] ], [ [[ISTART2]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[J:%.*]] = phi i32 [ [[J_NEXT:%.*]], [[OK]] ], [ 1, [[ENTRY]] ] +; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[J]], [[LENGTH]] +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[TMP4]], i32 9) [ "deopt"() ] +; CHECK-NEXT: call void @llvm.assume(i1 [[WITHIN_BOUNDS]]) +; CHECK-NEXT: [[TMP:%.*]] = icmp ult i32 [[ISTART]], 1000 +; CHECK-NEXT: br i1 [[TMP]], label [[OK]], label [[EXIT:%.*]] +; CHECK: ok: +; CHECK-NEXT: [[I_NEXT]] = add i32 [[I]], 1 +; CHECK-NEXT: [[J_NEXT]] = add i32 [[J]], 1 +; CHECK-NEXT: [[CONTINUE:%.*]] = icmp ult i32 [[I_NEXT]], [[NLIMIT]] +; CHECK-NEXT: br i1 [[CONTINUE]], label [[LOOP]], label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret i32 0 +; +entry: + ; %m might be a poison. + %m = add nuw nsw i32 %istart, 2 + %istart2 = xor i32 %m, 1 + br label %loop + +loop: + %i = phi i32 [ %i.next, %ok ], [ %istart2, %entry ] + %j = phi i32 [ %j.next, %ok ], [ 1, %entry ] + %within.bounds = icmp ult i32 %j, %length + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + ; This check guarantees %n is not a posion. + %tmp = icmp ult i32 %istart, 1000 + br i1 %tmp, label %ok, label %exit + +ok: + %i.next = add i32 %i, 1 + %j.next = add i32 %j, 1 + %continue = icmp ult i32 %i.next, %nlimit + br i1 %continue, label %loop, label %exit + +exit: + ret i32 0 +} From ea929a07b677406ae7bb42ae055a27b8606f088b Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 22 Mar 2023 11:28:49 +0000 Subject: [PATCH 286/691] [LV] Set inbounds flag using CreateGEP in vectorizeInterleaveGroup(NFC). This avoids having to cast the result of the builder to GetElementPtrInst. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 0b71d309c2975..58a80aa8025ec 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2659,8 +2659,8 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup( bool InBounds = false; if (auto *gep = dyn_cast(AddrPart->stripPointerCasts())) InBounds = gep->isInBounds(); - AddrPart = Builder.CreateGEP(ScalarTy, AddrPart, Builder.getInt32(-Index)); - cast(AddrPart)->setIsInBounds(InBounds); + AddrPart = Builder.CreateGEP(ScalarTy, AddrPart, Builder.getInt32(-Index), + "", InBounds); // Cast to the vector pointer type. unsigned AddressSpace = AddrPart->getType()->getPointerAddressSpace(); From e2f6ebe8ca1563711ffecfb8102b0611f99e1cbb Mon Sep 17 00:00:00 2001 From: Zhongyunde Date: Wed, 22 Mar 2023 19:47:35 +0800 Subject: [PATCH 287/691] [tests] precommit tests for D145846 --- llvm/test/Transforms/InstCombine/icmp-sub.ll | 85 ++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/icmp-sub.ll b/llvm/test/Transforms/InstCombine/icmp-sub.ll index ab64934fcb554..68f158508cc94 100644 --- a/llvm/test/Transforms/InstCombine/icmp-sub.ll +++ b/llvm/test/Transforms/InstCombine/icmp-sub.ll @@ -561,3 +561,88 @@ bb_loop: bb_exit: ret void } + +; https://alive2.llvm.org/ce/z/D2Aph4 +define i1 @PR60818_ne(i32 %a) { +; CHECK-LABEL: @PR60818_ne( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[A:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[SUB]], [[A]] +; CHECK-NEXT: ret i1 [[CMP]] +; +entry: + %sub = sub i32 0, %a + %cmp = icmp ne i32 %sub, %a + ret i1 %cmp +} + +define i1 @PR60818_eq(i32 %a) { +; CHECK-LABEL: @PR60818_eq( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[A:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[SUB]], [[A]] +; CHECK-NEXT: ret i1 [[CMP]] +; +entry: + %sub = sub i32 0, %a + %cmp = icmp eq i32 %sub, %a + ret i1 %cmp +} + +define i1 @PR60818_eq_commuted(i32 %x) { +; CHECK-LABEL: @PR60818_eq_commuted( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = mul i32 [[X:%.*]], 43 +; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[A]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], [[SUB]] +; CHECK-NEXT: ret i1 [[CMP]] +; +entry: + %a = mul i32 %x, 43 ; thwart complexity-based canonicalization + %sub = sub i32 0, %a + %cmp = icmp eq i32 %a, %sub ; negation on RHS + ret i1 %cmp +} + +define <2 x i1> @PR60818_ne_vector(<2 x i32> %a) { +; CHECK-LABEL: @PR60818_ne_vector( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> zeroinitializer, [[A:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[SUB]], [[A]] +; CHECK-NEXT: ret <2 x i1> [[CMP]] +; +entry: + %sub = sub <2 x i32> zeroinitializer, %a + %cmp = icmp ne <2 x i32> %a, %sub + ret <2 x i1> %cmp +} + +; Negative as multi-use +define i1 @PR60818_eq_multi_use(i32 %a) { +; CHECK-LABEL: @PR60818_eq_multi_use( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[A:%.*]] +; CHECK-NEXT: call void @use(i32 [[SUB]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[SUB]], [[A]] +; CHECK-NEXT: ret i1 [[CMP]] +; +entry: + %sub = sub i32 0, %a + call void @use(i32 %sub) ; add new user + %cmp = icmp eq i32 %sub, %a + ret i1 %cmp +} + +; Negative as non-equality predicate +define i1 @PR60818_sgt(i32 %a) { +; CHECK-LABEL: @PR60818_sgt( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[A:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[SUB]], [[A]] +; CHECK-NEXT: ret i1 [[CMP]] +; +entry: + %sub = sub i32 0, %a + %cmp = icmp sgt i32 %sub, %a + ret i1 %cmp +} From 7949a2a802f0791eaf7c6ecbdd6ed5daa2a278cf Mon Sep 17 00:00:00 2001 From: Zhongyunde Date: Wed, 22 Mar 2023 19:52:41 +0800 Subject: [PATCH 288/691] [InstCombine] enhance icmp with sub folds The new IR with And removes a use of the input variable, which is better for analysis. Fix https://github.com/llvm/llvm-project/issues/60818 Reviewed By: nikic, spatel Differential Revision: https://reviews.llvm.org/D145846 --- .../InstCombine/InstCombineCompares.cpp | 38 +++++++++++++------ llvm/test/Transforms/InstCombine/icmp-sub.ll | 16 ++++---- 2 files changed, 35 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 421b1824b965d..1666c06ed6991 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -4065,20 +4065,36 @@ Value *InstCombinerImpl::foldMultiplicationOverflowCheck(ICmpInst &I) { return Res; } -static Instruction *foldICmpXNegX(ICmpInst &I) { +static Instruction *foldICmpXNegX(ICmpInst &I, + InstCombiner::BuilderTy &Builder) { CmpInst::Predicate Pred; Value *X; - if (!match(&I, m_c_ICmp(Pred, m_NSWNeg(m_Value(X)), m_Deferred(X)))) - return nullptr; + if (match(&I, m_c_ICmp(Pred, m_NSWNeg(m_Value(X)), m_Deferred(X)))) { - if (ICmpInst::isSigned(Pred)) - Pred = ICmpInst::getSwappedPredicate(Pred); - else if (ICmpInst::isUnsigned(Pred)) - Pred = ICmpInst::getSignedPredicate(Pred); - // else for equality-comparisons just keep the predicate. + if (ICmpInst::isSigned(Pred)) + Pred = ICmpInst::getSwappedPredicate(Pred); + else if (ICmpInst::isUnsigned(Pred)) + Pred = ICmpInst::getSignedPredicate(Pred); + // else for equality-comparisons just keep the predicate. + + return ICmpInst::Create(Instruction::ICmp, Pred, X, + Constant::getNullValue(X->getType()), I.getName()); + } - return ICmpInst::Create(Instruction::ICmp, Pred, X, - Constant::getNullValue(X->getType()), I.getName()); + // A value is not equal to its negation unless that value is 0 or + // MinSignedValue, ie: a != -a --> (a & MaxSignedVal) != 0 + if (match(&I, m_c_ICmp(Pred, m_OneUse(m_Neg(m_Value(X))), m_Deferred(X))) && + ICmpInst::isEquality(Pred)) { + Type *Ty = X->getType(); + uint32_t BitWidth = Ty->getScalarSizeInBits(); + Constant *MaxSignedVal = + ConstantInt::get(Ty, APInt::getSignedMaxValue(BitWidth)); + Value *And = Builder.CreateAnd(X, MaxSignedVal); + Constant *Zero = Constant::getNullValue(Ty); + return CmpInst::Create(Instruction::ICmp, Pred, And, Zero); + } + + return nullptr; } /// Try to fold icmp (binop), X or icmp X, (binop). @@ -4096,7 +4112,7 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I, if (!BO0 && !BO1) return nullptr; - if (Instruction *NewICmp = foldICmpXNegX(I)) + if (Instruction *NewICmp = foldICmpXNegX(I, Builder)) return NewICmp; const CmpInst::Predicate Pred = I.getPredicate(); diff --git a/llvm/test/Transforms/InstCombine/icmp-sub.ll b/llvm/test/Transforms/InstCombine/icmp-sub.ll index 68f158508cc94..2dad575fede83 100644 --- a/llvm/test/Transforms/InstCombine/icmp-sub.ll +++ b/llvm/test/Transforms/InstCombine/icmp-sub.ll @@ -566,8 +566,8 @@ bb_exit: define i1 @PR60818_ne(i32 %a) { ; CHECK-LABEL: @PR60818_ne( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[A:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[SUB]], [[A]] +; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[A:%.*]], 2147483647 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP0]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; entry: @@ -579,8 +579,8 @@ entry: define i1 @PR60818_eq(i32 %a) { ; CHECK-LABEL: @PR60818_eq( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[A:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[SUB]], [[A]] +; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[A:%.*]], 2147483647 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; entry: @@ -593,8 +593,8 @@ define i1 @PR60818_eq_commuted(i32 %x) { ; CHECK-LABEL: @PR60818_eq_commuted( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = mul i32 [[X:%.*]], 43 -; CHECK-NEXT: [[SUB:%.*]] = sub i32 0, [[A]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], [[SUB]] +; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[A]], 2147483647 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 ; CHECK-NEXT: ret i1 [[CMP]] ; entry: @@ -607,8 +607,8 @@ entry: define <2 x i1> @PR60818_ne_vector(<2 x i32> %a) { ; CHECK-LABEL: @PR60818_ne_vector( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i32> zeroinitializer, [[A:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[SUB]], [[A]] +; CHECK-NEXT: [[TMP0:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP0]], zeroinitializer ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; entry: From c8117eb96423fcc9d31bb3fc8d70941686b33b83 Mon Sep 17 00:00:00 2001 From: Alexander Belyaev Date: Wed, 22 Mar 2023 12:50:24 +0100 Subject: [PATCH 289/691] [mlir] Add a pattern to fold tensor.cast into scf.forall. Differential revision: https://reviews.llvm.org/D146558 --- mlir/lib/Dialect/SCF/IR/SCF.cpp | 83 ++++++++++++++++++++++++- mlir/test/Dialect/SCF/canonicalize.mlir | 49 +++++++++++++++ 2 files changed, 131 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp index e212159442844..3eda0d68f5fbb 100644 --- a/mlir/lib/Dialect/SCF/IR/SCF.cpp +++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp @@ -21,6 +21,7 @@ #include "mlir/IR/PatternMatch.h" #include "mlir/Support/MathExtras.h" #include "mlir/Transforms/InliningUtils.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/TypeSwitch.h" using namespace mlir; @@ -1594,11 +1595,91 @@ struct ForallOpSingleOrZeroIterationDimsFolder } }; +struct FoldTensorCastOfOutputIntoForallOp + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + struct TypeCast { + Type srcType; + Type dstType; + }; + + LogicalResult matchAndRewrite(scf::ForallOp forallOp, + PatternRewriter &rewriter) const final { + llvm::SmallMapVector tensorCastProducers; + llvm::SmallVector newOutputTensors = forallOp.getOutputs(); + for (auto en : llvm::enumerate(newOutputTensors)) { + auto castOp = en.value().getDefiningOp(); + if (!castOp) + continue; + + // Only casts that that preserve static information, i.e. will make the + // loop result type "more" static than before, will be folded. + if (!tensor::preservesStaticInformation(castOp.getDest().getType(), + castOp.getSource().getType())) { + continue; + } + + tensorCastProducers[en.index()] = + TypeCast{castOp.getSource().getType(), castOp.getType()}; + newOutputTensors[en.index()] = castOp.getSource(); + } + + if (tensorCastProducers.empty()) + return failure(); + + // Create new loop. + Location loc = forallOp.getLoc(); + auto newForallOp = rewriter.create( + loc, forallOp.getMixedLowerBound(), forallOp.getMixedUpperBound(), + forallOp.getMixedStep(), newOutputTensors, forallOp.getMapping(), + [&](OpBuilder nestedBuilder, Location nestedLoc, ValueRange bbArgs) { + auto castBlockArgs = + llvm::to_vector(bbArgs.take_back(forallOp->getNumResults())); + for (auto [index, cast] : tensorCastProducers) { + Value &oldTypeBBArg = castBlockArgs[index]; + oldTypeBBArg = nestedBuilder.create( + nestedLoc, cast.dstType, oldTypeBBArg); + } + + // Move old body into new parallel loop. + SmallVector ivsBlockArgs = + llvm::to_vector(bbArgs.take_front(forallOp.getRank())); + ivsBlockArgs.append(castBlockArgs); + rewriter.mergeBlocks(forallOp.getBody(), + bbArgs.front().getParentBlock(), ivsBlockArgs); + }); + + // After `mergeBlocks` happened, the destinations in the terminator were + // mapped to the tensor.cast old-typed results of the output bbArgs. The + // destination have to be updated to point to the output bbArgs directly. + auto terminator = newForallOp.getTerminator(); + for (auto [yieldingOp, outputBlockArg] : + llvm::zip(terminator.getYieldingOps(), + newForallOp.getOutputBlockArguments())) { + auto insertSliceOp = cast(yieldingOp); + insertSliceOp.getDestMutable().assign(outputBlockArg); + } + + // Cast results back to the original types. + rewriter.setInsertionPointAfter(newForallOp); + SmallVector castResults = newForallOp.getResults(); + for (auto &item : tensorCastProducers) { + Value &oldTypeResult = castResults[item.first]; + oldTypeResult = rewriter.create(loc, item.second.dstType, + oldTypeResult); + } + rewriter.replaceOp(forallOp, castResults); + return success(); + } +}; + } // namespace void ForallOp::getCanonicalizationPatterns(RewritePatternSet &results, MLIRContext *context) { - results.add(context); } diff --git a/mlir/test/Dialect/SCF/canonicalize.mlir b/mlir/test/Dialect/SCF/canonicalize.mlir index f69cf196597e2..ec6e35a200075 100644 --- a/mlir/test/Dialect/SCF/canonicalize.mlir +++ b/mlir/test/Dialect/SCF/canonicalize.mlir @@ -1651,3 +1651,52 @@ func.func @remove_empty_forall(%in: tensor<8x8xf32>) -> tensor<8x8xf32> { // CHECK: %[[EMPTY:.*]] = tensor.empty // CHECK: return %[[EMPTY]] +// ----- + +func.func @fold_tensor_cast_into_forall( + %in: tensor<2xi32>, %out: tensor<2xi32>) -> tensor<2xi32> { + %cst = arith.constant dense<[100500]> : tensor<1xi32> + + + %out_cast = tensor.cast %out : tensor<2xi32> to tensor + %result = scf.forall (%i) = (0) to (2) step (1) + shared_outs (%out_ = %out_cast) -> tensor { + + scf.forall.in_parallel { + tensor.parallel_insert_slice %cst into %out_[%i] [1] [1] + : tensor<1xi32> into tensor + } + } + %result_cast = tensor.cast %result : tensor to tensor<2xi32> + func.return %result_cast : tensor<2xi32> +} +// CHECK-LABEL: @fold_tensor_cast_into_forall +// CHECK-NOT: tensor.cast +// CHECK: parallel_insert_slice +// CHECK-SAME: : tensor<1xi32> into tensor<2xi32> +// CHECK-NOT: tensor.cast + +// ----- + +func.func @do_not_fold_tensor_cast_from_dynamic_to_static_type_into_forall( + %in: tensor, %out: tensor) -> tensor { + %cst = arith.constant dense<[100500]> : tensor<1xi32> + + + %out_cast = tensor.cast %out : tensor to tensor<2xi32> + %result = scf.forall (%i) = (0) to (2) step (1) + shared_outs (%out_ = %out_cast) -> tensor<2xi32> { + + scf.forall.in_parallel { + tensor.parallel_insert_slice %cst into %out_[%i] [1] [1] + : tensor<1xi32> into tensor<2xi32> + } + } + %result_cast = tensor.cast %result : tensor<2xi32> to tensor + func.return %result_cast : tensor +} +// CHECK-LABEL: @do_not_fold_tensor_cast_ +// CHECK: tensor.cast +// CHECK: parallel_insert_slice +// CHECK-SAME: : tensor<1xi32> into tensor<2xi32> +// CHECK: tensor.cast From 255ba1c334b86792054c152ce8533dca5b452b41 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Wed, 22 Mar 2023 02:19:20 -0700 Subject: [PATCH 290/691] [mlir][AffineMap] NFC - Refactor getProjectedMap and split into projectDims and projectSymbols The default behavior of getProjectedMap may be surprising as it implicitly compresses the dims and the unused symbols. Make these explicit in the API and refactor to more idiomatic implementations with better reuse. Differential Revision: https://reviews.llvm.org/D146611 --- mlir/include/mlir/IR/AffineMap.h | 58 +++++++---- mlir/lib/IR/AffineMap.cpp | 160 ++++++++++++++++++++----------- 2 files changed, 144 insertions(+), 74 deletions(-) diff --git a/mlir/include/mlir/IR/AffineMap.h b/mlir/include/mlir/IR/AffineMap.h index 0f4c7469497a1..cc7c794f1f933 100644 --- a/mlir/include/mlir/IR/AffineMap.h +++ b/mlir/include/mlir/IR/AffineMap.h @@ -403,6 +403,9 @@ struct MutableAffineMap { /// Simplifies an affine map by simplifying its underlying AffineExpr results. AffineMap simplifyAffineMap(AffineMap map); +/// Drop the dims that are listed in `unusedDims`. +AffineMap compressDims(AffineMap map, const llvm::SmallBitVector &unusedDims); + /// Drop the dims that are not used. AffineMap compressUnusedDims(AffineMap map); @@ -411,8 +414,9 @@ AffineMap compressUnusedDims(AffineMap map); /// dims and symbols. SmallVector compressUnusedDims(ArrayRef maps); -/// Drop the dims that are not listed in `unusedDims`. -AffineMap compressDims(AffineMap map, const llvm::SmallBitVector &unusedDims); +/// Drop the symbols that are listed in `unusedSymbols`. +AffineMap compressSymbols(AffineMap map, + const llvm::SmallBitVector &unusedSymbols); /// Drop the symbols that are not used. AffineMap compressUnusedSymbols(AffineMap map); @@ -422,10 +426,6 @@ AffineMap compressUnusedSymbols(AffineMap map); /// dims and symbols. SmallVector compressUnusedSymbols(ArrayRef maps); -/// Drop the symbols that are not listed in `unusedSymbols`. -AffineMap compressSymbols(AffineMap map, - const llvm::SmallBitVector &unusedSymbols); - /// Returns a map with the same dimension and symbol count as `map`, but whose /// results are the unique affine expressions of `map`. AffineMap removeDuplicateExprs(AffineMap map); @@ -469,7 +469,7 @@ AffineMap inversePermutation(AffineMap map); /// Return the reverse map of a projected permutation where the projected /// dimensions are transformed into 0s. /// -/// Prerequisites: `map` must be a projected permuation. +/// Prerequisites: `map` must be a projected permutation. /// /// Example 1: /// @@ -559,9 +559,38 @@ AffineMap concatAffineMaps(ArrayRef maps); /// projected_dimensions : {1} /// result : affine_map<(d0, d1) -> (d0, 0)> /// -/// This function also compresses unused symbols away. +/// This function also compresses the dims when the boolean flag is true. +AffineMap projectDims(AffineMap map, + const llvm::SmallBitVector &projectedDimensions, + bool compressDimsFlag = false); +/// Symbol counterpart of `projectDims`. +/// This function also compresses the symbols when the boolean flag is true. +AffineMap projectSymbols(AffineMap map, + const llvm::SmallBitVector &projectedSymbols, + bool compressSymbolsFlag = false); +/// Calls `projectDims(map, projectedDimensions, compressDimsFlag)`. +/// If `compressSymbolsFlag` is true, additionally call `compressUnusedSymbols`. AffineMap getProjectedMap(AffineMap map, - const llvm::SmallBitVector &projectedDimensions); + const llvm::SmallBitVector &projectedDimensions, + bool compressDimsFlag = true, + bool compressSymbolsFlag = true); + +// Return a bitvector where each bit set indicates a dimension that is not used +// by any of the maps in the input array `maps`. +llvm::SmallBitVector getUnusedDimsBitVector(ArrayRef maps); + +// Return a bitvector where each bit set indicates a symbol that is not used +// by any of the maps in the input array `maps`. +llvm::SmallBitVector getUnusedSymbolsBitVector(ArrayRef maps); + +inline raw_ostream &operator<<(raw_ostream &os, AffineMap map) { + map.print(os); + return os; +} + +//===----------------------------------------------------------------------===// +// Templated helper functions. +//===----------------------------------------------------------------------===// /// Apply a permutation from `map` to `source` and return the result. template @@ -584,7 +613,7 @@ SmallVector applyPermutationMap(AffineMap map, llvm::ArrayRef source) { return result; } -/// Calculates maxmimum dimension and symbol positions from the expressions +/// Calculates maximum dimension and symbol positions from the expressions /// in `exprsLists` and stores them in `maxDim` and `maxSym` respectively. template static void getMaxDimAndSymbol(ArrayRef exprsList, @@ -601,15 +630,6 @@ static void getMaxDimAndSymbol(ArrayRef exprsList, } } -inline raw_ostream &operator<<(raw_ostream &os, AffineMap map) { - map.print(os); - return os; -} - -// Return a bitvector where each bit set indicates a dimension that is not used -// by any of the maps in the input array `maps`. -llvm::SmallBitVector getUnusedDimsBitVector(ArrayRef maps); - } // namespace mlir namespace llvm { diff --git a/mlir/lib/IR/AffineMap.cpp b/mlir/lib/IR/AffineMap.cpp index 90c546656ec84..c924d2bcde556 100644 --- a/mlir/lib/IR/AffineMap.cpp +++ b/mlir/lib/IR/AffineMap.cpp @@ -12,12 +12,14 @@ #include "mlir/IR/BuiltinTypes.h" #include "mlir/Support/LogicalResult.h" #include "mlir/Support/MathExtras.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" #include #include +#include using namespace mlir; @@ -569,32 +571,13 @@ AffineMap AffineMap::getMinorSubMap(unsigned numResults) const { return getSliceMap(getNumResults() - numResults, numResults); } -AffineMap mlir::compressDims(AffineMap map, - const llvm::SmallBitVector &unusedDims) { - unsigned numDims = 0; - SmallVector dimReplacements; - dimReplacements.reserve(map.getNumDims()); - MLIRContext *context = map.getContext(); - for (unsigned dim = 0, e = map.getNumDims(); dim < e; ++dim) { - if (unusedDims.test(dim)) - dimReplacements.push_back(getAffineConstantExpr(0, context)); - else - dimReplacements.push_back(getAffineDimExpr(numDims++, context)); - } - SmallVector resultExprs; - resultExprs.reserve(map.getNumResults()); - for (auto e : map.getResults()) - resultExprs.push_back(e.replaceDims(dimReplacements)); - return AffineMap::get(numDims, map.getNumSymbols(), resultExprs, context); -} - -AffineMap mlir::compressUnusedDims(AffineMap map) { - return compressDims(map, getUnusedDimsBitVector({map})); -} - -static SmallVector -compressUnusedImpl(ArrayRef maps, - llvm::function_ref compressionFun) { +/// Implementation detail to compress multiple affine maps with a compressionFun +/// that is expected to be either compressUnusedDims or compressUnusedSymbols. +/// The implementation keeps track of num dims and symbols across the different +/// affine maps. +static SmallVector compressUnusedListImpl( + ArrayRef maps, + llvm::function_ref compressionFun) { if (maps.empty()) return SmallVector(); SmallVector allExprs; @@ -622,41 +605,31 @@ compressUnusedImpl(ArrayRef maps, return res; } +AffineMap mlir::compressDims(AffineMap map, + const llvm::SmallBitVector &unusedDims) { + return projectDims(map, unusedDims, /*compressDimsFlag=*/true); +} + +AffineMap mlir::compressUnusedDims(AffineMap map) { + return compressDims(map, getUnusedDimsBitVector({map})); +} + SmallVector mlir::compressUnusedDims(ArrayRef maps) { - return compressUnusedImpl(maps, - [](AffineMap m) { return compressUnusedDims(m); }); + return compressUnusedListImpl( + maps, [](AffineMap m) { return compressUnusedDims(m); }); } AffineMap mlir::compressSymbols(AffineMap map, const llvm::SmallBitVector &unusedSymbols) { - unsigned numSymbols = 0; - SmallVector symReplacements; - symReplacements.reserve(map.getNumSymbols()); - MLIRContext *context = map.getContext(); - for (unsigned sym = 0, e = map.getNumSymbols(); sym < e; ++sym) { - if (unusedSymbols.test(sym)) - symReplacements.push_back(getAffineConstantExpr(0, context)); - else - symReplacements.push_back(getAffineSymbolExpr(numSymbols++, context)); - } - SmallVector resultExprs; - resultExprs.reserve(map.getNumResults()); - for (auto e : map.getResults()) - resultExprs.push_back(e.replaceSymbols(symReplacements)); - return AffineMap::get(map.getNumDims(), numSymbols, resultExprs, context); + return projectSymbols(map, unusedSymbols, /*compressSymbolsFlag=*/true); } AffineMap mlir::compressUnusedSymbols(AffineMap map) { - llvm::SmallBitVector unusedSymbols(map.getNumSymbols(), true); - map.walkExprs([&](AffineExpr expr) { - if (auto symExpr = expr.dyn_cast()) - unusedSymbols.reset(symExpr.getPosition()); - }); - return compressSymbols(map, unusedSymbols); + return compressSymbols(map, getUnusedSymbolsBitVector({map})); } SmallVector mlir::compressUnusedSymbols(ArrayRef maps) { - return compressUnusedImpl( + return compressUnusedListImpl( maps, [](AffineMap m) { return compressUnusedSymbols(m); }); } @@ -741,15 +714,80 @@ AffineMap mlir::concatAffineMaps(ArrayRef maps) { maps.front().getContext()); } +/// Common implementation to project out dimensions or symbols from an affine +/// map based on the template type. +/// Additionally, if 'compress' is true, the projected out dimensions or symbols +/// are also dropped from the resulting map. +template +static AffineMap projectCommonImpl(AffineMap map, + const llvm::SmallBitVector &toProject, + bool compress) { + static_assert(llvm::is_one_of::value, + "expected AffineDimExpr or AffineSymbolExpr"); + + constexpr bool isDim = std::is_same::value; + int64_t numDimOrSym = (isDim) ? map.getNumDims() : map.getNumSymbols(); + SmallVector replacements; + replacements.reserve(numDimOrSym); + + auto createNewDimOrSym = (isDim) ? getAffineDimExpr : getAffineSymbolExpr; + auto replaceDims = [](AffineExpr e, ArrayRef replacements) { + return e.replaceDims(replacements); + }; + auto replaceSymbols = [](AffineExpr e, ArrayRef replacements) { + return e.replaceSymbols(replacements); + }; + auto replaceNewDimOrSym = (isDim) ? replaceDims : replaceSymbols; + + MLIRContext *context = map.getContext(); + int64_t newNumDimOrSym = 0; + for (unsigned dimOrSym = 0; dimOrSym < numDimOrSym; ++dimOrSym) { + if (toProject.test(dimOrSym)) { + replacements.push_back(getAffineConstantExpr(0, context)); + continue; + } + int64_t newPos = compress ? newNumDimOrSym++ : dimOrSym; + replacements.push_back(createNewDimOrSym(newPos, context)); + } + SmallVector resultExprs; + resultExprs.reserve(map.getNumResults()); + for (auto e : map.getResults()) + resultExprs.push_back(replaceNewDimOrSym(e, replacements)); + + int64_t numDims = (compress && isDim) ? newNumDimOrSym : map.getNumDims(); + int64_t numSyms = (compress && !isDim) ? newNumDimOrSym : map.getNumSymbols(); + return AffineMap::get(numDims, numSyms, resultExprs, context); +} + +AffineMap mlir::projectDims(AffineMap map, + const llvm::SmallBitVector &projectedDimensions, + bool compressDimsFlag) { + return projectCommonImpl(map, projectedDimensions, + compressDimsFlag); +} + +AffineMap mlir::projectSymbols(AffineMap map, + const llvm::SmallBitVector &projectedSymbols, + bool compressSymbolsFlag) { + return projectCommonImpl(map, projectedSymbols, + compressSymbolsFlag); +} + AffineMap mlir::getProjectedMap(AffineMap map, - const llvm::SmallBitVector &unusedDims) { - return compressUnusedSymbols(compressDims(map, unusedDims)); + const llvm::SmallBitVector &projectedDimensions, + bool compressDimsFlag, + bool compressSymbolsFlag) { + map = projectDims(map, projectedDimensions, compressDimsFlag); + if (compressSymbolsFlag) + map = compressUnusedSymbols(map); + return map; } llvm::SmallBitVector mlir::getUnusedDimsBitVector(ArrayRef maps) { unsigned numDims = maps[0].getNumDims(); llvm::SmallBitVector numDimsBitVector(numDims, true); - for (const auto &m : maps) { + for (AffineMap m : maps) { for (unsigned i = 0; i < numDims; ++i) { if (m.isFunctionOfDim(i)) numDimsBitVector.reset(i); @@ -758,6 +796,18 @@ llvm::SmallBitVector mlir::getUnusedDimsBitVector(ArrayRef maps) { return numDimsBitVector; } +llvm::SmallBitVector mlir::getUnusedSymbolsBitVector(ArrayRef maps) { + unsigned numSymbols = maps[0].getNumSymbols(); + llvm::SmallBitVector numSymbolsBitVector(numSymbols, true); + for (AffineMap m : maps) { + for (unsigned i = 0; i < numSymbols; ++i) { + if (m.isFunctionOfSymbol(i)) + numSymbolsBitVector.reset(i); + } + } + return numSymbolsBitVector; +} + //===----------------------------------------------------------------------===// // MutableAffineMap. //===----------------------------------------------------------------------===// @@ -784,8 +834,8 @@ bool MutableAffineMap::isMultipleOf(unsigned idx, int64_t factor) const { return false; } -// Simplifies the result affine expressions of this map. The expressions have to -// be pure for the simplification implemented. +// Simplifies the result affine expressions of this map. The expressions +// have to be pure for the simplification implemented. void MutableAffineMap::simplify() { // Simplify each of the results if possible. // TODO: functional-style map From f0111f20399ecb5d188a0e59417f801d524e905f Mon Sep 17 00:00:00 2001 From: Dmitry Chernenkov Date: Wed, 22 Mar 2023 12:50:22 +0000 Subject: [PATCH 291/691] [libc][Bazel] Bazel fix for 494734b06d62b --- utils/bazel/llvm-project-overlay/libc/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 5e563fe71a1dd..e74c0e64ab10d 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -219,6 +219,7 @@ libc_support_library( hdrs = ["src/__support/arg_list.h"], deps = [ ":libc_root", + ":__support_common", ], ) From 256c6729d371722ea558411957c4e2ef362c54a2 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 22 Mar 2023 13:12:49 +0000 Subject: [PATCH 292/691] [ConstraintElim] Add tests with GEP constant expressions. --- .../ConstraintElimination/gep-constexpr.ll | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 llvm/test/Transforms/ConstraintElimination/gep-constexpr.ll diff --git a/llvm/test/Transforms/ConstraintElimination/gep-constexpr.ll b/llvm/test/Transforms/ConstraintElimination/gep-constexpr.ll new file mode 100644 index 0000000000000..38dbfc4aa4dc1 --- /dev/null +++ b/llvm/test/Transforms/ConstraintElimination/gep-constexpr.ll @@ -0,0 +1,44 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=constraint-elimination -S %s | FileCheck %s + +declare void @llvm.assume(i1 noundef) #0 + +@arr1 = common global [3 x i16] zeroinitializer, align 2 + +define i1 @gep_constexpr_index_lt_upper(i32 noundef %i) { +; CHECK-LABEL: @gep_constexpr_index_lt_upper( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I:%.*]], 3 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[UPPER:%.*]] = getelementptr inbounds i16, ptr @arr1, i64 [[IDXPROM]] +; CHECK-NEXT: [[C_1:%.*]] = icmp ult ptr [[UPPER]], getelementptr inbounds ([3 x i16], ptr @arr1, i64 1, i64 0) +; CHECK-NEXT: ret i1 [[C_1]] +; +entry: + %cmp = icmp ult i32 %i, 3 + call void @llvm.assume(i1 %cmp) + %idxprom = zext i32 %i to i64 + %upper = getelementptr inbounds i16, ptr @arr1, i64 %idxprom + %c.1 = icmp ult ptr %upper, getelementptr inbounds ([3 x i16], ptr @arr1, i64 1, i64 0) + ret i1 %c.1 +} + +define i1 @gep_constexpr_index_may_be_gt_upper(i32 noundef %i) { +; CHECK-LABEL: @gep_constexpr_index_may_be_gt_upper( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[I:%.*]], 3 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[IDXPROM:%.*]] = zext i32 [[I]] to i64 +; CHECK-NEXT: [[UPPER:%.*]] = getelementptr inbounds i16, ptr @arr1, i64 [[IDXPROM]] +; CHECK-NEXT: [[C_1:%.*]] = icmp ult ptr [[UPPER]], getelementptr inbounds ([3 x i16], ptr @arr1, i64 0, i64 2) +; CHECK-NEXT: ret i1 [[C_1]] +; +entry: + %cmp = icmp ult i32 %i, 3 + call void @llvm.assume(i1 %cmp) + %idxprom = zext i32 %i to i64 + %upper = getelementptr inbounds i16, ptr @arr1, i64 %idxprom + %c.1 = icmp ult ptr %upper, getelementptr inbounds ([3 x i16], ptr @arr1, i64 0, i64 2) + ret i1 %c.1 +} From 3a3ad9fe1811ebd7ff865b77f471d155d9a05ec8 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 22 Mar 2023 14:21:51 +0100 Subject: [PATCH 293/691] [SimplifyCFG][LICM] Add metadata speculation tests (NFC) --- llvm/test/Transforms/LICM/hoist-metadata.ll | 73 +++++++++++++++++++ .../SimplifyCFG/hoist-with-metadata.ll | 58 +++++++++++++++ 2 files changed, 131 insertions(+) create mode 100644 llvm/test/Transforms/LICM/hoist-metadata.ll diff --git a/llvm/test/Transforms/LICM/hoist-metadata.ll b/llvm/test/Transforms/LICM/hoist-metadata.ll new file mode 100644 index 0000000000000..7c5ed1f7ae6fb --- /dev/null +++ b/llvm/test/Transforms/LICM/hoist-metadata.ll @@ -0,0 +1,73 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 2 +; RUN: opt -S -passes=licm < %s| FileCheck %s + +declare void @foo(...) memory(none) + +define void @test_unconditional(i1 %c, ptr dereferenceable(8) align 8 %p) { +; CHECK-LABEL: define void @test_unconditional +; CHECK-SAME: (i1 [[C:%.*]], ptr align 8 dereferenceable(8) [[P:%.*]]) { +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0:![0-9]+]] +; CHECK-NEXT: [[V2:%.*]] = load ptr, ptr [[P]], align 8, !nonnull !1, !noundef !1 +; CHECK-NEXT: [[V3:%.*]] = load ptr, ptr [[P]], align 8, !dereferenceable !2, !align !2 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: call void @foo(i32 [[V1]], ptr [[V2]], ptr [[V3]]) +; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; + br label %loop + +loop: + %v1 = load i32, ptr %p, !range !{i32 0, i32 10} + %v2 = load ptr, ptr %p, !nonnull !{}, !noundef !{} + %v3 = load ptr, ptr %p, !align !{i64 4}, !dereferenceable !{i64 4} + call void @foo(i32 %v1, ptr %v2, ptr %v3) + br i1 %c, label %loop, label %exit + +exit: + ret void +} + +define void @test_conditional(i1 %c, i1 %c2, ptr dereferenceable(8) align 8 %p) { +; CHECK-LABEL: define void @test_conditional +; CHECK-SAME: (i1 [[C:%.*]], i1 [[C2:%.*]], ptr align 8 dereferenceable(8) [[P:%.*]]) { +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[P]], align 4 +; CHECK-NEXT: [[V2:%.*]] = load ptr, ptr [[P]], align 8 +; CHECK-NEXT: [[V3:%.*]] = load ptr, ptr [[P]], align 8 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: br i1 [[C]], label [[IF:%.*]], label [[LATCH:%.*]] +; CHECK: if: +; CHECK-NEXT: call void @foo(i32 [[V1]], ptr [[V2]], ptr [[V3]]) +; CHECK-NEXT: br label [[LATCH]] +; CHECK: latch: +; CHECK-NEXT: br i1 [[C2]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: ret void +; + br label %loop + +loop: + br i1 %c, label %if, label %latch + +if: + %v1 = load i32, ptr %p, !range !{i32 0, i32 10} + %v2 = load ptr, ptr %p, !nonnull !{}, !noundef !{} + %v3 = load ptr, ptr %p, !align !{i64 4}, !dereferenceable !{i64 4} + call void @foo(i32 %v1, ptr %v2, ptr %v3) + br label %latch + +latch: + br i1 %c2, label %loop, label %exit + +exit: + ret void +} +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { memory(none) } +;. +; CHECK: [[RNG0]] = !{i32 0, i32 10} +; CHECK: [[META1:![0-9]+]] = !{} +; CHECK: [[META2:![0-9]+]] = !{i64 4} +;. diff --git a/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll b/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll index 63ffdbb5a90d3..72f9f5b2f6fde 100644 --- a/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll +++ b/llvm/test/Transforms/SimplifyCFG/hoist-with-metadata.ll @@ -61,6 +61,64 @@ out: ret void } +define i32 @speculate_range(i1 %c, ptr dereferenceable(8) align 8 %p) { +; CHECK-LABEL: @speculate_range( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[C:%.*]], i32 [[V]], i32 0 +; CHECK-NEXT: ret i32 [[SPEC_SELECT]] +; +entry: + br i1 %c, label %if, label %join + +if: + %v = load i32, ptr %p, !range !{i32 0, i32 10} + br label %join + +join: + %phi = phi i32 [ %v, %if ], [ 0, %entry ] + ret i32 %phi +} + +define ptr @speculate_nonnull(i1 %c, ptr dereferenceable(8) align 8 %p) { +; CHECK-LABEL: @speculate_nonnull( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V:%.*]] = load ptr, ptr [[P:%.*]], align 8 +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[C:%.*]], ptr [[V]], ptr null +; CHECK-NEXT: ret ptr [[SPEC_SELECT]] +; +entry: + br i1 %c, label %if, label %join + +if: + %v = load ptr, ptr %p, !nonnull !{}, !noundef !{} + br label %join + +join: + %phi = phi ptr [ %v, %if ], [ null, %entry ] + ret ptr %phi +} + + +define ptr @speculate_align(i1 %c, ptr dereferenceable(8) align 8 %p) { +; CHECK-LABEL: @speculate_align( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[V:%.*]] = load ptr, ptr [[P:%.*]], align 8 +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[C:%.*]], ptr [[V]], ptr null +; CHECK-NEXT: ret ptr [[SPEC_SELECT]] +; +entry: + br i1 %c, label %if, label %join + +if: + %v = load ptr, ptr %p, !align !{i64 4}, !dereferenceable !{i64 4} + br label %join + +join: + %phi = phi ptr [ %v, %if ], [ null, %entry ] + ret ptr %phi +} + !0 = !{ i8 0, i8 1 } !1 = !{ i8 3, i8 5 } !2 = !{} From acf6a3224955779724a35a383d63c48af2163171 Mon Sep 17 00:00:00 2001 From: Tom Eccles Date: Tue, 14 Mar 2023 13:27:30 +0000 Subject: [PATCH 294/691] [flang] add -flang-experimental-hlfir flag to flang-new This flag instructs flang-new to use the new HLFIR lowering. It is marked as experimental and not included in --help. This was added to make it more convenient to test the performance of code generated by the HLFIR lowering. Extra diffs are from running clang-format on CLOptions.inc (which was being forced by CI). Differential Revision: https://reviews.llvm.org/D146278 --- clang/include/clang/Driver/Options.td | 4 ++++ clang/lib/Driver/ToolChains/Flang.cpp | 3 +++ flang/include/flang/Tools/CLOptions.inc | 23 ++++++++++++++++--- flang/lib/Frontend/CompilerInvocation.cpp | 5 ++++ flang/test/Driver/driver-help-hidden.f90 | 2 ++ flang/test/Driver/mlir-pass-pipeline.f90 | 4 ++++ flang/test/Fir/basic-program.fir | 6 ++++- .../HLFIR/flang-experimental-hlfir-flag.f90 | 20 ++++++++++++++++ 8 files changed, 63 insertions(+), 4 deletions(-) create mode 100644 flang/test/HLFIR/flang-experimental-hlfir-flag.f90 diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index a05e61ac0e92f..b50dfd6f35510 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5080,6 +5080,10 @@ def flang_experimental_exec : Flag<["-"], "flang-experimental-exec">, Flags<[FlangOption, FlangOnlyOption, NoXarchOption, HelpHidden]>, HelpText<"Enable support for generating executables (experimental)">; +def flang_experimental_hlfir : Flag<["-"], "flang-experimental-hlfir">, + Flags<[FlangOption, FC1Option, FlangOnlyOption, NoXarchOption, HelpHidden]>, + HelpText<"Use HLFIR lowering (experimental)">; + //===----------------------------------------------------------------------===// // FLangOption + CoreOption + NoXarchOption //===----------------------------------------------------------------------===// diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 0a4a0de99b89f..23083ff3795b5 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -65,6 +65,9 @@ void Flang::addOtherOptions(const ArgList &Args, ArgStringList &CmdArgs) const { if (stackArrays && !stackArrays->getOption().matches(options::OPT_fno_stack_arrays)) CmdArgs.push_back("-fstack-arrays"); + + if (Args.hasArg(options::OPT_flang_experimental_hlfir)) + CmdArgs.push_back("-flang-experimental-hlfir"); } void Flang::addPicOptions(const ArgList &Args, ArgStringList &CmdArgs) const { diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc index 9324686138717..30581624d0dc5 100644 --- a/flang/include/flang/Tools/CLOptions.inc +++ b/flang/include/flang/Tools/CLOptions.inc @@ -14,6 +14,7 @@ #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Transforms/Passes.h" #include "flang/Optimizer/CodeGen/CodeGen.h" +#include "flang/Optimizer/HLFIR/Passes.h" #include "flang/Optimizer/Transforms/Passes.h" #include "llvm/Passes/OptimizationLevel.h" #include "llvm/Support/CommandLine.h" @@ -72,7 +73,8 @@ DisableOption(BoxedProcedureRewrite, "boxed-procedure-rewrite", "rewrite boxed procedures"); #endif -DisableOption(ExternalNameConversion, "external-name-interop", "convert names with external convention"); +DisableOption(ExternalNameConversion, "external-name-interop", + "convert names with external convention"); /// Generic for adding a pass to the pass manager if it is not disabled. template @@ -211,6 +213,20 @@ inline void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm, pm.addPass(mlir::createCSEPass()); } +/// Create a pass pipeline for lowering from HLFIR to FIR +/// +/// \param pm - MLIR pass manager that will hold the pipeline definition +/// \param optLevel - optimization level used for creating FIR optimization +/// passes pipeline +inline void createHLFIRToFIRPassPipeline( + mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel) { + if (optLevel.isOptimizingForSpeed()) + pm.addPass(mlir::createCanonicalizerPass()); + pm.addPass(hlfir::createLowerHLFIRIntrinsicsPass()); + pm.addPass(hlfir::createBufferizeHLFIRPass()); + pm.addPass(hlfir::createConvertHLFIRtoFIRPass()); +} + #if !defined(FLANG_EXCLUDE_CODEGEN) inline void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel, @@ -218,8 +234,7 @@ inline void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm, fir::addBoxedProcedurePass(pm); pm.addNestedPass( fir::createAbstractResultOnFuncOptPass()); - pm.addNestedPass( - fir::createAbstractResultOnGlobalOptPass()); + pm.addNestedPass(fir::createAbstractResultOnGlobalOptPass()); fir::addCodeGenRewritePass(pm); fir::addTargetRewritePass(pm); fir::addExternalNameConversionPass(pm, underscoring); @@ -234,6 +249,8 @@ inline void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm, inline void createMLIRToLLVMPassPipeline(mlir::PassManager &pm, llvm::OptimizationLevel optLevel = defaultOptLevel, bool stackArrays = false, bool underscoring = true) { + fir::createHLFIRToFIRPassPipeline(pm, optLevel); + // Add default optimizer pass pipeline. fir::createDefaultFIROptimizerPassPipeline(pm, optLevel, stackArrays); diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index 6e963e2528101..a4183a52115b6 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -818,6 +818,11 @@ bool CompilerInvocation::createFromArgs( success = false; } + // -flang-experimental-hlfir + if (args.hasArg(clang::driver::options::OPT_flang_experimental_hlfir)) { + res.loweringOpts.setLowerToHighLevelFIR(true); + } + success &= parseFrontendArgs(res.getFrontendOpts(), args, diags); parseTargetArgs(res.getTargetOpts(), args); parsePreprocessorArgs(res.getPreprocessorOpts(), args); diff --git a/flang/test/Driver/driver-help-hidden.f90 b/flang/test/Driver/driver-help-hidden.f90 index bcb77c9f8bccb..535bb82b023c6 100644 --- a/flang/test/Driver/driver-help-hidden.f90 +++ b/flang/test/Driver/driver-help-hidden.f90 @@ -41,6 +41,8 @@ ! CHECK-NEXT: Specify where to find the compiled intrinsic modules ! CHECK-NEXT: -flang-experimental-exec ! CHECK-NEXT: Enable support for generating executables (experimental) +! CHECK-NEXT: -flang-experimental-hlfir +! CHECK-NEXT: Use HLFIR lowering (experimental) ! CHECK-NEXT: -flarge-sizes Use INTEGER(KIND=8) for the result type in size-related intrinsics ! CHECK-NEXT: -flogical-abbreviations Enable logical abbreviations ! CHECK-NEXT: -flto= Set LTO mode diff --git a/flang/test/Driver/mlir-pass-pipeline.f90 b/flang/test/Driver/mlir-pass-pipeline.f90 index f569ddac8a397..3e8df1615adaa 100644 --- a/flang/test/Driver/mlir-pass-pipeline.f90 +++ b/flang/test/Driver/mlir-pass-pipeline.f90 @@ -12,6 +12,10 @@ ! ALL: Pass statistics report ! ALL: Fortran::lower::VerifierPass +! O2-NEXT: Canonicalizer +! ALL-NEXT: LowerHLFIRIntrinsics +! ALL-NEXT: BufferizeHLFIR +! ALL-NEXT: ConvertHLFIRtoFIR ! ALL-NEXT: CSE ! Ideally, we need an output with only the pass names, but ! there is currently no way to get that, so in order to diff --git a/flang/test/Fir/basic-program.fir b/flang/test/Fir/basic-program.fir index 78c1ab080db11..e096fb62591e5 100644 --- a/flang/test/Fir/basic-program.fir +++ b/flang/test/Fir/basic-program.fir @@ -16,7 +16,11 @@ func.func @_QQmain() { // PASSES: Pass statistics report -// PASSES: CSE +// PASSES: Canonicalizer +// PASSES-NEXT: LowerHLFIRIntrinsics +// PASSES-NEXT: BufferizeHLFIR +// PASSES-NEXT: ConvertHLFIRtoFIR +// PASSES-NEXT: CSE // PASSES-NEXT: (S) 0 num-cse'd - Number of operations CSE'd // PASSES-NEXT: (S) 0 num-dce'd - Number of operations DCE'd diff --git a/flang/test/HLFIR/flang-experimental-hlfir-flag.f90 b/flang/test/HLFIR/flang-experimental-hlfir-flag.f90 new file mode 100644 index 0000000000000..a375d2726104e --- /dev/null +++ b/flang/test/HLFIR/flang-experimental-hlfir-flag.f90 @@ -0,0 +1,20 @@ +! Test -flang-experimental-hlfir flag +! RUN: %flang_fc1 -flang-experimental-hlfir -emit-fir -o - %s | FileCheck %s +! RUN: %flang_fc1 -emit-fir -o - %s | FileCheck %s --check-prefix NO-HLFIR + +subroutine test(a, res) + real :: a(:), res + res = SUM(a) +end subroutine +! CHECK-LABEL: func.func @_QPtest +! CHECK: %[[A:.*]]: !fir.box> +! CHECK: %[[RES:.*]]: !fir.ref +! CHECK-DAG: %[[A_VAR:.*]]:2 = hlfir.declare %[[A]] +! CHECK-DAG: %[[RES_VAR:.*]]:2 = hlfir.declare %[[RES]] +! CHECK-NEXT: %[[SUM_RES:.*]] = hlfir.sum %[[A_VAR]]#0 +! CHECK-NEXT: hlfir.assign %[[SUM_RES]] to %[[RES_VAR]]#0 +! CHECK-NEXT: hlfir.destroy %[[SUM_RES]] +! CHECK-NEXT: return +! CHECK-NEXT: } + +! NO-HLFIR-NOT: hlfir. From 893ce5759fe2e450dc637d4c76e779f883535882 Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Wed, 15 Mar 2023 13:06:56 -0700 Subject: [PATCH 295/691] docs: add some documentation on Windows SDK search Add some documentation on the flags and the process by which clang identifies the headers and libraries for the Windows environment. It should identify the flags and their interactions as well as the order in which the various sources of information are consulted. Differential Revision: https://reviews.llvm.org/D146165 Reviewed By: hans, mstorjo --- clang/docs/UsersManual.rst | 123 +++++++++++++++++++++++++++++++++++++ 1 file changed, 123 insertions(+) diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 77b1c938c6ad4..031d8a9b624d5 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -4487,3 +4487,126 @@ If the user is using the static CRT (``/MT``), then different runtimes are used to produce DLLs and EXEs. To link a DLL, pass ``clang_rt.asan_dll_thunk-x86_64.lib``. To link an EXE, pass ``-wholearchive:clang_rt.asan-x86_64.lib``. + +Windows System Headers and Library Lookup +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +clang-cl uses a set of different approaches to locate the right system libraries +to link against when building code. The Windows environment uses libraries from +three distinct sources: + +1. Windows SDK +2. UCRT (Universal C Runtime) +3. Visual C++ Tools (VCRuntime) + +The Windows SDK provides the import libraries and headers required to build +programs against the Windows system packages. Underlying the Windows SDK is the +UCRT, the universal C runtime. + +This difference is best illustrated by the various headers that one would find +in the different categories. The WinSDK would contain headers such as +`WinSock2.h` which is part of the Windows API surface, providing the Windows +socketing interfaces for networking. UCRT provides the C library headers, +including e.g. `stdio.h`. Finally, the Visual C++ tools provides the underlying +Visual C++ Runtime headers such as `stdint.h` or `crtdefs.h`. + +There are various controls that allow the user control over where clang-cl will +locate these headers. The default behaviour for the Windows SDK and UCRT is as +follows: + +1. Consult the command line. + + Anything the user specifies is always given precedence. The following + extensions are part of the clang-cl toolset: + + - `/winsysroot:` + + The `/winsysroot:` is used as an equivalent to `-sysroot` on Unix + environments. It allows the control of an alternate location to be treated + as a system root. When specified, it will be used as the root where the + `Windows Kits` is located. + + - `/winsdkversion:` + - `/winsdkdir:` + + If `/winsysroot:` is not specified, the `/winsdkdir:` argument is consulted + as a location to identify where the Windows SDK is located. Contrary to + `/winsysroot:`, `/winsdkdir:` is expected to be the complete path rather + than a root to locate `Windows Kits`. + + The `/winsdkversion:` flag allows the user to specify a version identifier + for the SDK to prefer. When this is specified, no additional validation is + performed and this version is preferred. If the version is not specified, + the highest detected version number will be used. + +2. Consult the environment. + + TODO: This is not yet implemented. + + This will consult the environment variables: + + - `WindowsSdkDir` + - `UCRTVersion` + +3. Fallback to the registry. + + If no arguments are used to indicate where the SDK is present, and the + compiler is running on Windows, the registry is consulted to locate the + installation. + +The Visual C++ Toolset has a slightly more elaborate mechanism for detection. + +1. Consult the command line. + + - `/winsysroot:` + + The `/winsysroot:` is used as an equivalent to `-sysroot` on Unix + environments. It allows the control of an alternate location to be treated + as a system root. When specified, it will be used as the root where the + `VC` directory is located. + + - `/vctoolsdir:` + - `/vctoolsversion:` + + If `/winsysroot:` is not specified, the `/vctoolsdir:` argument is consulted + as a location to identify where the Visual C++ Tools are located. If + `/vctoolsversion:` is specified, that version is preferred, otherwise, the + highest version detected is used. + +2. Consult the environment. + + - `/external:[VARIABLE]` + + This specifies a user identified environment variable which is treated as + a path delimiter (`;`) separated list of paths to map into `-imsvc` + arguments which are treated as `-isystem`. + + - `INCLUDE` and `EXTERNAL_INCLUDE` + + The path delimiter (`;`) separated list of paths will be mapped to + `-imsvc` arguments which are treated as `-isystem`. + + - `LIB` (indirectly) + + The linker `link.exe` or `lld-link.exe` will honour the environment + variable `LIB` which is a path delimiter (`;`) set of paths to consult for + the import libraries to use when linking the final target. + + The following environment variables will be consulted and used to form paths + to validate and load content from as appropriate: + + - `VCToolsInstallDir` + - `VCINSTALLDIR` + - `Path` + +3. Consult `ISetupConfiguration` [Windows Only] + + Assuming that the toolchain is built with `USE_MSVC_SETUP_API` defined and + is running on Windows, the Visual Studio COM interface `ISetupConfiguration` + will be used to locate the installation of the MSVC toolset. + +4. Fallback to the registry [DEPRECATED] + + The registry information is used to help locate the installation as a final + fallback. This is only possible for pre-VS2017 installations and is + considered deprecated. From ee5617dc71b311d69fc048d4a62f59c1e37d1434 Mon Sep 17 00:00:00 2001 From: Stephen Tozer Date: Wed, 1 Mar 2023 13:05:11 +0000 Subject: [PATCH 296/691] [Dexter] Add timeout options Adds a pair of options for Dexter that allow the user to specify a timeout duration. These options are: * --timeout-total: Times out if the total run-time of the debugger session exceeds seconds. * --timeout-breakpoint: Times out if the time without hitting a breakpoint exceeds seconds. Reviewed By: Orlando Differential Revision: https://reviews.llvm.org/D145063 --- .../ConditionalController.py | 21 +++++++++++-- .../DebuggerControllers/DefaultController.py | 20 ++++++++++-- .../dexter/dex/debugger/Debuggers.py | 16 +++++++++- .../dexter/dex/utils/Timeout.py | 31 +++++++++++++++++++ 4 files changed, 82 insertions(+), 6 deletions(-) create mode 100644 cross-project-tests/debuginfo-tests/dexter/dex/utils/Timeout.py diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DebuggerControllers/ConditionalController.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DebuggerControllers/ConditionalController.py index ccff7419d6822..c2e97e375d979 100644 --- a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DebuggerControllers/ConditionalController.py +++ b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DebuggerControllers/ConditionalController.py @@ -16,6 +16,7 @@ from dex.debugger.DebuggerControllers.DebuggerControllerBase import DebuggerControllerBase from dex.debugger.DebuggerBase import DebuggerBase from dex.utils.Exceptions import DebuggerException +from dex.utils.Timeout import Timeout class BreakpointRange: @@ -140,10 +141,26 @@ def _run_debugger_custom(self, cmdline): time.sleep(self._pause_between_steps) exit_desired = False + timed_out = False + total_timeout = Timeout(self.context.options.timeout_total) while not self.debugger.is_finished: - while self.debugger.is_running: - pass + + breakpoint_timeout = Timeout(self.context.options.timeout_breakpoint) + while self.debugger.is_running and not timed_out: + # Check to see whether we've timed out while we're waiting. + if total_timeout.timed_out(): + self.context.logger.error('Debugger session has been ' + f'running for {total_timeout.elapsed}s, timeout reached!') + timed_out = True + if breakpoint_timeout.timed_out(): + self.context.logger.error(f'Debugger session has not ' + f'hit a breakpoint for {breakpoint_timeout.elapsed}s, timeout ' + 'reached!') + timed_out = True + + if timed_out: + break step_info = self.debugger.get_step_info(self._watches, self._step_index) if step_info.current_frame: diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DebuggerControllers/DefaultController.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DebuggerControllers/DefaultController.py index 8315bb64c04c6..1cc2c4410768c 100644 --- a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DebuggerControllers/DefaultController.py +++ b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/DebuggerControllers/DefaultController.py @@ -13,6 +13,7 @@ from dex.debugger.DebuggerControllers.DebuggerControllerBase import DebuggerControllerBase from dex.debugger.DebuggerControllers.ControllerHelpers import in_source_file, update_step_watches from dex.utils.Exceptions import DebuggerException, LoadDebuggerException +from dex.utils.Timeout import Timeout class EarlyExitCondition(object): def __init__(self, on_line, hit_count, expression, values): @@ -81,12 +82,25 @@ def _run_debugger_custom(self, cmdline): self.watches.update(command_obj.get_watches()) early_exit_conditions = self._get_early_exit_conditions() + timed_out = False + total_timeout = Timeout(self.context.options.timeout_total) max_steps = self.context.options.max_steps for _ in range(max_steps): - while self.debugger.is_running: - pass - if self.debugger.is_finished: + breakpoint_timeout = Timeout(self.context.options.timeout_breakpoint) + while self.debugger.is_running and not timed_out: + # Check to see whether we've timed out while we're waiting. + if total_timeout.timed_out(): + self.context.logger.error('Debugger session has been ' + f'running for {total_timeout.elapsed}s, timeout reached!') + timed_out = True + if breakpoint_timeout.timed_out(): + self.context.logger.error(f'Debugger session has not ' + f'hit a breakpoint for {breakpoint_timeout.elapsed}s, timeout ' + 'reached!') + timed_out = True + + if timed_out or self.debugger.is_finished: break self.step_index += 1 diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/Debuggers.py b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/Debuggers.py index 48cb7e1a07898..c85310b37a8f4 100644 --- a/cross-project-tests/debuginfo-tests/dexter/dex/debugger/Debuggers.py +++ b/cross-project-tests/debuginfo-tests/dexter/dex/debugger/Debuggers.py @@ -121,7 +121,21 @@ def add_debugger_tool_arguments(parser, context, defaults): default='', help='command line arguments for the test program, in addition to any ' 'provided by DexCommandLine') - + parser.add_argument( + '--timeout-total', + metavar='', + type=float, + default=0.0, + help='if >0, debugger session will automatically exit after ' + 'running for seconds') + parser.add_argument( + '--timeout-breakpoint', + metavar='', + type=float, + default=0.0, + help='if >0, debugger session will automatically exit after ' + 'waiting seconds without hitting a ' + 'breakpoint') def handle_debugger_tool_base_options(context, defaults): # noqa options = context.options diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/utils/Timeout.py b/cross-project-tests/debuginfo-tests/dexter/dex/utils/Timeout.py new file mode 100644 index 0000000000000..d59d7d7693a6f --- /dev/null +++ b/cross-project-tests/debuginfo-tests/dexter/dex/utils/Timeout.py @@ -0,0 +1,31 @@ +# DExTer : Debugging Experience Tester +# ~~~~~~ ~ ~~ ~ ~~ +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +"""Utility class to check for timeouts. Timer starts when the object is initialized, +and can be checked by calling timed_out(). Passing a timeout value of 0.0 or less +means a timeout will never be triggered, i.e. timed_out() will always return False. +""" + +import time + +class Timeout(object): + + def __init__(self, duration: float): + self.start = self.now + self.duration = duration + + def timed_out(self): + if self.duration <= 0.0: + return False + return self.elapsed > self.duration + + @property + def elapsed(self): + return self.now - self.start + + @property + def now(self): + return time.time() From d6ad4f01c3dafcab335bca66dac6e36d9eac8421 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Thu, 29 Dec 2022 12:11:38 -0800 Subject: [PATCH 297/691] [MemProf] Context disambiguation cloning pass [patch 1a/3] Support for building, printing, and displaying CallsiteContextGraph which represents the MemProf metadata contexts. Uses CRTP to enable support for both IR (regular LTO) and summary (ThinLTO). This patch includes the support for building it in regular LTO mode (from memprof and callsite metadata), and the next patch will add the handling for building it from ThinLTO summaries. Also includes support for dumping the graph to text and to dot files. Follow-on patches will contain the support for cloning on the graph and in the IR. The graph represents the call contexts in all memprof metadata on allocation calls, with nodes for the allocations themselves, as well as for the calls in each context. The graph is initially built from the allocation memprof metadata (or summary) MIBs. It is then updated to match calls with callsite metadata onto the nodes, updating it to reflect any inlining performed on those calls. Each MIB (representing an allocation's call context with allocation behavior) is assigned a unique context id during the graph build. The edges and nodes in the graph are decorated with the context ids they carry. This is used to correctly update the graph when cloning is performed so that we can uniquify the context for a single (possibly cloned) allocation. Depends on D140786. Differential Revision: https://reviews.llvm.org/D140908 --- .../IPO/MemProfContextDisambiguation.h | 38 + llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassBuilderPipelines.cpp | 11 + llvm/lib/Passes/PassRegistry.def | 1 + llvm/lib/Transforms/IPO/CMakeLists.txt | 1 + .../IPO/MemProfContextDisambiguation.cpp | 1583 +++++++++++++++++ llvm/test/ThinLTO/X86/memprof-summary.ll | 184 -- .../MemProfContextDisambiguation/basic.ll | 158 ++ .../duplicate-context-ids.ll | 232 +++ .../duplicate-context-ids2.ll | 386 ++++ .../indirectcall.ll | 261 +++ .../MemProfContextDisambiguation/inlined.ll | 189 ++ .../MemProfContextDisambiguation/inlined2.ll | 135 ++ .../pass-pipeline.ll | 41 + 14 files changed, 3037 insertions(+), 184 deletions(-) create mode 100644 llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h create mode 100644 llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp delete mode 100644 llvm/test/ThinLTO/X86/memprof-summary.ll create mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/basic.ll create mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll create mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll create mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll create mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll create mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll create mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/pass-pipeline.ll diff --git a/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h b/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h new file mode 100644 index 0000000000000..56e56ed67f7df --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h @@ -0,0 +1,38 @@ +//==- MemProfContextDisambiguation.h - Context Disambiguation ----*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implements support for context disambiguation of allocation calls for profile +// guided heap optimization using memprof metadata. See implementation file for +// details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H +#define LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { +class Module; + +class MemProfContextDisambiguation + : public PassInfoMixin { + /// Run the context disambiguator on \p M, returns true if any changes made. + bool processModule(Module &M); + +public: + MemProfContextDisambiguation() {} + + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 89d2e6a4b2d1a..a04f8bbaa5dc0 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -117,6 +117,7 @@ #include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/IPO/LoopExtractor.h" #include "llvm/Transforms/IPO/LowerTypeTests.h" +#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" #include "llvm/Transforms/IPO/MergeFunctions.h" #include "llvm/Transforms/IPO/ModuleInliner.h" #include "llvm/Transforms/IPO/OpenMPOpt.h" diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 1d386139d9e6c..aaabe23049288 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -57,6 +57,7 @@ #include "llvm/Transforms/IPO/InferFunctionAttrs.h" #include "llvm/Transforms/IPO/Inliner.h" #include "llvm/Transforms/IPO/LowerTypeTests.h" +#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" #include "llvm/Transforms/IPO/MergeFunctions.h" #include "llvm/Transforms/IPO/ModuleInliner.h" #include "llvm/Transforms/IPO/OpenMPOpt.h" @@ -271,6 +272,10 @@ static cl::opt AttributorRun( clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs"))); +cl::opt EnableMemProfContextDisambiguation( + "enable-memprof-context-disambiguation", cl::init(false), cl::Hidden, + cl::ZeroOrMore, cl::desc("Enable MemProf context disambiguation")); + PipelineTuningOptions::PipelineTuningOptions() { LoopInterleaving = true; LoopVectorization = true; @@ -1709,6 +1714,12 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, InlineContext{ThinOrFullLTOPhase::FullLTOPostLink, InlinePass::CGSCCInliner})); + // Perform context disambiguation after inlining, since that would reduce the + // amount of additional cloning required to distinguish the allocation + // contexts. + if (EnableMemProfContextDisambiguation) + MPM.addPass(MemProfContextDisambiguation()); + // Optimize globals again after we ran the inliner. MPM.addPass(GlobalOptPass()); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 04d648580a8c5..82592a1ee9b55 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -87,6 +87,7 @@ MODULE_PASS("name-anon-globals", NameAnonGlobalPass()) MODULE_PASS("no-op-module", NoOpModulePass()) MODULE_PASS("objc-arc-apelim", ObjCARCAPElimPass()) MODULE_PASS("partial-inliner", PartialInlinerPass()) +MODULE_PASS("memprof-context-disambiguation", MemProfContextDisambiguation()) MODULE_PASS("pgo-icall-prom", PGOIndirectCallPromotion()) MODULE_PASS("pgo-instr-gen", PGOInstrumentationGen()) MODULE_PASS("pgo-instr-use", PGOInstrumentationUse()) diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt index 063a9a60d0cb5..e03aff0f65d7a 100644 --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -27,6 +27,7 @@ add_llvm_component_library(LLVMipo Internalize.cpp LoopExtractor.cpp LowerTypeTests.cpp + MemProfContextDisambiguation.cpp MergeFunctions.cpp ModuleInliner.cpp OpenMPOpt.cpp diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp new file mode 100644 index 0000000000000..fc8b12df67822 --- /dev/null +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -0,0 +1,1583 @@ +//==-- MemProfContextDisambiguation.cpp - Disambiguate contexts -------------=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements support for context disambiguation of allocation +// calls for profile guided heap optimization. Specifically, it uses Memprof +// profiles which indicate context specific allocation behavior (currently +// distinguishing cold vs hot memory allocations). Cloning is performed to +// expose the cold allocation call contexts, and the allocation calls are +// subsequently annotated with an attribute for later transformation. +// +// The transformations can be performed either directly on IR (regular LTO), or +// (eventually) on a ThinLTO index (later applied to the IR during the ThinLTO +// backend). Both types of LTO operate on a the same base graph representation, +// which uses CRTP to support either IR or Index formats. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/MemoryProfileInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO.h" +#include +#include +using namespace llvm; +using namespace llvm::memprof; + +#define DEBUG_TYPE "memprof-context-disambiguation" + +static cl::opt DotFilePathPrefix( + "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, + cl::value_desc("filename"), + cl::desc("Specify the path prefix of the MemProf dot files.")); + +static cl::opt ExportToDot("memprof-export-to-dot", cl::init(false), + cl::Hidden, + cl::desc("Export graph to dot files.")); + +static cl::opt + DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, + cl::desc("Dump CallingContextGraph to stdout after each stage.")); + +static cl::opt + VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, + cl::desc("Perform verification checks on CallingContextGraph.")); + +static cl::opt + VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, + cl::desc("Perform frequent verification checks on nodes.")); + +inline bool hasSingleAllocType(uint8_t AllocTypes) { + switch (AllocTypes) { + case (uint8_t)AllocationType::Cold: + case (uint8_t)AllocationType::NotCold: + return true; + break; + case (uint8_t)AllocationType::None: + assert(false); + break; + default: + return false; + break; + } + llvm_unreachable("invalid alloc type"); +} + +/// CRTP base for graphs built from either IR or ThinLTO summary index. +/// +/// The graph represents the call contexts in all memprof metadata on allocation +/// calls, with nodes for the allocations themselves, as well as for the calls +/// in each context. The graph is initially built from the allocation memprof +/// metadata (or summary) MIBs. It is then updated to match calls with callsite +/// metadata onto the nodes, updating it to reflect any inlining performed on +/// those calls. +/// +/// Each MIB (representing an allocation's call context with allocation +/// behavior) is assigned a unique context id during the graph build. The edges +/// and nodes in the graph are decorated with the context ids they carry. This +/// is used to correctly update the graph when cloning is performed so that we +/// can uniquify the context for a single (possibly cloned) allocation. +template +class CallsiteContextGraph { +public: + CallsiteContextGraph() = default; + CallsiteContextGraph(const CallsiteContextGraph &) = default; + CallsiteContextGraph(CallsiteContextGraph &&) = default; + + /// Main entry point to perform analysis and transformations on graph. + bool process(); + + void dump() const; + void print(raw_ostream &OS) const; + + friend raw_ostream &operator<<(raw_ostream &OS, + const CallsiteContextGraph &CCG) { + CCG.print(OS); + return OS; + } + + friend struct GraphTraits< + const CallsiteContextGraph *>; + friend struct DOTGraphTraits< + const CallsiteContextGraph *>; + + void exportToDot(std::string Label) const; + + /// Represents a function clone via FuncTy pointer and clone number pair. + struct FuncInfo final + : public std::pair { + using Base = std::pair; + FuncInfo(const Base &B) : Base(B) {} + FuncInfo(FuncTy *F = nullptr, unsigned CloneNo = 0) : Base(F, CloneNo) {} + explicit operator bool() const { return this->first != nullptr; } + FuncTy *func() const { return this->first; } + unsigned cloneNo() const { return this->second; } + }; + + /// Represents a callsite clone via CallTy and clone number pair. + struct CallInfo final : public std::pair { + using Base = std::pair; + CallInfo(const Base &B) : Base(B) {} + CallInfo(CallTy Call = nullptr, unsigned CloneNo = 0) + : Base(Call, CloneNo) {} + explicit operator bool() const { return (bool)this->first; } + CallTy call() const { return this->first; } + unsigned cloneNo() const { return this->second; } + void setCloneNo(unsigned N) { this->second = N; } + void print(raw_ostream &OS) const { + if (!operator bool()) { + assert(!cloneNo()); + OS << "null Call"; + return; + } + call()->print(OS); + OS << "\t(clone " << cloneNo() << ")"; + } + void dump() const { + print(dbgs()); + dbgs() << "\n"; + } + friend raw_ostream &operator<<(raw_ostream &OS, const CallInfo &Call) { + Call.print(OS); + return OS; + } + }; + + struct ContextEdge; + + /// Node in the Callsite Context Graph + struct ContextNode { + // Keep this for now since in the IR case where we have an Instruction* it + // is not as immediately discoverable. Used for printing richer information + // when dumping graph. + bool IsAllocation; + + // Keeps track of when the Call was reset to null because there was + // recursion. + bool Recursive = false; + + // The corresponding allocation or interior call. + CallInfo Call; + + // For alloc nodes this is a unique id assigned when constructed, and for + // callsite stack nodes it is the original stack id when the node is + // constructed from the memprof MIB metadata on the alloc nodes. Note that + // this is only used when matching callsite metadata onto the stack nodes + // created when processing the allocation memprof MIBs, and for labeling + // nodes in the dot graph. Therefore we don't bother to assign a value for + // clones. + uint64_t OrigStackOrAllocId = 0; + + // This will be formed by ORing together the AllocationType enum values + // for contexts including this node. + uint8_t AllocTypes = 0; + + // Edges to all callees in the profiled call stacks. + // TODO: Should this be a map (from Callee node) for more efficient lookup? + std::vector> CalleeEdges; + + // Edges to all callers in the profiled call stacks. + // TODO: Should this be a map (from Caller node) for more efficient lookup? + std::vector> CallerEdges; + + // The set of IDs for contexts including this node. + DenseSet ContextIds; + + // List of clones of this ContextNode, initially empty. + std::vector Clones; + + // If a clone, points to the original uncloned node. + ContextNode *CloneOf = nullptr; + + ContextNode(bool IsAllocation) : IsAllocation(IsAllocation), Call() {} + + ContextNode(bool IsAllocation, CallInfo C) + : IsAllocation(IsAllocation), Call(C) {} + + std::unique_ptr clone() { + auto Clone = std::make_unique(IsAllocation, Call); + if (CloneOf) { + CloneOf->Clones.push_back(Clone.get()); + Clone->CloneOf = CloneOf; + } else { + Clones.push_back(Clone.get()); + Clone->CloneOf = this; + } + return Clone; + } + + ContextNode *getOrigNode() { + if (!CloneOf) + return this; + return CloneOf; + } + + void addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType, + unsigned int ContextId); + + ContextEdge *findEdgeFromCallee(const ContextNode *Callee); + ContextEdge *findEdgeFromCaller(const ContextNode *Caller); + void eraseCalleeEdge(const ContextEdge *Edge); + void eraseCallerEdge(const ContextEdge *Edge); + + void setCall(CallInfo C) { Call = C; } + + bool hasCall() const { return (bool)Call.call(); } + + void printCall(raw_ostream &OS) const { Call.print(OS); } + + // True if this node was effectively removed from the graph, in which case + // its context id set, caller edges, and callee edges should all be empty. + bool isRemoved() const { + assert(ContextIds.empty() == + (CalleeEdges.empty() && CallerEdges.empty())); + return ContextIds.empty(); + } + + void dump() const; + void print(raw_ostream &OS) const; + + friend raw_ostream &operator<<(raw_ostream &OS, const ContextNode &Node) { + Node.print(OS); + return OS; + } + }; + + /// Edge in the Callsite Context Graph from a ContextNode N to a caller or + /// callee. + struct ContextEdge { + ContextNode *Callee; + ContextNode *Caller; + + // This will be formed by ORing together the AllocationType enum values + // for contexts including this edge. + uint8_t AllocTypes = 0; + + // The set of IDs for contexts including this edge. + DenseSet ContextIds; + + ContextEdge(ContextNode *Callee, ContextNode *Caller, uint8_t AllocType, + DenseSet ContextIds) + : Callee(Callee), Caller(Caller), AllocTypes(AllocType), + ContextIds(ContextIds) {} + + DenseSet &getContextIds() { return ContextIds; } + + void dump() const; + void print(raw_ostream &OS) const; + + friend raw_ostream &operator<<(raw_ostream &OS, const ContextEdge &Edge) { + Edge.print(OS); + return OS; + } + }; + +protected: + /// Get a list of nodes corresponding to the stack ids in the given callsite + /// context. + template + std::vector + getStackIdsWithContextNodes(CallStack &CallsiteContext); + + /// Adds nodes for the given allocation and any stack ids on its memprof MIB + /// metadata (or summary). + ContextNode *addAllocNode(CallInfo Call, const FuncTy *F); + + /// Adds nodes for the given MIB stack ids. + template + void addStackNodesForMIB(ContextNode *AllocNode, + CallStack &StackContext, + CallStack &CallsiteContext, + AllocationType AllocType); + + /// Matches all callsite metadata (or summary) to the nodes created for + /// allocation memprof MIB metadata, synthesizing new nodes to reflect any + /// inlining performed on those callsite instructions. + void updateStackNodes(); + + /// Update graph to conservatively handle any callsite stack nodes that target + /// multiple different callee target functions. + void handleCallsitesWithMultipleTargets(); + + /// Save lists of calls with MemProf metadata in each function, for faster + /// iteration. + std::vector>> + FuncToCallsWithMetadata; + + /// Map from callsite node to the enclosing caller function. + std::map NodeToCallingFunc; + +private: + using EdgeIter = typename std::vector>::iterator; + + using CallContextInfo = std::tuple, + const FuncTy *, DenseSet>; + + /// Assigns the given Node to calls at or inlined into the location with + /// the Node's stack id, after post order traversing and processing its + /// caller nodes. Uses the call information recorded in the given + /// StackIdToMatchingCalls map, and creates new nodes for inlined sequences + /// as needed. Called by updateStackNodes which sets up the given + /// StackIdToMatchingCalls map. + void assignStackNodesPostOrder( + ContextNode *Node, DenseSet &Visited, + DenseMap> &StackIdToMatchingCalls); + + /// Duplicates the given set of context ids, updating the provided + /// map from each original id with the newly generated context ids, + /// and returning the new duplicated id set. + DenseSet duplicateContextIds( + const DenseSet &StackSequenceContextIds, + DenseMap> &OldToNewContextIds); + + /// Propagates all duplicated context ids across the graph. + void propagateDuplicateContextIds( + const DenseMap> &OldToNewContextIds); + + /// Connect the NewNode to OrigNode's callees if TowardsCallee is true, + /// else to its callers. Also updates OrigNode's edges to remove any context + /// ids moved to the newly created edge. + void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode, + bool TowardsCallee); + + /// Get the stack id corresponding to the given Id or Index (for IR this will + /// return itself, for a summary index this will return the id recorded in the + /// index for that stack id index value). + uint64_t getStackId(uint64_t IdOrIndex) const { + return static_cast(this)->getStackId(IdOrIndex); + } + + /// Returns true if the given call targets the given function. + bool calleeMatchesFunc(CallTy Call, const FuncTy *Func) { + return static_cast(this)->calleeMatchesFunc(Call, Func); + } + + /// Get a list of nodes corresponding to the stack ids in the given + /// callsite's context. + std::vector getStackIdsWithContextNodesForCall(CallTy Call) { + return static_cast(this)->getStackIdsWithContextNodesForCall( + Call); + } + + /// Get the last stack id in the context for callsite. + uint64_t getLastStackId(CallTy Call) { + return static_cast(this)->getLastStackId(Call); + } + + /// Gets a label to use in the dot graph for the given call clone in the given + /// function. + std::string getLabel(const FuncTy *Func, const CallTy Call, + unsigned CloneNo) const { + return static_cast(this)->getLabel(Func, Call, CloneNo); + } + + /// Helpers to find the node corresponding to the given call or stackid. + ContextNode *getNodeForInst(const CallInfo &C); + ContextNode *getNodeForAlloc(const CallInfo &C); + ContextNode *getNodeForStackId(uint64_t StackId); + + /// Removes the node information recorded for the given call. + void unsetNodeForInst(const CallInfo &C); + + /// Computes the alloc type corresponding to the given context ids, by + /// unioning their recorded alloc types. + uint8_t computeAllocType(DenseSet &ContextIds); + + /// Map from each context ID to the AllocationType assigned to that context. + std::map ContextIdToAllocationType; + + /// Identifies the context node created for a stack id when adding the MIB + /// contexts to the graph. This is used to locate the context nodes when + /// trying to assign the corresponding callsites with those stack ids to these + /// nodes. + std::map StackEntryIdToContextNodeMap; + + /// Maps to track the calls to their corresponding nodes in the graph. + std::map AllocationCallToContextNodeMap; + std::map NonAllocationCallToContextNodeMap; + + /// Owner of all ContextNode unique_ptrs. + std::vector> NodeOwner; + + /// Perform sanity checks on graph when requested. + void check() const; + + /// Keeps track of the last unique context id assigned. + unsigned int LastContextId = 0; +}; + +template +using ContextNode = + typename CallsiteContextGraph::ContextNode; +template +using ContextEdge = + typename CallsiteContextGraph::ContextEdge; +template +using FuncInfo = + typename CallsiteContextGraph::FuncInfo; +template +using CallInfo = + typename CallsiteContextGraph::CallInfo; + +/// CRTP derived class for graphs built from IR (regular LTO). +class ModuleCallsiteContextGraph + : public CallsiteContextGraph { +public: + ModuleCallsiteContextGraph(Module &M); + +private: + friend CallsiteContextGraph; + + uint64_t getStackId(uint64_t IdOrIndex) const; + bool calleeMatchesFunc(Instruction *Call, const Function *Func); + uint64_t getLastStackId(Instruction *Call); + std::vector getStackIdsWithContextNodesForCall(Instruction *Call); + std::string getLabel(const Function *Func, const Instruction *Call, + unsigned CloneNo) const; + + const Module &Mod; +}; + +namespace { + +struct FieldSeparator { + bool Skip = true; + const char *Sep; + + FieldSeparator(const char *Sep = ", ") : Sep(Sep) {} +}; + +raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) { + if (FS.Skip) { + FS.Skip = false; + return OS; + } + return OS << FS.Sep; +} + +} // end anonymous namespace + +template +ContextNode * +CallsiteContextGraph::getNodeForInst( + const CallInfo &C) { + ContextNode *Node = getNodeForAlloc(C); + if (Node) + return Node; + + auto NonAllocCallNode = NonAllocationCallToContextNodeMap.find(C); + if (NonAllocCallNode != NonAllocationCallToContextNodeMap.end()) { + return NonAllocCallNode->second; + } + return nullptr; +} + +template +ContextNode * +CallsiteContextGraph::getNodeForAlloc( + const CallInfo &C) { + auto AllocCallNode = AllocationCallToContextNodeMap.find(C); + if (AllocCallNode != AllocationCallToContextNodeMap.end()) { + return AllocCallNode->second; + } + return nullptr; +} + +template +ContextNode * +CallsiteContextGraph::getNodeForStackId( + uint64_t StackId) { + auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId); + if (StackEntryNode != StackEntryIdToContextNodeMap.end()) + return StackEntryNode->second; + return nullptr; +} + +template +void CallsiteContextGraph::unsetNodeForInst( + const CallInfo &C) { + AllocationCallToContextNodeMap.erase(C) || + NonAllocationCallToContextNodeMap.erase(C); + assert(!AllocationCallToContextNodeMap.count(C) && + !NonAllocationCallToContextNodeMap.count(C)); +} + +template +void CallsiteContextGraph::ContextNode:: + addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType, + unsigned int ContextId) { + for (auto &Edge : CallerEdges) { + if (Edge->Caller == Caller) { + Edge->AllocTypes |= (uint8_t)AllocType; + Edge->getContextIds().insert(ContextId); + return; + } + } + std::shared_ptr Edge = std::make_shared( + this, Caller, (uint8_t)AllocType, DenseSet({ContextId})); + CallerEdges.push_back(Edge); + Caller->CalleeEdges.push_back(Edge); +} + +template +ContextEdge * +CallsiteContextGraph::ContextNode:: + findEdgeFromCallee(const ContextNode *Callee) { + for (const auto &Edge : CalleeEdges) + if (Edge->Callee == Callee) + return Edge.get(); + return nullptr; +} + +template +ContextEdge * +CallsiteContextGraph::ContextNode:: + findEdgeFromCaller(const ContextNode *Caller) { + for (const auto &Edge : CallerEdges) + if (Edge->Caller == Caller) + return Edge.get(); + return nullptr; +} + +template +void CallsiteContextGraph::ContextNode:: + eraseCalleeEdge(const ContextEdge *Edge) { + auto EI = + std::find_if(CalleeEdges.begin(), CalleeEdges.end(), + [Edge](const std::shared_ptr &CalleeEdge) { + return CalleeEdge.get() == Edge; + }); + assert(EI != CalleeEdges.end()); + CalleeEdges.erase(EI); +} + +template +void CallsiteContextGraph::ContextNode:: + eraseCallerEdge(const ContextEdge *Edge) { + auto EI = + std::find_if(CallerEdges.begin(), CallerEdges.end(), + [Edge](const std::shared_ptr &CallerEdge) { + return CallerEdge.get() == Edge; + }); + assert(EI != CallerEdges.end()); + CallerEdges.erase(EI); +} + +template +uint8_t CallsiteContextGraph::computeAllocType( + DenseSet &ContextIds) { + uint8_t BothTypes = + (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold; + uint8_t AllocType = (uint8_t)AllocationType::None; + for (auto Id : ContextIds) { + AllocType |= (uint8_t)ContextIdToAllocationType[Id]; + // Bail early if alloc type reached both, no further refinement. + if (AllocType == BothTypes) + return AllocType; + } + return AllocType; +} + +template +ContextNode * +CallsiteContextGraph::addAllocNode( + CallInfo Call, const FuncTy *F) { + assert(!getNodeForAlloc(Call)); + NodeOwner.push_back( + std::make_unique(/*IsAllocation=*/true, Call)); + ContextNode *AllocNode = NodeOwner.back().get(); + AllocationCallToContextNodeMap[Call] = AllocNode; + NodeToCallingFunc[AllocNode] = F; + // Use LastContextId as a uniq id for MIB allocation nodes. + AllocNode->OrigStackOrAllocId = LastContextId; + // Alloc type should be updated as we add in the MIBs. We should assert + // afterwards that it is not still None. + AllocNode->AllocTypes = (uint8_t)AllocationType::None; + + return AllocNode; +} + +template +template +void CallsiteContextGraph::addStackNodesForMIB( + ContextNode *AllocNode, CallStack &StackContext, + CallStack &CallsiteContext, AllocationType AllocType) { + ContextIdToAllocationType[++LastContextId] = AllocType; + + // Update alloc type and context ids for this MIB. + AllocNode->AllocTypes |= (uint8_t)AllocType; + AllocNode->ContextIds.insert(LastContextId); + + // Now add or update nodes for each stack id in alloc's context. + // Later when processing the stack ids on non-alloc callsites we will adjust + // for any inlining in the context. + ContextNode *PrevNode = AllocNode; + // Look for recursion (direct recursion should have been collapsed by + // module summary analysis, here we should just be detecting mutual + // recursion). Mark these nodes so we don't try to clone. + SmallSet StackIdSet; + // Skip any on the allocation call (inlining). + for (auto ContextIter = StackContext.beginAfterSharedPrefix(CallsiteContext); + ContextIter != StackContext.end(); ++ContextIter) { + auto StackId = getStackId(*ContextIter); + ContextNode *StackNode = getNodeForStackId(StackId); + if (!StackNode) { + NodeOwner.push_back( + std::make_unique(/*IsAllocation=*/false)); + StackNode = NodeOwner.back().get(); + StackEntryIdToContextNodeMap[StackId] = StackNode; + StackNode->OrigStackOrAllocId = StackId; + } + auto Ins = StackIdSet.insert(StackId); + if (!Ins.second) + StackNode->Recursive = true; + StackNode->ContextIds.insert(LastContextId); + StackNode->AllocTypes |= (uint8_t)AllocType; + PrevNode->addOrUpdateCallerEdge(StackNode, AllocType, LastContextId); + PrevNode = StackNode; + } +} + +template +DenseSet +CallsiteContextGraph::duplicateContextIds( + const DenseSet &StackSequenceContextIds, + DenseMap> &OldToNewContextIds) { + DenseSet NewContextIds; + for (auto OldId : StackSequenceContextIds) { + NewContextIds.insert(++LastContextId); + OldToNewContextIds[OldId].insert(LastContextId); + assert(ContextIdToAllocationType.count(OldId)); + // The new context has the same allocation type as original. + ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId]; + } + return NewContextIds; +} + +template +void CallsiteContextGraph:: + propagateDuplicateContextIds( + const DenseMap> &OldToNewContextIds) { + // Build a set of duplicated context ids corresponding to the input id set. + auto GetNewIds = [&OldToNewContextIds](const DenseSet &ContextIds) { + DenseSet NewIds; + for (auto Id : ContextIds) + if (auto NewId = OldToNewContextIds.find(Id); + NewId != OldToNewContextIds.end()) + NewIds.insert(NewId->second.begin(), NewId->second.end()); + return NewIds; + }; + + // Recursively update context ids sets along caller edges. + auto UpdateCallers = [&](ContextNode *Node, + DenseSet &Visited, + auto &&UpdateCallers) -> void { + for (auto Edge : Node->CallerEdges) { + auto Inserted = Visited.insert(Edge.get()); + if (!Inserted.second) + continue; + ContextNode *NextNode = Edge->Caller; + DenseSet NewIdsToAdd = GetNewIds(Edge->getContextIds()); + // Only need to recursively iterate to NextNode via this caller edge if + // it resulted in any added ids to NextNode. + if (!NewIdsToAdd.empty()) { + Edge->getContextIds().insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); + NextNode->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); + UpdateCallers(NextNode, Visited, UpdateCallers); + } + } + }; + + DenseSet Visited; + for (auto &Entry : AllocationCallToContextNodeMap) { + auto *Node = Entry.second; + // Update ids on the allocation nodes before calling the recursive + // update along caller edges, since this simplifies the logic during + // that traversal. + DenseSet NewIdsToAdd = GetNewIds(Node->ContextIds); + Node->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); + UpdateCallers(Node, Visited, UpdateCallers); + } +} + +template +void CallsiteContextGraph::connectNewNode( + ContextNode *NewNode, ContextNode *OrigNode, bool TowardsCallee) { + // Make a copy of the context ids, since this will be adjusted below as they + // are moved. + DenseSet RemainingContextIds = NewNode->ContextIds; + auto &OrigEdges = + TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges; + // Increment iterator in loop so that we can remove edges as needed. + for (auto EI = OrigEdges.begin(); EI != OrigEdges.end();) { + auto Edge = *EI; + // Remove any matching context ids from Edge, return set that were found and + // removed, these are the new edge's context ids. Also update the remaining + // (not found ids). + DenseSet NewEdgeContextIds, NotFoundContextIds; + set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds, + NotFoundContextIds); + RemainingContextIds.swap(NotFoundContextIds); + // If no matching context ids for this edge, skip it. + if (NewEdgeContextIds.empty()) { + ++EI; + continue; + } + if (TowardsCallee) { + auto NewEdge = std::make_shared( + Edge->Callee, NewNode, computeAllocType(NewEdgeContextIds), + NewEdgeContextIds); + NewNode->CalleeEdges.push_back(NewEdge); + NewEdge->Callee->CallerEdges.push_back(NewEdge); + } else { + auto NewEdge = std::make_shared( + NewNode, Edge->Caller, computeAllocType(NewEdgeContextIds), + NewEdgeContextIds); + NewNode->CallerEdges.push_back(NewEdge); + NewEdge->Caller->CalleeEdges.push_back(NewEdge); + } + // Remove old edge if context ids empty. + if (Edge->getContextIds().empty()) { + if (TowardsCallee) { + Edge->Callee->eraseCallerEdge(Edge.get()); + EI = OrigNode->CalleeEdges.erase(EI); + } else { + Edge->Caller->eraseCalleeEdge(Edge.get()); + EI = OrigNode->CallerEdges.erase(EI); + } + continue; + } + ++EI; + } +} + +template +void CallsiteContextGraph:: + assignStackNodesPostOrder(ContextNode *Node, + DenseSet &Visited, + DenseMap> + &StackIdToMatchingCalls) { + auto Inserted = Visited.insert(Node); + if (!Inserted.second) + return; + // Post order traversal. Iterate over a copy since we may add nodes and + // therefore new callers during the recursive call, invalidating any + // iterator over the original edge vector. We don't need to process these + // new nodes as they were already processed on creation. + auto CallerEdges = Node->CallerEdges; + for (auto &Edge : CallerEdges) { + // Skip any that have been removed during the recursion. + if (!Edge) + continue; + assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls); + } + + // If this node's stack id is in the map, update the graph to contain new + // nodes representing any inlining at interior callsites. Note we move the + // associated context ids over to the new nodes. + + // Ignore this node if it is for an allocation or we didn't record any + // stack id lists ending at it. + if (Node->IsAllocation || + !StackIdToMatchingCalls.count(Node->OrigStackOrAllocId)) + return; + + auto &Calls = StackIdToMatchingCalls[Node->OrigStackOrAllocId]; + // Handle the simple case first. A single call with a single stack id. + // In this case there is no need to create any new context nodes, simply + // assign the context node for stack id to this Call. + if (Calls.size() == 1) { + auto &[Call, Ids, Func, SavedContextIds] = Calls[0]; + if (Ids.size() == 1) { + assert(SavedContextIds.empty()); + // It should be this Node + assert(Node == getNodeForStackId(Ids[0])); + if (Node->Recursive) + return; + Node->setCall(Call); + NonAllocationCallToContextNodeMap[Call] = Node; + NodeToCallingFunc[Node] = Func; + return; + } + } + + // Find the node for the last stack id, which should be the same + // across all calls recorded for this id, and is this node's id. + uint64_t LastId = Node->OrigStackOrAllocId; + ContextNode *LastNode = getNodeForStackId(LastId); + // We should only have kept stack ids that had nodes. + assert(LastNode); + + for (unsigned I = 0; I < Calls.size(); I++) { + auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; + // Skip any for which we didn't assign any ids, these don't get a node in + // the graph. + if (SavedContextIds.empty()) + continue; + + assert(LastId == Ids.back()); + + ContextNode *FirstNode = getNodeForStackId(Ids[0]); + assert(FirstNode); + + // Recompute the context ids for this stack id sequence (the + // intersection of the context ids of the corresponding nodes). + // Start with the ids we saved in the map for this call, which could be + // duplicated context ids. We have to recompute as we might have overlap + // overlap between the saved context ids for different last nodes, and + // removed them already during the post order traversal. + set_intersect(SavedContextIds, FirstNode->ContextIds); + ContextNode *PrevNode = nullptr; + for (auto Id : Ids) { + ContextNode *CurNode = getNodeForStackId(Id); + // We should only have kept stack ids that had nodes and weren't + // recursive. + assert(CurNode); + assert(!CurNode->Recursive); + if (!PrevNode) { + PrevNode = CurNode; + continue; + } + auto *Edge = CurNode->findEdgeFromCallee(PrevNode); + if (!Edge) { + SavedContextIds.clear(); + break; + } + PrevNode = CurNode; + set_intersect(SavedContextIds, Edge->getContextIds()); + + // If we now have no context ids for clone, skip this call. + if (SavedContextIds.empty()) + break; + } + if (SavedContextIds.empty()) + continue; + + // Create new context node. + NodeOwner.push_back( + std::make_unique(/*IsAllocation=*/false, Call)); + ContextNode *NewNode = NodeOwner.back().get(); + NodeToCallingFunc[NewNode] = Func; + NonAllocationCallToContextNodeMap[Call] = NewNode; + NewNode->ContextIds = SavedContextIds; + NewNode->AllocTypes = computeAllocType(NewNode->ContextIds); + + // Connect to callees of innermost stack frame in inlined call chain. + // This updates context ids for FirstNode's callee's to reflect those + // moved to NewNode. + connectNewNode(NewNode, FirstNode, /*TowardsCallee=*/true); + + // Connect to callers of outermost stack frame in inlined call chain. + // This updates context ids for FirstNode's caller's to reflect those + // moved to NewNode. + connectNewNode(NewNode, LastNode, /*TowardsCallee=*/false); + + // Now we need to remove context ids from edges/nodes between First and + // Last Node. + PrevNode = nullptr; + for (auto Id : Ids) { + ContextNode *CurNode = getNodeForStackId(Id); + // We should only have kept stack ids that had nodes. + assert(CurNode); + + // Remove the context ids moved to NewNode from CurNode, and the + // edge from the prior node. + set_subtract(CurNode->ContextIds, NewNode->ContextIds); + if (PrevNode) { + auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode); + assert(PrevEdge); + set_subtract(PrevEdge->getContextIds(), NewNode->ContextIds); + if (PrevEdge->getContextIds().empty()) { + PrevNode->eraseCallerEdge(PrevEdge); + CurNode->eraseCalleeEdge(PrevEdge); + } + } + PrevNode = CurNode; + } + } +} + +template +void CallsiteContextGraph::updateStackNodes() { + // Map of stack id to all calls with that as the last (outermost caller) + // callsite id that has a context node (some might not due to pruning + // performed during matching of the allocation profile contexts). + // The CallContextInfo contains the Call and a list of its stack ids with + // ContextNodes, the function containing Call, and the set of context ids + // the analysis will eventually identify for use in any new node created + // for that callsite. + DenseMap> StackIdToMatchingCalls; + for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) { + for (auto &Call : CallsWithMetadata) { + // Ignore allocations, already handled. + if (AllocationCallToContextNodeMap.count(Call)) + continue; + auto StackIdsWithContextNodes = + getStackIdsWithContextNodesForCall(Call.call()); + // If there were no nodes created for MIBs on allocs (maybe this was in + // the unambiguous part of the MIB stack that was pruned), ignore. + if (StackIdsWithContextNodes.empty()) + continue; + // Otherwise, record this Call along with the list of ids for the last + // (outermost caller) stack id with a node. + StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back( + {Call.call(), StackIdsWithContextNodes, Func, {}}); + } + } + + // First make a pass through all stack ids that correspond to a call, + // as identified in the above loop. Compute the context ids corresponding to + // each of these calls when they correspond to multiple stack ids due to + // due to inlining. Perform any duplication of context ids required when + // there is more than one call with the same stack ids. Their (possibly newly + // duplicated) context ids are saved in the StackIdToMatchingCalls map. + DenseMap> OldToNewContextIds; + for (auto &It : StackIdToMatchingCalls) { + auto &Calls = It.getSecond(); + // Skip single calls with a single stack id. These don't need a new node. + if (Calls.size() == 1) { + auto &Ids = std::get<1>(Calls[0]); + if (Ids.size() == 1) + continue; + } + // In order to do the best and maximal matching of inlined calls to context + // node sequences we will sort the vectors of stack ids in descending order + // of length, and within each length, lexicographically by stack id. The + // latter is so that we can specially handle calls that have identical stack + // id sequences (either due to cloning or artificially because of the MIB + // context pruning). + std::sort(Calls.begin(), Calls.end(), + [](const CallContextInfo &A, const CallContextInfo &B) { + auto &IdsA = std::get<1>(A); + auto &IdsB = std::get<1>(B); + return IdsA.size() > IdsB.size() || + (IdsA.size() == IdsB.size() && IdsA < IdsB); + }); + + // Find the node for the last stack id, which should be the same + // across all calls recorded for this id, and is the id for this + // entry in the StackIdToMatchingCalls map. + uint64_t LastId = It.getFirst(); + ContextNode *LastNode = getNodeForStackId(LastId); + // We should only have kept stack ids that had nodes. + assert(LastNode); + + if (LastNode->Recursive) + continue; + + // Initialize the context ids with the last node's. We will subsequently + // refine the context ids by computing the intersection along all edges. + DenseSet LastNodeContextIds = LastNode->ContextIds; + assert(!LastNodeContextIds.empty()); + + for (unsigned I = 0; I < Calls.size(); I++) { + auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; + assert(SavedContextIds.empty()); + assert(LastId == Ids.back()); + + // First compute the context ids for this stack id sequence (the + // intersection of the context ids of the corresponding nodes). + // Start with the remaining saved ids for the last node. + assert(!LastNodeContextIds.empty()); + DenseSet StackSequenceContextIds = LastNodeContextIds; + + ContextNode *PrevNode = LastNode; + ContextNode *CurNode = LastNode; + bool Skip = false; + + // Iterate backwards through the stack Ids, starting after the last Id + // in the list, which was handled once outside for all Calls. + for (auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) { + auto Id = *IdIter; + CurNode = getNodeForStackId(Id); + // We should only have kept stack ids that had nodes. + assert(CurNode); + + if (CurNode->Recursive) { + Skip = true; + break; + } + + auto *Edge = CurNode->findEdgeFromCaller(PrevNode); + // If there is no edge then the nodes belong to different MIB contexts, + // and we should skip this inlined context sequence. For example, this + // particular inlined context may include stack ids A->B, and we may + // indeed have nodes for both A and B, but it is possible that they were + // never profiled in sequence in a single MIB for any allocation (i.e. + // we might have profiled an allocation that involves the callsite A, + // but through a different one of its callee callsites, and we might + // have profiled an allocation that involves callsite B, but reached + // from a different caller callsite). + if (!Edge) { + Skip = true; + break; + } + PrevNode = CurNode; + + // Update the context ids, which is the intersection of the ids along + // all edges in the sequence. + set_intersect(StackSequenceContextIds, Edge->getContextIds()); + + // If we now have no context ids for clone, skip this call. + if (StackSequenceContextIds.empty()) { + Skip = true; + break; + } + } + if (Skip) + continue; + + // If some of this call's stack ids did not have corresponding nodes (due + // to pruning), don't include any context ids for contexts that extend + // beyond these nodes. Otherwise we would be matching part of unrelated / + // not fully matching stack contexts. To do this, subtract any context ids + // found in caller nodes of the last node found above. + if (Ids.back() != getLastStackId(Call)) { + for (auto PE : LastNode->CallerEdges) { + set_subtract(StackSequenceContextIds, PE->getContextIds()); + if (StackSequenceContextIds.empty()) + break; + } + // If we now have no context ids for clone, skip this call. + if (StackSequenceContextIds.empty()) + continue; + } + + // Check if the next set of stack ids is the same (since the Calls vector + // of tuples is sorted by the stack ids we can just look at the next one). + bool DuplicateContextIds = false; + if (I + 1 < Calls.size()) { + auto NextIds = std::get<1>(Calls[I + 1]); + DuplicateContextIds = Ids == NextIds; + } + + // If we don't have duplicate context ids, then we can assign all the + // context ids computed for the original node sequence to this call. + // If there are duplicate calls with the same stack ids then we synthesize + // new context ids that are duplicates of the originals. These are + // assigned to SavedContextIds, which is a reference into the map entry + // for this call, allowing us to access these ids later on. + OldToNewContextIds.reserve(OldToNewContextIds.size() + + StackSequenceContextIds.size()); + SavedContextIds = + DuplicateContextIds + ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds) + : StackSequenceContextIds; + assert(!SavedContextIds.empty()); + + if (!DuplicateContextIds) { + // Update saved last node's context ids to remove those that are + // assigned to other calls, so that it is ready for the next call at + // this stack id. + set_subtract(LastNodeContextIds, StackSequenceContextIds); + if (LastNodeContextIds.empty()) + break; + } + } + } + + // Propagate the duplicate context ids over the graph. + propagateDuplicateContextIds(OldToNewContextIds); + + if (VerifyCCG) + check(); + + // Now perform a post-order traversal over the graph, starting with the + // allocation nodes, essentially processing nodes from callers to callees. + // For any that contains an id in the map, update the graph to contain new + // nodes representing any inlining at interior callsites. Note we move the + // associated context ids over to the new nodes. + DenseSet Visited; + for (auto &Entry : AllocationCallToContextNodeMap) + assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls); +} + +uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *Call) { + CallStack CallsiteContext( + Call->getMetadata(LLVMContext::MD_callsite)); + return CallsiteContext.back(); +} + +std::string ModuleCallsiteContextGraph::getLabel(const Function *Func, + const Instruction *Call, + unsigned CloneNo) const { + return (Twine(Call->getFunction()->getName()) + " -> " + + cast(Call)->getCalledFunction()->getName()) + .str(); +} + +std::vector +ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall( + Instruction *Call) { + CallStack CallsiteContext( + Call->getMetadata(LLVMContext::MD_callsite)); + return getStackIdsWithContextNodes( + CallsiteContext); +} + +template +template +std::vector +CallsiteContextGraph::getStackIdsWithContextNodes( + CallStack &CallsiteContext) { + std::vector StackIds; + for (auto IdOrIndex : CallsiteContext) { + auto StackId = getStackId(IdOrIndex); + ContextNode *Node = getNodeForStackId(StackId); + if (!Node) + break; + StackIds.push_back(StackId); + } + return StackIds; +} + +ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(Module &M) : Mod(M) { + for (auto &F : M) { + std::vector CallsWithMetadata; + for (auto &BB : F) { + for (auto &I : BB) { + if (!isa(I)) + continue; + if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) { + CallsWithMetadata.push_back(&I); + auto *AllocNode = addAllocNode(&I, &F); + auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite); + assert(CallsiteMD); + CallStack CallsiteContext(CallsiteMD); + // Add all of the MIBs and their stack nodes. + for (auto &MDOp : MemProfMD->operands()) { + auto *MIBMD = cast(MDOp); + MDNode *StackNode = getMIBStackNode(MIBMD); + assert(StackNode); + CallStack StackContext(StackNode); + addStackNodesForMIB( + AllocNode, StackContext, CallsiteContext, + getMIBAllocType(MIBMD)); + } + assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None); + // Memprof and callsite metadata on memory allocations no longer + // needed. + I.setMetadata(LLVMContext::MD_memprof, nullptr); + I.setMetadata(LLVMContext::MD_callsite, nullptr); + } + // For callsite metadata, add to list for this function for later use. + else if (I.getMetadata(LLVMContext::MD_callsite)) + CallsWithMetadata.push_back(&I); + } + } + if (!CallsWithMetadata.empty()) + FuncToCallsWithMetadata.push_back({&F, CallsWithMetadata}); + } + + if (DumpCCG) { + dbgs() << "CCG before updating call stack chains:\n"; + dbgs() << *this; + } + + if (ExportToDot) + exportToDot("prestackupdate"); + + updateStackNodes(); + + handleCallsitesWithMultipleTargets(); + + // Strip off remaining callsite metadata, no longer needed. + for (auto &FuncEntry : FuncToCallsWithMetadata) + for (auto &Call : FuncEntry.second) + Call.call()->setMetadata(LLVMContext::MD_callsite, nullptr); +} + +template +void CallsiteContextGraph::handleCallsitesWithMultipleTargets() { + // Look for and workaround callsites that call multiple functions. + // This can happen for indirect calls, which needs better handling, and in + // more rare cases (e.g. macro expansion). + // TODO: To fix this for indirect calls we will want to perform speculative + // devirtualization using either the normal PGO info with ICP, or using the + // information in the profiled MemProf contexts. We can do this prior to + // this transformation for regular LTO, and for ThinLTO we can simulate that + // effect in the summary and perform the actual speculative devirtualization + // while cloning in the ThinLTO backend. + for (auto Entry = NonAllocationCallToContextNodeMap.begin(); + Entry != NonAllocationCallToContextNodeMap.end();) { + auto *Node = Entry->second; + assert(Node->Clones.empty()); + // Check all node callees and see if in the same function. + bool Removed = false; + auto Call = Node->Call.call(); + for (auto &Edge : Node->CalleeEdges) { + if (!Edge->Callee->hasCall()) + continue; + assert(NodeToCallingFunc.count(Edge->Callee)); + // Check if the called function matches that of the callee node. + if (calleeMatchesFunc(Call, NodeToCallingFunc[Edge->Callee])) + continue; + // Work around by setting Node to have a null call, so it gets + // skipped during cloning. Otherwise assignFunctions will assert + // because its data structures are not designed to handle this case. + Entry = NonAllocationCallToContextNodeMap.erase(Entry); + Node->setCall(CallInfo()); + Removed = true; + break; + } + if (!Removed) + Entry++; + } +} + +uint64_t ModuleCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const { + // In the Module (IR) case this is already the Id. + return IdOrIndex; +} + +bool ModuleCallsiteContextGraph::calleeMatchesFunc(Instruction *Call, + const Function *Func) { + auto *CB = dyn_cast(Call); + if (!CB->getCalledOperand()) + return false; + auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts(); + auto *CalleeFunc = dyn_cast(CalleeVal); + if (CalleeFunc == Func) + return true; + auto *Alias = dyn_cast(CalleeVal); + return Alias && Alias->getAliasee() == Func; +} + +static std::string getAllocTypeString(uint8_t AllocTypes) { + if (!AllocTypes) + return "None"; + std::string Str; + if (AllocTypes & (uint8_t)AllocationType::NotCold) + Str += "NotCold"; + if (AllocTypes & (uint8_t)AllocationType::Cold) + Str += "Cold"; + return Str; +} + +template +void CallsiteContextGraph::ContextNode::dump() + const { + print(dbgs()); + dbgs() << "\n"; +} + +template +void CallsiteContextGraph::ContextNode::print( + raw_ostream &OS) const { + OS << "Node " << this << "\n"; + OS << "\t"; + printCall(OS); + if (Recursive) + OS << " (recursive)"; + OS << "\n"; + OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n"; + OS << "\tContextIds:"; + std::vector SortedIds(ContextIds.begin(), ContextIds.end()); + std::sort(SortedIds.begin(), SortedIds.end()); + for (auto Id : SortedIds) + OS << " " << Id; + OS << "\n"; + OS << "\tCalleeEdges:\n"; + for (auto &Edge : CalleeEdges) + OS << "\t\t" << *Edge << "\n"; + OS << "\tCallerEdges:\n"; + for (auto &Edge : CallerEdges) + OS << "\t\t" << *Edge << "\n"; + if (!Clones.empty()) { + OS << "\tClones: "; + FieldSeparator FS; + for (auto *Clone : Clones) + OS << FS << Clone; + OS << "\n"; + } else if (CloneOf) { + OS << "\tClone of " << CloneOf << "\n"; + } +} + +template +void CallsiteContextGraph::ContextEdge::dump() + const { + print(dbgs()); + dbgs() << "\n"; +} + +template +void CallsiteContextGraph::ContextEdge::print( + raw_ostream &OS) const { + OS << "Edge from Callee " << Callee << " to Caller: " << Caller + << " AllocTypes: " << getAllocTypeString(AllocTypes); + OS << " ContextIds:"; + std::vector SortedIds(ContextIds.begin(), ContextIds.end()); + std::sort(SortedIds.begin(), SortedIds.end()); + for (auto Id : SortedIds) + OS << " " << Id; +} + +template +void CallsiteContextGraph::dump() const { + print(dbgs()); +} + +template +void CallsiteContextGraph::print( + raw_ostream &OS) const { + OS << "Callsite Context Graph:\n"; + using GraphType = const CallsiteContextGraph *; + for (const auto Node : nodes(this)) { + if (Node->isRemoved()) + continue; + Node->print(OS); + OS << "\n"; + } +} + +template +static void checkEdge( + const std::shared_ptr> &Edge) { + // Confirm that alloc type is not None and that we have at least one context + // id. + assert(Edge->AllocTypes != (uint8_t)AllocationType::None); + assert(!Edge->ContextIds.empty()); +} + +template +static void checkNode(const ContextNode *Node) { + if (Node->isRemoved()) + return; + // Node's context ids should be the union of both its callee and caller edge + // context ids. + if (Node->CallerEdges.size()) { + auto EI = Node->CallerEdges.begin(); + auto &FirstEdge = *EI; + EI++; + DenseSet CallerEdgeContextIds(FirstEdge->ContextIds); + for (; EI != Node->CallerEdges.end(); EI++) { + const auto &Edge = *EI; + set_union(CallerEdgeContextIds, Edge->ContextIds); + } + // Node can have more context ids than callers if some contexts terminate at + // node and some are longer. + assert(Node->ContextIds == CallerEdgeContextIds || + set_is_subset(CallerEdgeContextIds, Node->ContextIds)); + } + if (Node->CalleeEdges.size()) { + auto EI = Node->CalleeEdges.begin(); + auto &FirstEdge = *EI; + EI++; + DenseSet CalleeEdgeContextIds(FirstEdge->ContextIds); + for (; EI != Node->CalleeEdges.end(); EI++) { + const auto &Edge = *EI; + set_union(CalleeEdgeContextIds, Edge->ContextIds); + } + assert(Node->ContextIds == CalleeEdgeContextIds); + } +} + +template +void CallsiteContextGraph::check() const { + using GraphType = const CallsiteContextGraph *; + for (const auto Node : nodes(this)) { + checkNode(Node); + for (auto &Edge : Node->CallerEdges) + checkEdge(Edge); + } +} + +template +struct GraphTraits *> { + using GraphType = const CallsiteContextGraph *; + using NodeRef = const ContextNode *; + + using NodePtrTy = std::unique_ptr>; + static NodeRef getNode(const NodePtrTy &P) { return P.get(); } + + using nodes_iterator = + mapped_iterator::const_iterator, + decltype(&getNode)>; + + static nodes_iterator nodes_begin(GraphType G) { + return nodes_iterator(G->NodeOwner.begin(), &getNode); + } + + static nodes_iterator nodes_end(GraphType G) { + return nodes_iterator(G->NodeOwner.end(), &getNode); + } + + static NodeRef getEntryNode(GraphType G) { + return G->NodeOwner.begin()->get(); + } + + using EdgePtrTy = std::shared_ptr>; + static const ContextNode * + GetCallee(const EdgePtrTy &P) { + return P->Callee; + } + + using ChildIteratorType = + mapped_iterator>>::const_iterator, + decltype(&GetCallee)>; + + static ChildIteratorType child_begin(NodeRef N) { + return ChildIteratorType(N->CalleeEdges.begin(), &GetCallee); + } + + static ChildIteratorType child_end(NodeRef N) { + return ChildIteratorType(N->CalleeEdges.end(), &GetCallee); + } +}; + +template +struct DOTGraphTraits *> + : public DefaultDOTGraphTraits { + DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {} + + using GraphType = const CallsiteContextGraph *; + using GTraits = GraphTraits; + using NodeRef = typename GTraits::NodeRef; + using ChildIteratorType = typename GTraits::ChildIteratorType; + + static std::string getNodeLabel(NodeRef Node, GraphType G) { + std::string LabelString = + (Twine("OrigId: ") + (Node->IsAllocation ? "Alloc" : "") + + Twine(Node->OrigStackOrAllocId)) + .str(); + LabelString += "\n"; + if (Node->hasCall()) { + auto Func = G->NodeToCallingFunc.find(Node); + assert(Func != G->NodeToCallingFunc.end()); + LabelString += + G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo()); + } else { + LabelString += "null call"; + if (Node->Recursive) + LabelString += " (recursive)"; + else + LabelString += " (external)"; + } + return LabelString; + } + + static std::string getNodeAttributes(NodeRef Node, GraphType) { + std::string AttributeString = (Twine("tooltip=\"") + getNodeId(Node) + " " + + getContextIds(Node->ContextIds) + "\"") + .str(); + AttributeString += + (Twine(",fillcolor=\"") + getColor(Node->AllocTypes) + "\"").str(); + AttributeString += ",style=\"filled\""; + if (Node->CloneOf) { + AttributeString += ",color=\"blue\""; + AttributeString += ",style=\"filled,bold,dashed\""; + } else + AttributeString += ",style=\"filled\""; + return AttributeString; + } + + static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, + GraphType) { + auto &Edge = *(ChildIter.getCurrent()); + return (Twine("tooltip=\"") + getContextIds(Edge->ContextIds) + "\"" + + Twine(",fillcolor=\"") + getColor(Edge->AllocTypes) + "\"") + .str(); + } + + // Since the NodeOwners list includes nodes that are no longer connected to + // the graph, skip them here. + static bool isNodeHidden(NodeRef Node, GraphType) { + return Node->isRemoved(); + } + +private: + static std::string getContextIds(const DenseSet &ContextIds) { + std::string IdString = "ContextIds:"; + if (ContextIds.size() < 100) { + std::vector SortedIds(ContextIds.begin(), ContextIds.end()); + std::sort(SortedIds.begin(), SortedIds.end()); + for (auto Id : SortedIds) + IdString += (" " + Twine(Id)).str(); + } else { + IdString += (" (" + Twine(ContextIds.size()) + " ids)").str(); + } + return IdString; + } + + static std::string getColor(uint8_t AllocTypes) { + if (AllocTypes == (uint8_t)AllocationType::NotCold) + // Color "brown1" actually looks like a lighter red. + return "brown1"; + if (AllocTypes == (uint8_t)AllocationType::Cold) + return "cyan"; + if (AllocTypes == + ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold)) + // Lighter purple. + return "mediumorchid1"; + return "gray"; + } + + static std::string getNodeId(NodeRef Node) { + std::stringstream SStream; + SStream << std::hex << "N0x" << (unsigned long long)Node; + std::string Result = SStream.str(); + return Result; + } +}; + +template +void CallsiteContextGraph::exportToDot( + std::string Label) const { + WriteGraph(this, "", false, Label, + DotFilePathPrefix + "ccg." + Label + ".dot"); +} + +template +bool CallsiteContextGraph::process() { + if (DumpCCG) { + dbgs() << "CCG before cloning:\n"; + dbgs() << *this; + } + if (ExportToDot) + exportToDot("postbuild"); + + if (VerifyCCG) { + check(); + } + + return false; +} + +bool MemProfContextDisambiguation::processModule(Module &M) { + bool Changed = false; + + ModuleCallsiteContextGraph CCG(M); + Changed = CCG.process(); + + return Changed; +} + +PreservedAnalyses MemProfContextDisambiguation::run(Module &M, + ModuleAnalysisManager &AM) { + if (!processModule(M)) + return PreservedAnalyses::all(); + return PreservedAnalyses::none(); +} diff --git a/llvm/test/ThinLTO/X86/memprof-summary.ll b/llvm/test/ThinLTO/X86/memprof-summary.ll deleted file mode 100644 index 597cd44c030e7..0000000000000 --- a/llvm/test/ThinLTO/X86/memprof-summary.ll +++ /dev/null @@ -1,184 +0,0 @@ -;; Check memprof summaries (per module, combined index, and distributed indexes) - -; RUN: split-file %s %t -; RUN: opt -module-summary %t/a.ll -o %ta.bc -; RUN: opt -module-summary %t/b.ll -o %tb.bc - -; RUN: llvm-dis -o - %ta.bc | FileCheck %s --check-prefix=PRELINKDISA -; PRELINKDISA: gv: (name: "main", {{.*}} callsites: ((callee: ^2, clones: (0), stackIds: (8632435727821051414)), (callee: ^2, clones: (0), stackIds: (15025054523792398438)))))) ; guid = 15822663052811949562 - -; RUN: llvm-dis -o - %tb.bc | FileCheck %s --check-prefix=PRELINKDISB -; PRELINKDISB: ^[[PLBAR:[0-9]+]] = gv: (name: "_Z3barv", {{.*}} allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (12481870273128938184, 2732490490862098848, 8632435727821051414)), (type: cold, stackIds: (12481870273128938184, 2732490490862098848, 15025054523792398438)))))))) ; guid = 4555904644815367798 -; PRELINKDISB: ^[[PLFOO:[0-9]+]] = gv: (name: "_Z3foov", {{.*}} callsites: ((callee: ^[[PLBAZ:[0-9]+]], clones: (0), stackIds: (2732490490862098848)))))) ; guid = 9191153033785521275 -; PRELINKDISB: ^[[PLBAZ]] = gv: (name: "_Z3bazv", {{.*}} callsites: ((callee: ^[[PLBAR]], clones: (0), stackIds: (12481870273128938184)))))) ; guid = 15176620447596392000 - -; RUN: llvm-bcanalyzer -dump %ta.bc | FileCheck %s --check-prefix=PRELINKBCANA -; PRELINKBCANA: - -; RUN: llvm-bcanalyzer -dump %tb.bc | FileCheck %s --check-prefix=PRELINKBCANB -; PRELINKBCANB: - -; RUN: llvm-lto2 run %ta.bc %tb.bc -o %t -save-temps \ -; RUN: -thinlto-distributed-indexes \ -; RUN: -r=%ta.bc,main,plx \ -; RUN: -r=%ta.bc,_Z3foov, \ -; RUN: -r=%ta.bc,free, \ -; RUN: -r=%ta.bc,sleep, \ -; RUN: -r=%tb.bc,_Z3foov,pl \ -; RUN: -r=%tb.bc,_Znam, \ -; RUN: -r=%tb.bc,_Z3bazv,pl - -; RUN: llvm-dis -o - %t.index.bc | FileCheck %s --check-prefix=COMBINEDDIS -; COMBINEDDIS: ^[[COMBBAR:[0-9]+]] = gv: (guid: 4555904644815367798, {{.*}} allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (12481870273128938184, 2732490490862098848, 8632435727821051414)), (type: cold, stackIds: (12481870273128938184, 2732490490862098848, 15025054523792398438)))))))) -; COMBINEDDIS: ^[[COMBFOO:[0-9]+]] = gv: (guid: 9191153033785521275, {{.*}} callsites: ((callee: ^[[COMBBAZ:[0-9]+]], clones: (0), stackIds: (2732490490862098848)))))) -; COMBINEDDIS: ^[[COMBBAZ]] = gv: (guid: 15176620447596392000, {{.*}} callsites: ((callee: ^[[COMBBAR]], clones: (0), stackIds: (12481870273128938184)))))) -; COMBINEDDIS: ^[[COMBMAIN:[0-9]+]] = gv: (guid: 15822663052811949562, {{.*}} callsites: ((callee: ^[[COMBFOO]], clones: (0), stackIds: (8632435727821051414)), (callee: ^[[COMBFOO]], clones: (0), stackIds: (15025054523792398438)))))) - -; RUN: llvm-bcanalyzer -dump %t.index.bc | FileCheck %s --check-prefix=COMBINEDBCAN -; COMBINEDBCAN: - -; RUN: llvm-dis -o - %ta.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDDISA -; DISTRIBUTEDDISA: gv: (guid: 9191153033785521275, {{.*}} callsites: ((callee: null, clones: (0), stackIds: (2732490490862098848)))))) -; DISTRIBUTEDDISA: gv: (guid: 15822663052811949562, {{.*}} callsites: ((callee: ^2, clones: (0), stackIds: (8632435727821051414)), (callee: ^2, clones: (0), stackIds: (15025054523792398438)))))) - -; RUN: llvm-dis -o - %tb.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDDISB -; DISTRIBUTEDDISB: ^[[DISTRBAR:[0-9]+]] = gv: (guid: 4555904644815367798, {{.*}} allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (12481870273128938184, 2732490490862098848, 8632435727821051414)), (type: cold, stackIds: (12481870273128938184, 2732490490862098848, 15025054523792398438)))))))) -; DISTRIBUTEDDISB: ^[[DISTRFOO:[0-9]+]] = gv: (guid: 9191153033785521275, {{.*}} callsites: ((callee: ^[[DISTRBAZ:[0-9]+]], clones: (0), stackIds: (2732490490862098848)))))) -; DISTRIBUTEDDISB: ^[[DISTRBAZ]] = gv: (guid: 15176620447596392000, {{.*}} callsites: ((callee: ^[[DISTRBAR]], clones: (0), stackIds: (12481870273128938184)))))) - -; RUN: llvm-bcanalyzer -dump %ta.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDBCANA -; DISTRIBUTEDBCANA: - -; RUN: llvm-bcanalyzer -dump %tb.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDBCANB -; DISTRIBUTEDBCANB: - -;--- a.ll -; ModuleID = 'a.cc' -source_filename = "a.cc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: mustprogress norecurse uwtable -define dso_local noundef i32 @main(i32 noundef %argc, ptr nocapture noundef readnone %argv) local_unnamed_addr #0 !dbg !39 { -entry: - %call = call noundef ptr @_Z3foov(), !dbg !42, !callsite !43 - %call1 = call noundef ptr @_Z3foov(), !dbg !44, !callsite !45 - call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(10) %call, i8 0, i64 10, i1 false), !dbg !46 - call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(10) %call1, i8 0, i64 10, i1 false), !dbg !47 - call void @free(ptr noundef %call) #4, !dbg !48 - %call2 = call i32 @sleep(i32 noundef 10), !dbg !49 - call void @free(ptr noundef %call1) #4, !dbg !50 - ret i32 0, !dbg !51 -} - -declare !dbg !52 noundef ptr @_Z3foov() local_unnamed_addr #1 - -; Function Attrs: argmemonly mustprogress nocallback nofree nounwind willreturn writeonly -declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2 - -; Function Attrs: inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("free") -declare void @free(ptr allocptr nocapture noundef) local_unnamed_addr #3 - -declare !dbg !53 i32 @sleep(i32 noundef) local_unnamed_addr #1 - -attributes #0 = { mustprogress norecurse uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #1 = { "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #2 = { argmemonly mustprogress nocallback nofree nounwind willreturn writeonly } -attributes #3 = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("free") "alloc-family"="malloc" "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #4 = { nounwind } - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 16.0.0 (git@github.com:llvm/llvm-project.git ffecb643ee2c49e55e0689339b6d5921b5e6ff8b)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) -!1 = !DIFile(filename: "a.cc", directory: ".", checksumkind: CSK_MD5, checksum: "ebabd56909271a1d4a7cac81c10624d5") -!2 = !{i32 7, !"Dwarf Version", i32 5} -!3 = !{i32 2, !"Debug Info Version", i32 3} -!4 = !{i32 1, !"wchar_size", i32 4} -!5 = !{i32 8, !"PIC Level", i32 2} -!6 = !{i32 7, !"PIE Level", i32 2} -!7 = !{i32 7, !"uwtable", i32 2} -!8 = !{i32 7, !"frame-pointer", i32 2} -!39 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 5, type: !40, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41) -!40 = !DISubroutineType(types: !41) -!41 = !{} -!42 = !DILocation(line: 6, column: 13, scope: !39) -!43 = !{i64 8632435727821051414} -!44 = !DILocation(line: 7, column: 13, scope: !39) -!45 = !{i64 -3421689549917153178} -!46 = !DILocation(line: 8, column: 3, scope: !39) -!47 = !DILocation(line: 9, column: 3, scope: !39) -!48 = !DILocation(line: 10, column: 3, scope: !39) -!49 = !DILocation(line: 11, column: 3, scope: !39) -!50 = !DILocation(line: 12, column: 3, scope: !39) -!51 = !DILocation(line: 13, column: 3, scope: !39) -!52 = !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 4, type: !40, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !41) -!53 = !DISubprogram(name: "sleep", scope: !54, file: !54, line: 453, type: !40, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !41) -!54 = !DIFile(filename: "include/unistd.h", directory: "/usr", checksumkind: CSK_MD5, checksum: "ee8f41a17f563f029d0e930ad871815a") - -;--- b.ll -; ModuleID = 'b.cc' -source_filename = "b.cc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: mustprogress noinline uwtable -define internal noalias noundef nonnull ptr @_Z3barv() local_unnamed_addr #0 !dbg !39 { -entry: - %call = call noalias noundef nonnull dereferenceable(10) ptr @_Znam(i64 noundef 10) #2, !dbg !42, !memprof !43, !callsite !48 - ret ptr %call, !dbg !49 -} - -; Function Attrs: nobuiltin allocsize(0) -declare noundef nonnull ptr @_Znam(i64 noundef) local_unnamed_addr #1 - -; Function Attrs: mustprogress noinline uwtable -define dso_local noalias noundef nonnull ptr @_Z3bazv() local_unnamed_addr #0 !dbg !50 { -entry: - %call = call noundef ptr @_Z3barv(), !dbg !51, !callsite !52 - ret ptr %call, !dbg !53 -} - -; Function Attrs: mustprogress uwtable -define dso_local noalias noundef nonnull ptr @_Z3foov() local_unnamed_addr #3 !dbg !54 { -entry: - %call = call noundef ptr @_Z3bazv(), !dbg !55, !callsite !56 - ret ptr %call, !dbg !57 -} - -attributes #0 = { mustprogress noinline uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #1 = { nobuiltin allocsize(0) "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #2 = { builtin allocsize(0) } -attributes #3 = { mustprogress uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 16.0.0 (git@github.com:llvm/llvm-project.git ffecb643ee2c49e55e0689339b6d5921b5e6ff8b)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) -!1 = !DIFile(filename: "b.cc", directory: ".", checksumkind: CSK_MD5, checksum: "335f81d275af57725cfc9ffc7be49bc2") -!2 = !{i32 7, !"Dwarf Version", i32 5} -!3 = !{i32 2, !"Debug Info Version", i32 3} -!4 = !{i32 1, !"wchar_size", i32 4} -!5 = !{i32 8, !"PIC Level", i32 2} -!6 = !{i32 7, !"PIE Level", i32 2} -!7 = !{i32 7, !"uwtable", i32 2} -!8 = !{i32 7, !"frame-pointer", i32 2} -!39 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !1, file: !1, line: 1, type: !40, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41) -!40 = !DISubroutineType(types: !41) -!41 = !{} -!42 = !DILocation(line: 2, column: 10, scope: !39) -!43 = !{!44, !46} -!44 = !{!45, !"notcold"} -!45 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} -!46 = !{!47, !"cold"} -!47 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} -!48 = !{i64 9086428284934609951} -!49 = !DILocation(line: 2, column: 3, scope: !39) -!50 = distinct !DISubprogram(name: "baz", linkageName: "_Z3bazv", scope: !1, file: !1, line: 5, type: !40, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41) -!51 = !DILocation(line: 6, column: 10, scope: !50) -!52 = !{i64 -5964873800580613432} -!53 = !DILocation(line: 6, column: 3, scope: !50) -!54 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 9, type: !40, scopeLine: 9, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41) -!55 = !DILocation(line: 10, column: 10, scope: !54) -!56 = !{i64 2732490490862098848} -!57 = !DILocation(line: 10, column: 3, scope: !54) diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll new file mode 100644 index 0000000000000..539d88a815ed1 --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll @@ -0,0 +1,158 @@ +;; Test callsite context graph generation for simple call graph with +;; two memprof contexts and no inlining. +;; +;; Original code looks like: +;; +;; char *bar() { +;; return new char[10]; +;; } +;; +;; char *baz() { +;; return bar(); +;; } +;; +;; char *foo() { +;; return baz(); +;; } +;; +;; int main(int argc, char **argv) { +;; char *x = foo(); +;; char *y = foo(); +;; memset(x, 0, 10); +;; memset(y, 0, 10); +;; delete[] x; +;; sleep(10); +;; delete[] y; +;; return 0; +;; } +;; +;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; memory freed after sleep(10) results in cold lifetimes. +;; +;; The IR was then reduced using llvm-reduce with the expected FileCheck input. + +; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ +; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP + +; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() #0 { +entry: + %call = call noundef ptr @_Z3foov(), !callsite !0 + %call1 = call noundef ptr @_Z3foov(), !callsite !1 + ret i32 0 +} + +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #1 + +; Function Attrs: nobuiltin +declare void @_ZdaPv() #2 + +define internal ptr @_Z3barv() #3 { +entry: + %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !2, !callsite !7 + ret ptr null +} + +declare ptr @_Znam(i64) + +define internal ptr @_Z3bazv() #4 { +entry: + %call = call noundef ptr @_Z3barv(), !callsite !8 + ret ptr null +} + +; Function Attrs: noinline +define internal ptr @_Z3foov() #5 { +entry: + %call = call noundef ptr @_Z3bazv(), !callsite !9 + ret ptr null +} + +; uselistorder directives +uselistorder ptr @_Z3foov, { 1, 0 } + +attributes #0 = { "tune-cpu"="generic" } +attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: write) } +attributes #2 = { nobuiltin } +attributes #3 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" } +attributes #4 = { "stack-protector-buffer-size"="8" } +attributes #5 = { noinline } +attributes #6 = { builtin } + +!0 = !{i64 8632435727821051414} +!1 = !{i64 -3421689549917153178} +!2 = !{!3, !5} +!3 = !{!4, !"notcold"} +!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!5 = !{!6, !"cold"} +!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!7 = !{i64 9086428284934609951} +!8 = !{i64 -5964873800580613432} +!9 = !{i64 2732490490862098848} + + +; DUMP: CCG before cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[BAR:0x[a-z0-9]+]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 + +; DUMP: Node [[BAZ]] +; DUMP: %call = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 + +; DUMP: Node [[FOO]] +; DUMP: %call = call noundef ptr @_Z3bazv() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 + +; DUMP: Node [[MAIN1]] +; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: + + +; DOT: digraph "postbuild" { +; DOT: label="postbuild"; +; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"]; +; DOT: Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 12481870273128938184\n_Z3bazv -\> _Z3barv}"]; +; DOT: Node[[BAZ]] -> Node[[BAR]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\n_Z3foov -\> _Z3bazv}"]; +; DOT: Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOT: } diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll new file mode 100644 index 0000000000000..c5ed97f182a98 --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll @@ -0,0 +1,232 @@ +;; Test callsite context graph generation for call graph with with MIBs +;; that have pruned contexts that partially match multiple inlined +;; callsite contexts, requiring duplication of context ids and nodes +;; while matching callsite nodes onto the graph. +;; +;; Original code looks like: +;; +;; char *D() { +;; return new char[10]; +;; } +;; +;; char *F() { +;; return D(); +;; } +;; +;; char *C() { +;; return D(); +;; } +;; +;; char *B() { +;; return C(); +;; } +;; +;; char *E() { +;; return C(); +;; } +;; int main(int argc, char **argv) { +;; char *x = B(); // cold +;; char *y = E(); // cold +;; char *z = F(); // default +;; memset(x, 0, 10); +;; memset(y, 0, 10); +;; memset(z, 0, 10); +;; delete[] z; +;; sleep(10); +;; delete[] x; +;; delete[] y; +;; return 0; +;; } +;; +;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; memory freed after sleep(10) results in cold lifetimes. +;; +;; The code below was created by forcing inlining of C into both B and E. +;; Since both allocation contexts via C are cold, the matched memprof +;; metadata has the context pruned above C's callsite. This requires +;; matching the stack node for C to callsites where it was inlined (i.e. +;; the callsites in B and E that have callsite metadata that includes C's). +;; It also requires duplication of that node in the graph as well as the +;; duplication of the context ids along that path through the graph, +;; so that we can represent the duplicated (via inlining) C callsite. +;; +;; The IR was then reduced using llvm-reduce with the expected FileCheck input. + +; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ +; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP + +; RUN: cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE +; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define internal ptr @_Z1Dv() { +entry: + %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !0, !callsite !5 + ret ptr null +} + +declare ptr @_Znam(i64) + +define internal ptr @_Z1Fv() #0 { +entry: + %call = call noundef ptr @_Z1Dv(), !callsite !6 + ret ptr null +} + +; Function Attrs: mustprogress noinline optnone uwtable +define internal ptr @_Z1Cv() #1 { +entry: + %call = call noundef ptr @_Z1Dv(), !callsite !7 + ret ptr null +} + +; Function Attrs: mustprogress noinline optnone uwtable +define internal ptr @_Z1Bv() #1 { +entry: + %call.i = call noundef ptr @_Z1Dv(), !callsite !8 + ret ptr null +} + +; Function Attrs: mustprogress noinline optnone uwtable +define internal ptr @_Z1Ev() #1 { +entry: + %call.i = call noundef ptr @_Z1Dv(), !callsite !9 + ret ptr null +} + +; Function Attrs: noinline +declare i32 @main() #2 + +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3 + +; Function Attrs: nounwind +declare void @_ZdaPv() #4 + +declare i32 @sleep() #5 + +attributes #0 = { "disable-tail-calls"="true" } +attributes #1 = { mustprogress noinline optnone uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #2 = { noinline } +attributes #3 = { nocallback nofree nounwind willreturn memory(argmem: write) } +attributes #4 = { nounwind } +attributes #5 = { "no-trapping-math"="true" } +attributes #6 = { builtin } + +!0 = !{!1, !3} +!1 = !{!2, !"cold"} +!2 = !{i64 6541423618768552252, i64 -6270142974039008131} +!3 = !{!4, !"notcold"} +!4 = !{i64 6541423618768552252, i64 -4903163940066524832} +!5 = !{i64 6541423618768552252} +!6 = !{i64 -4903163940066524832} +!7 = !{i64 -6270142974039008131} +!8 = !{i64 -6270142974039008131, i64 -184525619819294889} +!9 = !{i64 -6270142974039008131, i64 1905834578520680781} + + +;; After adding only the alloc node memprof metadata, we only have 2 contexts. + +; DUMP: CCG before updating call stack chains: +; DUMP: Callsite Context Graph: +; DUMP: Node [[D:0x[a-z0-9]+]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1 +; DUMP: Edge from Callee [[D]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2 + +; DUMP: Node [[C]] +; DUMP: null Call +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[C]] AllocTypes: Cold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[F]] +; DUMP: null Call +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 +; DUMP: CallerEdges: + +;; After updating for callsite metadata, we should have generated context ids 3 and 4, +;; along with 2 new nodes for those callsites. All have the same allocation type +;; behavior as the original C node. + +; DUMP: CCG before cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[D]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 3 4 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 +; DUMP: Edge from Callee [[D]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3 +; DUMP: Edge from Callee [[D]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 +; DUMP: Edge from Callee [[D]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1 + +; DUMP: Node [[F]] +; DUMP: %call = call noundef ptr @_Z1Dv() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 +; DUMP: CallerEdges: + +; DUMP: Node [[C2]] +; DUMP: %call = call noundef ptr @_Z1Dv() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3 +; DUMP: CallerEdges: + +; DUMP: Node [[B]] +; DUMP: %call.i = call noundef ptr @_Z1Dv() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4 +; DUMP: CallerEdges: + +; DUMP: Node [[E]] +; DUMP: %call.i = call noundef ptr @_Z1Dv() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1 +; DUMP: CallerEdges: + + +; DOTPRE: digraph "prestackupdate" { +; DOTPRE: label="prestackupdate"; +; DOTPRE: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"]; +; DOTPRE: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12176601099670543485\nnull call (external)}"]; +; DOTPRE: Node[[C]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"]; +; DOTPRE: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\nnull call (external)}"]; +; DOTPRE: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"]; +; DOTPRE: } + + +; DOTPOST:digraph "postbuild" { +; DOTPOST: label="postbuild"; +; DOTPOST: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"]; +; DOTPOST: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"]; +; DOTPOST: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"]; +; DOTPOST: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"]; +; DOTPOST: Node[[C]] -> Node[[D]][tooltip="ContextIds: 3",fillcolor="cyan"]; +; DOTPOST: Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"]; +; DOTPOST: Node[[B]] -> Node[[D]][tooltip="ContextIds: 4",fillcolor="cyan"]; +; DOTPOST: Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"]; +; DOTPOST: Node[[E]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"]; +; DOTPOST:} diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll new file mode 100644 index 0000000000000..da0fd3f44b45e --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll @@ -0,0 +1,386 @@ +;; Test callsite context graph generation for call graph with with MIBs +;; that have pruned contexts that partially match multiple inlined +;; callsite contexts, requiring duplication of context ids and nodes +;; while matching callsite nodes onto the graph. This test requires more +;; complex duplication due to multiple contexts for different allocations +;; that share some of the same callsite nodes. +;; +;; Original code looks like: +;; +;; char *D(bool Call1) { +;; if (Call1) +;; return new char[10]; +;; else +;; return new char[10]; +;; } +;; +;; char *C(bool Call1) { +;; return D(Call1); +;; } +;; +;; char *B(bool Call1) { +;; if (Call1) +;; return C(true); +;; else +;; return C(false); +;; } +;; +;; char *A(bool Call1) { +;; return B(Call1); +;; } +;; +;; char *A1() { +;; return A(true); +;; } +;; +;; char *A2() { +;; return A(true); +;; } +;; +;; char *A3() { +;; return A(false); +;; } +;; +;; char *A4() { +;; return A(false); +;; } +;; +;; char *E() { +;; return B(true); +;; } +;; +;; char *F() { +;; return B(false); +;; } +;; +;; int main(int argc, char **argv) { +;; char *a1 = A1(); // cold +;; char *a2 = A2(); // cold +;; char *e = E(); // default +;; char *a3 = A3(); // default +;; char *a4 = A4(); // default +;; char *f = F(); // cold +;; memset(a1, 0, 10); +;; memset(a2, 0, 10); +;; memset(e, 0, 10); +;; memset(a3, 0, 10); +;; memset(a4, 0, 10); +;; memset(f, 0, 10); +;; delete[] a3; +;; delete[] a4; +;; delete[] e; +;; sleep(10); +;; delete[] a1; +;; delete[] a2; +;; delete[] f; +;; return 0; +;; } +;; +;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; memory freed after sleep(10) results in cold lifetimes. +;; +;; The code below was created by forcing inlining of A into its callers, +;; without any other inlining or optimizations. Since both allocation contexts +;; via A for each allocation in D have the same allocation type (cold via +;; A1 and A2 for the first new in D, and non-cold via A3 and A4 for the second +;; new in D, the contexts for those respective allocations are pruned above A. +;; The allocations via E and F are to ensure we don't prune above B. +;; +;; The matching onto the inlined A[1234]->A sequences will require duplication +;; of the context id assigned to the context from A for each allocation in D. +;; This test ensures that we do this correctly in the presence of callsites +;; shared by the different duplicated context ids (i.e. callsite in C). +;; +;; The IR was then reduced using llvm-reduce with the expected FileCheck input. + +; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ +; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: mustprogress noinline uwtable +define ptr @_Z1Db(i1 %Call1) #0 { +entry: + %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !5 + br label %return + +if.else: ; No predecessors! + %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !6, !callsite !11 + br label %return + +return: ; preds = %if.else, %entry + ret ptr null +} + +; Function Attrs: nobuiltin +declare ptr @_Znam(i64) #1 + +define ptr @_Z1Cb(i1 %Call1) { +entry: + %tobool = trunc i8 0 to i1 + %call = call noundef ptr @_Z1Db(i1 noundef zeroext %tobool), !callsite !12 + ret ptr null +} + +; Function Attrs: mustprogress noinline uwtable +define ptr @_Z1Bb(i1 %Call1) #0 { +entry: + %call = call noundef ptr @_Z1Cb(i1 noundef zeroext true), !callsite !13 + br label %return + +if.else: ; No predecessors! + %call1 = call noundef ptr @_Z1Cb(i1 noundef zeroext false), !callsite !14 + br label %return + +return: ; preds = %if.else, %entry + ret ptr null +} + +define ptr @_Z1Ab(i1 %tobool) #2 { +entry: + %call = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool), !callsite !15 + ret ptr null +} + +; Function Attrs: mustprogress noinline uwtable +define ptr @_Z2A1v(i1 %tobool.i) #0 { +entry: + %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !16 + ret ptr null +} + +; Function Attrs: mustprogress noinline uwtable +define ptr @_Z2A2v(i1 %tobool.i) #0 { +entry: + %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !17 + ret ptr null +} + +; Function Attrs: mustprogress noinline uwtable +define ptr @_Z2A3v(i1 %tobool.i) #0 { +entry: + %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !18 + ret ptr null +} + +; Function Attrs: mustprogress noinline uwtable +define ptr @_Z2A4v(i1 %tobool.i) #0 { +entry: + %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !19 + ret ptr null +} + +; Function Attrs: mustprogress noinline uwtable +define ptr @_Z1Ev() #0 { +entry: + %call = call noundef ptr @_Z1Bb(i1 noundef zeroext true), !callsite !20 + ret ptr null +} + +; Function Attrs: mustprogress noinline uwtable +define ptr @_Z1Fv() #0 { +entry: + %call = call noundef ptr @_Z1Bb(i1 noundef zeroext false), !callsite !21 + ret ptr null +} + +; Function Attrs: noinline +declare i32 @main() #3 + +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4 + +declare void @_ZdaPv() #5 + +declare i32 @sleep() #6 + +; uselistorder directives +uselistorder ptr @_Znam, { 1, 0 } + +attributes #0 = { mustprogress noinline uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { nobuiltin } +attributes #2 = { "tune-cpu"="generic" } +attributes #3 = { noinline } +attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) } +attributes #5 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" } +attributes #6 = { "disable-tail-calls"="true" } +attributes #7 = { builtin allocsize(0) } + +!0 = !{!1, !3} +!1 = !{!2, !"notcold"} +!2 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 1905834578520680781} +!3 = !{!4, !"cold"} +!4 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 -6528110295079665978} +!5 = !{i64 4854880825882961848} +!6 = !{!7, !9} +!7 = !{!8, !"notcold"} +!8 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -6528110295079665978} +!9 = !{!10, !"cold"} +!10 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -4903163940066524832} +!11 = !{i64 -8775068539491628272} +!12 = !{i64 -904694911315397047} +!13 = !{i64 6532298921261778285} +!14 = !{i64 7859682663773658275} +!15 = !{i64 -6528110295079665978} +!16 = !{i64 -6528110295079665978, i64 5747919905719679568} +!17 = !{i64 -6528110295079665978, i64 -5753238080028016843} +!18 = !{i64 -6528110295079665978, i64 1794685869326395337} +!19 = !{i64 -6528110295079665978, i64 5462047985461644151} +!20 = !{i64 1905834578520680781} +!21 = !{i64 -4903163940066524832} + + +;; After adding only the alloc node memprof metadata, we only have 4 contexts (we only +;; match the interesting parts of the pre-update graph here). + +; DUMP: CCG before updating call stack chains: +; DUMP: Callsite Context Graph: + +; DUMP: Node [[D1:0x[a-z0-9]+]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 + +; DUMP: Node [[C:0x[a-z0-9]+]] +; DUMP: null Call +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 3 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: Edge from Callee [[D2:0x[a-z0-9]+]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 + +; DUMP: Node [[D2]] +; DUMP: %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 3 4 + + +;; After updating for callsite metadata, we should have duplicated the context +;; ids coming from node A (2 and 3) 4 times, for the 4 different callers of A, +;; and used those on new nodes for those callers. Note that while in reality +;; we only have cold edges coming from A1 and A2 and noncold from A3 and A4, +;; due to the pruning we have lost this information and thus end up duplicating +;; both of A's contexts to all of the new nodes (which could result in some +;; unnecessary cloning. + +; DUMP: CCG before cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[D1]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 5 7 9 11 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 + +; DUMP: Node [[C]] +; DUMP: %call = call noundef ptr @_Z1Db(i1 noundef zeroext %tobool) (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 3 4 5 6 7 8 9 10 11 12 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 +; DUMP: Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[C]] to Caller: [[B1:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 +; DUMP: Edge from Callee [[C]] to Caller: [[B2:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 + +; DUMP: Node [[B1]] +; DUMP: %call = call noundef ptr @_Z1Cb(i1 noundef zeroext true) (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 5 7 9 11 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[C]] to Caller: [[B1]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[B1]] to Caller: [[A2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 5 +; DUMP: Edge from Callee [[B1]] to Caller: [[A3:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 7 +; DUMP: Edge from Callee [[B1]] to Caller: [[A1:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 9 +; DUMP: Edge from Callee [[B1]] to Caller: [[A4:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 11 +; DUMP: Edge from Callee [[B1]] to Caller: [[A:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 + +; DUMP: Node [[E]] +; DUMP: %call = call noundef ptr @_Z1Bb(i1 noundef zeroext true) (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[E]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[D2]] +; DUMP: %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 3 4 6 8 10 12 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 + +; DUMP: Node [[B2]] +; DUMP: %call1 = call noundef ptr @_Z1Cb(i1 noundef zeroext false) (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 3 4 6 8 10 12 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[C]] to Caller: [[B2]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[B2]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 +; DUMP: Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6 +; DUMP: Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8 +; DUMP: Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10 +; DUMP: Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12 +; DUMP: Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3 + +; DUMP: Node [[F]] +; DUMP: %call = call noundef ptr @_Z1Bb(i1 noundef zeroext false) (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B2]] to Caller: [[F]] AllocTypes: Cold ContextIds: 4 +; DUMP: CallerEdges: + +; DUMP: Node [[A2]] +; DUMP: %call = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool) (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 5 6 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[A2]] AllocTypes: Cold ContextIds: 5 +; DUMP: Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6 +; DUMP: CallerEdges: + +; DUMP: Node [[A3]] +; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 7 8 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[A3]] AllocTypes: Cold ContextIds: 7 +; DUMP: Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8 +; DUMP: CallerEdges: + +; DUMP: Node [[A1]] +; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 9 10 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[A1]] AllocTypes: Cold ContextIds: 9 +; DUMP: Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10 +; DUMP: CallerEdges: + +; DUMP: Node [[A4]] +; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 11 12 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[A4]] AllocTypes: Cold ContextIds: 11 +; DUMP: Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12 +; DUMP: CallerEdges: + +; DUMP: Node [[A]] +; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 2 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[A]] AllocTypes: Cold ContextIds: 2 +; DUMP: Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3 +; DUMP: CallerEdges: diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll b/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll new file mode 100644 index 0000000000000..9ebf219dd37a0 --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll @@ -0,0 +1,261 @@ +;; Tests callsite context graph generation for call graph containing indirect +;; calls. Currently this should result in conservative behavior, such that the +;; indirect call receives a null call in its graph node, to prevent subsequent +;; cloning. +;; +;; Original code looks like: +;; +;; char *foo() { +;; return new char[10]; +;; } +;; class A { +;; public: +;; virtual char *x() { return foo(); } +;; }; +;; class B : public A { +;; public: +;; char *x() final { return foo(); } +;; }; +;; char *bar(A *a) { +;; return a->x(); +;; } +;; int main(int argc, char **argv) { +;; char *x = foo(); +;; char *y = foo(); +;; B b; +;; char *z = bar(&b); +;; char *w = bar(&b); +;; A a; +;; char *r = bar(&a); +;; char *s = bar(&a); +;; memset(x, 0, 10); +;; memset(y, 0, 10); +;; memset(z, 0, 10); +;; memset(w, 0, 10); +;; memset(r, 0, 10); +;; memset(s, 0, 10); +;; delete[] x; +;; delete[] w; +;; delete[] r; +;; sleep(10); +;; delete[] y; +;; delete[] z; +;; delete[] s; +;; return 0; +;; } +;; +;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; memory freed after sleep(10) results in cold lifetimes. +;; +;; Compiled without optimization to prevent inlining and devirtualization. +;; +;; The IR was then reduced using llvm-reduce with the expected FileCheck input. + +; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ +; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP + +; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare ptr @_Z3barP1A(ptr) + +define i32 @main(ptr %b, ptr %a) #0 { +entry: + %call = call noundef ptr @_Z3foov(), !callsite !0 + %call1 = call noundef ptr @_Z3foov(), !callsite !1 + %call2 = call noundef ptr @_Z3barP1A(ptr noundef %b), !callsite !2 + %call3 = call noundef ptr @_Z3barP1A(ptr noundef %b), !callsite !3 + %call4 = call noundef ptr @_Z3barP1A(ptr noundef %a), !callsite !4 + %call5 = call noundef ptr @_Z3barP1A(ptr noundef %a), !callsite !5 + ret i32 0 +} + +; Function Attrs: noinline +declare void @_ZN1BC2Ev() #1 + +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2 + +; Function Attrs: nobuiltin +declare void @_ZdaPv() #3 + +define internal ptr @_ZN1A1xEv() #4 { +entry: + %call = call noundef ptr @_Z3foov(), !callsite !6 + ret ptr null +} + +; Function Attrs: mustprogress uwtable +define internal ptr @_ZN1B1xEv() #5 { +entry: + %call = call noundef ptr @_Z3foov(), !callsite !7 + ret ptr null +} + +; Function Attrs: mustprogress uwtable +define internal ptr @_Z3foov() #5 { +entry: + %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !8, !callsite !21 + ret ptr null +} + +declare ptr @_Znam(i64) #6 + +; uselistorder directives +uselistorder ptr @_Z3foov, { 3, 2, 1, 0 } + +attributes #0 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" } +attributes #1 = { noinline } +attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: write) } +attributes #3 = { nobuiltin } +attributes #4 = { "tune-cpu"="generic" } +attributes #5 = { mustprogress uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #6 = { "disable-tail-calls"="true" } +attributes #7 = { builtin } + +!0 = !{i64 8632435727821051414} +!1 = !{i64 -3421689549917153178} +!2 = !{i64 6792096022461663180} +!3 = !{i64 -2709642582978494015} +!4 = !{i64 748269490701775343} +!5 = !{i64 -5747251260480066785} +!6 = !{i64 8256774051149711748} +!7 = !{i64 -4831879094954754638} +!8 = !{!9, !11, !13, !15, !17, !19} +!9 = !{!10, !"notcold"} +!10 = !{i64 2732490490862098848, i64 8256774051149711748, i64 -4820244510750103755, i64 748269490701775343} +!11 = !{!12, !"cold"} +!12 = !{i64 2732490490862098848, i64 8256774051149711748, i64 -4820244510750103755, i64 -5747251260480066785} +!13 = !{!14, !"notcold"} +!14 = !{i64 2732490490862098848, i64 8632435727821051414} +!15 = !{!16, !"cold"} +!16 = !{i64 2732490490862098848, i64 -4831879094954754638, i64 -4820244510750103755, i64 6792096022461663180} +!17 = !{!18, !"notcold"} +!18 = !{i64 2732490490862098848, i64 -4831879094954754638, i64 -4820244510750103755, i64 -2709642582978494015} +!19 = !{!20, !"cold"} +!20 = !{i64 2732490490862098848, i64 -3421689549917153178} +!21 = !{i64 2732490490862098848} + + +; DUMP: CCG before cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[FOO:0x[a-z0-9]+]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 3 4 5 6 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[AX:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3 +; DUMP: Edge from Callee [[FOO]] to Caller: [[BX:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 4 5 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 6 + +; DUMP: Node [[AX]] +; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[AX]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[AX]] to Caller: [[BAR:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 + +;; Bar contains an indirect call, with multiple targets. It's call should be null. +; DUMP: Node [[BAR]] +; DUMP: null Call +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 4 5 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[AX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN3:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN4:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN5:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN6:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 5 + +; DUMP: Node [[MAIN3]] +; DUMP: %call4 = call noundef ptr @_Z3barP1A(ptr noundef %a) (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN3]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN4]] +; DUMP: %call5 = call noundef ptr @_Z3barP1A(ptr noundef %a) (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN4]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN1]] +; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3 +; DUMP: CallerEdges: + +; DUMP: Node [[BX]] +; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 4 5 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[BX]] AllocTypes: NotColdCold ContextIds: 4 5 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5 + +; DUMP: Node [[MAIN5]] +; DUMP: %call2 = call noundef ptr @_Z3barP1A(ptr noundef %b) (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN5]] AllocTypes: Cold ContextIds: 4 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN6]] +; DUMP: %call3 = call noundef ptr @_Z3barP1A(ptr noundef %b) (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 5 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN6]] AllocTypes: NotCold ContextIds: 5 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 6 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 6 +; DUMP: CallerEdges: + + +; DOT: digraph "postbuild" { +; DOT: label="postbuild"; +; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2 3 4 5 6",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3foov -\> _Znam}"]; +; DOT: Node[[AX:0x[a-z0-9]+]] [shape=record,tooltip="N[[AX]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 8256774051149711748\n_ZN1A1xEv -\> _Z3foov}"]; +; DOT: Node[[AX]] -> Node[[FOO]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13626499562959447861\nnull call (external)}"]; +; DOT: Node[[BAR]] -> Node[[AX]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOT: Node[[BAR]] -> Node[[BX:0x[a-z0-9]+]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"]; +; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 748269490701775343\nmain -\> _Z3barP1A}"]; +; DOT: Node[[MAIN1]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12699492813229484831\nmain -\> _Z3barP1A}"]; +; DOT: Node[[MAIN2]] -> Node[[BAR]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOT: Node[[MAIN3:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN3]] ContextIds: 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN3]] -> Node[[FOO]][tooltip="ContextIds: 3",fillcolor="brown1"]; +; DOT: Node[[BX]] [shape=record,tooltip="N[[BX]] ContextIds: 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13614864978754796978\n_ZN1B1xEv -\> _Z3foov}"]; +; DOT: Node[[BX]] -> Node[[FOO]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"]; +; DOT: Node[[MAIN4:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN4]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 6792096022461663180\nmain -\> _Z3barP1A}"]; +; DOT: Node[[MAIN4]] -> Node[[BAR]][tooltip="ContextIds: 4",fillcolor="cyan"]; +; DOT: Node[[MAIN5:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN5]] ContextIds: 5",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 15737101490731057601\nmain -\> _Z3barP1A}"]; +; DOT: Node[[MAIN5]] -> Node[[BAR]][tooltip="ContextIds: 5",fillcolor="brown1"]; +; DOT: Node[[MAIN6:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN6]] ContextIds: 6",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN6]] -> Node[[FOO]][tooltip="ContextIds: 6",fillcolor="cyan"]; +; DOT: } diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll b/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll new file mode 100644 index 0000000000000..59f135ca06627 --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll @@ -0,0 +1,189 @@ +;; Test callsite context graph generation for call graph with two memprof +;; contexts and partial inlining, requiring generation of a new fused node to +;; represent the inlined sequence while matching callsite nodes onto the graph. +;; +;; Original code looks like: +;; +;; char *bar() { +;; return new char[10]; +;; } +;; +;; char *baz() { +;; return bar(); +;; } +;; +;; char *foo() { +;; return baz(); +;; } +;; +;; int main(int argc, char **argv) { +;; char *x = foo(); +;; char *y = foo(); +;; memset(x, 0, 10); +;; memset(y, 0, 10); +;; delete[] x; +;; sleep(10); +;; delete[] y; +;; return 0; +;; } +;; +;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; memory freed after sleep(10) results in cold lifetimes. +;; +;; The code below was created by forcing inlining of baz into foo, and +;; bar into baz. Due to the inlining of bar we will initially have two +;; allocation nodes in the graph. This tests that we correctly match +;; foo (with baz inlined) onto the graph nodes first, and generate a new +;; fused node for it. We should then not match baz (with bar inlined) as that +;; is not reached by the MIB contexts (since all calls from main will look +;; like main -> foo(+baz) -> bar after the inlining reflected in this IR). +;; +;; The IR was then reduced using llvm-reduce with the expected FileCheck input. + +; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ +; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP + +; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define internal ptr @_Z3barv() { +entry: + %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !5 + ret ptr null +} + +; Function Attrs: nobuiltin +declare ptr @_Znam(i64) #0 + +; Function Attrs: mustprogress +define internal ptr @_Z3bazv() #1 { +entry: + %call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !6 + ret ptr null +} + +; Function Attrs: noinline +define internal ptr @_Z3foov() #2 { +entry: + %call.i = call noundef ptr @_Z3barv(), !callsite !7 + ret ptr null +} + +define i32 @main() #3 { +entry: + %call = call noundef ptr @_Z3foov(), !callsite !8 + %call1 = call noundef ptr @_Z3foov(), !callsite !9 + ret i32 0 +} + +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4 + +; Function Attrs: nounwind +declare void @_ZdaPv() #5 + +declare i32 @sleep() #6 + +attributes #0 = { nobuiltin } +attributes #1 = { mustprogress } +attributes #2 = { noinline } +attributes #3 = { "tune-cpu"="generic" } +attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) } +attributes #5 = { nounwind } +attributes #6 = { "disable-tail-calls"="true" } +attributes #7 = { builtin } + +!0 = !{!1, !3} +!1 = !{!2, !"notcold"} +!2 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!3 = !{!4, !"cold"} +!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!5 = !{i64 9086428284934609951} +!6 = !{i64 9086428284934609951, i64 -5964873800580613432} +!7 = !{i64 -5964873800580613432, i64 2732490490862098848} +!8 = !{i64 8632435727821051414} +!9 = !{i64 -3421689549917153178} + + +; DUMP: CCG before cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[BAR:0x[a-z0-9]+]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 + +;; This is leftover from the MIB on the alloc inlined into baz. It is not +;; matched with any call, since there is no such node in the IR. Due to the +;; null call it will not participate in any context transformations. +; DUMP: Node [[FOO2:0x[a-z0-9]+]] +; DUMP: null Call +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 3 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ:0x[a-z0-9]+]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3 +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 + +; DUMP: Node [[MAIN1]] +; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: + +; DUMP: Node [[BAZ]] +; DUMP: %call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 3 4 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4 + +;; This is the node synthesized for the call to bar in foo that was created +;; by inlining baz into foo. +; DUMP: Node [[FOO]] +; DUMP: %call.i = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 + + +; DOT: digraph "postbuild" { +; DOT: label="postbuild"; +; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"]; +; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\nnull call (external)}"]; +; DOT: Node[[FOO]] -> Node[[BAZ:0x[a-z0-9]+]][tooltip="ContextIds: 3 4",fillcolor="mediumorchid1"]; +; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 3",fillcolor="brown1"]; +; DOT: Node[[MAIN1]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 4",fillcolor="cyan"]; +; DOT: Node[[MAIN2]] -> Node[[FOO2]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOT: Node[[BAZ]] [shape=record,tooltip="N[[BAZ]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc2\n_Z3bazv -\> _Znam}"]; +; DOT: Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"]; +; DOT: Node[[FOO2]] -> Node[[BAR]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOT: } diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll b/llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll new file mode 100644 index 0000000000000..a3a056ade8c49 --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll @@ -0,0 +1,135 @@ +;; Test callsite context graph generation for call graph with two memprof +;; contexts and multiple levels of inlining, requiring generation of new +;; fused nodes to represent the inlined sequence while matching callsite +;; nodes onto the graph. In particular this tests the case where a function +;; has inlined a callee containing an inlined callee. +;; +;; Original code looks like: +;; +;; char *bar() __attribute__((noinline)) { +;; return new char[10]; +;; } +;; +;; char *baz() { +;; return bar(); +;; } +;; +;; char *foo() { +;; return baz(); +;; } +;; +;; int main(int argc, char **argv) { +;; char *x = foo(); +;; char *y = foo(); +;; memset(x, 0, 10); +;; memset(y, 0, 10); +;; delete[] x; +;; sleep(10); +;; delete[] y; +;; return 0; +;; } +;; +;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; memory freed after sleep(10) results in cold lifetimes. +;; +;; Both foo and baz are inlined into main, at both foo callsites. +;; We should update the graph for new fused nodes for both of those inlined +;; callsites to bar. +;; +;; Note that baz and bar are both dead due to the inlining, but have been left +;; in the input IR to ensure that the MIB call chain is matched to the longer +;; inline sequences from main. +;; +;; The IR was then reduced using llvm-reduce with the expected FileCheck input. + +; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define ptr @_Z3barv() #0 { +entry: + %call = call noalias noundef nonnull dereferenceable(10) ptr @_Znam(i64 noundef 10) #7, !memprof !7, !callsite !12, !heapallocsite !13 + ret ptr null +} + +; Function Attrs: nobuiltin +declare ptr @_Znam(i64) #1 + +; Function Attrs: mustprogress +declare ptr @_Z3bazv() #2 + +define i32 @main() #3 { +delete.end5: + %call.i.i = call noundef ptr @_Z3barv(), !callsite !14 + %call.i.i8 = call noundef ptr @_Z3barv(), !callsite !15 + ret i32 0 +} + +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4 + +declare void @_ZdaPv() #5 + +declare i32 @sleep() #6 + +attributes #0 = { "stack-protector-buffer-size"="8" } +attributes #1 = { nobuiltin } +attributes #2 = { mustprogress } +attributes #3 = { "tune-cpu"="generic" } +attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) } +attributes #5 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" } +attributes #6 = { "disable-tail-calls"="true" } +attributes #7 = { builtin } + +!llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !6} + +!0 = !{i32 7, !"Dwarf Version", i32 5} +!1 = !{i32 2, !"Debug Info Version", i32 3} +!2 = !{i32 1, !"wchar_size", i32 4} +!3 = !{i32 8, !"PIC Level", i32 2} +!4 = !{i32 7, !"PIE Level", i32 2} +!5 = !{i32 7, !"uwtable", i32 2} +!6 = !{i32 7, !"frame-pointer", i32 2} +!7 = !{!8, !10} +!8 = !{!9, !"notcold"} +!9 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!10 = !{!11, !"cold"} +!11 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!12 = !{i64 9086428284934609951} +!13 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) +!14 = !{i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!15 = !{i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} + + +; DUMP: CCG before cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[BAR:0x[a-z0-9]+]] +; DUMP: %call = call noalias noundef nonnull dereferenceable(10) ptr @_Znam(i64 noundef 10) #7, !heapallocsite !7 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 + +;; This is the node synthesized for the first inlined call chain of main->foo->baz +; DUMP: Node [[MAIN1]] +; DUMP: %call.i.i = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: + +;; This is the node synthesized for the second inlined call chain of main->foo->baz +; DUMP: Node [[MAIN2]] +; DUMP: %call.i.i8 = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/pass-pipeline.ll b/llvm/test/Transforms/MemProfContextDisambiguation/pass-pipeline.ll new file mode 100644 index 0000000000000..fede5fe96eccd --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/pass-pipeline.ll @@ -0,0 +1,41 @@ +;; Test that MemProfContextDisambiguation is enabled under the expected conditions +;; and in the expected position. + +;; Pass is not currently enabled by default at any opt level. +; RUN: opt -debug-pass-manager -passes='lto' -S %s \ +; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation" +; RUN: opt -debug-pass-manager -passes='lto' -S %s \ +; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation" +; RUN: opt -debug-pass-manager -passes='lto' -S %s \ +; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation" +; RUN: opt -debug-pass-manager -passes='lto' -S %s \ +; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation" + +;; Pass should not run even under option at O0/O1. +; RUN: opt -debug-pass-manager -passes='lto' -S %s \ +; RUN: -enable-memprof-context-disambiguation \ +; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation" +; RUN: opt -debug-pass-manager -passes='lto' -S %s \ +; RUN: -enable-memprof-context-disambiguation \ +; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation" + +;; Pass should be enabled under option at O2/O3. +; RUN: opt -debug-pass-manager -passes='lto' -S %s \ +; RUN: -enable-memprof-context-disambiguation \ +; RUN: 2>&1 | FileCheck %s --check-prefix=ENABLED +; RUN: opt -debug-pass-manager -passes='lto' -S %s \ +; RUN: -enable-memprof-context-disambiguation \ +; RUN: 2>&1 | FileCheck %s --check-prefix=ENABLED + +;; When enabled, MemProfContextDisambiguation runs just after inlining. +; ENABLED: Running pass: InlinerPass +; ENABLED: Invalidating analysis: InlineAdvisorAnalysis +; ENABLED: Running pass: MemProfContextDisambiguation + +define noundef ptr @_Z3barv() { +entry: + %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) + ret ptr %call +} + +declare noundef nonnull ptr @_Znam(i64 noundef) From d4eb996e6fff305685a9272689cd3ad7aa8f73e4 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 22 Mar 2023 14:06:06 +0000 Subject: [PATCH 298/691] [gn build] Port d6ad4f01c3da --- llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn index 644d30f10854e..0dbeb793e40eb 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn @@ -48,6 +48,7 @@ static_library("IPO") { "Internalize.cpp", "LoopExtractor.cpp", "LowerTypeTests.cpp", + "MemProfContextDisambiguation.cpp", "MergeFunctions.cpp", "ModuleInliner.cpp", "OpenMPOpt.cpp", From a17b71d17f853350dcd6c72ab141b196d7caec2a Mon Sep 17 00:00:00 2001 From: OCHyams Date: Wed, 22 Mar 2023 13:27:35 +0000 Subject: [PATCH 299/691] [NFC] Add DebugVariableAggregate class A DebugVariableAggregate is a DebugVariable that discards FragmentInfo; it represents a whole variable instance. Reviewed By: StephenTozer Differential Revision: https://reviews.llvm.org/D146298 --- llvm/include/llvm/IR/DebugInfoMetadata.h | 12 ++++++++++++ llvm/lib/IR/DebugInfoMetadata.cpp | 4 ++++ 2 files changed, 16 insertions(+) diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h index fb0a194e0c344..258eda717e198 100644 --- a/llvm/include/llvm/IR/DebugInfoMetadata.h +++ b/llvm/include/llvm/IR/DebugInfoMetadata.h @@ -3817,6 +3817,18 @@ template <> struct DenseMapInfo { } }; +/// Identifies a unique instance of a whole variable (discards/ignores fragment +/// information). +class DebugVariableAggregate : public DebugVariable { +public: + DebugVariableAggregate(const DbgVariableIntrinsic *DVI); + DebugVariableAggregate(const DebugVariable &V) + : DebugVariable(V.getVariable(), std::nullopt, V.getInlinedAt()) {} +}; + +template <> +struct DenseMapInfo + : public DenseMapInfo {}; } // end namespace llvm #undef DEFINE_MDNODE_GET_UNPACK_IMPL diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp index 479a51e1e0042..a5da64e59e1d8 100644 --- a/llvm/lib/IR/DebugInfoMetadata.cpp +++ b/llvm/lib/IR/DebugInfoMetadata.cpp @@ -42,6 +42,10 @@ DebugVariable::DebugVariable(const DbgVariableIntrinsic *DII) Fragment(DII->getExpression()->getFragmentInfo()), InlinedAt(DII->getDebugLoc().getInlinedAt()) {} +DebugVariableAggregate::DebugVariableAggregate(const DbgVariableIntrinsic *DVI) + : DebugVariable(DVI->getVariable(), std::nullopt, + DVI->getDebugLoc()->getInlinedAt()) {} + DILocation::DILocation(LLVMContext &C, StorageType Storage, unsigned Line, unsigned Column, ArrayRef MDs, bool ImplicitCode) From f2252726c40a77d83982932cfc647f459a4c5c69 Mon Sep 17 00:00:00 2001 From: OCHyams Date: Wed, 22 Mar 2023 13:29:46 +0000 Subject: [PATCH 300/691] [Assignment Tracking] Fix mem2reg misidentifying unlinked stores updateForDeletedStore updates the assignment tracking debug info for a store that is about to be deleted by mem2reg. For each variable backed by the target alloca, if a dbg.assign exists it is kept (well - it's downgraded to a dbg.value). A dbg.value is inserted if there's not a linked dbg.assign for a variable which is backed by the target alloca. This patch fixes a bug whereby a store with a linked dbg.assign that describes a fragment different to the one linked to the alloca was not counted for the variable, leading to both keeping the dbg.assign (downgrading it) and inserting a new dbg.value. Reviewed By: StephenTozer Differential Revision: https://reviews.llvm.org/D146299 --- .../Utils/PromoteMemoryToRegister.cpp | 6 +- .../mem2reg/store-to-part-of-alloca.ll | 84 +++++++++++++++++++ .../Generic/assignment-tracking/sroa/vec-2.ll | 1 - 3 files changed, 87 insertions(+), 4 deletions(-) create mode 100644 llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/store-to-part-of-alloca.ll diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 22272f7e51b36..3b321cdb699b5 100644 --- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -132,9 +132,9 @@ class AssignmentTrackingInfo { // unnecessary function-local metadata. Remember that we've seen a // dbg.assign for each variable fragment for the untracked store handling // (after this loop). - SmallSet VarHasDbgAssignForStore; + SmallSet VarHasDbgAssignForStore; for (DbgAssignIntrinsic *DAI : at::getAssignmentMarkers(ToDelete)) { - VarHasDbgAssignForStore.insert(DebugVariable(DAI)); + VarHasDbgAssignForStore.insert(DebugVariableAggregate(DAI)); DbgAssignsToDelete->insert(DAI); DIB.insertDbgValueIntrinsic(DAI->getValue(), DAI->getVariable(), DAI->getExpression(), DAI->getDebugLoc(), @@ -150,7 +150,7 @@ class AssignmentTrackingInfo { // size) or one that is trackable but has had its DIAssignID attachment // dropped accidentally. for (auto *DAI : DbgAssigns) { - if (VarHasDbgAssignForStore.contains(DebugVariable(DAI))) + if (VarHasDbgAssignForStore.contains(DebugVariableAggregate(DAI))) continue; ConvertDebugDeclareToDebugValue(DAI, ToDelete, DIB); } diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/store-to-part-of-alloca.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/store-to-part-of-alloca.ll new file mode 100644 index 0000000000000..5cf673cde2629 --- /dev/null +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/store-to-part-of-alloca.ll @@ -0,0 +1,84 @@ +; RUN: opt -passes=mem2reg -S %s -o - | FileCheck %s --implicit-check-not="call void @llvm.dbg" + +; CHECK: llvm.dbg.value(metadata i64 0, metadata ![[#]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)) + +;; The store has a debug intrinsic attached to it with a fragment size +;; different to the base alloca debug intrinsic fragment size. Check that +;; mem2reg doesn't think this store is "untagged" for that base variable. If +;; that were the case mem2reg would insert a dbg.value covering the entire +;; variable, which isn't the right thing to do here. This example looks weird +;; and not particularly compelling, but this was encountered in the wild on +;; "real code". + +;; Reduced from this C++ (which itself has been reduced). +;; class a { +;; public: +;; a(float, float); +;; }; +;; class d { +;; protected: +;; float b[4]; +;; +;; public: +;; float e() { return b[0]; } +;; float f() { return b[1]; } +;; }; +;; class g : public d { +;; public: +;; void operator*=(g) { +;; { +;; float __attribute__((nodebug)) c = b[2], __attribute__((nodebug)) h = b[0]; +;; b[0] = c; +;; b[1] = h; +;; } +;; } +;; }; +;; g get(); +;; void i() { +;; g __attribute__((nodebug)) j = get(); +;; g k = j; +;; k *= j; +;; a(k.e(), k.f()); +;; } + +define dso_local i64 @_Z3funv() #0 !dbg !10 { +entry: + %retval.sroa.0 = alloca i64, align 8, !DIAssignID !20 + call void @llvm.dbg.assign(metadata i1 undef, metadata !19, metadata !DIExpression(), metadata !20, metadata ptr %retval.sroa.0, metadata !DIExpression()), !dbg !21 + store i64 0, ptr %retval.sroa.0, align 8, !dbg !22, !DIAssignID !23 + call void @llvm.dbg.assign(metadata i64 0, metadata !19, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32), metadata !23, metadata ptr %retval.sroa.0, metadata !DIExpression()), !dbg !21 + ret i64 0 +} + +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 +declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) #1 + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8} +!llvm.ident = !{!9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 17.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.cpp", directory: "/") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 8, !"PIC Level", i32 2} +!6 = !{i32 7, !"PIE Level", i32 2} +!7 = !{i32 7, !"uwtable", i32 2} +!8 = !{i32 7, !"debug-info-assignment-tracking", i1 true} +!9 = !{!"clang version 17.0.0"} +!10 = distinct !DISubprogram(name: "fun", linkageName: "_Z3funv", scope: !1, file: !1, line: 2, type: !11, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !18) +!11 = !DISubroutineType(types: !12) +!12 = !{!13} +!13 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Pair", file: !1, line: 1, size: 64, flags: DIFlagTypePassByValue, elements: !14, identifier: "_ZTS4Pair") +!14 = !{!15, !17} +!15 = !DIDerivedType(tag: DW_TAG_member, name: "A", scope: !13, file: !1, line: 1, baseType: !16, size: 32) +!16 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!17 = !DIDerivedType(tag: DW_TAG_member, name: "B", scope: !13, file: !1, line: 1, baseType: !16, size: 32, offset: 32) +!18 = !{!19} +!19 = !DILocalVariable(name: "X", scope: !10, file: !1, line: 3, type: !13) +!20 = distinct !DIAssignID() +!21 = !DILocation(line: 0, scope: !10) +!22 = !DILocation(line: 3, column: 8, scope: !10) +!23 = distinct !DIAssignID() +!24 = !DILocation(line: 4, column: 3, scope: !10) diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/vec-2.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/vec-2.ll index 9633950d4e634..107a1e0a91bc6 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/vec-2.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/vec-2.ll @@ -30,7 +30,6 @@ ;; There's a few dbg intrinsics we're not interested in testing wedged in here. ; CHECK-NEXT: dbg.value ; CHECK-NEXT: dbg.value -; CHECK-NEXT: dbg.value ; CHECK-NEXT: call void @llvm.dbg.value(metadata float %2,{{.+}}, metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32)) %class.d = type { %class.a } From e4ceb5a7bb9b8f6d730530345649286370dd3ff8 Mon Sep 17 00:00:00 2001 From: "Luo, Yuanke" Date: Wed, 22 Mar 2023 20:41:36 +0800 Subject: [PATCH 301/691] [X86] Create extra prolog/epilog for stack realignment Fix some bugs and reland e4c1dfed38370b4 and 614c63bec6d67c. 1. Run argument stack rebase pass before the reserved physical register is finalized. 2. Add LEA pseudo instruction to prevent the instruction being eliminated. 3. Don't support X32. --- llvm/lib/Target/X86/CMakeLists.txt | 1 + llvm/lib/Target/X86/X86.h | 2 + .../Target/X86/X86ArgumentStackSlotRebase.cpp | 198 ++++++++++++++++++ llvm/lib/Target/X86/X86FrameLowering.cpp | 192 +++++++++++++++-- llvm/lib/Target/X86/X86InstrArithmetic.td | 7 + llvm/lib/Target/X86/X86MachineFunctionInfo.h | 5 + llvm/lib/Target/X86/X86RegisterInfo.cpp | 44 ++++ llvm/lib/Target/X86/X86RegisterInfo.h | 4 + llvm/lib/Target/X86/X86RegisterInfo.td | 12 ++ llvm/lib/Target/X86/X86TargetMachine.cpp | 2 + .../CodeGen/MIR/X86/inline-asm-registers.mir | 8 +- llvm/test/CodeGen/X86/O0-pipeline.ll | 1 + llvm/test/CodeGen/X86/i386-baseptr.ll | 23 +- llvm/test/CodeGen/X86/opt-pipeline.ll | 1 + .../X86/statepoint-invoke-ra-enter-at-end.mir | 4 +- llvm/test/CodeGen/X86/x86-64-baseptr.ll | 60 +++--- 16 files changed, 507 insertions(+), 57 deletions(-) create mode 100644 llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt index 545e8a38b6961..5eba6db5b06eb 100644 --- a/llvm/lib/Target/X86/CMakeLists.txt +++ b/llvm/lib/Target/X86/CMakeLists.txt @@ -26,6 +26,7 @@ endif() add_public_tablegen_target(X86CommonTableGen) set(sources + X86ArgumentStackSlotRebase.cpp X86AsmPrinter.cpp X86AvoidTrailingCall.cpp X86CallFrameOptimization.cpp diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 044b2636f951c..5ff9d7facc878 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -166,11 +166,13 @@ FunctionPass *createX86LoadValueInjectionLoadHardeningPass(); FunctionPass *createX86LoadValueInjectionRetHardeningPass(); FunctionPass *createX86SpeculativeLoadHardeningPass(); FunctionPass *createX86SpeculativeExecutionSideEffectSuppression(); +FunctionPass *createX86ArgumentStackSlotPass(); void initializeEvexToVexInstPassPass(PassRegistry &); void initializeFPSPass(PassRegistry &); void initializeFixupBWInstPassPass(PassRegistry &); void initializeFixupLEAPassPass(PassRegistry &); +void initializeX86ArgumentStackSlotPassPass(PassRegistry &); void initializeX86FixupInstTuningPassPass(PassRegistry &); void initializeWinEHStatePassPass(PassRegistry &); void initializeX86AvoidSFBPassPass(PassRegistry &); diff --git a/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp b/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp new file mode 100644 index 0000000000000..7ce1960b57a45 --- /dev/null +++ b/llvm/lib/Target/X86/X86ArgumentStackSlotRebase.cpp @@ -0,0 +1,198 @@ +//===---- X86ArgumentStackSlotRebase.cpp - rebase argument stack slot -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass replace the frame register with a GPR virtual register and set +// the stack offset for each instruction which reference argument from stack. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrBuilder.h" +#include "X86MachineFunctionInfo.h" +#include "X86RegisterInfo.h" +#include "X86Subtarget.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Function.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" + +using namespace llvm; + +#define DEBUG_TYPE "x86argumentstackrebase" + +namespace { + +class X86ArgumentStackSlotPass : public MachineFunctionPass { + +public: + static char ID; // Pass identification, replacement for typeid + + explicit X86ArgumentStackSlotPass() : MachineFunctionPass(ID) { + initializeX86ArgumentStackSlotPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +} // end anonymous namespace + +char X86ArgumentStackSlotPass::ID = 0; + +INITIALIZE_PASS(X86ArgumentStackSlotPass, DEBUG_TYPE, "Argument Stack Rebase", + false, false) + +FunctionPass *llvm::createX86ArgumentStackSlotPass() { + return new X86ArgumentStackSlotPass(); +} + +static Register getArgBaseReg(MachineFunction &MF) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + const X86Subtarget &STI = MF.getSubtarget(); + const Function &F = MF.getFunction(); + CallingConv::ID CC = F.getCallingConv(); + Register NoReg; + const TargetRegisterClass *RC = nullptr; + switch (CC) { + // We need a virtual register in case there is inline assembly + // clobber argument base register. + case CallingConv::C: + RC = STI.is64Bit() ? &X86::GR64_ArgRefRegClass : &X86::GR32_ArgRefRegClass; + break; + case CallingConv::X86_RegCall: + // FIXME: For regcall there is no scratch register on 32-bit target. + // We may use a callee saved register as argument base register and + // save it before being changed as base pointer. We need DW_CFA to + // indicate where the callee saved register is saved, so that it can + // be correctly unwind. + // push ebx + // mov ebx, esp + // and esp, -128 + // ... + // pop ebx + // ret + RC = STI.is64Bit() ? &X86::GR64_ArgRefRegClass : nullptr; + break; + // TODO: Refine register class for each calling convention. + default: + break; + } + if (RC) + return MRI.createVirtualRegister(RC); + else + return NoReg; +} + +bool X86ArgumentStackSlotPass::runOnMachineFunction(MachineFunction &MF) { + const Function &F = MF.getFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + const X86Subtarget &STI = MF.getSubtarget(); + const X86RegisterInfo *TRI = STI.getRegisterInfo(); + const X86InstrInfo *TII = STI.getInstrInfo(); + X86MachineFunctionInfo *X86FI = MF.getInfo(); + bool Changed = false; + + if (F.hasFnAttribute(Attribute::Naked)) + return false; + // Only support Linux and ELF. + if (!STI.isTargetLinux() && !STI.isTargetELF()) + return false; + if (!TRI->hasBasePointer(MF)) + return false; + // Don't support X32 + if (STI.isTarget64BitILP32()) + return false; + + Register BasePtr = TRI->getBaseRegister(); + auto IsBaseRegisterClobbered = [&]() { + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (!MI.isInlineAsm()) + continue; + for (MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (!Register::isPhysicalRegister(Reg)) + continue; + if (TRI->isSuperOrSubRegisterEq(BasePtr, Reg)) + return true; + } + } + } + return false; + }; + if (!IsBaseRegisterClobbered()) + return false; + + Register ArgBaseReg = getArgBaseReg(MF); + if (!ArgBaseReg.isValid()) + return false; + // leal 4(%esp), %reg + MachineBasicBlock &MBB = MF.front(); + MachineBasicBlock::iterator MBBI = MBB.begin(); + DebugLoc DL; + // Emit instruction to copy get stack pointer to a virtual register + // and save the instruction to x86 machine functon info. We can get + // physical register of ArgBaseReg after register allocation. The + // stack slot is used to save/restore argument base pointer. We can + // get the index from the instruction. + unsigned SlotSize = TRI->getSlotSize(); + int FI = MFI.CreateSpillStackObject(SlotSize, Align(SlotSize)); + // Use pseudo LEA to prevent the instruction from being eliminated. + // TODO: if it is duplicated we can expand it to lea. + MachineInstr *LEA = + BuildMI(MBB, MBBI, DL, + TII->get(STI.is64Bit() ? X86::PLEA64r : X86::PLEA32r), ArgBaseReg) + .addFrameIndex(FI) + .addImm(1) + .addUse(X86::NoRegister) + .addImm(SlotSize) + .addUse(X86::NoRegister) + .setMIFlag(MachineInstr::FrameSetup); + X86FI->setStackPtrSaveMI(LEA); + + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + int I = 0; + for (MachineOperand &MO : MI.operands()) { + if (MO.isFI()) { + int Idx = MO.getIndex(); + if (!MFI.isFixedObjectIndex(Idx)) + continue; + int64_t Offset = MFI.getObjectOffset(Idx); + if (Offset < 0) + continue; + // TODO replace register for debug instruction + if (MI.isDebugInstr()) + continue; + // Replace frame register with argument base pointer and its offset. + TRI->eliminateFrameIndex(MI.getIterator(), I, ArgBaseReg, Offset); + Changed = true; + } + ++I; + } + } + } + + return Changed; +} diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index cb42a1025ea21..78f96817c8420 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -33,6 +33,7 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/LEB128.h" #include "llvm/Target/TargetOptions.h" #include @@ -476,6 +477,7 @@ void X86FrameLowering::emitCalleeSavedFrameMoves( MachineFrameInfo &MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); + X86MachineFunctionInfo *X86FI = MF.getInfo(); // Add callee saved registers to move list. const std::vector &CSI = MFI.getCalleeSavedInfo(); @@ -487,13 +489,62 @@ void X86FrameLowering::emitCalleeSavedFrameMoves( unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); if (IsPrologue) { - BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); + if (X86FI->getStackPtrSaveMI()) { + // +2*SlotSize because there is return address and ebp at the bottom + // of the stack. + // | retaddr | + // | ebp | + // | |<--ebp + Offset += 2 * SlotSize; + SmallString<64> CfaExpr; + CfaExpr.push_back(dwarf::DW_CFA_expression); + uint8_t buffer[16]; + CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer)); + CfaExpr.push_back(2); + Register FramePtr = TRI->getFrameRegister(MF); + const Register MachineFramePtr = + STI.isTarget64BitILP32() + ? Register(getX86SubSuperRegister(FramePtr, 64)) + : FramePtr; + unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true); + CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr)); + CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer)); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createEscape(nullptr, CfaExpr.str()), + MachineInstr::FrameSetup); + } else { + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); + } } else { BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createRestore(nullptr, DwarfReg)); } } + if (auto *MI = X86FI->getStackPtrSaveMI()) { + int FI = MI->getOperand(1).getIndex(); + int64_t Offset = MFI.getObjectOffset(FI) + 2 * SlotSize; + SmallString<64> CfaExpr; + Register FramePtr = TRI->getFrameRegister(MF); + const Register MachineFramePtr = + STI.isTarget64BitILP32() + ? Register(getX86SubSuperRegister(FramePtr, 64)) + : FramePtr; + unsigned DwarfFramePtr = MRI->getDwarfRegNum(MachineFramePtr, true); + CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfFramePtr)); + uint8_t buffer[16]; + CfaExpr.append(buffer, buffer + encodeSLEB128(Offset, buffer)); + CfaExpr.push_back(dwarf::DW_OP_deref); + + SmallString<64> DefCfaExpr; + DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression); + DefCfaExpr.append(buffer, buffer + encodeSLEB128(CfaExpr.size(), buffer)); + DefCfaExpr.append(CfaExpr.str()); + // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str()), + MachineInstr::FrameSetup); + } } void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero, @@ -1509,6 +1560,42 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // Debug location must be unknown since the first debug location is used // to determine the end of the prologue. DebugLoc DL; + Register ArgBaseReg; + + // Emit extra prolog for argument stack slot reference. + if (auto *MI = X86FI->getStackPtrSaveMI()) { + // MI is lea instruction that created in X86ArgumentStackSlotPass. + // Creat extra prolog for stack realignment. + ArgBaseReg = MI->getOperand(0).getReg(); + // leal 4(%esp), %basereg + // .cfi_def_cfa %basereg, 0 + // andl $-128, %esp + // pushl -4(%basereg) + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::LEA64r : X86::LEA32r), + ArgBaseReg) + .addUse(StackPtr) + .addImm(1) + .addUse(X86::NoRegister) + .addImm(SlotSize) + .addUse(X86::NoRegister) + .setMIFlag(MachineInstr::FrameSetup); + if (NeedsDwarfCFI) { + // .cfi_def_cfa %basereg, 0 + unsigned DwarfStackPtr = TRI->getDwarfRegNum(ArgBaseReg, true); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, 0), + MachineInstr::FrameSetup); + } + BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign); + int64_t Offset = Is64Bit ? -2 * (int64_t)SlotSize : -1 * (int64_t)SlotSize; + BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64rmm: X86::PUSH32rmm)) + .addReg(ArgBaseReg) + .addImm(1) + .addReg(X86::NoRegister) + .addImm(Offset) + .addReg(X86::NoRegister) + .setMIFlag(MachineInstr::FrameSetup); + } // Space reserved for stack-based arguments when making a (ABI-guaranteed) // tail call. @@ -1640,7 +1727,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, .addReg(MachineFramePtr, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); - if (NeedsDwarfCFI) { + if (NeedsDwarfCFI && !ArgBaseReg.isValid()) { // Mark the place where EBP/RBP was saved. // Define the current CFA rule to use the provided offset. assert(StackSize); @@ -1717,13 +1804,28 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); if (NeedsDwarfCFI) { - // Mark effective beginning of when frame pointer becomes valid. - // Define the current CFA to use the EBP/RBP register. - unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); - BuildCFI( - MBB, MBBI, DL, - MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr), - MachineInstr::FrameSetup); + if (ArgBaseReg.isValid()) { + SmallString<64> CfaExpr; + CfaExpr.push_back(dwarf::DW_CFA_expression); + uint8_t buffer[16]; + unsigned DwarfReg = TRI->getDwarfRegNum(MachineFramePtr, true); + CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer)); + CfaExpr.push_back(2); + CfaExpr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg)); + CfaExpr.push_back(0); + // DW_CFA_expression: reg5 DW_OP_breg5 +0 + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::createEscape(nullptr, CfaExpr.str()), + MachineInstr::FrameSetup); + } else { + // Mark effective beginning of when frame pointer becomes valid. + // Define the current CFA to use the EBP/RBP register. + unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); + BuildCFI( + MBB, MBBI, DL, + MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr), + MachineInstr::FrameSetup); + } } if (NeedsWinFPO) { @@ -1790,7 +1892,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // Realign stack after we pushed callee-saved registers (so that we'll be // able to calculate their offsets from the frame pointer). // Don't do this for Win64, it needs to realign the stack after the prologue. - if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF)) { + if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF) && + !ArgBaseReg.isValid()) { assert(HasFP && "There should be a frame pointer if stack is realigned."); BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign); @@ -2048,6 +2151,16 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); } } + if (ArgBaseReg.isValid()) { + // Save argument base pointer. + auto *MI = X86FI->getStackPtrSaveMI(); + int FI = MI->getOperand(1).getIndex(); + unsigned MOVmr = Is64Bit ? X86::MOV64mr : X86::MOV32mr; + // movl %basereg, offset(%ebp) + addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), FI) + .addReg(ArgBaseReg) + .setMIFlag(MachineInstr::FrameSetup); + } if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) { // Mark end of stack pointer adjustment. @@ -2196,6 +2309,34 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, !MF.getTarget().getTargetTriple().isOSWindows()) && MF.needsFrameMoves(); + Register ArgBaseReg; + if (auto *MI = X86FI->getStackPtrSaveMI()) { + unsigned Opc = X86::LEA32r; + Register StackReg = X86::ESP; + ArgBaseReg = MI->getOperand(0).getReg(); + if (STI.is64Bit()) { + Opc = X86::LEA64r; + StackReg = X86::RSP; + } + // leal -8(%basereg), %esp + // .cfi_def_cfa %esp, 4 + BuildMI(MBB, MBBI, DL, TII.get(Opc), StackReg) + .addUse(ArgBaseReg) + .addImm(1) + .addUse(X86::NoRegister) + .addImm((int64_t)SlotSize * -2) + .addUse(X86::NoRegister) + .setMIFlag(MachineInstr::FrameDestroy); + if (NeedsDwarfCFI) { + unsigned DwarfStackPtr = TRI->getDwarfRegNum(StackReg, true); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize), + MachineInstr::FrameDestroy); + --MBBI; + } + --MBBI; + } + if (IsFunclet) { assert(HasFP && "EH funclets without FP not yet implemented"); NumBytes = getWinEHFuncletFrameSize(MF); @@ -2237,11 +2378,13 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, } if (NeedsDwarfCFI) { - unsigned DwarfStackPtr = - TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true); - BuildCFI(MBB, MBBI, DL, - MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize), - MachineInstr::FrameDestroy); + if (!ArgBaseReg.isValid()) { + unsigned DwarfStackPtr = + TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true); + BuildCFI(MBB, MBBI, DL, + MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize), + MachineInstr::FrameDestroy); + } if (!MBB.succ_empty() && !MBB.isReturnBlock()) { unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); BuildCFI(MBB, AfterPop, DL, @@ -2271,6 +2414,15 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, --MBBI; } + if (ArgBaseReg.isValid()) { + // Restore argument base pointer. + auto *MI = X86FI->getStackPtrSaveMI(); + int FI = MI->getOperand(1).getIndex(); + unsigned MOVrm = Is64Bit ? X86::MOV64rm : X86::MOV32rm; + // movl offset(%ebp), %basereg + addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(MOVrm), ArgBaseReg), FI) + .setMIFlag(MachineInstr::FrameDestroy); + } MBBI = FirstCSPop; if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET) @@ -3889,8 +4041,16 @@ void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const { void X86FrameLowering::processFunctionBeforeFrameIndicesReplaced( MachineFunction &MF, RegScavenger *RS) const { + auto *X86FI = MF.getInfo(); + if (STI.is32Bit() && MF.hasEHFunclets()) restoreWinEHStackPointersInParent(MF); + // We have emitted prolog and epilog. Don't need stack pointer saving + // instruction any more. + if (MachineInstr *MI = X86FI->getStackPtrSaveMI()) { + MI->eraseFromParent(); + X86FI->setStackPtrSaveMI(nullptr); + } } void X86FrameLowering::restoreWinEHStackPointersInParent( diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td index 6e2b636260d12..42cc7c8f4585d 100644 --- a/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -37,6 +37,13 @@ def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src), [(set GR64:$dst, lea64addr:$src)]>; } // SchedRW +// Pseudo instruction for lea that prevent optimizer from eliminating +// the instruction. +let SchedRW = [WriteLEA], isPseudo = true, hasSideEffects = 1 in { +def PLEA32r : PseudoI<(outs GR32:$dst), (ins anymem:$src), []>; +def PLEA64r : PseudoI<(outs GR64:$dst), (ins anymem:$src), []>; +} + //===----------------------------------------------------------------------===// // Fixed-Register Multiplication and Division Instructions. // diff --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h index 372838a212cc8..31c087df9a911 100644 --- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h +++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h @@ -117,6 +117,8 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { /// determine if we should insert tilerelease in frame lowering. bool HasVirtualTileReg = false; + MachineInstr *StackPtrSaveMI = nullptr; + std::optional SwiftAsyncContextFrameIdx; // Preallocated fields are only used during isel. @@ -225,6 +227,9 @@ class X86MachineFunctionInfo : public MachineFunctionInfo { bool hasVirtualTileReg() const { return HasVirtualTileReg; } void setHasVirtualTileReg(bool v) { HasVirtualTileReg = v; } + void setStackPtrSaveMI(MachineInstr *MI) { StackPtrSaveMI = MI; } + MachineInstr *getStackPtrSaveMI() const { return StackPtrSaveMI; } + std::optional getSwiftAsyncContextFrameIdx() const { return SwiftAsyncContextFrameIdx; } diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp index 327c61c4c5d0b..0edc0a432f8e0 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -702,6 +702,11 @@ static bool CantUseSP(const MachineFrameInfo &MFI) { bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const { const X86MachineFunctionInfo *X86FI = MF.getInfo(); + // We have a virtual register to reference argument, and don't need base + // pointer. + if (X86FI->getStackPtrSaveMI() != nullptr) + return false; + if (X86FI->hasPreallocatedCall()) return true; @@ -778,6 +783,45 @@ static bool isFuncletReturnInstr(MachineInstr &MI) { llvm_unreachable("impossible"); } +void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + unsigned FIOperandNum, + Register BaseReg, + int FIOffset) const { + MachineInstr &MI = *II; + unsigned Opc = MI.getOpcode(); + if (Opc == TargetOpcode::LOCAL_ESCAPE) { + MachineOperand &FI = MI.getOperand(FIOperandNum); + FI.ChangeToImmediate(FIOffset); + return; + } + + MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false); + + // The frame index format for stackmaps and patchpoints is different from the + // X86 format. It only has a FI and an offset. + if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { + assert(BasePtr == FramePtr && "Expected the FP as base register"); + int64_t Offset = MI.getOperand(FIOperandNum + 1).getImm() + FIOffset; + MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); + return; + } + + if (MI.getOperand(FIOperandNum + 3).isImm()) { + // Offset is a 32-bit integer. + int Imm = (int)(MI.getOperand(FIOperandNum + 3).getImm()); + int Offset = FIOffset + Imm; + assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) && + "Requesting 64-bit offset in 32-bit immediate!"); + if (Offset != 0 || !tryOptimizeLEAtoMOV(II)) + MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset); + } else { + // Offset is symbolic. This is extremely rare. + uint64_t Offset = + FIOffset + (uint64_t)MI.getOperand(FIOperandNum + 3).getOffset(); + MI.getOperand(FIOperandNum + 3).setOffset(Offset); + } +} + bool X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h index f88d4b18f1d86..48eeb72479f8c 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.h +++ b/llvm/lib/Target/X86/X86RegisterInfo.h @@ -133,6 +133,10 @@ class X86RegisterInfo final : public X86GenRegisterInfo { bool canRealignStack(const MachineFunction &MF) const override; + void eliminateFrameIndex(MachineBasicBlock::iterator II, + unsigned FIOperandNum, Register BaseReg, + int FIOffset) const; + bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS = nullptr) const override; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td index 4ef7150122ca8..1e6477e658b9d 100644 --- a/llvm/lib/Target/X86/X86RegisterInfo.td +++ b/llvm/lib/Target/X86/X86RegisterInfo.td @@ -433,6 +433,18 @@ def GR64PLTSafe : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI, R8, R9, RBX, R14, R15, R12, R13, RBP)>; +// It includes the GPR that are used as scratch register for Linux64 calling +// convention. +def GR64_ArgRef: RegisterClass<"X86", [i64], 64, (add R10, R11)> { + let GeneratePressureSet = 0; +} + +// It includes the GPR that are used as scratch register for Linux32 calling +// convention. +def GR32_ArgRef: RegisterClass<"X86", [i32], 32, (add ECX, EDX)> { + let GeneratePressureSet = 0; +} + // Segment registers for use by MOV instructions (and others) that have a // segment register as one operand. Always contain a 16-bit segment // descriptor. diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index dfb7030def7e7..a8895453c14ab 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -104,6 +104,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86Target() { initializePseudoProbeInserterPass(PR); initializeX86ReturnThunksPass(PR); initializeX86DAGToDAGISelPass(PR); + initializeX86ArgumentStackSlotPassPass(PR); } static std::unique_ptr createTLOF(const Triple &TT) { @@ -478,6 +479,7 @@ bool X86PassConfig::addInstSelector() { addPass(createCleanupLocalDynamicTLSPass()); addPass(createX86GlobalBaseRegPass()); + addPass(createX86ArgumentStackSlotPass()); return false; } diff --git a/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir b/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir index 0202103c8ff4d..f92d49cabdcda 100644 --- a/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir +++ b/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir @@ -28,8 +28,8 @@ body: | liveins: $rdi, $rsi ; CHECK-LABEL: name: test - ; CHECK: INLINEASM &foo, 0 /* attdialect */, 4456458 /* regdef:GR64 */, def $rsi, 4456458 /* regdef:GR64 */, def dead $rdi, - INLINEASM &foo, 0, 4456458, def $rsi, 4456458, def dead $rdi, 2147549193, killed $rdi, 2147483657, killed $rsi, 12, implicit-def dead early-clobber $eflags + ; CHECK: INLINEASM &foo, 0 /* attdialect */, 4521994 /* regdef:GR64 */, def $rsi, 4521994 /* regdef:GR64 */, def dead $rdi, + INLINEASM &foo, 0, 4521994, def $rsi, 4521994, def dead $rdi, 2147549193, killed $rdi, 2147483657, killed $rsi, 12, implicit-def dead early-clobber $eflags $rax = MOV64rr killed $rsi RET64 killed $rax ... @@ -45,8 +45,8 @@ body: | ; Verify that the register ties are preserved. ; CHECK-LABEL: name: test2 - ; CHECK: INLINEASM &foo, 0 /* attdialect */, 4456458 /* regdef:GR64 */, def $rsi, 4456458 /* regdef:GR64 */, def dead $rdi, 2147549193 /* reguse tiedto:$1 */, killed $rdi(tied-def 5), 2147483657 /* reguse tiedto:$0 */, killed $rsi(tied-def 3), 12 /* clobber */, implicit-def dead early-clobber $eflags - INLINEASM &foo, 0, 4456458, def $rsi, 4456458, def dead $rdi, 2147549193, killed $rdi(tied-def 5), 2147483657, killed $rsi(tied-def 3), 12, implicit-def dead early-clobber $eflags + ; CHECK: INLINEASM &foo, 0 /* attdialect */, 4521994 /* regdef:GR64 */, def $rsi, 4521994 /* regdef:GR64 */, def dead $rdi, 2147549193 /* reguse tiedto:$1 */, killed $rdi(tied-def 5), 2147483657 /* reguse tiedto:$0 */, killed $rsi(tied-def 3), 12 /* clobber */, implicit-def dead early-clobber $eflags + INLINEASM &foo, 0, 4521994, def $rsi, 4521994, def dead $rdi, 2147549193, killed $rdi(tied-def 5), 2147483657, killed $rsi(tied-def 3), 12, implicit-def dead early-clobber $eflags $rax = MOV64rr killed $rsi RET64 killed $rax ... diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll index e78464120426d..402645ed1e2e5 100644 --- a/llvm/test/CodeGen/X86/O0-pipeline.ll +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll @@ -39,6 +39,7 @@ ; CHECK-NEXT: Assignment Tracking Analysis ; CHECK-NEXT: X86 DAG->DAG Instruction Selection ; CHECK-NEXT: X86 PIC Global Base Reg Initialization +; CHECK-NEXT: Argument Stack Rebase ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions ; CHECK-NEXT: Local Stack Slot Allocation ; CHECK-NEXT: X86 speculative load hardening diff --git a/llvm/test/CodeGen/X86/i386-baseptr.ll b/llvm/test/CodeGen/X86/i386-baseptr.ll index baae07ef435ed..715ffcbd5c930 100644 --- a/llvm/test/CodeGen/X86/i386-baseptr.ll +++ b/llvm/test/CodeGen/X86/i386-baseptr.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=i386-pc-linux -stackrealign -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=i386-pc-none-elf -stackrealign -verify-machineinstrs < %s | FileCheck %s declare i32 @helper() nounwind define void @base() #0 { @@ -39,16 +40,18 @@ entry: define void @clobber_base() #0 { ; CHECK-LABEL: clobber_base: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: leal {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: .cfi_def_cfa %ecx, 0 +; CHECK-NEXT: andl $-128, %esp +; CHECK-NEXT: pushl -4(%ecx) ; CHECK-NEXT: pushl %ebp -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: .cfi_offset %ebp, -8 ; CHECK-NEXT: movl %esp, %ebp -; CHECK-NEXT: .cfi_def_cfa_register %ebp +; CHECK-NEXT: .cfi_escape 0x10, 0x05, 0x02, 0x75, 0x00 # ; CHECK-NEXT: pushl %esi -; CHECK-NEXT: andl $-128, %esp -; CHECK-NEXT: subl $128, %esp -; CHECK-NEXT: movl %esp, %esi -; CHECK-NEXT: .cfi_offset %esi, -12 +; CHECK-NEXT: subl $244, %esp +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: .cfi_escape 0x10, 0x06, 0x02, 0x75, 0x7c # +; CHECK-NEXT: .cfi_escape 0x0f, 0x04, 0x75, 0x84, 0x7f, 0x06 # ; CHECK-NEXT: calll helper@PLT ; CHECK-NEXT: movl %esp, %ecx ; CHECK-NEXT: leal 31(,%eax,4), %eax @@ -63,12 +66,14 @@ define void @clobber_base() #0 { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $8, %edx ; CHECK-NEXT: #APP -; CHECK-NEXT: movl %edx, (%esi) +; CHECK-NEXT: movl %edx, -120(%ebp) ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $0, (%ecx,%eax) +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: leal -4(%ebp), %esp ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %ebp +; CHECK-NEXT: leal -8(%ecx), %esp ; CHECK-NEXT: .cfi_def_cfa %esp, 4 ; CHECK-NEXT: retl entry: @@ -135,6 +140,6 @@ entry: ret void } -attributes #0 = { "frame-pointer"="all"} +attributes #0 = {"frame-pointer"="all"} !llvm.module.flags = !{!0} !0 = !{i32 2, !"override-stack-alignment", i32 32} diff --git a/llvm/test/CodeGen/X86/opt-pipeline.ll b/llvm/test/CodeGen/X86/opt-pipeline.ll index 4361b863c25b0..dcbf5cf2f0b75 100644 --- a/llvm/test/CodeGen/X86/opt-pipeline.ll +++ b/llvm/test/CodeGen/X86/opt-pipeline.ll @@ -88,6 +88,7 @@ ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Local Dynamic TLS Access Clean-up ; CHECK-NEXT: X86 PIC Global Base Reg Initialization +; CHECK-NEXT: Argument Stack Rebase ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions ; CHECK-NEXT: X86 Domain Reassignment Pass ; CHECK-NEXT: Lazy Machine Block Frequency Analysis diff --git a/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir b/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir index 2170573f4b0d9..fe057a8d78eb1 100644 --- a/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir +++ b/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir @@ -350,7 +350,7 @@ body: | ; CHECK-NEXT: CMP64rr [[NOT64r2]], [[COPY6]], implicit-def $eflags ; CHECK-NEXT: undef %100.sub_32bit:gr64_with_sub_8bit = MOV32ri 0 ; CHECK-NEXT: [[CMOV64rr:%[0-9]+]]:gr64 = CMOV64rr [[CMOV64rr]], %100, 4, implicit killed $eflags - ; CHECK-NEXT: INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4456457 /* reguse:GR64 */, %100, 4456457 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags + ; CHECK-NEXT: INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4521993 /* reguse:GR64 */, %100, 4521993 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags ; CHECK-NEXT: LCMPXCHG32 undef %67:gr64, 1, $noreg, 0, $noreg, [[COPY5]], implicit-def dead $eax, implicit-def dead $eflags, implicit undef $eax :: (load store acquire monotonic (s32) on `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK-NEXT: $rdi = COPY [[COPY4]] @@ -470,7 +470,7 @@ body: | %63:gr64 = NOT64r %63 CMP64rr %63, %31, implicit-def $eflags %63:gr64 = CMOV64rr %63, %53, 4, implicit killed $eflags - INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4456457 /* reguse:GR64 */, %53, 4456457 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags + INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4521993 /* reguse:GR64 */, %53, 4521993 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags LCMPXCHG32 undef %67:gr64, 1, $noreg, 0, $noreg, %65, implicit-def dead $eax, implicit-def dead $eflags, implicit undef $eax :: (load store acquire monotonic (s32) on `ptr addrspace(1) undef`, addrspace 1) ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp $rdi = COPY %64 diff --git a/llvm/test/CodeGen/X86/x86-64-baseptr.ll b/llvm/test/CodeGen/X86/x86-64-baseptr.ll index dbd04f22a5c09..451e2c553873a 100644 --- a/llvm/test/CodeGen/X86/x86-64-baseptr.ll +++ b/llvm/test/CodeGen/X86/x86-64-baseptr.ll @@ -77,16 +77,18 @@ entry: define void @clobber_base() #0 { ; CHECK-LABEL: clobber_base: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %r10 +; CHECK-NEXT: .cfi_def_cfa %r10, 0 +; CHECK-NEXT: andq $-128, %rsp +; CHECK-NEXT: pushq -16(%r10) ; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset %rbp, -16 ; CHECK-NEXT: movq %rsp, %rbp -; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: .cfi_escape 0x10, 0x06, 0x02, 0x76, 0x00 # ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: andq $-128, %rsp -; CHECK-NEXT: subq $128, %rsp -; CHECK-NEXT: movq %rsp, %rbx -; CHECK-NEXT: .cfi_offset %rbx, -24 +; CHECK-NEXT: subq $232, %rsp +; CHECK-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: .cfi_escape 0x10, 0x03, 0x02, 0x76, 0x78 # +; CHECK-NEXT: .cfi_escape 0x0f, 0x04, 0x76, 0x88, 0x7f, 0x06 # ; CHECK-NEXT: callq helper@PLT ; CHECK-NEXT: movq %rsp, %rcx ; CHECK-NEXT: movl %eax, %eax @@ -102,12 +104,14 @@ define void @clobber_base() #0 { ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $8, %edx ; CHECK-NEXT: #APP -; CHECK-NEXT: movl %edx, (%rbx) +; CHECK-NEXT: movl %edx, -112(%rbp) ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $0, (%rcx,%rax) +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload ; CHECK-NEXT: leaq -8(%rbp), %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %rbp +; CHECK-NEXT: leaq -16(%r10), %rsp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 ; CHECK-NEXT: retq ; @@ -160,14 +164,15 @@ entry: define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32 %param3, i32 %param4, i32 %param5, i32 %param6, i32 %param7, i32 %param8, i32 %param9, i32 %param10, i32 %param11, i32 %param12) #0 { ; CHECK-LABEL: clobber_baseptr_argptr: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %r10 +; CHECK-NEXT: .cfi_def_cfa %r10, 0 +; CHECK-NEXT: andq $-128, %rsp +; CHECK-NEXT: pushq -16(%r10) ; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset %rbp, -16 ; CHECK-NEXT: movq %rsp, %rbp -; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: .cfi_escape 0x10, 0x06, 0x02, 0x76, 0x00 # ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: andq $-128, %rsp -; CHECK-NEXT: subq $256, %rsp # imm = 0x100 +; CHECK-NEXT: subq $360, %rsp # imm = 0x168 ; CHECK-NEXT: movaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill @@ -176,17 +181,18 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32 ; CHECK-NEXT: movaps %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movaps %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movaps %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill -; CHECK-NEXT: movq %rsp, %rbx -; CHECK-NEXT: .cfi_offset %rbx, -24 -; CHECK-NEXT: .cfi_offset %xmm8, -160 -; CHECK-NEXT: .cfi_offset %xmm9, -144 -; CHECK-NEXT: .cfi_offset %xmm10, -128 -; CHECK-NEXT: .cfi_offset %xmm11, -112 -; CHECK-NEXT: .cfi_offset %xmm12, -96 -; CHECK-NEXT: .cfi_offset %xmm13, -80 -; CHECK-NEXT: .cfi_offset %xmm14, -64 -; CHECK-NEXT: .cfi_offset %xmm15, -48 -; CHECK-NEXT: movl 16(%rbp), %r14d +; CHECK-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: .cfi_escape 0x10, 0x03, 0x02, 0x76, 0x78 # +; CHECK-NEXT: .cfi_escape 0x10, 0x19, 0x02, 0x76, 0xf0, 0x7e # +; CHECK-NEXT: .cfi_escape 0x10, 0x1a, 0x02, 0x76, 0x80, 0x7f # +; CHECK-NEXT: .cfi_escape 0x10, 0x1b, 0x02, 0x76, 0x90, 0x7f # +; CHECK-NEXT: .cfi_escape 0x10, 0x1c, 0x02, 0x76, 0xa0, 0x7f # +; CHECK-NEXT: .cfi_escape 0x10, 0x1d, 0x02, 0x76, 0xb0, 0x7f # +; CHECK-NEXT: .cfi_escape 0x10, 0x1e, 0x02, 0x76, 0x40 # +; CHECK-NEXT: .cfi_escape 0x10, 0x1f, 0x02, 0x76, 0x50 # +; CHECK-NEXT: .cfi_escape 0x10, 0x20, 0x02, 0x76, 0x60 # +; CHECK-NEXT: .cfi_escape 0x0f, 0x04, 0x76, 0x88, 0x7e, 0x06 # +; CHECK-NEXT: movl (%r10), %r14d ; CHECK-NEXT: callq helper@PLT ; CHECK-NEXT: movq %rsp, %rcx ; CHECK-NEXT: movl %eax, %eax @@ -205,7 +211,7 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32 ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl $8, %edx ; CHECK-NEXT: #APP -; CHECK-NEXT: movl %edx, (%rbx) +; CHECK-NEXT: movl %edx, -240(%rbp) ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: movl %r14d, (%rcx,%rax) ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm8 # 16-byte Reload @@ -216,9 +222,11 @@ define x86_regcallcc void @clobber_baseptr_argptr(i32 %param1, i32 %param2, i32 ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload ; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload ; CHECK-NEXT: leaq -8(%rbp), %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %rbp +; CHECK-NEXT: leaq -16(%r10), %rsp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 ; CHECK-NEXT: retq ; @@ -298,6 +306,6 @@ entry: ret void } -attributes #0 = { "frame-pointer"="all"} +attributes #0 = {"frame-pointer"="all"} !llvm.module.flags = !{!0} !0 = !{i32 2, !"override-stack-alignment", i32 32} From e7618a6361f856a089d6f9eb476f18a9bf4643b8 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 22 Mar 2023 15:22:33 +0100 Subject: [PATCH 302/691] [GVN] Fix change reporting when removing assume (PR61574) Report a change when removing a true/false assume. Fixes https://github.com/llvm/llvm-project/issues/61574. --- llvm/lib/Transforms/Scalar/GVN.cpp | 4 +++- llvm/test/Transforms/GVN/assume.ll | 19 ++++++++++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index 28b1f455459f8..0c263e5644b6f 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -1908,8 +1908,10 @@ bool GVNPass::processAssumeIntrinsic(AssumeInst *IntrinsicI) { MSSAU->insertDef(cast(NewDef), /*RenameUses=*/false); } } - if (isAssumeWithEmptyBundle(*IntrinsicI)) + if (isAssumeWithEmptyBundle(*IntrinsicI)) { markInstructionForDeletion(IntrinsicI); + return true; + } return false; } else if (isa(V)) { // If it's not false, and constant, it must evaluate to true. This means our diff --git a/llvm/test/Transforms/GVN/assume.ll b/llvm/test/Transforms/GVN/assume.ll index 3b6346f63dbb3..6cb4c871750d6 100644 --- a/llvm/test/Transforms/GVN/assume.ll +++ b/llvm/test/Transforms/GVN/assume.ll @@ -1,9 +1,26 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=gvn -S | FileCheck %s +; RUN: opt < %s -passes=gvn -verify-analysis-invalidation -S | FileCheck %s declare void @llvm.assume(i1) declare void @use(i1) +define void @assume_true() { +; CHECK-LABEL: @assume_true( +; CHECK-NEXT: ret void +; + call void @llvm.assume(i1 true) + ret void +} + +define void @assume_false() { +; CHECK-LABEL: @assume_false( +; CHECK-NEXT: store i8 poison, ptr null, align 1 +; CHECK-NEXT: ret void +; + call void @llvm.assume(i1 false) + ret void +} + define void @assume_arg(i1 %x) { ; CHECK-LABEL: @assume_arg( ; CHECK-NEXT: call void @llvm.assume(i1 [[X:%.*]]) From a2033ff3cc76fcc05e69627ad2685b6376f9ad4e Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 22 Mar 2023 14:28:29 +0000 Subject: [PATCH 303/691] [gn build] Port e4ceb5a7bb9b --- llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn index e2351f36ed1b1..7c27f7ca21583 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/X86/BUILD.gn @@ -77,6 +77,7 @@ static_library("LLVMX86CodeGen") { deps += [ ":X86GenFoldTables" ] } sources = [ + "X86ArgumentStackSlotRebase.cpp", "X86AsmPrinter.cpp", "X86AvoidStoreForwardingBlocks.cpp", "X86AvoidTrailingCall.cpp", From ed114b6ffc953ac2c648f64287c44456a7d19d0c Mon Sep 17 00:00:00 2001 From: Johannes de Fine Licht Date: Wed, 22 Mar 2023 15:22:11 +0100 Subject: [PATCH 304/691] [MLIR][LLVM] Copy byval attributes during inlining. Support inlining of function calls with the byval attribute on function arguments by copying the pointee into a newly alloca'ed pointer at the callsite before inlining. The alignment attribute is not yet taken into account. Reviewed By: ftynse, gysit Differential Revision: https://reviews.llvm.org/D146616 --- mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | 57 ++++++++++++++++++++- mlir/test/Dialect/LLVMIR/inlining.mlir | 58 ++++++++++++++++------ 2 files changed, 99 insertions(+), 16 deletions(-) diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 1336ad1169c12..ca439ab8cc15e 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "TypeDetail.h" +#include "mlir/Dialect/LLVMIR/LLVMAttrs.h" #include "mlir/Dialect/LLVMIR/LLVMInterfaces.h" #include "mlir/Dialect/LLVMIR/LLVMTypes.h" #include "mlir/IR/Builders.h" @@ -2854,6 +2855,39 @@ static void moveConstantAllocasToEntryBlock( } } +static Value handleByValArgument(OpBuilder &builder, Operation *callable, + Value argument, + NamedAttribute byValAttribute) { + auto func = cast(callable); + LLVM::MemoryEffectsAttr memoryEffects = func.getMemoryAttr(); + // If there is no memory effects attribute, assume that the function is + // not read-only. + bool isReadOnly = memoryEffects && + memoryEffects.getArgMem() != ModRefInfo::ModRef && + memoryEffects.getArgMem() != ModRefInfo::Mod; + if (isReadOnly) + return argument; + // Resolve the pointee type and its size. + auto ptrType = cast(argument.getType()); + Type elementType = cast(byValAttribute.getValue()).getValue(); + unsigned int typeSize = + DataLayout(callable->getParentOfType()) + .getTypeSize(elementType); + // Allocate the new value on the stack. + Value one = builder.create( + func.getLoc(), builder.getI64Type(), builder.getI64IntegerAttr(1)); + Value allocaOp = + builder.create(func.getLoc(), ptrType, elementType, one); + // Copy the pointee to the newly allocated value. + Value copySize = builder.create( + func.getLoc(), builder.getI64Type(), builder.getI64IntegerAttr(typeSize)); + Value isVolatile = builder.create( + func.getLoc(), builder.getI1Type(), builder.getBoolAttr(false)); + builder.create(func.getLoc(), allocaOp, argument, copySize, + isVolatile); + return allocaOp; +} + namespace { struct LLVMInlinerInterface : public DialectInlinerInterface { using DialectInlinerInterface::DialectInlinerInterface; @@ -2866,8 +2900,19 @@ struct LLVMInlinerInterface : public DialectInlinerInterface { auto funcOp = dyn_cast(callable); if (!callOp || !funcOp) return false; - // TODO: Handle argument and result attributes; - if (funcOp.getArgAttrs() || funcOp.getResAttrs()) + if (auto attrs = funcOp.getArgAttrs()) { + for (Attribute attr : *attrs) { + auto attrDict = cast(attr); + for (NamedAttribute attr : attrDict) { + if (attr.getName() == LLVMDialect::getByValAttrName()) + continue; + // TODO: Handle all argument attributes; + return false; + } + } + } + // TODO: Handle result attributes; + if (funcOp.getResAttrs()) return false; // TODO: Handle exceptions. if (funcOp.getPersonality()) @@ -2942,6 +2987,14 @@ struct LLVMInlinerInterface : public DialectInlinerInterface { dst.replaceAllUsesWith(src); } + Value handleArgument(OpBuilder &builder, Operation *call, Operation *callable, + Value argument, Type targetType, + DictionaryAttr argumentAttrs) const final { + if (auto attr = argumentAttrs.getNamed(LLVMDialect::getByValAttrName())) + return handleByValArgument(builder, callable, argument, *attr); + return argument; + } + void processInlinedCallBlocks( Operation *call, iterator_range inlinedBlocks) const override { diff --git a/mlir/test/Dialect/LLVMIR/inlining.mlir b/mlir/test/Dialect/LLVMIR/inlining.mlir index ab28f4236af97..cefb8d5e461d4 100644 --- a/mlir/test/Dialect/LLVMIR/inlining.mlir +++ b/mlir/test/Dialect/LLVMIR/inlining.mlir @@ -187,20 +187,6 @@ llvm.func @caller() { // ----- -llvm.func @callee(%ptr : !llvm.ptr {llvm.byval = !llvm.ptr}) -> (!llvm.ptr) { - llvm.return %ptr : !llvm.ptr -} - -// CHECK-LABEL: llvm.func @caller -// CHECK-NEXT: llvm.call @callee -// CHECK-NEXT: return -llvm.func @caller(%ptr : !llvm.ptr) -> (!llvm.ptr) { - %0 = llvm.call @callee(%ptr) : (!llvm.ptr) -> (!llvm.ptr) - llvm.return %0 : !llvm.ptr -} - -// ----- - llvm.func @static_alloca() -> f32 { %0 = llvm.mlir.constant(4 : i32) : i32 %1 = llvm.alloca %0 x f32 : (i32) -> !llvm.ptr @@ -349,3 +335,47 @@ llvm.func @test_inline(%cond0 : i1, %cond1 : i1, %funcArg : f32) -> f32 { ^bb3(%blockArg: f32): llvm.return %blockArg : f32 } + +// ----- + +llvm.func @with_byval_arg(%ptr : !llvm.ptr { llvm.byval = f64 }) { + llvm.return +} + +// CHECK-LABEL: llvm.func @test_byval +// CHECK-SAME: %[[PTR:[a-zA-Z0-9_]+]]: !llvm.ptr +// CHECK: %[[ALLOCA:.+]] = llvm.alloca %{{.+}} x f64 +// CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[PTR]] +llvm.func @test_byval(%ptr : !llvm.ptr) { + llvm.call @with_byval_arg(%ptr) : (!llvm.ptr) -> () + llvm.return +} + +// ----- + +llvm.func @with_byval_arg(%ptr : !llvm.ptr { llvm.byval = f64 }) attributes {memory = #llvm.memory_effects} { + llvm.return +} + +// CHECK-LABEL: llvm.func @test_byval_read_only +// CHECK-NOT: llvm.call +// CHECK-NEXT: llvm.return +llvm.func @test_byval_read_only(%ptr : !llvm.ptr) { + llvm.call @with_byval_arg(%ptr) : (!llvm.ptr) -> () + llvm.return +} + +// ----- + +llvm.func @with_byval_arg(%ptr : !llvm.ptr { llvm.byval = f64 }) attributes {memory = #llvm.memory_effects} { + llvm.return +} + +// CHECK-LABEL: llvm.func @test_byval_write_only +// CHECK-SAME: %[[PTR:[a-zA-Z0-9_]+]]: !llvm.ptr +// CHECK: %[[ALLOCA:.+]] = llvm.alloca %{{.+}} x f64 +// CHECK: "llvm.intr.memcpy"(%[[ALLOCA]], %[[PTR]] +llvm.func @test_byval_write_only(%ptr : !llvm.ptr) { + llvm.call @with_byval_arg(%ptr) : (!llvm.ptr) -> () + llvm.return +} From 883dbb9c86be87593a58ef10b070b3a0564c7fee Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 22 Mar 2023 15:43:46 +0100 Subject: [PATCH 305/691] Revert "[MemProf] Context disambiguation cloning pass [patch 1a/3]" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit d6ad4f01c3dafcab335bca66dac6e36d9eac8421. Fails to build on at least gcc 12.2: /home/npopov/repos/llvm-project/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp:482:1: error: no declaration matches ‘ContextNode* CallsiteContextGraph::getNodeForInst(const CallInfo&)’ 482 | CallsiteContextGraph::getNodeForInst( | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /home/npopov/repos/llvm-project/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp:393:16: note: candidate is: ‘CallsiteContextGraph::ContextNode* CallsiteContextGraph::getNodeForInst(const CallInfo&)’ 393 | ContextNode *getNodeForInst(const CallInfo &C); | ^~~~~~~~~~~~~~ /home/npopov/repos/llvm-project/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp:99:7: note: ‘class CallsiteContextGraph’ defined here 99 | class CallsiteContextGraph { | ^~~~~~~~~~~~~~~~~~~~ --- .../IPO/MemProfContextDisambiguation.h | 38 - llvm/lib/Passes/PassBuilder.cpp | 1 - llvm/lib/Passes/PassBuilderPipelines.cpp | 11 - llvm/lib/Passes/PassRegistry.def | 1 - llvm/lib/Transforms/IPO/CMakeLists.txt | 1 - .../IPO/MemProfContextDisambiguation.cpp | 1583 ----------------- llvm/test/ThinLTO/X86/memprof-summary.ll | 184 ++ .../MemProfContextDisambiguation/basic.ll | 158 -- .../duplicate-context-ids.ll | 232 --- .../duplicate-context-ids2.ll | 386 ---- .../indirectcall.ll | 261 --- .../MemProfContextDisambiguation/inlined.ll | 189 -- .../MemProfContextDisambiguation/inlined2.ll | 135 -- .../pass-pipeline.ll | 41 - 14 files changed, 184 insertions(+), 3037 deletions(-) delete mode 100644 llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h delete mode 100644 llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp create mode 100644 llvm/test/ThinLTO/X86/memprof-summary.ll delete mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/basic.ll delete mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll delete mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll delete mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll delete mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll delete mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll delete mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/pass-pipeline.ll diff --git a/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h b/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h deleted file mode 100644 index 56e56ed67f7df..0000000000000 --- a/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h +++ /dev/null @@ -1,38 +0,0 @@ -//==- MemProfContextDisambiguation.h - Context Disambiguation ----*- C++ -*-==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Implements support for context disambiguation of allocation calls for profile -// guided heap optimization using memprof metadata. See implementation file for -// details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H -#define LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/StringSet.h" -#include "llvm/IR/GlobalValue.h" -#include "llvm/IR/PassManager.h" - -namespace llvm { -class Module; - -class MemProfContextDisambiguation - : public PassInfoMixin { - /// Run the context disambiguator on \p M, returns true if any changes made. - bool processModule(Module &M); - -public: - MemProfContextDisambiguation() {} - - PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); -}; -} // end namespace llvm - -#endif // LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index a04f8bbaa5dc0..89d2e6a4b2d1a 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -117,7 +117,6 @@ #include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/IPO/LoopExtractor.h" #include "llvm/Transforms/IPO/LowerTypeTests.h" -#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" #include "llvm/Transforms/IPO/MergeFunctions.h" #include "llvm/Transforms/IPO/ModuleInliner.h" #include "llvm/Transforms/IPO/OpenMPOpt.h" diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index aaabe23049288..1d386139d9e6c 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -57,7 +57,6 @@ #include "llvm/Transforms/IPO/InferFunctionAttrs.h" #include "llvm/Transforms/IPO/Inliner.h" #include "llvm/Transforms/IPO/LowerTypeTests.h" -#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" #include "llvm/Transforms/IPO/MergeFunctions.h" #include "llvm/Transforms/IPO/ModuleInliner.h" #include "llvm/Transforms/IPO/OpenMPOpt.h" @@ -272,10 +271,6 @@ static cl::opt AttributorRun( clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs"))); -cl::opt EnableMemProfContextDisambiguation( - "enable-memprof-context-disambiguation", cl::init(false), cl::Hidden, - cl::ZeroOrMore, cl::desc("Enable MemProf context disambiguation")); - PipelineTuningOptions::PipelineTuningOptions() { LoopInterleaving = true; LoopVectorization = true; @@ -1714,12 +1709,6 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, InlineContext{ThinOrFullLTOPhase::FullLTOPostLink, InlinePass::CGSCCInliner})); - // Perform context disambiguation after inlining, since that would reduce the - // amount of additional cloning required to distinguish the allocation - // contexts. - if (EnableMemProfContextDisambiguation) - MPM.addPass(MemProfContextDisambiguation()); - // Optimize globals again after we ran the inliner. MPM.addPass(GlobalOptPass()); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 82592a1ee9b55..04d648580a8c5 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -87,7 +87,6 @@ MODULE_PASS("name-anon-globals", NameAnonGlobalPass()) MODULE_PASS("no-op-module", NoOpModulePass()) MODULE_PASS("objc-arc-apelim", ObjCARCAPElimPass()) MODULE_PASS("partial-inliner", PartialInlinerPass()) -MODULE_PASS("memprof-context-disambiguation", MemProfContextDisambiguation()) MODULE_PASS("pgo-icall-prom", PGOIndirectCallPromotion()) MODULE_PASS("pgo-instr-gen", PGOInstrumentationGen()) MODULE_PASS("pgo-instr-use", PGOInstrumentationUse()) diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt index e03aff0f65d7a..063a9a60d0cb5 100644 --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -27,7 +27,6 @@ add_llvm_component_library(LLVMipo Internalize.cpp LoopExtractor.cpp LowerTypeTests.cpp - MemProfContextDisambiguation.cpp MergeFunctions.cpp ModuleInliner.cpp OpenMPOpt.cpp diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp deleted file mode 100644 index fc8b12df67822..0000000000000 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ /dev/null @@ -1,1583 +0,0 @@ -//==-- MemProfContextDisambiguation.cpp - Disambiguate contexts -------------=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements support for context disambiguation of allocation -// calls for profile guided heap optimization. Specifically, it uses Memprof -// profiles which indicate context specific allocation behavior (currently -// distinguishing cold vs hot memory allocations). Cloning is performed to -// expose the cold allocation call contexts, and the allocation calls are -// subsequently annotated with an attribute for later transformation. -// -// The transformations can be performed either directly on IR (regular LTO), or -// (eventually) on a ThinLTO index (later applied to the IR during the ThinLTO -// backend). Both types of LTO operate on a the same base graph representation, -// which uses CRTP to support either IR or Index formats. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/SetOperations.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/MemoryProfileInfo.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/GraphWriter.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/IPO.h" -#include -#include -using namespace llvm; -using namespace llvm::memprof; - -#define DEBUG_TYPE "memprof-context-disambiguation" - -static cl::opt DotFilePathPrefix( - "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, - cl::value_desc("filename"), - cl::desc("Specify the path prefix of the MemProf dot files.")); - -static cl::opt ExportToDot("memprof-export-to-dot", cl::init(false), - cl::Hidden, - cl::desc("Export graph to dot files.")); - -static cl::opt - DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, - cl::desc("Dump CallingContextGraph to stdout after each stage.")); - -static cl::opt - VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, - cl::desc("Perform verification checks on CallingContextGraph.")); - -static cl::opt - VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, - cl::desc("Perform frequent verification checks on nodes.")); - -inline bool hasSingleAllocType(uint8_t AllocTypes) { - switch (AllocTypes) { - case (uint8_t)AllocationType::Cold: - case (uint8_t)AllocationType::NotCold: - return true; - break; - case (uint8_t)AllocationType::None: - assert(false); - break; - default: - return false; - break; - } - llvm_unreachable("invalid alloc type"); -} - -/// CRTP base for graphs built from either IR or ThinLTO summary index. -/// -/// The graph represents the call contexts in all memprof metadata on allocation -/// calls, with nodes for the allocations themselves, as well as for the calls -/// in each context. The graph is initially built from the allocation memprof -/// metadata (or summary) MIBs. It is then updated to match calls with callsite -/// metadata onto the nodes, updating it to reflect any inlining performed on -/// those calls. -/// -/// Each MIB (representing an allocation's call context with allocation -/// behavior) is assigned a unique context id during the graph build. The edges -/// and nodes in the graph are decorated with the context ids they carry. This -/// is used to correctly update the graph when cloning is performed so that we -/// can uniquify the context for a single (possibly cloned) allocation. -template -class CallsiteContextGraph { -public: - CallsiteContextGraph() = default; - CallsiteContextGraph(const CallsiteContextGraph &) = default; - CallsiteContextGraph(CallsiteContextGraph &&) = default; - - /// Main entry point to perform analysis and transformations on graph. - bool process(); - - void dump() const; - void print(raw_ostream &OS) const; - - friend raw_ostream &operator<<(raw_ostream &OS, - const CallsiteContextGraph &CCG) { - CCG.print(OS); - return OS; - } - - friend struct GraphTraits< - const CallsiteContextGraph *>; - friend struct DOTGraphTraits< - const CallsiteContextGraph *>; - - void exportToDot(std::string Label) const; - - /// Represents a function clone via FuncTy pointer and clone number pair. - struct FuncInfo final - : public std::pair { - using Base = std::pair; - FuncInfo(const Base &B) : Base(B) {} - FuncInfo(FuncTy *F = nullptr, unsigned CloneNo = 0) : Base(F, CloneNo) {} - explicit operator bool() const { return this->first != nullptr; } - FuncTy *func() const { return this->first; } - unsigned cloneNo() const { return this->second; } - }; - - /// Represents a callsite clone via CallTy and clone number pair. - struct CallInfo final : public std::pair { - using Base = std::pair; - CallInfo(const Base &B) : Base(B) {} - CallInfo(CallTy Call = nullptr, unsigned CloneNo = 0) - : Base(Call, CloneNo) {} - explicit operator bool() const { return (bool)this->first; } - CallTy call() const { return this->first; } - unsigned cloneNo() const { return this->second; } - void setCloneNo(unsigned N) { this->second = N; } - void print(raw_ostream &OS) const { - if (!operator bool()) { - assert(!cloneNo()); - OS << "null Call"; - return; - } - call()->print(OS); - OS << "\t(clone " << cloneNo() << ")"; - } - void dump() const { - print(dbgs()); - dbgs() << "\n"; - } - friend raw_ostream &operator<<(raw_ostream &OS, const CallInfo &Call) { - Call.print(OS); - return OS; - } - }; - - struct ContextEdge; - - /// Node in the Callsite Context Graph - struct ContextNode { - // Keep this for now since in the IR case where we have an Instruction* it - // is not as immediately discoverable. Used for printing richer information - // when dumping graph. - bool IsAllocation; - - // Keeps track of when the Call was reset to null because there was - // recursion. - bool Recursive = false; - - // The corresponding allocation or interior call. - CallInfo Call; - - // For alloc nodes this is a unique id assigned when constructed, and for - // callsite stack nodes it is the original stack id when the node is - // constructed from the memprof MIB metadata on the alloc nodes. Note that - // this is only used when matching callsite metadata onto the stack nodes - // created when processing the allocation memprof MIBs, and for labeling - // nodes in the dot graph. Therefore we don't bother to assign a value for - // clones. - uint64_t OrigStackOrAllocId = 0; - - // This will be formed by ORing together the AllocationType enum values - // for contexts including this node. - uint8_t AllocTypes = 0; - - // Edges to all callees in the profiled call stacks. - // TODO: Should this be a map (from Callee node) for more efficient lookup? - std::vector> CalleeEdges; - - // Edges to all callers in the profiled call stacks. - // TODO: Should this be a map (from Caller node) for more efficient lookup? - std::vector> CallerEdges; - - // The set of IDs for contexts including this node. - DenseSet ContextIds; - - // List of clones of this ContextNode, initially empty. - std::vector Clones; - - // If a clone, points to the original uncloned node. - ContextNode *CloneOf = nullptr; - - ContextNode(bool IsAllocation) : IsAllocation(IsAllocation), Call() {} - - ContextNode(bool IsAllocation, CallInfo C) - : IsAllocation(IsAllocation), Call(C) {} - - std::unique_ptr clone() { - auto Clone = std::make_unique(IsAllocation, Call); - if (CloneOf) { - CloneOf->Clones.push_back(Clone.get()); - Clone->CloneOf = CloneOf; - } else { - Clones.push_back(Clone.get()); - Clone->CloneOf = this; - } - return Clone; - } - - ContextNode *getOrigNode() { - if (!CloneOf) - return this; - return CloneOf; - } - - void addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType, - unsigned int ContextId); - - ContextEdge *findEdgeFromCallee(const ContextNode *Callee); - ContextEdge *findEdgeFromCaller(const ContextNode *Caller); - void eraseCalleeEdge(const ContextEdge *Edge); - void eraseCallerEdge(const ContextEdge *Edge); - - void setCall(CallInfo C) { Call = C; } - - bool hasCall() const { return (bool)Call.call(); } - - void printCall(raw_ostream &OS) const { Call.print(OS); } - - // True if this node was effectively removed from the graph, in which case - // its context id set, caller edges, and callee edges should all be empty. - bool isRemoved() const { - assert(ContextIds.empty() == - (CalleeEdges.empty() && CallerEdges.empty())); - return ContextIds.empty(); - } - - void dump() const; - void print(raw_ostream &OS) const; - - friend raw_ostream &operator<<(raw_ostream &OS, const ContextNode &Node) { - Node.print(OS); - return OS; - } - }; - - /// Edge in the Callsite Context Graph from a ContextNode N to a caller or - /// callee. - struct ContextEdge { - ContextNode *Callee; - ContextNode *Caller; - - // This will be formed by ORing together the AllocationType enum values - // for contexts including this edge. - uint8_t AllocTypes = 0; - - // The set of IDs for contexts including this edge. - DenseSet ContextIds; - - ContextEdge(ContextNode *Callee, ContextNode *Caller, uint8_t AllocType, - DenseSet ContextIds) - : Callee(Callee), Caller(Caller), AllocTypes(AllocType), - ContextIds(ContextIds) {} - - DenseSet &getContextIds() { return ContextIds; } - - void dump() const; - void print(raw_ostream &OS) const; - - friend raw_ostream &operator<<(raw_ostream &OS, const ContextEdge &Edge) { - Edge.print(OS); - return OS; - } - }; - -protected: - /// Get a list of nodes corresponding to the stack ids in the given callsite - /// context. - template - std::vector - getStackIdsWithContextNodes(CallStack &CallsiteContext); - - /// Adds nodes for the given allocation and any stack ids on its memprof MIB - /// metadata (or summary). - ContextNode *addAllocNode(CallInfo Call, const FuncTy *F); - - /// Adds nodes for the given MIB stack ids. - template - void addStackNodesForMIB(ContextNode *AllocNode, - CallStack &StackContext, - CallStack &CallsiteContext, - AllocationType AllocType); - - /// Matches all callsite metadata (or summary) to the nodes created for - /// allocation memprof MIB metadata, synthesizing new nodes to reflect any - /// inlining performed on those callsite instructions. - void updateStackNodes(); - - /// Update graph to conservatively handle any callsite stack nodes that target - /// multiple different callee target functions. - void handleCallsitesWithMultipleTargets(); - - /// Save lists of calls with MemProf metadata in each function, for faster - /// iteration. - std::vector>> - FuncToCallsWithMetadata; - - /// Map from callsite node to the enclosing caller function. - std::map NodeToCallingFunc; - -private: - using EdgeIter = typename std::vector>::iterator; - - using CallContextInfo = std::tuple, - const FuncTy *, DenseSet>; - - /// Assigns the given Node to calls at or inlined into the location with - /// the Node's stack id, after post order traversing and processing its - /// caller nodes. Uses the call information recorded in the given - /// StackIdToMatchingCalls map, and creates new nodes for inlined sequences - /// as needed. Called by updateStackNodes which sets up the given - /// StackIdToMatchingCalls map. - void assignStackNodesPostOrder( - ContextNode *Node, DenseSet &Visited, - DenseMap> &StackIdToMatchingCalls); - - /// Duplicates the given set of context ids, updating the provided - /// map from each original id with the newly generated context ids, - /// and returning the new duplicated id set. - DenseSet duplicateContextIds( - const DenseSet &StackSequenceContextIds, - DenseMap> &OldToNewContextIds); - - /// Propagates all duplicated context ids across the graph. - void propagateDuplicateContextIds( - const DenseMap> &OldToNewContextIds); - - /// Connect the NewNode to OrigNode's callees if TowardsCallee is true, - /// else to its callers. Also updates OrigNode's edges to remove any context - /// ids moved to the newly created edge. - void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode, - bool TowardsCallee); - - /// Get the stack id corresponding to the given Id or Index (for IR this will - /// return itself, for a summary index this will return the id recorded in the - /// index for that stack id index value). - uint64_t getStackId(uint64_t IdOrIndex) const { - return static_cast(this)->getStackId(IdOrIndex); - } - - /// Returns true if the given call targets the given function. - bool calleeMatchesFunc(CallTy Call, const FuncTy *Func) { - return static_cast(this)->calleeMatchesFunc(Call, Func); - } - - /// Get a list of nodes corresponding to the stack ids in the given - /// callsite's context. - std::vector getStackIdsWithContextNodesForCall(CallTy Call) { - return static_cast(this)->getStackIdsWithContextNodesForCall( - Call); - } - - /// Get the last stack id in the context for callsite. - uint64_t getLastStackId(CallTy Call) { - return static_cast(this)->getLastStackId(Call); - } - - /// Gets a label to use in the dot graph for the given call clone in the given - /// function. - std::string getLabel(const FuncTy *Func, const CallTy Call, - unsigned CloneNo) const { - return static_cast(this)->getLabel(Func, Call, CloneNo); - } - - /// Helpers to find the node corresponding to the given call or stackid. - ContextNode *getNodeForInst(const CallInfo &C); - ContextNode *getNodeForAlloc(const CallInfo &C); - ContextNode *getNodeForStackId(uint64_t StackId); - - /// Removes the node information recorded for the given call. - void unsetNodeForInst(const CallInfo &C); - - /// Computes the alloc type corresponding to the given context ids, by - /// unioning their recorded alloc types. - uint8_t computeAllocType(DenseSet &ContextIds); - - /// Map from each context ID to the AllocationType assigned to that context. - std::map ContextIdToAllocationType; - - /// Identifies the context node created for a stack id when adding the MIB - /// contexts to the graph. This is used to locate the context nodes when - /// trying to assign the corresponding callsites with those stack ids to these - /// nodes. - std::map StackEntryIdToContextNodeMap; - - /// Maps to track the calls to their corresponding nodes in the graph. - std::map AllocationCallToContextNodeMap; - std::map NonAllocationCallToContextNodeMap; - - /// Owner of all ContextNode unique_ptrs. - std::vector> NodeOwner; - - /// Perform sanity checks on graph when requested. - void check() const; - - /// Keeps track of the last unique context id assigned. - unsigned int LastContextId = 0; -}; - -template -using ContextNode = - typename CallsiteContextGraph::ContextNode; -template -using ContextEdge = - typename CallsiteContextGraph::ContextEdge; -template -using FuncInfo = - typename CallsiteContextGraph::FuncInfo; -template -using CallInfo = - typename CallsiteContextGraph::CallInfo; - -/// CRTP derived class for graphs built from IR (regular LTO). -class ModuleCallsiteContextGraph - : public CallsiteContextGraph { -public: - ModuleCallsiteContextGraph(Module &M); - -private: - friend CallsiteContextGraph; - - uint64_t getStackId(uint64_t IdOrIndex) const; - bool calleeMatchesFunc(Instruction *Call, const Function *Func); - uint64_t getLastStackId(Instruction *Call); - std::vector getStackIdsWithContextNodesForCall(Instruction *Call); - std::string getLabel(const Function *Func, const Instruction *Call, - unsigned CloneNo) const; - - const Module &Mod; -}; - -namespace { - -struct FieldSeparator { - bool Skip = true; - const char *Sep; - - FieldSeparator(const char *Sep = ", ") : Sep(Sep) {} -}; - -raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) { - if (FS.Skip) { - FS.Skip = false; - return OS; - } - return OS << FS.Sep; -} - -} // end anonymous namespace - -template -ContextNode * -CallsiteContextGraph::getNodeForInst( - const CallInfo &C) { - ContextNode *Node = getNodeForAlloc(C); - if (Node) - return Node; - - auto NonAllocCallNode = NonAllocationCallToContextNodeMap.find(C); - if (NonAllocCallNode != NonAllocationCallToContextNodeMap.end()) { - return NonAllocCallNode->second; - } - return nullptr; -} - -template -ContextNode * -CallsiteContextGraph::getNodeForAlloc( - const CallInfo &C) { - auto AllocCallNode = AllocationCallToContextNodeMap.find(C); - if (AllocCallNode != AllocationCallToContextNodeMap.end()) { - return AllocCallNode->second; - } - return nullptr; -} - -template -ContextNode * -CallsiteContextGraph::getNodeForStackId( - uint64_t StackId) { - auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId); - if (StackEntryNode != StackEntryIdToContextNodeMap.end()) - return StackEntryNode->second; - return nullptr; -} - -template -void CallsiteContextGraph::unsetNodeForInst( - const CallInfo &C) { - AllocationCallToContextNodeMap.erase(C) || - NonAllocationCallToContextNodeMap.erase(C); - assert(!AllocationCallToContextNodeMap.count(C) && - !NonAllocationCallToContextNodeMap.count(C)); -} - -template -void CallsiteContextGraph::ContextNode:: - addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType, - unsigned int ContextId) { - for (auto &Edge : CallerEdges) { - if (Edge->Caller == Caller) { - Edge->AllocTypes |= (uint8_t)AllocType; - Edge->getContextIds().insert(ContextId); - return; - } - } - std::shared_ptr Edge = std::make_shared( - this, Caller, (uint8_t)AllocType, DenseSet({ContextId})); - CallerEdges.push_back(Edge); - Caller->CalleeEdges.push_back(Edge); -} - -template -ContextEdge * -CallsiteContextGraph::ContextNode:: - findEdgeFromCallee(const ContextNode *Callee) { - for (const auto &Edge : CalleeEdges) - if (Edge->Callee == Callee) - return Edge.get(); - return nullptr; -} - -template -ContextEdge * -CallsiteContextGraph::ContextNode:: - findEdgeFromCaller(const ContextNode *Caller) { - for (const auto &Edge : CallerEdges) - if (Edge->Caller == Caller) - return Edge.get(); - return nullptr; -} - -template -void CallsiteContextGraph::ContextNode:: - eraseCalleeEdge(const ContextEdge *Edge) { - auto EI = - std::find_if(CalleeEdges.begin(), CalleeEdges.end(), - [Edge](const std::shared_ptr &CalleeEdge) { - return CalleeEdge.get() == Edge; - }); - assert(EI != CalleeEdges.end()); - CalleeEdges.erase(EI); -} - -template -void CallsiteContextGraph::ContextNode:: - eraseCallerEdge(const ContextEdge *Edge) { - auto EI = - std::find_if(CallerEdges.begin(), CallerEdges.end(), - [Edge](const std::shared_ptr &CallerEdge) { - return CallerEdge.get() == Edge; - }); - assert(EI != CallerEdges.end()); - CallerEdges.erase(EI); -} - -template -uint8_t CallsiteContextGraph::computeAllocType( - DenseSet &ContextIds) { - uint8_t BothTypes = - (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold; - uint8_t AllocType = (uint8_t)AllocationType::None; - for (auto Id : ContextIds) { - AllocType |= (uint8_t)ContextIdToAllocationType[Id]; - // Bail early if alloc type reached both, no further refinement. - if (AllocType == BothTypes) - return AllocType; - } - return AllocType; -} - -template -ContextNode * -CallsiteContextGraph::addAllocNode( - CallInfo Call, const FuncTy *F) { - assert(!getNodeForAlloc(Call)); - NodeOwner.push_back( - std::make_unique(/*IsAllocation=*/true, Call)); - ContextNode *AllocNode = NodeOwner.back().get(); - AllocationCallToContextNodeMap[Call] = AllocNode; - NodeToCallingFunc[AllocNode] = F; - // Use LastContextId as a uniq id for MIB allocation nodes. - AllocNode->OrigStackOrAllocId = LastContextId; - // Alloc type should be updated as we add in the MIBs. We should assert - // afterwards that it is not still None. - AllocNode->AllocTypes = (uint8_t)AllocationType::None; - - return AllocNode; -} - -template -template -void CallsiteContextGraph::addStackNodesForMIB( - ContextNode *AllocNode, CallStack &StackContext, - CallStack &CallsiteContext, AllocationType AllocType) { - ContextIdToAllocationType[++LastContextId] = AllocType; - - // Update alloc type and context ids for this MIB. - AllocNode->AllocTypes |= (uint8_t)AllocType; - AllocNode->ContextIds.insert(LastContextId); - - // Now add or update nodes for each stack id in alloc's context. - // Later when processing the stack ids on non-alloc callsites we will adjust - // for any inlining in the context. - ContextNode *PrevNode = AllocNode; - // Look for recursion (direct recursion should have been collapsed by - // module summary analysis, here we should just be detecting mutual - // recursion). Mark these nodes so we don't try to clone. - SmallSet StackIdSet; - // Skip any on the allocation call (inlining). - for (auto ContextIter = StackContext.beginAfterSharedPrefix(CallsiteContext); - ContextIter != StackContext.end(); ++ContextIter) { - auto StackId = getStackId(*ContextIter); - ContextNode *StackNode = getNodeForStackId(StackId); - if (!StackNode) { - NodeOwner.push_back( - std::make_unique(/*IsAllocation=*/false)); - StackNode = NodeOwner.back().get(); - StackEntryIdToContextNodeMap[StackId] = StackNode; - StackNode->OrigStackOrAllocId = StackId; - } - auto Ins = StackIdSet.insert(StackId); - if (!Ins.second) - StackNode->Recursive = true; - StackNode->ContextIds.insert(LastContextId); - StackNode->AllocTypes |= (uint8_t)AllocType; - PrevNode->addOrUpdateCallerEdge(StackNode, AllocType, LastContextId); - PrevNode = StackNode; - } -} - -template -DenseSet -CallsiteContextGraph::duplicateContextIds( - const DenseSet &StackSequenceContextIds, - DenseMap> &OldToNewContextIds) { - DenseSet NewContextIds; - for (auto OldId : StackSequenceContextIds) { - NewContextIds.insert(++LastContextId); - OldToNewContextIds[OldId].insert(LastContextId); - assert(ContextIdToAllocationType.count(OldId)); - // The new context has the same allocation type as original. - ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId]; - } - return NewContextIds; -} - -template -void CallsiteContextGraph:: - propagateDuplicateContextIds( - const DenseMap> &OldToNewContextIds) { - // Build a set of duplicated context ids corresponding to the input id set. - auto GetNewIds = [&OldToNewContextIds](const DenseSet &ContextIds) { - DenseSet NewIds; - for (auto Id : ContextIds) - if (auto NewId = OldToNewContextIds.find(Id); - NewId != OldToNewContextIds.end()) - NewIds.insert(NewId->second.begin(), NewId->second.end()); - return NewIds; - }; - - // Recursively update context ids sets along caller edges. - auto UpdateCallers = [&](ContextNode *Node, - DenseSet &Visited, - auto &&UpdateCallers) -> void { - for (auto Edge : Node->CallerEdges) { - auto Inserted = Visited.insert(Edge.get()); - if (!Inserted.second) - continue; - ContextNode *NextNode = Edge->Caller; - DenseSet NewIdsToAdd = GetNewIds(Edge->getContextIds()); - // Only need to recursively iterate to NextNode via this caller edge if - // it resulted in any added ids to NextNode. - if (!NewIdsToAdd.empty()) { - Edge->getContextIds().insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); - NextNode->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); - UpdateCallers(NextNode, Visited, UpdateCallers); - } - } - }; - - DenseSet Visited; - for (auto &Entry : AllocationCallToContextNodeMap) { - auto *Node = Entry.second; - // Update ids on the allocation nodes before calling the recursive - // update along caller edges, since this simplifies the logic during - // that traversal. - DenseSet NewIdsToAdd = GetNewIds(Node->ContextIds); - Node->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); - UpdateCallers(Node, Visited, UpdateCallers); - } -} - -template -void CallsiteContextGraph::connectNewNode( - ContextNode *NewNode, ContextNode *OrigNode, bool TowardsCallee) { - // Make a copy of the context ids, since this will be adjusted below as they - // are moved. - DenseSet RemainingContextIds = NewNode->ContextIds; - auto &OrigEdges = - TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges; - // Increment iterator in loop so that we can remove edges as needed. - for (auto EI = OrigEdges.begin(); EI != OrigEdges.end();) { - auto Edge = *EI; - // Remove any matching context ids from Edge, return set that were found and - // removed, these are the new edge's context ids. Also update the remaining - // (not found ids). - DenseSet NewEdgeContextIds, NotFoundContextIds; - set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds, - NotFoundContextIds); - RemainingContextIds.swap(NotFoundContextIds); - // If no matching context ids for this edge, skip it. - if (NewEdgeContextIds.empty()) { - ++EI; - continue; - } - if (TowardsCallee) { - auto NewEdge = std::make_shared( - Edge->Callee, NewNode, computeAllocType(NewEdgeContextIds), - NewEdgeContextIds); - NewNode->CalleeEdges.push_back(NewEdge); - NewEdge->Callee->CallerEdges.push_back(NewEdge); - } else { - auto NewEdge = std::make_shared( - NewNode, Edge->Caller, computeAllocType(NewEdgeContextIds), - NewEdgeContextIds); - NewNode->CallerEdges.push_back(NewEdge); - NewEdge->Caller->CalleeEdges.push_back(NewEdge); - } - // Remove old edge if context ids empty. - if (Edge->getContextIds().empty()) { - if (TowardsCallee) { - Edge->Callee->eraseCallerEdge(Edge.get()); - EI = OrigNode->CalleeEdges.erase(EI); - } else { - Edge->Caller->eraseCalleeEdge(Edge.get()); - EI = OrigNode->CallerEdges.erase(EI); - } - continue; - } - ++EI; - } -} - -template -void CallsiteContextGraph:: - assignStackNodesPostOrder(ContextNode *Node, - DenseSet &Visited, - DenseMap> - &StackIdToMatchingCalls) { - auto Inserted = Visited.insert(Node); - if (!Inserted.second) - return; - // Post order traversal. Iterate over a copy since we may add nodes and - // therefore new callers during the recursive call, invalidating any - // iterator over the original edge vector. We don't need to process these - // new nodes as they were already processed on creation. - auto CallerEdges = Node->CallerEdges; - for (auto &Edge : CallerEdges) { - // Skip any that have been removed during the recursion. - if (!Edge) - continue; - assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls); - } - - // If this node's stack id is in the map, update the graph to contain new - // nodes representing any inlining at interior callsites. Note we move the - // associated context ids over to the new nodes. - - // Ignore this node if it is for an allocation or we didn't record any - // stack id lists ending at it. - if (Node->IsAllocation || - !StackIdToMatchingCalls.count(Node->OrigStackOrAllocId)) - return; - - auto &Calls = StackIdToMatchingCalls[Node->OrigStackOrAllocId]; - // Handle the simple case first. A single call with a single stack id. - // In this case there is no need to create any new context nodes, simply - // assign the context node for stack id to this Call. - if (Calls.size() == 1) { - auto &[Call, Ids, Func, SavedContextIds] = Calls[0]; - if (Ids.size() == 1) { - assert(SavedContextIds.empty()); - // It should be this Node - assert(Node == getNodeForStackId(Ids[0])); - if (Node->Recursive) - return; - Node->setCall(Call); - NonAllocationCallToContextNodeMap[Call] = Node; - NodeToCallingFunc[Node] = Func; - return; - } - } - - // Find the node for the last stack id, which should be the same - // across all calls recorded for this id, and is this node's id. - uint64_t LastId = Node->OrigStackOrAllocId; - ContextNode *LastNode = getNodeForStackId(LastId); - // We should only have kept stack ids that had nodes. - assert(LastNode); - - for (unsigned I = 0; I < Calls.size(); I++) { - auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; - // Skip any for which we didn't assign any ids, these don't get a node in - // the graph. - if (SavedContextIds.empty()) - continue; - - assert(LastId == Ids.back()); - - ContextNode *FirstNode = getNodeForStackId(Ids[0]); - assert(FirstNode); - - // Recompute the context ids for this stack id sequence (the - // intersection of the context ids of the corresponding nodes). - // Start with the ids we saved in the map for this call, which could be - // duplicated context ids. We have to recompute as we might have overlap - // overlap between the saved context ids for different last nodes, and - // removed them already during the post order traversal. - set_intersect(SavedContextIds, FirstNode->ContextIds); - ContextNode *PrevNode = nullptr; - for (auto Id : Ids) { - ContextNode *CurNode = getNodeForStackId(Id); - // We should only have kept stack ids that had nodes and weren't - // recursive. - assert(CurNode); - assert(!CurNode->Recursive); - if (!PrevNode) { - PrevNode = CurNode; - continue; - } - auto *Edge = CurNode->findEdgeFromCallee(PrevNode); - if (!Edge) { - SavedContextIds.clear(); - break; - } - PrevNode = CurNode; - set_intersect(SavedContextIds, Edge->getContextIds()); - - // If we now have no context ids for clone, skip this call. - if (SavedContextIds.empty()) - break; - } - if (SavedContextIds.empty()) - continue; - - // Create new context node. - NodeOwner.push_back( - std::make_unique(/*IsAllocation=*/false, Call)); - ContextNode *NewNode = NodeOwner.back().get(); - NodeToCallingFunc[NewNode] = Func; - NonAllocationCallToContextNodeMap[Call] = NewNode; - NewNode->ContextIds = SavedContextIds; - NewNode->AllocTypes = computeAllocType(NewNode->ContextIds); - - // Connect to callees of innermost stack frame in inlined call chain. - // This updates context ids for FirstNode's callee's to reflect those - // moved to NewNode. - connectNewNode(NewNode, FirstNode, /*TowardsCallee=*/true); - - // Connect to callers of outermost stack frame in inlined call chain. - // This updates context ids for FirstNode's caller's to reflect those - // moved to NewNode. - connectNewNode(NewNode, LastNode, /*TowardsCallee=*/false); - - // Now we need to remove context ids from edges/nodes between First and - // Last Node. - PrevNode = nullptr; - for (auto Id : Ids) { - ContextNode *CurNode = getNodeForStackId(Id); - // We should only have kept stack ids that had nodes. - assert(CurNode); - - // Remove the context ids moved to NewNode from CurNode, and the - // edge from the prior node. - set_subtract(CurNode->ContextIds, NewNode->ContextIds); - if (PrevNode) { - auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode); - assert(PrevEdge); - set_subtract(PrevEdge->getContextIds(), NewNode->ContextIds); - if (PrevEdge->getContextIds().empty()) { - PrevNode->eraseCallerEdge(PrevEdge); - CurNode->eraseCalleeEdge(PrevEdge); - } - } - PrevNode = CurNode; - } - } -} - -template -void CallsiteContextGraph::updateStackNodes() { - // Map of stack id to all calls with that as the last (outermost caller) - // callsite id that has a context node (some might not due to pruning - // performed during matching of the allocation profile contexts). - // The CallContextInfo contains the Call and a list of its stack ids with - // ContextNodes, the function containing Call, and the set of context ids - // the analysis will eventually identify for use in any new node created - // for that callsite. - DenseMap> StackIdToMatchingCalls; - for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) { - for (auto &Call : CallsWithMetadata) { - // Ignore allocations, already handled. - if (AllocationCallToContextNodeMap.count(Call)) - continue; - auto StackIdsWithContextNodes = - getStackIdsWithContextNodesForCall(Call.call()); - // If there were no nodes created for MIBs on allocs (maybe this was in - // the unambiguous part of the MIB stack that was pruned), ignore. - if (StackIdsWithContextNodes.empty()) - continue; - // Otherwise, record this Call along with the list of ids for the last - // (outermost caller) stack id with a node. - StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back( - {Call.call(), StackIdsWithContextNodes, Func, {}}); - } - } - - // First make a pass through all stack ids that correspond to a call, - // as identified in the above loop. Compute the context ids corresponding to - // each of these calls when they correspond to multiple stack ids due to - // due to inlining. Perform any duplication of context ids required when - // there is more than one call with the same stack ids. Their (possibly newly - // duplicated) context ids are saved in the StackIdToMatchingCalls map. - DenseMap> OldToNewContextIds; - for (auto &It : StackIdToMatchingCalls) { - auto &Calls = It.getSecond(); - // Skip single calls with a single stack id. These don't need a new node. - if (Calls.size() == 1) { - auto &Ids = std::get<1>(Calls[0]); - if (Ids.size() == 1) - continue; - } - // In order to do the best and maximal matching of inlined calls to context - // node sequences we will sort the vectors of stack ids in descending order - // of length, and within each length, lexicographically by stack id. The - // latter is so that we can specially handle calls that have identical stack - // id sequences (either due to cloning or artificially because of the MIB - // context pruning). - std::sort(Calls.begin(), Calls.end(), - [](const CallContextInfo &A, const CallContextInfo &B) { - auto &IdsA = std::get<1>(A); - auto &IdsB = std::get<1>(B); - return IdsA.size() > IdsB.size() || - (IdsA.size() == IdsB.size() && IdsA < IdsB); - }); - - // Find the node for the last stack id, which should be the same - // across all calls recorded for this id, and is the id for this - // entry in the StackIdToMatchingCalls map. - uint64_t LastId = It.getFirst(); - ContextNode *LastNode = getNodeForStackId(LastId); - // We should only have kept stack ids that had nodes. - assert(LastNode); - - if (LastNode->Recursive) - continue; - - // Initialize the context ids with the last node's. We will subsequently - // refine the context ids by computing the intersection along all edges. - DenseSet LastNodeContextIds = LastNode->ContextIds; - assert(!LastNodeContextIds.empty()); - - for (unsigned I = 0; I < Calls.size(); I++) { - auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; - assert(SavedContextIds.empty()); - assert(LastId == Ids.back()); - - // First compute the context ids for this stack id sequence (the - // intersection of the context ids of the corresponding nodes). - // Start with the remaining saved ids for the last node. - assert(!LastNodeContextIds.empty()); - DenseSet StackSequenceContextIds = LastNodeContextIds; - - ContextNode *PrevNode = LastNode; - ContextNode *CurNode = LastNode; - bool Skip = false; - - // Iterate backwards through the stack Ids, starting after the last Id - // in the list, which was handled once outside for all Calls. - for (auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) { - auto Id = *IdIter; - CurNode = getNodeForStackId(Id); - // We should only have kept stack ids that had nodes. - assert(CurNode); - - if (CurNode->Recursive) { - Skip = true; - break; - } - - auto *Edge = CurNode->findEdgeFromCaller(PrevNode); - // If there is no edge then the nodes belong to different MIB contexts, - // and we should skip this inlined context sequence. For example, this - // particular inlined context may include stack ids A->B, and we may - // indeed have nodes for both A and B, but it is possible that they were - // never profiled in sequence in a single MIB for any allocation (i.e. - // we might have profiled an allocation that involves the callsite A, - // but through a different one of its callee callsites, and we might - // have profiled an allocation that involves callsite B, but reached - // from a different caller callsite). - if (!Edge) { - Skip = true; - break; - } - PrevNode = CurNode; - - // Update the context ids, which is the intersection of the ids along - // all edges in the sequence. - set_intersect(StackSequenceContextIds, Edge->getContextIds()); - - // If we now have no context ids for clone, skip this call. - if (StackSequenceContextIds.empty()) { - Skip = true; - break; - } - } - if (Skip) - continue; - - // If some of this call's stack ids did not have corresponding nodes (due - // to pruning), don't include any context ids for contexts that extend - // beyond these nodes. Otherwise we would be matching part of unrelated / - // not fully matching stack contexts. To do this, subtract any context ids - // found in caller nodes of the last node found above. - if (Ids.back() != getLastStackId(Call)) { - for (auto PE : LastNode->CallerEdges) { - set_subtract(StackSequenceContextIds, PE->getContextIds()); - if (StackSequenceContextIds.empty()) - break; - } - // If we now have no context ids for clone, skip this call. - if (StackSequenceContextIds.empty()) - continue; - } - - // Check if the next set of stack ids is the same (since the Calls vector - // of tuples is sorted by the stack ids we can just look at the next one). - bool DuplicateContextIds = false; - if (I + 1 < Calls.size()) { - auto NextIds = std::get<1>(Calls[I + 1]); - DuplicateContextIds = Ids == NextIds; - } - - // If we don't have duplicate context ids, then we can assign all the - // context ids computed for the original node sequence to this call. - // If there are duplicate calls with the same stack ids then we synthesize - // new context ids that are duplicates of the originals. These are - // assigned to SavedContextIds, which is a reference into the map entry - // for this call, allowing us to access these ids later on. - OldToNewContextIds.reserve(OldToNewContextIds.size() + - StackSequenceContextIds.size()); - SavedContextIds = - DuplicateContextIds - ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds) - : StackSequenceContextIds; - assert(!SavedContextIds.empty()); - - if (!DuplicateContextIds) { - // Update saved last node's context ids to remove those that are - // assigned to other calls, so that it is ready for the next call at - // this stack id. - set_subtract(LastNodeContextIds, StackSequenceContextIds); - if (LastNodeContextIds.empty()) - break; - } - } - } - - // Propagate the duplicate context ids over the graph. - propagateDuplicateContextIds(OldToNewContextIds); - - if (VerifyCCG) - check(); - - // Now perform a post-order traversal over the graph, starting with the - // allocation nodes, essentially processing nodes from callers to callees. - // For any that contains an id in the map, update the graph to contain new - // nodes representing any inlining at interior callsites. Note we move the - // associated context ids over to the new nodes. - DenseSet Visited; - for (auto &Entry : AllocationCallToContextNodeMap) - assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls); -} - -uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *Call) { - CallStack CallsiteContext( - Call->getMetadata(LLVMContext::MD_callsite)); - return CallsiteContext.back(); -} - -std::string ModuleCallsiteContextGraph::getLabel(const Function *Func, - const Instruction *Call, - unsigned CloneNo) const { - return (Twine(Call->getFunction()->getName()) + " -> " + - cast(Call)->getCalledFunction()->getName()) - .str(); -} - -std::vector -ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall( - Instruction *Call) { - CallStack CallsiteContext( - Call->getMetadata(LLVMContext::MD_callsite)); - return getStackIdsWithContextNodes( - CallsiteContext); -} - -template -template -std::vector -CallsiteContextGraph::getStackIdsWithContextNodes( - CallStack &CallsiteContext) { - std::vector StackIds; - for (auto IdOrIndex : CallsiteContext) { - auto StackId = getStackId(IdOrIndex); - ContextNode *Node = getNodeForStackId(StackId); - if (!Node) - break; - StackIds.push_back(StackId); - } - return StackIds; -} - -ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(Module &M) : Mod(M) { - for (auto &F : M) { - std::vector CallsWithMetadata; - for (auto &BB : F) { - for (auto &I : BB) { - if (!isa(I)) - continue; - if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) { - CallsWithMetadata.push_back(&I); - auto *AllocNode = addAllocNode(&I, &F); - auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite); - assert(CallsiteMD); - CallStack CallsiteContext(CallsiteMD); - // Add all of the MIBs and their stack nodes. - for (auto &MDOp : MemProfMD->operands()) { - auto *MIBMD = cast(MDOp); - MDNode *StackNode = getMIBStackNode(MIBMD); - assert(StackNode); - CallStack StackContext(StackNode); - addStackNodesForMIB( - AllocNode, StackContext, CallsiteContext, - getMIBAllocType(MIBMD)); - } - assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None); - // Memprof and callsite metadata on memory allocations no longer - // needed. - I.setMetadata(LLVMContext::MD_memprof, nullptr); - I.setMetadata(LLVMContext::MD_callsite, nullptr); - } - // For callsite metadata, add to list for this function for later use. - else if (I.getMetadata(LLVMContext::MD_callsite)) - CallsWithMetadata.push_back(&I); - } - } - if (!CallsWithMetadata.empty()) - FuncToCallsWithMetadata.push_back({&F, CallsWithMetadata}); - } - - if (DumpCCG) { - dbgs() << "CCG before updating call stack chains:\n"; - dbgs() << *this; - } - - if (ExportToDot) - exportToDot("prestackupdate"); - - updateStackNodes(); - - handleCallsitesWithMultipleTargets(); - - // Strip off remaining callsite metadata, no longer needed. - for (auto &FuncEntry : FuncToCallsWithMetadata) - for (auto &Call : FuncEntry.second) - Call.call()->setMetadata(LLVMContext::MD_callsite, nullptr); -} - -template -void CallsiteContextGraph::handleCallsitesWithMultipleTargets() { - // Look for and workaround callsites that call multiple functions. - // This can happen for indirect calls, which needs better handling, and in - // more rare cases (e.g. macro expansion). - // TODO: To fix this for indirect calls we will want to perform speculative - // devirtualization using either the normal PGO info with ICP, or using the - // information in the profiled MemProf contexts. We can do this prior to - // this transformation for regular LTO, and for ThinLTO we can simulate that - // effect in the summary and perform the actual speculative devirtualization - // while cloning in the ThinLTO backend. - for (auto Entry = NonAllocationCallToContextNodeMap.begin(); - Entry != NonAllocationCallToContextNodeMap.end();) { - auto *Node = Entry->second; - assert(Node->Clones.empty()); - // Check all node callees and see if in the same function. - bool Removed = false; - auto Call = Node->Call.call(); - for (auto &Edge : Node->CalleeEdges) { - if (!Edge->Callee->hasCall()) - continue; - assert(NodeToCallingFunc.count(Edge->Callee)); - // Check if the called function matches that of the callee node. - if (calleeMatchesFunc(Call, NodeToCallingFunc[Edge->Callee])) - continue; - // Work around by setting Node to have a null call, so it gets - // skipped during cloning. Otherwise assignFunctions will assert - // because its data structures are not designed to handle this case. - Entry = NonAllocationCallToContextNodeMap.erase(Entry); - Node->setCall(CallInfo()); - Removed = true; - break; - } - if (!Removed) - Entry++; - } -} - -uint64_t ModuleCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const { - // In the Module (IR) case this is already the Id. - return IdOrIndex; -} - -bool ModuleCallsiteContextGraph::calleeMatchesFunc(Instruction *Call, - const Function *Func) { - auto *CB = dyn_cast(Call); - if (!CB->getCalledOperand()) - return false; - auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts(); - auto *CalleeFunc = dyn_cast(CalleeVal); - if (CalleeFunc == Func) - return true; - auto *Alias = dyn_cast(CalleeVal); - return Alias && Alias->getAliasee() == Func; -} - -static std::string getAllocTypeString(uint8_t AllocTypes) { - if (!AllocTypes) - return "None"; - std::string Str; - if (AllocTypes & (uint8_t)AllocationType::NotCold) - Str += "NotCold"; - if (AllocTypes & (uint8_t)AllocationType::Cold) - Str += "Cold"; - return Str; -} - -template -void CallsiteContextGraph::ContextNode::dump() - const { - print(dbgs()); - dbgs() << "\n"; -} - -template -void CallsiteContextGraph::ContextNode::print( - raw_ostream &OS) const { - OS << "Node " << this << "\n"; - OS << "\t"; - printCall(OS); - if (Recursive) - OS << " (recursive)"; - OS << "\n"; - OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n"; - OS << "\tContextIds:"; - std::vector SortedIds(ContextIds.begin(), ContextIds.end()); - std::sort(SortedIds.begin(), SortedIds.end()); - for (auto Id : SortedIds) - OS << " " << Id; - OS << "\n"; - OS << "\tCalleeEdges:\n"; - for (auto &Edge : CalleeEdges) - OS << "\t\t" << *Edge << "\n"; - OS << "\tCallerEdges:\n"; - for (auto &Edge : CallerEdges) - OS << "\t\t" << *Edge << "\n"; - if (!Clones.empty()) { - OS << "\tClones: "; - FieldSeparator FS; - for (auto *Clone : Clones) - OS << FS << Clone; - OS << "\n"; - } else if (CloneOf) { - OS << "\tClone of " << CloneOf << "\n"; - } -} - -template -void CallsiteContextGraph::ContextEdge::dump() - const { - print(dbgs()); - dbgs() << "\n"; -} - -template -void CallsiteContextGraph::ContextEdge::print( - raw_ostream &OS) const { - OS << "Edge from Callee " << Callee << " to Caller: " << Caller - << " AllocTypes: " << getAllocTypeString(AllocTypes); - OS << " ContextIds:"; - std::vector SortedIds(ContextIds.begin(), ContextIds.end()); - std::sort(SortedIds.begin(), SortedIds.end()); - for (auto Id : SortedIds) - OS << " " << Id; -} - -template -void CallsiteContextGraph::dump() const { - print(dbgs()); -} - -template -void CallsiteContextGraph::print( - raw_ostream &OS) const { - OS << "Callsite Context Graph:\n"; - using GraphType = const CallsiteContextGraph *; - for (const auto Node : nodes(this)) { - if (Node->isRemoved()) - continue; - Node->print(OS); - OS << "\n"; - } -} - -template -static void checkEdge( - const std::shared_ptr> &Edge) { - // Confirm that alloc type is not None and that we have at least one context - // id. - assert(Edge->AllocTypes != (uint8_t)AllocationType::None); - assert(!Edge->ContextIds.empty()); -} - -template -static void checkNode(const ContextNode *Node) { - if (Node->isRemoved()) - return; - // Node's context ids should be the union of both its callee and caller edge - // context ids. - if (Node->CallerEdges.size()) { - auto EI = Node->CallerEdges.begin(); - auto &FirstEdge = *EI; - EI++; - DenseSet CallerEdgeContextIds(FirstEdge->ContextIds); - for (; EI != Node->CallerEdges.end(); EI++) { - const auto &Edge = *EI; - set_union(CallerEdgeContextIds, Edge->ContextIds); - } - // Node can have more context ids than callers if some contexts terminate at - // node and some are longer. - assert(Node->ContextIds == CallerEdgeContextIds || - set_is_subset(CallerEdgeContextIds, Node->ContextIds)); - } - if (Node->CalleeEdges.size()) { - auto EI = Node->CalleeEdges.begin(); - auto &FirstEdge = *EI; - EI++; - DenseSet CalleeEdgeContextIds(FirstEdge->ContextIds); - for (; EI != Node->CalleeEdges.end(); EI++) { - const auto &Edge = *EI; - set_union(CalleeEdgeContextIds, Edge->ContextIds); - } - assert(Node->ContextIds == CalleeEdgeContextIds); - } -} - -template -void CallsiteContextGraph::check() const { - using GraphType = const CallsiteContextGraph *; - for (const auto Node : nodes(this)) { - checkNode(Node); - for (auto &Edge : Node->CallerEdges) - checkEdge(Edge); - } -} - -template -struct GraphTraits *> { - using GraphType = const CallsiteContextGraph *; - using NodeRef = const ContextNode *; - - using NodePtrTy = std::unique_ptr>; - static NodeRef getNode(const NodePtrTy &P) { return P.get(); } - - using nodes_iterator = - mapped_iterator::const_iterator, - decltype(&getNode)>; - - static nodes_iterator nodes_begin(GraphType G) { - return nodes_iterator(G->NodeOwner.begin(), &getNode); - } - - static nodes_iterator nodes_end(GraphType G) { - return nodes_iterator(G->NodeOwner.end(), &getNode); - } - - static NodeRef getEntryNode(GraphType G) { - return G->NodeOwner.begin()->get(); - } - - using EdgePtrTy = std::shared_ptr>; - static const ContextNode * - GetCallee(const EdgePtrTy &P) { - return P->Callee; - } - - using ChildIteratorType = - mapped_iterator>>::const_iterator, - decltype(&GetCallee)>; - - static ChildIteratorType child_begin(NodeRef N) { - return ChildIteratorType(N->CalleeEdges.begin(), &GetCallee); - } - - static ChildIteratorType child_end(NodeRef N) { - return ChildIteratorType(N->CalleeEdges.end(), &GetCallee); - } -}; - -template -struct DOTGraphTraits *> - : public DefaultDOTGraphTraits { - DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {} - - using GraphType = const CallsiteContextGraph *; - using GTraits = GraphTraits; - using NodeRef = typename GTraits::NodeRef; - using ChildIteratorType = typename GTraits::ChildIteratorType; - - static std::string getNodeLabel(NodeRef Node, GraphType G) { - std::string LabelString = - (Twine("OrigId: ") + (Node->IsAllocation ? "Alloc" : "") + - Twine(Node->OrigStackOrAllocId)) - .str(); - LabelString += "\n"; - if (Node->hasCall()) { - auto Func = G->NodeToCallingFunc.find(Node); - assert(Func != G->NodeToCallingFunc.end()); - LabelString += - G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo()); - } else { - LabelString += "null call"; - if (Node->Recursive) - LabelString += " (recursive)"; - else - LabelString += " (external)"; - } - return LabelString; - } - - static std::string getNodeAttributes(NodeRef Node, GraphType) { - std::string AttributeString = (Twine("tooltip=\"") + getNodeId(Node) + " " + - getContextIds(Node->ContextIds) + "\"") - .str(); - AttributeString += - (Twine(",fillcolor=\"") + getColor(Node->AllocTypes) + "\"").str(); - AttributeString += ",style=\"filled\""; - if (Node->CloneOf) { - AttributeString += ",color=\"blue\""; - AttributeString += ",style=\"filled,bold,dashed\""; - } else - AttributeString += ",style=\"filled\""; - return AttributeString; - } - - static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, - GraphType) { - auto &Edge = *(ChildIter.getCurrent()); - return (Twine("tooltip=\"") + getContextIds(Edge->ContextIds) + "\"" + - Twine(",fillcolor=\"") + getColor(Edge->AllocTypes) + "\"") - .str(); - } - - // Since the NodeOwners list includes nodes that are no longer connected to - // the graph, skip them here. - static bool isNodeHidden(NodeRef Node, GraphType) { - return Node->isRemoved(); - } - -private: - static std::string getContextIds(const DenseSet &ContextIds) { - std::string IdString = "ContextIds:"; - if (ContextIds.size() < 100) { - std::vector SortedIds(ContextIds.begin(), ContextIds.end()); - std::sort(SortedIds.begin(), SortedIds.end()); - for (auto Id : SortedIds) - IdString += (" " + Twine(Id)).str(); - } else { - IdString += (" (" + Twine(ContextIds.size()) + " ids)").str(); - } - return IdString; - } - - static std::string getColor(uint8_t AllocTypes) { - if (AllocTypes == (uint8_t)AllocationType::NotCold) - // Color "brown1" actually looks like a lighter red. - return "brown1"; - if (AllocTypes == (uint8_t)AllocationType::Cold) - return "cyan"; - if (AllocTypes == - ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold)) - // Lighter purple. - return "mediumorchid1"; - return "gray"; - } - - static std::string getNodeId(NodeRef Node) { - std::stringstream SStream; - SStream << std::hex << "N0x" << (unsigned long long)Node; - std::string Result = SStream.str(); - return Result; - } -}; - -template -void CallsiteContextGraph::exportToDot( - std::string Label) const { - WriteGraph(this, "", false, Label, - DotFilePathPrefix + "ccg." + Label + ".dot"); -} - -template -bool CallsiteContextGraph::process() { - if (DumpCCG) { - dbgs() << "CCG before cloning:\n"; - dbgs() << *this; - } - if (ExportToDot) - exportToDot("postbuild"); - - if (VerifyCCG) { - check(); - } - - return false; -} - -bool MemProfContextDisambiguation::processModule(Module &M) { - bool Changed = false; - - ModuleCallsiteContextGraph CCG(M); - Changed = CCG.process(); - - return Changed; -} - -PreservedAnalyses MemProfContextDisambiguation::run(Module &M, - ModuleAnalysisManager &AM) { - if (!processModule(M)) - return PreservedAnalyses::all(); - return PreservedAnalyses::none(); -} diff --git a/llvm/test/ThinLTO/X86/memprof-summary.ll b/llvm/test/ThinLTO/X86/memprof-summary.ll new file mode 100644 index 0000000000000..597cd44c030e7 --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-summary.ll @@ -0,0 +1,184 @@ +;; Check memprof summaries (per module, combined index, and distributed indexes) + +; RUN: split-file %s %t +; RUN: opt -module-summary %t/a.ll -o %ta.bc +; RUN: opt -module-summary %t/b.ll -o %tb.bc + +; RUN: llvm-dis -o - %ta.bc | FileCheck %s --check-prefix=PRELINKDISA +; PRELINKDISA: gv: (name: "main", {{.*}} callsites: ((callee: ^2, clones: (0), stackIds: (8632435727821051414)), (callee: ^2, clones: (0), stackIds: (15025054523792398438)))))) ; guid = 15822663052811949562 + +; RUN: llvm-dis -o - %tb.bc | FileCheck %s --check-prefix=PRELINKDISB +; PRELINKDISB: ^[[PLBAR:[0-9]+]] = gv: (name: "_Z3barv", {{.*}} allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (12481870273128938184, 2732490490862098848, 8632435727821051414)), (type: cold, stackIds: (12481870273128938184, 2732490490862098848, 15025054523792398438)))))))) ; guid = 4555904644815367798 +; PRELINKDISB: ^[[PLFOO:[0-9]+]] = gv: (name: "_Z3foov", {{.*}} callsites: ((callee: ^[[PLBAZ:[0-9]+]], clones: (0), stackIds: (2732490490862098848)))))) ; guid = 9191153033785521275 +; PRELINKDISB: ^[[PLBAZ]] = gv: (name: "_Z3bazv", {{.*}} callsites: ((callee: ^[[PLBAR]], clones: (0), stackIds: (12481870273128938184)))))) ; guid = 15176620447596392000 + +; RUN: llvm-bcanalyzer -dump %ta.bc | FileCheck %s --check-prefix=PRELINKBCANA +; PRELINKBCANA: + +; RUN: llvm-bcanalyzer -dump %tb.bc | FileCheck %s --check-prefix=PRELINKBCANB +; PRELINKBCANB: + +; RUN: llvm-lto2 run %ta.bc %tb.bc -o %t -save-temps \ +; RUN: -thinlto-distributed-indexes \ +; RUN: -r=%ta.bc,main,plx \ +; RUN: -r=%ta.bc,_Z3foov, \ +; RUN: -r=%ta.bc,free, \ +; RUN: -r=%ta.bc,sleep, \ +; RUN: -r=%tb.bc,_Z3foov,pl \ +; RUN: -r=%tb.bc,_Znam, \ +; RUN: -r=%tb.bc,_Z3bazv,pl + +; RUN: llvm-dis -o - %t.index.bc | FileCheck %s --check-prefix=COMBINEDDIS +; COMBINEDDIS: ^[[COMBBAR:[0-9]+]] = gv: (guid: 4555904644815367798, {{.*}} allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (12481870273128938184, 2732490490862098848, 8632435727821051414)), (type: cold, stackIds: (12481870273128938184, 2732490490862098848, 15025054523792398438)))))))) +; COMBINEDDIS: ^[[COMBFOO:[0-9]+]] = gv: (guid: 9191153033785521275, {{.*}} callsites: ((callee: ^[[COMBBAZ:[0-9]+]], clones: (0), stackIds: (2732490490862098848)))))) +; COMBINEDDIS: ^[[COMBBAZ]] = gv: (guid: 15176620447596392000, {{.*}} callsites: ((callee: ^[[COMBBAR]], clones: (0), stackIds: (12481870273128938184)))))) +; COMBINEDDIS: ^[[COMBMAIN:[0-9]+]] = gv: (guid: 15822663052811949562, {{.*}} callsites: ((callee: ^[[COMBFOO]], clones: (0), stackIds: (8632435727821051414)), (callee: ^[[COMBFOO]], clones: (0), stackIds: (15025054523792398438)))))) + +; RUN: llvm-bcanalyzer -dump %t.index.bc | FileCheck %s --check-prefix=COMBINEDBCAN +; COMBINEDBCAN: + +; RUN: llvm-dis -o - %ta.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDDISA +; DISTRIBUTEDDISA: gv: (guid: 9191153033785521275, {{.*}} callsites: ((callee: null, clones: (0), stackIds: (2732490490862098848)))))) +; DISTRIBUTEDDISA: gv: (guid: 15822663052811949562, {{.*}} callsites: ((callee: ^2, clones: (0), stackIds: (8632435727821051414)), (callee: ^2, clones: (0), stackIds: (15025054523792398438)))))) + +; RUN: llvm-dis -o - %tb.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDDISB +; DISTRIBUTEDDISB: ^[[DISTRBAR:[0-9]+]] = gv: (guid: 4555904644815367798, {{.*}} allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (12481870273128938184, 2732490490862098848, 8632435727821051414)), (type: cold, stackIds: (12481870273128938184, 2732490490862098848, 15025054523792398438)))))))) +; DISTRIBUTEDDISB: ^[[DISTRFOO:[0-9]+]] = gv: (guid: 9191153033785521275, {{.*}} callsites: ((callee: ^[[DISTRBAZ:[0-9]+]], clones: (0), stackIds: (2732490490862098848)))))) +; DISTRIBUTEDDISB: ^[[DISTRBAZ]] = gv: (guid: 15176620447596392000, {{.*}} callsites: ((callee: ^[[DISTRBAR]], clones: (0), stackIds: (12481870273128938184)))))) + +; RUN: llvm-bcanalyzer -dump %ta.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDBCANA +; DISTRIBUTEDBCANA: + +; RUN: llvm-bcanalyzer -dump %tb.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDBCANB +; DISTRIBUTEDBCANB: + +;--- a.ll +; ModuleID = 'a.cc' +source_filename = "a.cc" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: mustprogress norecurse uwtable +define dso_local noundef i32 @main(i32 noundef %argc, ptr nocapture noundef readnone %argv) local_unnamed_addr #0 !dbg !39 { +entry: + %call = call noundef ptr @_Z3foov(), !dbg !42, !callsite !43 + %call1 = call noundef ptr @_Z3foov(), !dbg !44, !callsite !45 + call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(10) %call, i8 0, i64 10, i1 false), !dbg !46 + call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(10) %call1, i8 0, i64 10, i1 false), !dbg !47 + call void @free(ptr noundef %call) #4, !dbg !48 + %call2 = call i32 @sleep(i32 noundef 10), !dbg !49 + call void @free(ptr noundef %call1) #4, !dbg !50 + ret i32 0, !dbg !51 +} + +declare !dbg !52 noundef ptr @_Z3foov() local_unnamed_addr #1 + +; Function Attrs: argmemonly mustprogress nocallback nofree nounwind willreturn writeonly +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2 + +; Function Attrs: inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("free") +declare void @free(ptr allocptr nocapture noundef) local_unnamed_addr #3 + +declare !dbg !53 i32 @sleep(i32 noundef) local_unnamed_addr #1 + +attributes #0 = { mustprogress norecurse uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #2 = { argmemonly mustprogress nocallback nofree nounwind willreturn writeonly } +attributes #3 = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("free") "alloc-family"="malloc" "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #4 = { nounwind } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 16.0.0 (git@github.com:llvm/llvm-project.git ffecb643ee2c49e55e0689339b6d5921b5e6ff8b)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) +!1 = !DIFile(filename: "a.cc", directory: ".", checksumkind: CSK_MD5, checksum: "ebabd56909271a1d4a7cac81c10624d5") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 8, !"PIC Level", i32 2} +!6 = !{i32 7, !"PIE Level", i32 2} +!7 = !{i32 7, !"uwtable", i32 2} +!8 = !{i32 7, !"frame-pointer", i32 2} +!39 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 5, type: !40, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41) +!40 = !DISubroutineType(types: !41) +!41 = !{} +!42 = !DILocation(line: 6, column: 13, scope: !39) +!43 = !{i64 8632435727821051414} +!44 = !DILocation(line: 7, column: 13, scope: !39) +!45 = !{i64 -3421689549917153178} +!46 = !DILocation(line: 8, column: 3, scope: !39) +!47 = !DILocation(line: 9, column: 3, scope: !39) +!48 = !DILocation(line: 10, column: 3, scope: !39) +!49 = !DILocation(line: 11, column: 3, scope: !39) +!50 = !DILocation(line: 12, column: 3, scope: !39) +!51 = !DILocation(line: 13, column: 3, scope: !39) +!52 = !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 4, type: !40, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !41) +!53 = !DISubprogram(name: "sleep", scope: !54, file: !54, line: 453, type: !40, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !41) +!54 = !DIFile(filename: "include/unistd.h", directory: "/usr", checksumkind: CSK_MD5, checksum: "ee8f41a17f563f029d0e930ad871815a") + +;--- b.ll +; ModuleID = 'b.cc' +source_filename = "b.cc" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: mustprogress noinline uwtable +define internal noalias noundef nonnull ptr @_Z3barv() local_unnamed_addr #0 !dbg !39 { +entry: + %call = call noalias noundef nonnull dereferenceable(10) ptr @_Znam(i64 noundef 10) #2, !dbg !42, !memprof !43, !callsite !48 + ret ptr %call, !dbg !49 +} + +; Function Attrs: nobuiltin allocsize(0) +declare noundef nonnull ptr @_Znam(i64 noundef) local_unnamed_addr #1 + +; Function Attrs: mustprogress noinline uwtable +define dso_local noalias noundef nonnull ptr @_Z3bazv() local_unnamed_addr #0 !dbg !50 { +entry: + %call = call noundef ptr @_Z3barv(), !dbg !51, !callsite !52 + ret ptr %call, !dbg !53 +} + +; Function Attrs: mustprogress uwtable +define dso_local noalias noundef nonnull ptr @_Z3foov() local_unnamed_addr #3 !dbg !54 { +entry: + %call = call noundef ptr @_Z3bazv(), !dbg !55, !callsite !56 + ret ptr %call, !dbg !57 +} + +attributes #0 = { mustprogress noinline uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { nobuiltin allocsize(0) "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #2 = { builtin allocsize(0) } +attributes #3 = { mustprogress uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 16.0.0 (git@github.com:llvm/llvm-project.git ffecb643ee2c49e55e0689339b6d5921b5e6ff8b)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) +!1 = !DIFile(filename: "b.cc", directory: ".", checksumkind: CSK_MD5, checksum: "335f81d275af57725cfc9ffc7be49bc2") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 8, !"PIC Level", i32 2} +!6 = !{i32 7, !"PIE Level", i32 2} +!7 = !{i32 7, !"uwtable", i32 2} +!8 = !{i32 7, !"frame-pointer", i32 2} +!39 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !1, file: !1, line: 1, type: !40, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41) +!40 = !DISubroutineType(types: !41) +!41 = !{} +!42 = !DILocation(line: 2, column: 10, scope: !39) +!43 = !{!44, !46} +!44 = !{!45, !"notcold"} +!45 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!46 = !{!47, !"cold"} +!47 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!48 = !{i64 9086428284934609951} +!49 = !DILocation(line: 2, column: 3, scope: !39) +!50 = distinct !DISubprogram(name: "baz", linkageName: "_Z3bazv", scope: !1, file: !1, line: 5, type: !40, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41) +!51 = !DILocation(line: 6, column: 10, scope: !50) +!52 = !{i64 -5964873800580613432} +!53 = !DILocation(line: 6, column: 3, scope: !50) +!54 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 9, type: !40, scopeLine: 9, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41) +!55 = !DILocation(line: 10, column: 10, scope: !54) +!56 = !{i64 2732490490862098848} +!57 = !DILocation(line: 10, column: 3, scope: !54) diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll deleted file mode 100644 index 539d88a815ed1..0000000000000 --- a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll +++ /dev/null @@ -1,158 +0,0 @@ -;; Test callsite context graph generation for simple call graph with -;; two memprof contexts and no inlining. -;; -;; Original code looks like: -;; -;; char *bar() { -;; return new char[10]; -;; } -;; -;; char *baz() { -;; return bar(); -;; } -;; -;; char *foo() { -;; return baz(); -;; } -;; -;; int main(int argc, char **argv) { -;; char *x = foo(); -;; char *y = foo(); -;; memset(x, 0, 10); -;; memset(y, 0, 10); -;; delete[] x; -;; sleep(10); -;; delete[] y; -;; return 0; -;; } -;; -;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the -;; memory freed after sleep(10) results in cold lifetimes. -;; -;; The IR was then reduced using llvm-reduce with the expected FileCheck input. - -; RUN: opt -passes=memprof-context-disambiguation \ -; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ -; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ -; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP - -; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT - -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -define i32 @main() #0 { -entry: - %call = call noundef ptr @_Z3foov(), !callsite !0 - %call1 = call noundef ptr @_Z3foov(), !callsite !1 - ret i32 0 -} - -; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) -declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #1 - -; Function Attrs: nobuiltin -declare void @_ZdaPv() #2 - -define internal ptr @_Z3barv() #3 { -entry: - %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !2, !callsite !7 - ret ptr null -} - -declare ptr @_Znam(i64) - -define internal ptr @_Z3bazv() #4 { -entry: - %call = call noundef ptr @_Z3barv(), !callsite !8 - ret ptr null -} - -; Function Attrs: noinline -define internal ptr @_Z3foov() #5 { -entry: - %call = call noundef ptr @_Z3bazv(), !callsite !9 - ret ptr null -} - -; uselistorder directives -uselistorder ptr @_Z3foov, { 1, 0 } - -attributes #0 = { "tune-cpu"="generic" } -attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: write) } -attributes #2 = { nobuiltin } -attributes #3 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" } -attributes #4 = { "stack-protector-buffer-size"="8" } -attributes #5 = { noinline } -attributes #6 = { builtin } - -!0 = !{i64 8632435727821051414} -!1 = !{i64 -3421689549917153178} -!2 = !{!3, !5} -!3 = !{!4, !"notcold"} -!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} -!5 = !{!6, !"cold"} -!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} -!7 = !{i64 9086428284934609951} -!8 = !{i64 -5964873800580613432} -!9 = !{i64 2732490490862098848} - - -; DUMP: CCG before cloning: -; DUMP: Callsite Context Graph: -; DUMP: Node [[BAR:0x[a-z0-9]+]] -; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 -; DUMP: CalleeEdges: -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 - -; DUMP: Node [[BAZ]] -; DUMP: %call = call noundef ptr @_Z3barv() (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotColdCold ContextIds: 1 2 -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 - -; DUMP: Node [[FOO]] -; DUMP: %call = call noundef ptr @_Z3bazv() (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 1 2 -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 -; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 - -; DUMP: Node [[MAIN1]] -; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) -; DUMP: AllocTypes: NotCold -; DUMP: ContextIds: 1 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 -; DUMP: CallerEdges: - -; DUMP: Node [[MAIN2]] -; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0) -; DUMP: AllocTypes: Cold -; DUMP: ContextIds: 2 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 -; DUMP: CallerEdges: - - -; DOT: digraph "postbuild" { -; DOT: label="postbuild"; -; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"]; -; DOT: Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 12481870273128938184\n_Z3bazv -\> _Z3barv}"]; -; DOT: Node[[BAZ]] -> Node[[BAR]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; -; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\n_Z3foov -\> _Z3bazv}"]; -; DOT: Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; -; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; -; DOT: Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"]; -; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; -; DOT: Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 2",fillcolor="cyan"]; -; DOT: } diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll deleted file mode 100644 index c5ed97f182a98..0000000000000 --- a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll +++ /dev/null @@ -1,232 +0,0 @@ -;; Test callsite context graph generation for call graph with with MIBs -;; that have pruned contexts that partially match multiple inlined -;; callsite contexts, requiring duplication of context ids and nodes -;; while matching callsite nodes onto the graph. -;; -;; Original code looks like: -;; -;; char *D() { -;; return new char[10]; -;; } -;; -;; char *F() { -;; return D(); -;; } -;; -;; char *C() { -;; return D(); -;; } -;; -;; char *B() { -;; return C(); -;; } -;; -;; char *E() { -;; return C(); -;; } -;; int main(int argc, char **argv) { -;; char *x = B(); // cold -;; char *y = E(); // cold -;; char *z = F(); // default -;; memset(x, 0, 10); -;; memset(y, 0, 10); -;; memset(z, 0, 10); -;; delete[] z; -;; sleep(10); -;; delete[] x; -;; delete[] y; -;; return 0; -;; } -;; -;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the -;; memory freed after sleep(10) results in cold lifetimes. -;; -;; The code below was created by forcing inlining of C into both B and E. -;; Since both allocation contexts via C are cold, the matched memprof -;; metadata has the context pruned above C's callsite. This requires -;; matching the stack node for C to callsites where it was inlined (i.e. -;; the callsites in B and E that have callsite metadata that includes C's). -;; It also requires duplication of that node in the graph as well as the -;; duplication of the context ids along that path through the graph, -;; so that we can represent the duplicated (via inlining) C callsite. -;; -;; The IR was then reduced using llvm-reduce with the expected FileCheck input. - -; RUN: opt -passes=memprof-context-disambiguation \ -; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ -; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ -; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP - -; RUN: cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE -; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST - -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -define internal ptr @_Z1Dv() { -entry: - %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !0, !callsite !5 - ret ptr null -} - -declare ptr @_Znam(i64) - -define internal ptr @_Z1Fv() #0 { -entry: - %call = call noundef ptr @_Z1Dv(), !callsite !6 - ret ptr null -} - -; Function Attrs: mustprogress noinline optnone uwtable -define internal ptr @_Z1Cv() #1 { -entry: - %call = call noundef ptr @_Z1Dv(), !callsite !7 - ret ptr null -} - -; Function Attrs: mustprogress noinline optnone uwtable -define internal ptr @_Z1Bv() #1 { -entry: - %call.i = call noundef ptr @_Z1Dv(), !callsite !8 - ret ptr null -} - -; Function Attrs: mustprogress noinline optnone uwtable -define internal ptr @_Z1Ev() #1 { -entry: - %call.i = call noundef ptr @_Z1Dv(), !callsite !9 - ret ptr null -} - -; Function Attrs: noinline -declare i32 @main() #2 - -; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) -declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3 - -; Function Attrs: nounwind -declare void @_ZdaPv() #4 - -declare i32 @sleep() #5 - -attributes #0 = { "disable-tail-calls"="true" } -attributes #1 = { mustprogress noinline optnone uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #2 = { noinline } -attributes #3 = { nocallback nofree nounwind willreturn memory(argmem: write) } -attributes #4 = { nounwind } -attributes #5 = { "no-trapping-math"="true" } -attributes #6 = { builtin } - -!0 = !{!1, !3} -!1 = !{!2, !"cold"} -!2 = !{i64 6541423618768552252, i64 -6270142974039008131} -!3 = !{!4, !"notcold"} -!4 = !{i64 6541423618768552252, i64 -4903163940066524832} -!5 = !{i64 6541423618768552252} -!6 = !{i64 -4903163940066524832} -!7 = !{i64 -6270142974039008131} -!8 = !{i64 -6270142974039008131, i64 -184525619819294889} -!9 = !{i64 -6270142974039008131, i64 1905834578520680781} - - -;; After adding only the alloc node memprof metadata, we only have 2 contexts. - -; DUMP: CCG before updating call stack chains: -; DUMP: Callsite Context Graph: -; DUMP: Node [[D:0x[a-z0-9]+]] -; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 -; DUMP: CalleeEdges: -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[D]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1 -; DUMP: Edge from Callee [[D]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2 - -; DUMP: Node [[C]] -; DUMP: null Call -; DUMP: AllocTypes: Cold -; DUMP: ContextIds: 1 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[D]] to Caller: [[C]] AllocTypes: Cold ContextIds: 1 -; DUMP: CallerEdges: - -; DUMP: Node [[F]] -; DUMP: null Call -; DUMP: AllocTypes: NotCold -; DUMP: ContextIds: 2 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 -; DUMP: CallerEdges: - -;; After updating for callsite metadata, we should have generated context ids 3 and 4, -;; along with 2 new nodes for those callsites. All have the same allocation type -;; behavior as the original C node. - -; DUMP: CCG before cloning: -; DUMP: Callsite Context Graph: -; DUMP: Node [[D]] -; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 3 4 -; DUMP: CalleeEdges: -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 -; DUMP: Edge from Callee [[D]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3 -; DUMP: Edge from Callee [[D]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 -; DUMP: Edge from Callee [[D]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1 - -; DUMP: Node [[F]] -; DUMP: %call = call noundef ptr @_Z1Dv() (clone 0) -; DUMP: AllocTypes: NotCold -; DUMP: ContextIds: 2 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 -; DUMP: CallerEdges: - -; DUMP: Node [[C2]] -; DUMP: %call = call noundef ptr @_Z1Dv() (clone 0) -; DUMP: AllocTypes: Cold -; DUMP: ContextIds: 3 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[D]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3 -; DUMP: CallerEdges: - -; DUMP: Node [[B]] -; DUMP: %call.i = call noundef ptr @_Z1Dv() (clone 0) -; DUMP: AllocTypes: Cold -; DUMP: ContextIds: 4 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[D]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4 -; DUMP: CallerEdges: - -; DUMP: Node [[E]] -; DUMP: %call.i = call noundef ptr @_Z1Dv() (clone 0) -; DUMP: AllocTypes: Cold -; DUMP: ContextIds: 1 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[D]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1 -; DUMP: CallerEdges: - - -; DOTPRE: digraph "prestackupdate" { -; DOTPRE: label="prestackupdate"; -; DOTPRE: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"]; -; DOTPRE: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12176601099670543485\nnull call (external)}"]; -; DOTPRE: Node[[C]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"]; -; DOTPRE: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\nnull call (external)}"]; -; DOTPRE: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"]; -; DOTPRE: } - - -; DOTPOST:digraph "postbuild" { -; DOTPOST: label="postbuild"; -; DOTPOST: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"]; -; DOTPOST: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"]; -; DOTPOST: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"]; -; DOTPOST: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"]; -; DOTPOST: Node[[C]] -> Node[[D]][tooltip="ContextIds: 3",fillcolor="cyan"]; -; DOTPOST: Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"]; -; DOTPOST: Node[[B]] -> Node[[D]][tooltip="ContextIds: 4",fillcolor="cyan"]; -; DOTPOST: Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"]; -; DOTPOST: Node[[E]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"]; -; DOTPOST:} diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll deleted file mode 100644 index da0fd3f44b45e..0000000000000 --- a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll +++ /dev/null @@ -1,386 +0,0 @@ -;; Test callsite context graph generation for call graph with with MIBs -;; that have pruned contexts that partially match multiple inlined -;; callsite contexts, requiring duplication of context ids and nodes -;; while matching callsite nodes onto the graph. This test requires more -;; complex duplication due to multiple contexts for different allocations -;; that share some of the same callsite nodes. -;; -;; Original code looks like: -;; -;; char *D(bool Call1) { -;; if (Call1) -;; return new char[10]; -;; else -;; return new char[10]; -;; } -;; -;; char *C(bool Call1) { -;; return D(Call1); -;; } -;; -;; char *B(bool Call1) { -;; if (Call1) -;; return C(true); -;; else -;; return C(false); -;; } -;; -;; char *A(bool Call1) { -;; return B(Call1); -;; } -;; -;; char *A1() { -;; return A(true); -;; } -;; -;; char *A2() { -;; return A(true); -;; } -;; -;; char *A3() { -;; return A(false); -;; } -;; -;; char *A4() { -;; return A(false); -;; } -;; -;; char *E() { -;; return B(true); -;; } -;; -;; char *F() { -;; return B(false); -;; } -;; -;; int main(int argc, char **argv) { -;; char *a1 = A1(); // cold -;; char *a2 = A2(); // cold -;; char *e = E(); // default -;; char *a3 = A3(); // default -;; char *a4 = A4(); // default -;; char *f = F(); // cold -;; memset(a1, 0, 10); -;; memset(a2, 0, 10); -;; memset(e, 0, 10); -;; memset(a3, 0, 10); -;; memset(a4, 0, 10); -;; memset(f, 0, 10); -;; delete[] a3; -;; delete[] a4; -;; delete[] e; -;; sleep(10); -;; delete[] a1; -;; delete[] a2; -;; delete[] f; -;; return 0; -;; } -;; -;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the -;; memory freed after sleep(10) results in cold lifetimes. -;; -;; The code below was created by forcing inlining of A into its callers, -;; without any other inlining or optimizations. Since both allocation contexts -;; via A for each allocation in D have the same allocation type (cold via -;; A1 and A2 for the first new in D, and non-cold via A3 and A4 for the second -;; new in D, the contexts for those respective allocations are pruned above A. -;; The allocations via E and F are to ensure we don't prune above B. -;; -;; The matching onto the inlined A[1234]->A sequences will require duplication -;; of the context id assigned to the context from A for each allocation in D. -;; This test ensures that we do this correctly in the presence of callsites -;; shared by the different duplicated context ids (i.e. callsite in C). -;; -;; The IR was then reduced using llvm-reduce with the expected FileCheck input. - -; RUN: opt -passes=memprof-context-disambiguation \ -; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ -; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ -; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP - - -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: mustprogress noinline uwtable -define ptr @_Z1Db(i1 %Call1) #0 { -entry: - %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !5 - br label %return - -if.else: ; No predecessors! - %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !6, !callsite !11 - br label %return - -return: ; preds = %if.else, %entry - ret ptr null -} - -; Function Attrs: nobuiltin -declare ptr @_Znam(i64) #1 - -define ptr @_Z1Cb(i1 %Call1) { -entry: - %tobool = trunc i8 0 to i1 - %call = call noundef ptr @_Z1Db(i1 noundef zeroext %tobool), !callsite !12 - ret ptr null -} - -; Function Attrs: mustprogress noinline uwtable -define ptr @_Z1Bb(i1 %Call1) #0 { -entry: - %call = call noundef ptr @_Z1Cb(i1 noundef zeroext true), !callsite !13 - br label %return - -if.else: ; No predecessors! - %call1 = call noundef ptr @_Z1Cb(i1 noundef zeroext false), !callsite !14 - br label %return - -return: ; preds = %if.else, %entry - ret ptr null -} - -define ptr @_Z1Ab(i1 %tobool) #2 { -entry: - %call = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool), !callsite !15 - ret ptr null -} - -; Function Attrs: mustprogress noinline uwtable -define ptr @_Z2A1v(i1 %tobool.i) #0 { -entry: - %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !16 - ret ptr null -} - -; Function Attrs: mustprogress noinline uwtable -define ptr @_Z2A2v(i1 %tobool.i) #0 { -entry: - %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !17 - ret ptr null -} - -; Function Attrs: mustprogress noinline uwtable -define ptr @_Z2A3v(i1 %tobool.i) #0 { -entry: - %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !18 - ret ptr null -} - -; Function Attrs: mustprogress noinline uwtable -define ptr @_Z2A4v(i1 %tobool.i) #0 { -entry: - %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !19 - ret ptr null -} - -; Function Attrs: mustprogress noinline uwtable -define ptr @_Z1Ev() #0 { -entry: - %call = call noundef ptr @_Z1Bb(i1 noundef zeroext true), !callsite !20 - ret ptr null -} - -; Function Attrs: mustprogress noinline uwtable -define ptr @_Z1Fv() #0 { -entry: - %call = call noundef ptr @_Z1Bb(i1 noundef zeroext false), !callsite !21 - ret ptr null -} - -; Function Attrs: noinline -declare i32 @main() #3 - -; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) -declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4 - -declare void @_ZdaPv() #5 - -declare i32 @sleep() #6 - -; uselistorder directives -uselistorder ptr @_Znam, { 1, 0 } - -attributes #0 = { mustprogress noinline uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #1 = { nobuiltin } -attributes #2 = { "tune-cpu"="generic" } -attributes #3 = { noinline } -attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) } -attributes #5 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" } -attributes #6 = { "disable-tail-calls"="true" } -attributes #7 = { builtin allocsize(0) } - -!0 = !{!1, !3} -!1 = !{!2, !"notcold"} -!2 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 1905834578520680781} -!3 = !{!4, !"cold"} -!4 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 -6528110295079665978} -!5 = !{i64 4854880825882961848} -!6 = !{!7, !9} -!7 = !{!8, !"notcold"} -!8 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -6528110295079665978} -!9 = !{!10, !"cold"} -!10 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -4903163940066524832} -!11 = !{i64 -8775068539491628272} -!12 = !{i64 -904694911315397047} -!13 = !{i64 6532298921261778285} -!14 = !{i64 7859682663773658275} -!15 = !{i64 -6528110295079665978} -!16 = !{i64 -6528110295079665978, i64 5747919905719679568} -!17 = !{i64 -6528110295079665978, i64 -5753238080028016843} -!18 = !{i64 -6528110295079665978, i64 1794685869326395337} -!19 = !{i64 -6528110295079665978, i64 5462047985461644151} -!20 = !{i64 1905834578520680781} -!21 = !{i64 -4903163940066524832} - - -;; After adding only the alloc node memprof metadata, we only have 4 contexts (we only -;; match the interesting parts of the pre-update graph here). - -; DUMP: CCG before updating call stack chains: -; DUMP: Callsite Context Graph: - -; DUMP: Node [[D1:0x[a-z0-9]+]] -; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 - -; DUMP: Node [[C:0x[a-z0-9]+]] -; DUMP: null Call -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 3 4 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 -; DUMP: Edge from Callee [[D2:0x[a-z0-9]+]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 - -; DUMP: Node [[D2]] -; DUMP: %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 3 4 - - -;; After updating for callsite metadata, we should have duplicated the context -;; ids coming from node A (2 and 3) 4 times, for the 4 different callers of A, -;; and used those on new nodes for those callers. Note that while in reality -;; we only have cold edges coming from A1 and A2 and noncold from A3 and A4, -;; due to the pruning we have lost this information and thus end up duplicating -;; both of A's contexts to all of the new nodes (which could result in some -;; unnecessary cloning. - -; DUMP: CCG before cloning: -; DUMP: Callsite Context Graph: -; DUMP: Node [[D1]] -; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 5 7 9 11 -; DUMP: CalleeEdges: -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 - -; DUMP: Node [[C]] -; DUMP: %call = call noundef ptr @_Z1Db(i1 noundef zeroext %tobool) (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 3 4 5 6 7 8 9 10 11 12 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 -; DUMP: Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[C]] to Caller: [[B1:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 -; DUMP: Edge from Callee [[C]] to Caller: [[B2:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 - -; DUMP: Node [[B1]] -; DUMP: %call = call noundef ptr @_Z1Cb(i1 noundef zeroext true) (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 5 7 9 11 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[C]] to Caller: [[B1]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[B1]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 -; DUMP: Edge from Callee [[B1]] to Caller: [[A2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 5 -; DUMP: Edge from Callee [[B1]] to Caller: [[A3:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 7 -; DUMP: Edge from Callee [[B1]] to Caller: [[A1:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 9 -; DUMP: Edge from Callee [[B1]] to Caller: [[A4:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 11 -; DUMP: Edge from Callee [[B1]] to Caller: [[A:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 - -; DUMP: Node [[E]] -; DUMP: %call = call noundef ptr @_Z1Bb(i1 noundef zeroext true) (clone 0) -; DUMP: AllocTypes: NotCold -; DUMP: ContextIds: 1 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[B1]] to Caller: [[E]] AllocTypes: NotCold ContextIds: 1 -; DUMP: CallerEdges: - -; DUMP: Node [[D2]] -; DUMP: %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 3 4 6 8 10 12 -; DUMP: CalleeEdges: -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 - -; DUMP: Node [[B2]] -; DUMP: %call1 = call noundef ptr @_Z1Cb(i1 noundef zeroext false) (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 3 4 6 8 10 12 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[C]] to Caller: [[B2]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[B2]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 -; DUMP: Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6 -; DUMP: Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8 -; DUMP: Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10 -; DUMP: Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12 -; DUMP: Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3 - -; DUMP: Node [[F]] -; DUMP: %call = call noundef ptr @_Z1Bb(i1 noundef zeroext false) (clone 0) -; DUMP: AllocTypes: Cold -; DUMP: ContextIds: 4 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[B2]] to Caller: [[F]] AllocTypes: Cold ContextIds: 4 -; DUMP: CallerEdges: - -; DUMP: Node [[A2]] -; DUMP: %call = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool) (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 5 6 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[B1]] to Caller: [[A2]] AllocTypes: Cold ContextIds: 5 -; DUMP: Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6 -; DUMP: CallerEdges: - -; DUMP: Node [[A3]] -; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 7 8 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[B1]] to Caller: [[A3]] AllocTypes: Cold ContextIds: 7 -; DUMP: Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8 -; DUMP: CallerEdges: - -; DUMP: Node [[A1]] -; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 9 10 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[B1]] to Caller: [[A1]] AllocTypes: Cold ContextIds: 9 -; DUMP: Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10 -; DUMP: CallerEdges: - -; DUMP: Node [[A4]] -; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 11 12 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[B1]] to Caller: [[A4]] AllocTypes: Cold ContextIds: 11 -; DUMP: Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12 -; DUMP: CallerEdges: - -; DUMP: Node [[A]] -; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 2 3 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[B1]] to Caller: [[A]] AllocTypes: Cold ContextIds: 2 -; DUMP: Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3 -; DUMP: CallerEdges: diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll b/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll deleted file mode 100644 index 9ebf219dd37a0..0000000000000 --- a/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll +++ /dev/null @@ -1,261 +0,0 @@ -;; Tests callsite context graph generation for call graph containing indirect -;; calls. Currently this should result in conservative behavior, such that the -;; indirect call receives a null call in its graph node, to prevent subsequent -;; cloning. -;; -;; Original code looks like: -;; -;; char *foo() { -;; return new char[10]; -;; } -;; class A { -;; public: -;; virtual char *x() { return foo(); } -;; }; -;; class B : public A { -;; public: -;; char *x() final { return foo(); } -;; }; -;; char *bar(A *a) { -;; return a->x(); -;; } -;; int main(int argc, char **argv) { -;; char *x = foo(); -;; char *y = foo(); -;; B b; -;; char *z = bar(&b); -;; char *w = bar(&b); -;; A a; -;; char *r = bar(&a); -;; char *s = bar(&a); -;; memset(x, 0, 10); -;; memset(y, 0, 10); -;; memset(z, 0, 10); -;; memset(w, 0, 10); -;; memset(r, 0, 10); -;; memset(s, 0, 10); -;; delete[] x; -;; delete[] w; -;; delete[] r; -;; sleep(10); -;; delete[] y; -;; delete[] z; -;; delete[] s; -;; return 0; -;; } -;; -;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the -;; memory freed after sleep(10) results in cold lifetimes. -;; -;; Compiled without optimization to prevent inlining and devirtualization. -;; -;; The IR was then reduced using llvm-reduce with the expected FileCheck input. - -; RUN: opt -passes=memprof-context-disambiguation \ -; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ -; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ -; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP - -; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT - - -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -declare ptr @_Z3barP1A(ptr) - -define i32 @main(ptr %b, ptr %a) #0 { -entry: - %call = call noundef ptr @_Z3foov(), !callsite !0 - %call1 = call noundef ptr @_Z3foov(), !callsite !1 - %call2 = call noundef ptr @_Z3barP1A(ptr noundef %b), !callsite !2 - %call3 = call noundef ptr @_Z3barP1A(ptr noundef %b), !callsite !3 - %call4 = call noundef ptr @_Z3barP1A(ptr noundef %a), !callsite !4 - %call5 = call noundef ptr @_Z3barP1A(ptr noundef %a), !callsite !5 - ret i32 0 -} - -; Function Attrs: noinline -declare void @_ZN1BC2Ev() #1 - -; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) -declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2 - -; Function Attrs: nobuiltin -declare void @_ZdaPv() #3 - -define internal ptr @_ZN1A1xEv() #4 { -entry: - %call = call noundef ptr @_Z3foov(), !callsite !6 - ret ptr null -} - -; Function Attrs: mustprogress uwtable -define internal ptr @_ZN1B1xEv() #5 { -entry: - %call = call noundef ptr @_Z3foov(), !callsite !7 - ret ptr null -} - -; Function Attrs: mustprogress uwtable -define internal ptr @_Z3foov() #5 { -entry: - %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !8, !callsite !21 - ret ptr null -} - -declare ptr @_Znam(i64) #6 - -; uselistorder directives -uselistorder ptr @_Z3foov, { 3, 2, 1, 0 } - -attributes #0 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" } -attributes #1 = { noinline } -attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: write) } -attributes #3 = { nobuiltin } -attributes #4 = { "tune-cpu"="generic" } -attributes #5 = { mustprogress uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #6 = { "disable-tail-calls"="true" } -attributes #7 = { builtin } - -!0 = !{i64 8632435727821051414} -!1 = !{i64 -3421689549917153178} -!2 = !{i64 6792096022461663180} -!3 = !{i64 -2709642582978494015} -!4 = !{i64 748269490701775343} -!5 = !{i64 -5747251260480066785} -!6 = !{i64 8256774051149711748} -!7 = !{i64 -4831879094954754638} -!8 = !{!9, !11, !13, !15, !17, !19} -!9 = !{!10, !"notcold"} -!10 = !{i64 2732490490862098848, i64 8256774051149711748, i64 -4820244510750103755, i64 748269490701775343} -!11 = !{!12, !"cold"} -!12 = !{i64 2732490490862098848, i64 8256774051149711748, i64 -4820244510750103755, i64 -5747251260480066785} -!13 = !{!14, !"notcold"} -!14 = !{i64 2732490490862098848, i64 8632435727821051414} -!15 = !{!16, !"cold"} -!16 = !{i64 2732490490862098848, i64 -4831879094954754638, i64 -4820244510750103755, i64 6792096022461663180} -!17 = !{!18, !"notcold"} -!18 = !{i64 2732490490862098848, i64 -4831879094954754638, i64 -4820244510750103755, i64 -2709642582978494015} -!19 = !{!20, !"cold"} -!20 = !{i64 2732490490862098848, i64 -3421689549917153178} -!21 = !{i64 2732490490862098848} - - -; DUMP: CCG before cloning: -; DUMP: Callsite Context Graph: -; DUMP: Node [[FOO:0x[a-z0-9]+]] -; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 3 4 5 6 -; DUMP: CalleeEdges: -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[FOO]] to Caller: [[AX:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 -; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3 -; DUMP: Edge from Callee [[FOO]] to Caller: [[BX:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 4 5 -; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 6 - -; DUMP: Node [[AX]] -; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[FOO]] to Caller: [[AX]] AllocTypes: NotColdCold ContextIds: 1 2 -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[AX]] to Caller: [[BAR:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 - -;; Bar contains an indirect call, with multiple targets. It's call should be null. -; DUMP: Node [[BAR]] -; DUMP: null Call -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 4 5 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[AX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 1 2 -; DUMP: Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5 -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN3:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 -; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN4:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 -; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN5:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 -; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN6:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 5 - -; DUMP: Node [[MAIN3]] -; DUMP: %call4 = call noundef ptr @_Z3barP1A(ptr noundef %a) (clone 0) -; DUMP: AllocTypes: NotCold -; DUMP: ContextIds: 1 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN3]] AllocTypes: NotCold ContextIds: 1 -; DUMP: CallerEdges: - -; DUMP: Node [[MAIN4]] -; DUMP: %call5 = call noundef ptr @_Z3barP1A(ptr noundef %a) (clone 0) -; DUMP: AllocTypes: Cold -; DUMP: ContextIds: 2 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN4]] AllocTypes: Cold ContextIds: 2 -; DUMP: CallerEdges: - -; DUMP: Node [[MAIN1]] -; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) -; DUMP: AllocTypes: NotCold -; DUMP: ContextIds: 3 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3 -; DUMP: CallerEdges: - -; DUMP: Node [[BX]] -; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 4 5 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[FOO]] to Caller: [[BX]] AllocTypes: NotColdCold ContextIds: 4 5 -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5 - -; DUMP: Node [[MAIN5]] -; DUMP: %call2 = call noundef ptr @_Z3barP1A(ptr noundef %b) (clone 0) -; DUMP: AllocTypes: Cold -; DUMP: ContextIds: 4 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN5]] AllocTypes: Cold ContextIds: 4 -; DUMP: CallerEdges: - -; DUMP: Node [[MAIN6]] -; DUMP: %call3 = call noundef ptr @_Z3barP1A(ptr noundef %b) (clone 0) -; DUMP: AllocTypes: NotCold -; DUMP: ContextIds: 5 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN6]] AllocTypes: NotCold ContextIds: 5 -; DUMP: CallerEdges: - -; DUMP: Node [[MAIN2]] -; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0) -; DUMP: AllocTypes: Cold -; DUMP: ContextIds: 6 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 6 -; DUMP: CallerEdges: - - -; DOT: digraph "postbuild" { -; DOT: label="postbuild"; -; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2 3 4 5 6",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3foov -\> _Znam}"]; -; DOT: Node[[AX:0x[a-z0-9]+]] [shape=record,tooltip="N[[AX]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 8256774051149711748\n_ZN1A1xEv -\> _Z3foov}"]; -; DOT: Node[[AX]] -> Node[[FOO]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; -; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13626499562959447861\nnull call (external)}"]; -; DOT: Node[[BAR]] -> Node[[AX]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; -; DOT: Node[[BAR]] -> Node[[BX:0x[a-z0-9]+]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"]; -; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 748269490701775343\nmain -\> _Z3barP1A}"]; -; DOT: Node[[MAIN1]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"]; -; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12699492813229484831\nmain -\> _Z3barP1A}"]; -; DOT: Node[[MAIN2]] -> Node[[BAR]][tooltip="ContextIds: 2",fillcolor="cyan"]; -; DOT: Node[[MAIN3:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN3]] ContextIds: 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; -; DOT: Node[[MAIN3]] -> Node[[FOO]][tooltip="ContextIds: 3",fillcolor="brown1"]; -; DOT: Node[[BX]] [shape=record,tooltip="N[[BX]] ContextIds: 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13614864978754796978\n_ZN1B1xEv -\> _Z3foov}"]; -; DOT: Node[[BX]] -> Node[[FOO]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"]; -; DOT: Node[[MAIN4:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN4]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 6792096022461663180\nmain -\> _Z3barP1A}"]; -; DOT: Node[[MAIN4]] -> Node[[BAR]][tooltip="ContextIds: 4",fillcolor="cyan"]; -; DOT: Node[[MAIN5:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN5]] ContextIds: 5",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 15737101490731057601\nmain -\> _Z3barP1A}"]; -; DOT: Node[[MAIN5]] -> Node[[BAR]][tooltip="ContextIds: 5",fillcolor="brown1"]; -; DOT: Node[[MAIN6:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN6]] ContextIds: 6",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; -; DOT: Node[[MAIN6]] -> Node[[FOO]][tooltip="ContextIds: 6",fillcolor="cyan"]; -; DOT: } diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll b/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll deleted file mode 100644 index 59f135ca06627..0000000000000 --- a/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll +++ /dev/null @@ -1,189 +0,0 @@ -;; Test callsite context graph generation for call graph with two memprof -;; contexts and partial inlining, requiring generation of a new fused node to -;; represent the inlined sequence while matching callsite nodes onto the graph. -;; -;; Original code looks like: -;; -;; char *bar() { -;; return new char[10]; -;; } -;; -;; char *baz() { -;; return bar(); -;; } -;; -;; char *foo() { -;; return baz(); -;; } -;; -;; int main(int argc, char **argv) { -;; char *x = foo(); -;; char *y = foo(); -;; memset(x, 0, 10); -;; memset(y, 0, 10); -;; delete[] x; -;; sleep(10); -;; delete[] y; -;; return 0; -;; } -;; -;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the -;; memory freed after sleep(10) results in cold lifetimes. -;; -;; The code below was created by forcing inlining of baz into foo, and -;; bar into baz. Due to the inlining of bar we will initially have two -;; allocation nodes in the graph. This tests that we correctly match -;; foo (with baz inlined) onto the graph nodes first, and generate a new -;; fused node for it. We should then not match baz (with bar inlined) as that -;; is not reached by the MIB contexts (since all calls from main will look -;; like main -> foo(+baz) -> bar after the inlining reflected in this IR). -;; -;; The IR was then reduced using llvm-reduce with the expected FileCheck input. - -; RUN: opt -passes=memprof-context-disambiguation \ -; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ -; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ -; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP - -; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT - - -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -define internal ptr @_Z3barv() { -entry: - %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !5 - ret ptr null -} - -; Function Attrs: nobuiltin -declare ptr @_Znam(i64) #0 - -; Function Attrs: mustprogress -define internal ptr @_Z3bazv() #1 { -entry: - %call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !6 - ret ptr null -} - -; Function Attrs: noinline -define internal ptr @_Z3foov() #2 { -entry: - %call.i = call noundef ptr @_Z3barv(), !callsite !7 - ret ptr null -} - -define i32 @main() #3 { -entry: - %call = call noundef ptr @_Z3foov(), !callsite !8 - %call1 = call noundef ptr @_Z3foov(), !callsite !9 - ret i32 0 -} - -; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) -declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4 - -; Function Attrs: nounwind -declare void @_ZdaPv() #5 - -declare i32 @sleep() #6 - -attributes #0 = { nobuiltin } -attributes #1 = { mustprogress } -attributes #2 = { noinline } -attributes #3 = { "tune-cpu"="generic" } -attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) } -attributes #5 = { nounwind } -attributes #6 = { "disable-tail-calls"="true" } -attributes #7 = { builtin } - -!0 = !{!1, !3} -!1 = !{!2, !"notcold"} -!2 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} -!3 = !{!4, !"cold"} -!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} -!5 = !{i64 9086428284934609951} -!6 = !{i64 9086428284934609951, i64 -5964873800580613432} -!7 = !{i64 -5964873800580613432, i64 2732490490862098848} -!8 = !{i64 8632435727821051414} -!9 = !{i64 -3421689549917153178} - - -; DUMP: CCG before cloning: -; DUMP: Callsite Context Graph: -; DUMP: Node [[BAR:0x[a-z0-9]+]] -; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 -; DUMP: CalleeEdges: -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 - -;; This is leftover from the MIB on the alloc inlined into baz. It is not -;; matched with any call, since there is no such node in the IR. Due to the -;; null call it will not participate in any context transformations. -; DUMP: Node [[FOO2:0x[a-z0-9]+]] -; DUMP: null Call -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 3 4 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[BAZ:0x[a-z0-9]+]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4 -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3 -; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 - -; DUMP: Node [[MAIN1]] -; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) -; DUMP: AllocTypes: NotCold -; DUMP: ContextIds: 1 3 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3 -; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 -; DUMP: CallerEdges: - -; DUMP: Node [[MAIN2]] -; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0) -; DUMP: AllocTypes: Cold -; DUMP: ContextIds: 2 4 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4 -; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 -; DUMP: CallerEdges: - -; DUMP: Node [[BAZ]] -; DUMP: %call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 3 4 -; DUMP: CalleeEdges: -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4 - -;; This is the node synthesized for the call to bar in foo that was created -;; by inlining baz into foo. -; DUMP: Node [[FOO]] -; DUMP: %call.i = call noundef ptr @_Z3barv() (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 1 2 -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 -; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 - - -; DOT: digraph "postbuild" { -; DOT: label="postbuild"; -; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"]; -; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\nnull call (external)}"]; -; DOT: Node[[FOO]] -> Node[[BAZ:0x[a-z0-9]+]][tooltip="ContextIds: 3 4",fillcolor="mediumorchid1"]; -; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; -; DOT: Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 3",fillcolor="brown1"]; -; DOT: Node[[MAIN1]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 1",fillcolor="brown1"]; -; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; -; DOT: Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 4",fillcolor="cyan"]; -; DOT: Node[[MAIN2]] -> Node[[FOO2]][tooltip="ContextIds: 2",fillcolor="cyan"]; -; DOT: Node[[BAZ]] [shape=record,tooltip="N[[BAZ]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc2\n_Z3bazv -\> _Znam}"]; -; DOT: Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"]; -; DOT: Node[[FOO2]] -> Node[[BAR]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; -; DOT: } diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll b/llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll deleted file mode 100644 index a3a056ade8c49..0000000000000 --- a/llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll +++ /dev/null @@ -1,135 +0,0 @@ -;; Test callsite context graph generation for call graph with two memprof -;; contexts and multiple levels of inlining, requiring generation of new -;; fused nodes to represent the inlined sequence while matching callsite -;; nodes onto the graph. In particular this tests the case where a function -;; has inlined a callee containing an inlined callee. -;; -;; Original code looks like: -;; -;; char *bar() __attribute__((noinline)) { -;; return new char[10]; -;; } -;; -;; char *baz() { -;; return bar(); -;; } -;; -;; char *foo() { -;; return baz(); -;; } -;; -;; int main(int argc, char **argv) { -;; char *x = foo(); -;; char *y = foo(); -;; memset(x, 0, 10); -;; memset(y, 0, 10); -;; delete[] x; -;; sleep(10); -;; delete[] y; -;; return 0; -;; } -;; -;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the -;; memory freed after sleep(10) results in cold lifetimes. -;; -;; Both foo and baz are inlined into main, at both foo callsites. -;; We should update the graph for new fused nodes for both of those inlined -;; callsites to bar. -;; -;; Note that baz and bar are both dead due to the inlining, but have been left -;; in the input IR to ensure that the MIB call chain is matched to the longer -;; inline sequences from main. -;; -;; The IR was then reduced using llvm-reduce with the expected FileCheck input. - -; RUN: opt -passes=memprof-context-disambiguation \ -; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ -; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP - - -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -define ptr @_Z3barv() #0 { -entry: - %call = call noalias noundef nonnull dereferenceable(10) ptr @_Znam(i64 noundef 10) #7, !memprof !7, !callsite !12, !heapallocsite !13 - ret ptr null -} - -; Function Attrs: nobuiltin -declare ptr @_Znam(i64) #1 - -; Function Attrs: mustprogress -declare ptr @_Z3bazv() #2 - -define i32 @main() #3 { -delete.end5: - %call.i.i = call noundef ptr @_Z3barv(), !callsite !14 - %call.i.i8 = call noundef ptr @_Z3barv(), !callsite !15 - ret i32 0 -} - -; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) -declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4 - -declare void @_ZdaPv() #5 - -declare i32 @sleep() #6 - -attributes #0 = { "stack-protector-buffer-size"="8" } -attributes #1 = { nobuiltin } -attributes #2 = { mustprogress } -attributes #3 = { "tune-cpu"="generic" } -attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) } -attributes #5 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" } -attributes #6 = { "disable-tail-calls"="true" } -attributes #7 = { builtin } - -!llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !6} - -!0 = !{i32 7, !"Dwarf Version", i32 5} -!1 = !{i32 2, !"Debug Info Version", i32 3} -!2 = !{i32 1, !"wchar_size", i32 4} -!3 = !{i32 8, !"PIC Level", i32 2} -!4 = !{i32 7, !"PIE Level", i32 2} -!5 = !{i32 7, !"uwtable", i32 2} -!6 = !{i32 7, !"frame-pointer", i32 2} -!7 = !{!8, !10} -!8 = !{!9, !"notcold"} -!9 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} -!10 = !{!11, !"cold"} -!11 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} -!12 = !{i64 9086428284934609951} -!13 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) -!14 = !{i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} -!15 = !{i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} - - -; DUMP: CCG before cloning: -; DUMP: Callsite Context Graph: -; DUMP: Node [[BAR:0x[a-z0-9]+]] -; DUMP: %call = call noalias noundef nonnull dereferenceable(10) ptr @_Znam(i64 noundef 10) #7, !heapallocsite !7 (clone 0) -; DUMP: AllocTypes: NotColdCold -; DUMP: ContextIds: 1 2 -; DUMP: CalleeEdges: -; DUMP: CallerEdges: -; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 -; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 - -;; This is the node synthesized for the first inlined call chain of main->foo->baz -; DUMP: Node [[MAIN1]] -; DUMP: %call.i.i = call noundef ptr @_Z3barv() (clone 0) -; DUMP: AllocTypes: NotCold -; DUMP: ContextIds: 1 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 -; DUMP: CallerEdges: - -;; This is the node synthesized for the second inlined call chain of main->foo->baz -; DUMP: Node [[MAIN2]] -; DUMP: %call.i.i8 = call noundef ptr @_Z3barv() (clone 0) -; DUMP: AllocTypes: Cold -; DUMP: ContextIds: 2 -; DUMP: CalleeEdges: -; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 -; DUMP: CallerEdges: diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/pass-pipeline.ll b/llvm/test/Transforms/MemProfContextDisambiguation/pass-pipeline.ll deleted file mode 100644 index fede5fe96eccd..0000000000000 --- a/llvm/test/Transforms/MemProfContextDisambiguation/pass-pipeline.ll +++ /dev/null @@ -1,41 +0,0 @@ -;; Test that MemProfContextDisambiguation is enabled under the expected conditions -;; and in the expected position. - -;; Pass is not currently enabled by default at any opt level. -; RUN: opt -debug-pass-manager -passes='lto' -S %s \ -; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation" -; RUN: opt -debug-pass-manager -passes='lto' -S %s \ -; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation" -; RUN: opt -debug-pass-manager -passes='lto' -S %s \ -; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation" -; RUN: opt -debug-pass-manager -passes='lto' -S %s \ -; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation" - -;; Pass should not run even under option at O0/O1. -; RUN: opt -debug-pass-manager -passes='lto' -S %s \ -; RUN: -enable-memprof-context-disambiguation \ -; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation" -; RUN: opt -debug-pass-manager -passes='lto' -S %s \ -; RUN: -enable-memprof-context-disambiguation \ -; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation" - -;; Pass should be enabled under option at O2/O3. -; RUN: opt -debug-pass-manager -passes='lto' -S %s \ -; RUN: -enable-memprof-context-disambiguation \ -; RUN: 2>&1 | FileCheck %s --check-prefix=ENABLED -; RUN: opt -debug-pass-manager -passes='lto' -S %s \ -; RUN: -enable-memprof-context-disambiguation \ -; RUN: 2>&1 | FileCheck %s --check-prefix=ENABLED - -;; When enabled, MemProfContextDisambiguation runs just after inlining. -; ENABLED: Running pass: InlinerPass -; ENABLED: Invalidating analysis: InlineAdvisorAnalysis -; ENABLED: Running pass: MemProfContextDisambiguation - -define noundef ptr @_Z3barv() { -entry: - %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) - ret ptr %call -} - -declare noundef nonnull ptr @_Znam(i64 noundef) From 553bff0e9c571c4b53520126e8c0f6fe2ed966a0 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 22 Mar 2023 14:45:00 +0000 Subject: [PATCH 306/691] [gn build] Port 883dbb9c86be --- llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn index 0dbeb793e40eb..644d30f10854e 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn @@ -48,7 +48,6 @@ static_library("IPO") { "Internalize.cpp", "LoopExtractor.cpp", "LowerTypeTests.cpp", - "MemProfContextDisambiguation.cpp", "MergeFunctions.cpp", "ModuleInliner.cpp", "OpenMPOpt.cpp", From 65a0d669b4625c34775436a6d3643d15bbc2465a Mon Sep 17 00:00:00 2001 From: Doru Bercea Date: Wed, 22 Feb 2023 11:58:48 -0500 Subject: [PATCH 307/691] Fix accessing of aligned arrays in offloaded target regions --- clang/lib/Sema/SemaOpenMP.cpp | 8 +- .../amdgpu_target_with_aligned_attribute.c | 305 + .../OpenMP/parallel_firstprivate_codegen.cpp | 328 +- ...l_master_taskloop_firstprivate_codegen.cpp | 2562 +++- ...ter_taskloop_simd_firstprivate_codegen.cpp | 2664 +++- .../OpenMP/target_firstprivate_codegen.cpp | 12203 +++++++++++++++- .../OpenMP/target_is_device_ptr_codegen.cpp | 5825 +++++++- .../OpenMP/teams_firstprivate_codegen.cpp | 752 +- 8 files changed, 22804 insertions(+), 1843 deletions(-) create mode 100644 clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp index e193fa3d19d5c..1cd263b8a5b1c 100644 --- a/clang/lib/Sema/SemaOpenMP.cpp +++ b/clang/lib/Sema/SemaOpenMP.cpp @@ -2273,10 +2273,10 @@ bool Sema::isOpenMPCapturedByRef(const ValueDecl *D, unsigned Level, // and alignment, because the runtime library only deals with uintptr types. // If it does not fit the uintptr size, we need to pass the data by reference // instead. - if (!IsByRef && - (Ctx.getTypeSizeInChars(Ty) > - Ctx.getTypeSizeInChars(Ctx.getUIntPtrType()) || - Ctx.getDeclAlign(D) > Ctx.getTypeAlignInChars(Ctx.getUIntPtrType()))) { + if (!IsByRef && (Ctx.getTypeSizeInChars(Ty) > + Ctx.getTypeSizeInChars(Ctx.getUIntPtrType()) || + Ctx.getAlignOfGlobalVarInChars(Ty) > + Ctx.getTypeAlignInChars(Ctx.getUIntPtrType()))) { IsByRef = true; } diff --git a/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c b/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c new file mode 100644 index 0000000000000..e33ad0b353f51 --- /dev/null +++ b/clang/test/OpenMP/amdgpu_target_with_aligned_attribute.c @@ -0,0 +1,305 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ +// REQUIRES: amdgpu-registered-target + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// RUN: %clang_cc1 -verify -fopenmp -x c -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host-amd.bc +// RUN: %clang_cc1 -verify -fopenmp -x c -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host-amd.bc -o - | FileCheck %s --check-prefix=CHECK-AMD + + +void write_to_aligned_array(int *a, int N) { + int *aptr __attribute__ ((aligned(64))) = a; + #pragma omp target teams distribute parallel for map(tofrom: aptr[0:N]) + for(int i = 0; i < N; i++) { + aptr[i] = i; + } +} + +#endif +// CHECK-AMD-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_write_to_aligned_array_l14 +// CHECK-AMD-SAME: (i64 noundef [[N:%.*]], ptr noundef [[APTR:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-AMD-NEXT: entry: +// CHECK-AMD-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[APTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// CHECK-AMD-NEXT: [[APTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[APTR_ADDR]] to ptr +// CHECK-AMD-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr +// CHECK-AMD-NEXT: [[DOTZERO_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTZERO_ADDR]] to ptr +// CHECK-AMD-NEXT: [[DOTTHREADID_TEMP__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTTHREADID_TEMP_]] to ptr +// CHECK-AMD-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: store ptr [[APTR]], ptr [[APTR_ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @[[GLOB1:[0-9]+]] to ptr), i8 2, i1 false) +// CHECK-AMD-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 +// CHECK-AMD-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] +// CHECK-AMD: user_code.entry: +// CHECK-AMD-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr)) +// CHECK-AMD-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP2]], ptr [[N_CASTED_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP3:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP4:%.*]] = load ptr, ptr [[APTR_ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: store i32 0, ptr [[DOTZERO_ADDR_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP__ASCAST]], align 4 +// CHECK-AMD-NEXT: call void @__omp_outlined__(ptr [[DOTTHREADID_TEMP__ASCAST]], ptr [[DOTZERO_ADDR_ASCAST]], i64 [[TMP3]], ptr [[TMP4]]) #[[ATTR2:[0-9]+]] +// CHECK-AMD-NEXT: call void @__kmpc_target_deinit(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i8 2) +// CHECK-AMD-NEXT: ret void +// CHECK-AMD: worker.exit: +// CHECK-AMD-NEXT: ret void +// +// +// CHECK-AMD-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK-AMD-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[N:%.*]], ptr noundef [[APTR:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-AMD-NEXT: entry: +// CHECK-AMD-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[APTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[I3:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8, addrspace(5) +// CHECK-AMD-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// CHECK-AMD-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// CHECK-AMD-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// CHECK-AMD-NEXT: [[APTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[APTR_ADDR]] to ptr +// CHECK-AMD-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// CHECK-AMD-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// CHECK-AMD-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr +// CHECK-AMD-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr +// CHECK-AMD-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// CHECK-AMD-NEXT: [[DOTOMP_COMB_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_LB]] to ptr +// CHECK-AMD-NEXT: [[DOTOMP_COMB_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_COMB_UB]] to ptr +// CHECK-AMD-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr +// CHECK-AMD-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// CHECK-AMD-NEXT: [[I3_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I3]] to ptr +// CHECK-AMD-NEXT: [[N_CASTED_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_CASTED]] to ptr +// CHECK-AMD-NEXT: [[CAPTURED_VARS_ADDRS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[CAPTURED_VARS_ADDRS]] to ptr +// CHECK-AMD-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: store ptr [[APTR]], ptr [[APTR_ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// CHECK-AMD-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK-AMD-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK-AMD-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK-AMD-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// CHECK-AMD-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK-AMD-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK-AMD: omp.precond.then: +// CHECK-AMD-NEXT: store i32 0, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @__kmpc_get_hardware_num_threads_in_block() +// CHECK-AMD-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK-AMD-NEXT: call void @__kmpc_distribute_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB2:[0-9]+]] to ptr), i32 [[TMP5]], i32 91, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_COMB_LB_ASCAST]], ptr [[DOTOMP_COMB_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK-AMD-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP6]], [[TMP7]] +// CHECK-AMD-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK-AMD: cond.true: +// CHECK-AMD-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 +// CHECK-AMD-NEXT: br label [[COND_END:%.*]] +// CHECK-AMD: cond.false: +// CHECK-AMD-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: br label [[COND_END]] +// CHECK-AMD: cond.end: +// CHECK-AMD-NEXT: [[COND:%.*]] = phi i32 [ [[TMP8]], [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK-AMD-NEXT: store i32 [[COND]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP10]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-AMD: omp.inner.for.cond: +// CHECK-AMD-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK-AMD-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP11]], [[ADD]] +// CHECK-AMD-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-AMD: omp.inner.for.body: +// CHECK-AMD-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP14:%.*]] = zext i32 [[TMP13]] to i64 +// CHECK-AMD-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 +// CHECK-AMD-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP17]], ptr [[N_CASTED_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP18:%.*]] = load i64, ptr [[N_CASTED_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP19:%.*]] = load ptr, ptr [[APTR_ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 0 +// CHECK-AMD-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP14]] to ptr +// CHECK-AMD-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8 +// CHECK-AMD-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 1 +// CHECK-AMD-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP16]] to ptr +// CHECK-AMD-NEXT: store ptr [[TMP23]], ptr [[TMP22]], align 8 +// CHECK-AMD-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 2 +// CHECK-AMD-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP18]] to ptr +// CHECK-AMD-NEXT: store ptr [[TMP25]], ptr [[TMP24]], align 8 +// CHECK-AMD-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 0, i64 3 +// CHECK-AMD-NEXT: store ptr [[TMP19]], ptr [[TMP26]], align 8 +// CHECK-AMD-NEXT: [[TMP27:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 +// CHECK-AMD-NEXT: call void @__kmpc_parallel_51(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), i32 [[TMP28]], i32 1, i32 -1, i32 -1, ptr @__omp_outlined__.1, ptr null, ptr [[CAPTURED_VARS_ADDRS_ASCAST]], i64 4) +// CHECK-AMD-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-AMD: omp.inner.for.inc: +// CHECK-AMD-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK-AMD-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP31:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP32:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK-AMD-NEXT: store i32 [[ADD7]], ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP33:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP34:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK-AMD-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP35:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP36:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] +// CHECK-AMD-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK-AMD: cond.true10: +// CHECK-AMD-NEXT: [[TMP37:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 +// CHECK-AMD-NEXT: br label [[COND_END12:%.*]] +// CHECK-AMD: cond.false11: +// CHECK-AMD-NEXT: [[TMP38:%.*]] = load i32, ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: br label [[COND_END12]] +// CHECK-AMD: cond.end12: +// CHECK-AMD-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] +// CHECK-AMD-NEXT: store i32 [[COND13]], ptr [[DOTOMP_COMB_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTOMP_COMB_LB_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP39]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-AMD: omp.inner.for.end: +// CHECK-AMD-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-AMD: omp.loop.exit: +// CHECK-AMD-NEXT: [[TMP40:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP41:%.*]] = load i32, ptr [[TMP40]], align 4 +// CHECK-AMD-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP41]]) +// CHECK-AMD-NEXT: br label [[OMP_PRECOND_END]] +// CHECK-AMD: omp.precond.end: +// CHECK-AMD-NEXT: ret void +// +// +// CHECK-AMD-LABEL: define {{[^@]+}}@__omp_outlined__.1 +// CHECK-AMD-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[DOTPREVIOUS_LB_:%.*]], i64 noundef [[DOTPREVIOUS_UB_:%.*]], i64 noundef [[N:%.*]], ptr noundef [[APTR:%.*]]) #[[ATTR1]] { +// CHECK-AMD-NEXT: entry: +// CHECK-AMD-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[APTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) +// CHECK-AMD-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[I4:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-AMD-NEXT: [[DOTGLOBAL_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTGLOBAL_TID__ADDR]] to ptr +// CHECK-AMD-NEXT: [[DOTBOUND_TID__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTBOUND_TID__ADDR]] to ptr +// CHECK-AMD-NEXT: [[DOTPREVIOUS_LB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_LB__ADDR]] to ptr +// CHECK-AMD-NEXT: [[DOTPREVIOUS_UB__ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTPREVIOUS_UB__ADDR]] to ptr +// CHECK-AMD-NEXT: [[N_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[N_ADDR]] to ptr +// CHECK-AMD-NEXT: [[APTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[APTR_ADDR]] to ptr +// CHECK-AMD-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr +// CHECK-AMD-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr +// CHECK-AMD-NEXT: [[DOTCAPTURE_EXPR__ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_]] to ptr +// CHECK-AMD-NEXT: [[DOTCAPTURE_EXPR_1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTCAPTURE_EXPR_1]] to ptr +// CHECK-AMD-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr +// CHECK-AMD-NEXT: [[DOTOMP_LB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_LB]] to ptr +// CHECK-AMD-NEXT: [[DOTOMP_UB_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_UB]] to ptr +// CHECK-AMD-NEXT: [[DOTOMP_STRIDE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_STRIDE]] to ptr +// CHECK-AMD-NEXT: [[DOTOMP_IS_LAST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IS_LAST]] to ptr +// CHECK-AMD-NEXT: [[I4_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I4]] to ptr +// CHECK-AMD-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: store i64 [[DOTPREVIOUS_LB_]], ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: store i64 [[DOTPREVIOUS_UB_]], ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: store i64 [[N]], ptr [[N_ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: store ptr [[APTR]], ptr [[APTR_ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP0:%.*]] = load i32, ptr [[N_ADDR_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP0]], ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP1:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// CHECK-AMD-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP1]], 0 +// CHECK-AMD-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK-AMD-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK-AMD-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 0, ptr [[I_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__ASCAST]], align 4 +// CHECK-AMD-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP2]] +// CHECK-AMD-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK-AMD: omp.precond.then: +// CHECK-AMD-NEXT: store i32 0, ptr [[DOTOMP_LB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP3]], ptr [[DOTOMP_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP4:%.*]] = load i64, ptr [[DOTPREVIOUS_LB__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[CONV:%.*]] = trunc i64 [[TMP4]] to i32 +// CHECK-AMD-NEXT: [[TMP5:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK-AMD-NEXT: store i32 [[CONV]], ptr [[DOTOMP_LB_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[CONV3]], ptr [[DOTOMP_UB_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 1, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK-AMD-NEXT: call void @__kmpc_for_static_init_4(ptr addrspacecast (ptr addrspace(1) @[[GLOB3:[0-9]+]] to ptr), i32 [[TMP7]], i32 33, ptr [[DOTOMP_IS_LAST_ASCAST]], ptr [[DOTOMP_LB_ASCAST]], ptr [[DOTOMP_UB_ASCAST]], ptr [[DOTOMP_STRIDE_ASCAST]], i32 1, i32 1) +// CHECK-AMD-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_LB_ASCAST]], align 4 +// CHECK-AMD-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK-AMD: omp.inner.for.cond: +// CHECK-AMD-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[CONV5:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK-AMD-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTPREVIOUS_UB__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[CMP6:%.*]] = icmp ule i64 [[CONV5]], [[TMP10]] +// CHECK-AMD-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK-AMD: omp.inner.for.body: +// CHECK-AMD-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK-AMD-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-AMD-NEXT: store i32 [[ADD]], ptr [[I4_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP12:%.*]] = load i32, ptr [[I4_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP13:%.*]] = load ptr, ptr [[APTR_ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP14:%.*]] = load i32, ptr [[I4_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK-AMD-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i64 [[IDXPROM]] +// CHECK-AMD-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX]], align 4 +// CHECK-AMD-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK-AMD: omp.body.continue: +// CHECK-AMD-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK-AMD: omp.inner.for.inc: +// CHECK-AMD-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_STRIDE_ASCAST]], align 4 +// CHECK-AMD-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK-AMD-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV_ASCAST]], align 4 +// CHECK-AMD-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK-AMD: omp.inner.for.end: +// CHECK-AMD-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK-AMD: omp.loop.exit: +// CHECK-AMD-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR_ASCAST]], align 8 +// CHECK-AMD-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK-AMD-NEXT: call void @__kmpc_distribute_static_fini(ptr addrspacecast (ptr addrspace(1) @[[GLOB2]] to ptr), i32 [[TMP18]]) +// CHECK-AMD-NEXT: br label [[OMP_PRECOND_END]] +// CHECK-AMD: omp.precond.end: +// CHECK-AMD-NEXT: ret void +// diff --git a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp index 2c70e05feafd0..845888dd32d19 100644 --- a/clang/test/OpenMP/parallel_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_firstprivate_codegen.cpp @@ -467,6 +467,8 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 // CHECK1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 // CHECK1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 +// CHECK1-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[T_VAR_CASTED1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: call void @_ZN1SIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK1-NEXT: call void @_ZN3SSTIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[SST]]) // CHECK1-NEXT: store i32 0, ptr [[T_VAR]], align 128 @@ -476,23 +478,29 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 2) // CHECK1-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[VAR]], i32 3) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..3, ptr [[VEC]], ptr [[T_VAR]], ptr [[S_ARR]], ptr [[VAR]]) -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[T_VAR]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..3, ptr [[VEC]], i32 [[TMP1]], ptr [[S_ARR]], ptr [[VAR]]) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK1-NEXT: store i32 [[TMP2]], ptr [[T_VAR_CASTED1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_CASTED1]], align 4 +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, i32 [[TMP3]]) // CHECK1-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done1: +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done2: // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP1]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK1-NEXT: ret i32 [[TMP5]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZN2SSC2ERi @@ -713,67 +721,63 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 -// CHECK1-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 128 -// CHECK1-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 128 +// CHECK1-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 128 +// CHECK1-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S.0], align 128 // CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK1-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK1-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 +// CHECK1-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 // CHECK1-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK1-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 // CHECK1-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 128 -// CHECK1-NEXT: store i32 [[TMP4]], ptr [[T_VAR1]], align 128 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VEC2]], ptr align 128 [[TMP0]], i32 8, i1 false) -// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VEC1]], ptr align 128 [[TMP0]], i32 8, i1 false) +// CHECK1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK1: omp.arraycpy.body: -// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK1-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK1-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr [[AGG_TMP]]) // CHECK1-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK1-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK1-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK1: omp.arraycpy.done4: -// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR5]], ptr nonnull align 4 dereferenceable(4) [[TMP3]], ptr [[AGG_TMP6]]) -// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 128 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 0 -// CHECK1-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX]], align 128 -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[ARRAYIDX7]], ptr align 128 [[VAR5]], i32 4, i1 false) -// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 +// CHECK1-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] +// CHECK1-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK1: omp.arraycpy.done3: +// CHECK1-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) +// CHECK1-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR4]], ptr nonnull align 4 dereferenceable(4) [[TMP2]], ptr [[AGG_TMP5]]) +// CHECK1-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 0 +// CHECK1-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 128 +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[ARRAYIDX6]], ptr align 128 [[VAR4]], i32 4, i1 false) +// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR4]] +// CHECK1-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK1: arraydestroy.body: -// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[OMP_ARRAYCPY_DONE4]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[OMP_ARRAYCPY_DONE3]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] -// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK1: arraydestroy.done9: +// CHECK1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK1: arraydestroy.done8: // CHECK1-NEXT: ret void // // @@ -803,18 +807,14 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[T_VAR:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK1-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 +// CHECK1-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK1-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[T_VAR1]], align 128 +// CHECK1-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK1-NEXT: ret void // // @@ -1123,27 +1123,23 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[G:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR3]] { +// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[G:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR3]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G1:%.*]] = alloca i32, align 128 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load volatile i32, ptr [[TMP0]], align 128 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[G1]], align 128 -// CHECK3-NEXT: store i32 1, ptr [[G1]], align 128 +// CHECK3-NEXT: store i32 1, ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G1]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP1]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) // CHECK3-NEXT: ret void // @@ -1193,33 +1189,33 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[G_CASTED:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 4 // CHECK4-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 -// CHECK4-NEXT: store i32 [[TMP0]], ptr [[SIVAR_CASTED]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr @g, i32 [[TMP1]]) +// CHECK4-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 128 +// CHECK4-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_CASTED]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 +// CHECK4-NEXT: store i32 [[TMP2]], ptr [[SIVAR_CASTED]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., i32 [[TMP1]], i32 [[TMP3]]) // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK4-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[G:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK4-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[G:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK4-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK4-NEXT: [[G_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[G1:%.*]] = alloca i32, align 128 // CHECK4-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, i32, [104 x i8], i32 }>, align 128 // CHECK4-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[G]], ptr [[G_ADDR]], align 4 // CHECK4-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load volatile i32, ptr [[TMP0]], align 128 -// CHECK4-NEXT: store i32 [[TMP1]], ptr [[G1]], align 128 -// CHECK4-NEXT: store i32 1, ptr [[G1]], align 128 +// CHECK4-NEXT: store i32 1, ptr [[G_ADDR]], align 4 // CHECK4-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 // CHECK4-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [104 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK4-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 128 @@ -1232,14 +1228,14 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK4-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [104 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK4-NEXT: store ptr @__block_descriptor_tmp, ptr [[BLOCK_DESCRIPTOR]], align 16 // CHECK4-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [104 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 7 -// CHECK4-NEXT: [[TMP2:%.*]] = load volatile i32, ptr [[G1]], align 128 -// CHECK4-NEXT: store volatile i32 [[TMP2]], ptr [[BLOCK_CAPTURED]], align 128 -// CHECK4-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [104 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP3]], ptr [[BLOCK_CAPTURED2]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 -// CHECK4-NEXT: call void [[TMP5]](ptr [[BLOCK]]) +// CHECK4-NEXT: [[TMP0:%.*]] = load volatile i32, ptr [[G_ADDR]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP0]], ptr [[BLOCK_CAPTURED]], align 128 +// CHECK4-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [104 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 5 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], ptr [[BLOCK_CAPTURED1]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK4-NEXT: call void [[TMP3]](ptr [[BLOCK]]) // CHECK4-NEXT: ret void // // @@ -1675,6 +1671,8 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 // CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 // CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 +// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR_CASTED1:%.*]] = alloca i64, align 8 // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: call void @_ZN3SSTIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[SST]]) // CHECK9-NEXT: store i32 0, ptr [[T_VAR]], align 128 @@ -1684,23 +1682,29 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 2) // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[VAR]], i32 3) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..3, ptr [[VEC]], ptr [[T_VAR]], ptr [[S_ARR]], ptr [[VAR]]) -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, ptr [[T_VAR]]) +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK9-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..3, ptr [[VEC]], i64 [[TMP1]], ptr [[S_ARR]], ptr [[VAR]]) +// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK9-NEXT: store i32 [[TMP2]], ptr [[T_VAR_CASTED1]], align 4 +// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[T_VAR_CASTED1]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..4, i64 [[TMP3]]) // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done1: +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done2: // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP1]] +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP5]] // // // CHECK9-LABEL: define {{[^@]+}}@_ZN2SSC2ERi @@ -1921,67 +1925,63 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 -// CHECK9-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 128 -// CHECK9-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 128 +// CHECK9-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 128 +// CHECK9-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S.0], align 128 // CHECK9-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK9-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK9-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 +// CHECK9-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 128 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[T_VAR1]], align 128 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC2]], ptr align 128 [[TMP0]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC1]], ptr align 128 [[TMP0]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done4: -// CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR5]], ptr nonnull align 4 dereferenceable(4) [[TMP3]], ptr [[AGG_TMP6]]) -// CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 128 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 0 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX]], align 128 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i64 0, i64 0 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[ARRAYIDX7]], ptr align 128 [[VAR5]], i64 4, i1 false) -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done3: +// CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) +// CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR4]], ptr nonnull align 4 dereferenceable(4) [[TMP2]], ptr [[AGG_TMP5]]) +// CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i64 0, i64 0 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 128 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i64 0, i64 0 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[ARRAYIDX6]], ptr align 128 [[VAR4]], i64 4, i1 false) +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[OMP_ARRAYCPY_DONE4]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[OMP_ARRAYCPY_DONE3]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done9: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done8: // CHECK9-NEXT: ret void // // @@ -2011,18 +2011,14 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[T_VAR:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 +// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[T_VAR1]], align 128 +// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -2331,27 +2327,23 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[G:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[G:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK11-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK11-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 // CHECK11-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK11-NEXT: [[G1:%.*]] = alloca i32, align 128 // CHECK11-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_1:%.*]], align 8 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK11-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 +// CHECK11-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK11-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK11-NEXT: [[TMP1:%.*]] = load volatile i32, ptr [[TMP0]], align 128 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[G1]], align 128 -// CHECK11-NEXT: store i32 1, ptr [[G1]], align 128 +// CHECK11-NEXT: store i32 1, ptr [[G_ADDR]], align 4 // CHECK11-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[G1]], ptr [[TMP2]], align 8 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK11-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP1]], align 8 // CHECK11-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK11-NEXT: ret void // @@ -2401,33 +2393,33 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca ptr, align 8 // CHECK12-NEXT: [[BLOCK_ADDR:%.*]] = alloca ptr, align 8 +// CHECK12-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 // CHECK12-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 // CHECK12-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 // CHECK12-NEXT: store ptr [[DOTBLOCK_DESCRIPTOR]], ptr [[BLOCK_ADDR]], align 8 -// CHECK12-NEXT: [[TMP0:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 -// CHECK12-NEXT: store i32 [[TMP0]], ptr [[SIVAR_CASTED]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr @g, i64 [[TMP1]]) +// CHECK12-NEXT: [[TMP0:%.*]] = load volatile i32, ptr @g, align 128 +// CHECK12-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr @_ZZ4mainE5sivar, align 4 +// CHECK12-NEXT: store i32 [[TMP2]], ptr [[SIVAR_CASTED]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 +// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP3]]) // CHECK12-NEXT: ret void // // // CHECK12-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK12-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[G:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK12-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[G:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK12-NEXT: entry: // CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK12-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK12-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 // CHECK12-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK12-NEXT: [[G1:%.*]] = alloca i32, align 128 // CHECK12-NEXT: [[BLOCK:%.*]] = alloca <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, align 128 // CHECK12-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK12-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK12-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 +// CHECK12-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK12-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK12-NEXT: [[TMP1:%.*]] = load volatile i32, ptr [[TMP0]], align 128 -// CHECK12-NEXT: store i32 [[TMP1]], ptr [[G1]], align 128 -// CHECK12-NEXT: store i32 1, ptr [[G1]], align 128 +// CHECK12-NEXT: store i32 1, ptr [[G_ADDR]], align 4 // CHECK12-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 // CHECK12-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 0 // CHECK12-NEXT: store ptr @_NSConcreteStackBlock, ptr [[BLOCK_ISA]], align 128 @@ -2440,14 +2432,14 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK12-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 4 // CHECK12-NEXT: store ptr @__block_descriptor_tmp, ptr [[BLOCK_DESCRIPTOR]], align 8 // CHECK12-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 7 -// CHECK12-NEXT: [[TMP2:%.*]] = load volatile i32, ptr [[G1]], align 128 -// CHECK12-NEXT: store volatile i32 [[TMP2]], ptr [[BLOCK_CAPTURED]], align 128 -// CHECK12-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 5 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP3]], ptr [[BLOCK_CAPTURED2]], align 32 -// CHECK12-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 -// CHECK12-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK12-NEXT: call void [[TMP5]](ptr [[BLOCK]]) +// CHECK12-NEXT: [[TMP0:%.*]] = load volatile i32, ptr [[G_ADDR]], align 4 +// CHECK12-NEXT: store volatile i32 [[TMP0]], ptr [[BLOCK_CAPTURED]], align 128 +// CHECK12-NEXT: [[BLOCK_CAPTURED1:%.*]] = getelementptr inbounds <{ ptr, i32, i32, ptr, ptr, i32, [92 x i8], i32 }>, ptr [[BLOCK]], i32 0, i32 5 +// CHECK12-NEXT: [[TMP1:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP1]], ptr [[BLOCK_CAPTURED1]], align 32 +// CHECK12-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], ptr [[BLOCK]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK12-NEXT: call void [[TMP3]](ptr [[BLOCK]]) // CHECK12-NEXT: ret void // // diff --git a/clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp index a120b8bfb1904..d316ee8b3e411 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_firstprivate_codegen.cpp @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s @@ -7,11 +8,10 @@ // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s -// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY3 %s +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY4 %s // expected-no-diagnostics #ifndef ARRAY @@ -30,15 +30,6 @@ struct S { volatile double g; -// CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, %union{{.+}}, %union{{.+}}, i64, i64, i64, i32, i8* } -// CHECK-DAG: [[S_DOUBLE_TY:%.+]] = type { double } -// CHECK-DAG: [[PRIVATES_MAIN_TY:%.+]] = type {{.?}}{ [2 x [[S_DOUBLE_TY]]], [[S_DOUBLE_TY]], i32, [2 x i32] -// CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type { [2 x [[S_DOUBLE_TY]]]*, [[S_DOUBLE_TY]]* } -// CHECK-DAG: [[KMP_TASK_MAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [[PRIVATES_MAIN_TY]] } -// CHECK-DAG: [[S_INT_TY:%.+]] = type { i32 } -// CHECK-DAG: [[CAP_TMAIN_TY:%.+]] = type { [2 x [[S_INT_TY]]]*, [[S_INT_TY]]* } -// CHECK-DAG: [[PRIVATES_TMAIN_TY:%.+]] = type { i32, [2 x i32], [2 x [[S_INT_TY]]], [[S_INT_TY]], [104 x i8] } -// CHECK-DAG: [[KMP_TASK_TMAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [{{[0-9]+}} x i8], [[PRIVATES_TMAIN_TY]] } template T tmain() { S ttt; @@ -58,48 +49,14 @@ T tmain() { int main() { static int sivar; #ifdef LAMBDA - // LAMBDA: [[G:@.+]] ={{.*}} global double - // LAMBDA: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0, - // LAMBDA-LABEL: @main - // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]]( [&]() { - // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( -// LAMBDA: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master( -// LAMBDA-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0 -// LAMBDA-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] -// LAMBDA: [[THEN]] -// LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) -// LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 -// LAMBDA: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 -// LAMBDA: [[G_VAL:%.+]] = load volatile double, double* @{{.+}}, -// LAMBDA: store volatile double [[G_VAL]], double* [[G_PRIVATE_ADDR]] - -// LAMBDA: [[SIVAR_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 -// LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* @{{.+}}, -// LAMBDA: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]] - -// LAMBDA: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null) -// LAMBDA: call {{.*}}void @__kmpc_end_master( -// LAMBDA-NEXT: br label {{%?}}[[EXIT]] -// LAMBDA: [[EXIT]] -// LAMBDA: ret + + #pragma omp parallel master taskloop firstprivate(g, sivar) for (int i = 0; i < 10; ++i) { - // LAMBDA: define {{.+}} void [[INNER_LAMBDA:@.+]](%{{.+}}* {{[^,]*}} [[ARG_PTR:%.+]]) - // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], - // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] - // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]] - // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]] - - // LAMBDA: store double* %{{.+}}, double** %{{.+}}, - // LAMBDA: define internal noundef i32 [[TASK_ENTRY]](i32 noundef %0, %{{.+}}* noalias noundef %1) + g = 1; sivar = 11; - // LAMBDA: store double 1.0{{.+}}, double* %{{.+}}, - // LAMBDA: store i{{[0-9]+}} 11, i{{[0-9]+}}* %{{.+}}, - // LAMBDA: call void [[INNER_LAMBDA]](% - // LAMBDA: ret [&]() { g = 2; sivar = 22; @@ -108,51 +65,13 @@ int main() { }(); return 0; #elif defined(BLOCKS) - // BLOCKS: [[G:@.+]] ={{.*}} global double - // BLOCKS: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0, - // BLOCKS-LABEL: @main - // BLOCKS: call void {{%.+}}(i8 ^{ - // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8* - // BLOCKS: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master( - // BLOCKS-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0 - // BLOCKS-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] - // BLOCKS: [[THEN]] - // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) - // BLOCKS: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 - // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 - // BLOCKS: [[G_VAL:%.+]] = load volatile double, double* @{{.+}}, - // BLOCKS: store volatile double [[G_VAL]], double* [[G_PRIVATE_ADDR]] - - // BLOCKS: [[SIVAR_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 - // BLOCKS: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* @{{.+}}, - // BLOCKS: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]] - // BLOCKS: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null) - // BLOCKS: call {{.*}}void @__kmpc_end_master( - // BLOCKS-NEXT: br label {{%?}}[[EXIT]] - // BLOCKS: [[EXIT]] - // BLOCKS: ret + #pragma omp parallel master taskloop firstprivate(g, sivar) for (int i = 0; i < 10; ++i) { - // BLOCKS: define {{.+}} void {{@.+}}(i8* - // BLOCKS-NOT: [[G]]{{[[^:word:]]}} - // BLOCKS: store double 2.0{{.+}}, double* - // BLOCKS-NOT: [[G]]{{[[^:word:]]}} - // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}} - // BLOCKS: store i{{[0-9]+}} 22, i{{[0-9]+}}* - // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}} - // BLOCKS: ret - - // BLOCKS: store double* %{{.+}}, double** %{{.+}}, - // BLOCKS: store i{{[0-9]+}}* %{{.+}}, i{{[0-9]+}}** %{{.+}}, - // BLOCKS: define internal noundef i32 [[TASK_ENTRY]](i32 noundef %0, %{{.+}}* noalias noundef %1) + g = 1; sivar = 11; - // BLOCKS: store double 1.0{{.+}}, double* %{{.+}}, - // BLOCKS-NOT: [[G]]{{[[^:word:]]}} - // BLOCKS: store i{{[0-9]+}} 11, i{{[0-9]+}}* %{{.+}}, - // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}} - // BLOCKS: call void {{%.+}}(i8 ^{ g = 2; sivar = 22; @@ -177,303 +96,86 @@ int main() { #endif } -// CHECK: [[SIVAR:.+]] = internal global i{{[0-9]+}} 0, -// CHECK: define{{.*}} i{{[0-9]+}} @main() -// CHECK: alloca [[S_DOUBLE_TY]], -// CHECK: [[TEST:%.+]] = alloca [[S_DOUBLE_TY]], -// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32, -// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32], -// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]], -// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]], - -// CHECK: call {{.*}} [[S_DOUBLE_TY_COPY_CONSTR:@.+]]([[S_DOUBLE_TY]]* {{[^,]*}} [[TEST]], - -// CHECK: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master( -// CHECK-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0 -// CHECK-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] -// CHECK: [[THEN]] + + // Store original variables in capture struct. -// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: store [2 x [[S_DOUBLE_TY]]]* %{{.+}}, [2 x [[S_DOUBLE_TY]]]** [[S_ARR_REF]], -// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: store [[S_DOUBLE_TY]]* %{{.+}}, [[S_DOUBLE_TY]]** [[VAR_REF]], // Allocate task. // Returns struct kmp_task_t { // [[KMP_TASK_T]] task_data; // [[KMP_TASK_MAIN_TY]] privates; // }; -// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 9, i64 120, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) -// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_MAIN_TY]]* // Fill kmp_task_t->shareds by copying from original capture argument. -// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]], -// CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_MAIN_TY]]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 16, i1 false) // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). // Also copy address of private copy to the corresponding shareds reference. -// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 // Constructors for s_arr and var. // s_arr; -// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: bitcast [2 x [[S_DOUBLE_TY]]]* %{{.+}} to [[S_DOUBLE_TY]]* -// CHECK: call void [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* {{[^,]*}} [[S_ARR_CUR:%[^,]+]], -// CHECK: getelementptr [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* [[S_ARR_CUR]], i{{.+}} 1 -// CHECK: getelementptr [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} 1 -// CHECK: icmp eq -// CHECK: br i1 // var; -// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 -// CHECK-NEXT: call void [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* {{[^,]*}} [[PRIVATE_VAR_REF]], [[S_DOUBLE_TY]]* {{.*}}, // t_var; -// CHECK: [[PRIVATE_T_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 -// CHECK-NEXT: [[T_VAR:%.+]] = load i32, i32* %{{.+}}, -// CHECK-NEXT: store i32 [[T_VAR]], i32* [[PRIVATE_T_VAR_REF]], // vec; -// CHECK: [[PRIVATE_VEC_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 -// CHECK-NEXT: bitcast [2 x i32]* [[PRIVATE_VEC_REF]] to i8* -// CHECK-NEXT: bitcast [2 x i32]* %{{.+}} to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64( // sivar; -// CHECK: [[PRIVATE_SIVAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 4 -// CHECK-NEXT: [[SIVAR:%.+]] = load i32, i32* @{{.+}}, -// CHECK-NEXT: store i32 [[SIVAR]], i32* [[PRIVATE_SIVAR_REF]], // Provide pointer to destructor function, which will destroy private variables at the end of the task. -// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3 -// CHECK: [[DESTRUCTORS_PTR:%.+]] = bitcast %union{{.+}}* [[DESTRUCTORS_REF]] to i32 (i32, i8*)** -// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_PTR]], // Start task. -// CHECK: call void @__kmpc_taskloop(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*)) -// CHECK: call {{.*}}void @__kmpc_end_master( -// CHECK-NEXT: br label {{%?}}[[EXIT]] -// CHECK: [[EXIT]] - -// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_MAIN_TY]]* noalias noundef %0, [[S_DOUBLE_TY]]** noalias noundef %1, i32** noalias noundef %2, [2 x [[S_DOUBLE_TY]]]** noalias noundef %3, [2 x i32]** noalias noundef %4, i32** noalias noundef %5) -// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_MAIN_TY]]*, [[PRIVATES_MAIN_TY]]** -// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 0 -// CHECK: [[ARG3:%.+]] = load [2 x [[S_DOUBLE_TY]]]**, [2 x [[S_DOUBLE_TY]]]*** %{{.+}}, -// CHECK: store [2 x [[S_DOUBLE_TY]]]* [[PRIV_S_VAR]], [2 x [[S_DOUBLE_TY]]]** [[ARG3]], -// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 1 -// CHECK: [[ARG1:%.+]] = load [[S_DOUBLE_TY]]**, [[S_DOUBLE_TY]]*** {{.+}}, -// CHECK: store [[S_DOUBLE_TY]]* [[PRIV_VAR]], [[S_DOUBLE_TY]]** [[ARG1]], -// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 2 -// CHECK: [[ARG2:%.+]] = load i32**, i32*** %{{.+}}, -// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG2]], -// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 3 -// CHECK: [[ARG4:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}}, -// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG4]], -// CHECK: [[PRIV_SIVAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 4 -// CHECK: [[ARG5:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** %{{.+}}, -// CHECK: store i{{[0-9]+}}* [[PRIV_SIVAR]], i{{[0-9]+}}** [[ARG5]], -// CHECK: ret void - -// CHECK: define internal noundef i32 [[TASK_ENTRY]](i32 noundef %0, [[KMP_TASK_MAIN_TY]]* noalias noundef %1) - -// CHECK: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]]*, -// CHECK: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, -// CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, -// CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, -// CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], -// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], - -// CHECK: [[FN:%.+]] = bitcast void (i8*, ...)* [[MAP_FN]] to void (i8*, -// CHECK: call void [[FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) - -// CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], -// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], -// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_DOUBLE_TY]]]*, [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], -// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]], -// CHECK: [[PRIV_SIVAR:%.+]] = load i32*, i32** [[PRIV_SIVAR_ADDR]], + + + + + // Privates actually are used. -// CHECK-DAG: [[PRIV_VAR]] -// CHECK-DAG: [[PRIV_T_VAR]] -// CHECK-DAG: [[PRIV_S_ARR]] -// CHECK-DAG: [[PRIV_VEC]] -// CHECK-DAG: [[PRIV_SIVAR]] - -// CHECK: ret - -// CHECK: define internal void [[MAIN_DUP]]([[KMP_TASK_MAIN_TY]]* noundef %0, [[KMP_TASK_MAIN_TY]]* noundef %1, i32 noundef %2) -// CHECK: getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* %{{.+}}, i32 0, i32 1 -// CHECK: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* %{{.+}}, i32 0, i32 0 -// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* %{{.+}}, i32 0, i32 0 -// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i64 2 -// CHECK: br i1 % - -// CHECK: phi [[S_DOUBLE_TY]]* -// CHECK: call {{.*}} [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* -// CHECK: getelementptr [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i32 1 -// CHECK: icmp eq [[S_DOUBLE_TY]]* % -// CHECK: br i1 % - -// CHECK: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* %{{.+}}, i32 0, i32 1 -// CHECK: call {{.*}} [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* -// CHECK: ret void - -// CHECK: define internal noundef i32 [[DESTRUCTORS]](i32 noundef %{{.+}}, [[KMP_TASK_MAIN_TY]]* noalias noundef %{{.+}}) -// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 -// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 -// CHECK: call void @_ZN1SIdED1Ev([[S_DOUBLE_TY]]* {{[^,]*}} [[PRIVATE_VAR_REF]]) -// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 -// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} 2 -// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} -1 -// CHECK: call void @_ZN1SIdED1Ev([[S_DOUBLE_TY]]* {{[^,]*}} [[PRIVATE_S_ARR_ELEM_REF]]) -// CHECK: icmp eq -// CHECK: br i1 -// CHECK: ret i32 - -// CHECK: alloca [[S_INT_TY]], -// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], -// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32, align 128 -// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32], -// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]], -// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]], - -// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR:@.+]]([[S_INT_TY]]* {{[^,]*}} [[TEST]], + + + + + + + // Store original variables in capture struct. -// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: store [2 x [[S_INT_TY]]]* %{{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_REF]], -// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: store [[S_INT_TY]]* %{{.+}}, [[S_INT_TY]]** [[VAR_REF]], // Allocate task. // Returns struct kmp_task_t { // [[KMP_TASK_T_TY]] task_data; // [[KMP_TASK_TMAIN_TY]] privates; // }; -// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 9, i64 256, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) -// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_TMAIN_TY]]* // Fill kmp_task_t->shareds by copying from original capture argument. -// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]], -// CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_TMAIN_TY]]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 16, i1 false) // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). -// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 // t_var; -// CHECK: [[PRIVATE_T_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 -// CHECK: [[T_VAR:%.+]] = load i32, i32* %{{.+}}, align 128 -// CHECK: store i32 [[T_VAR]], i32* [[PRIVATE_T_VAR_REF]], align 128 // vec; -// CHECK: [[PRIVATE_VEC_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 -// CHECK-NEXT: bitcast [2 x i32]* [[PRIVATE_VEC_REF]] to i8* -// CHECK-NEXT: bitcast [2 x i32]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64( // Constructors for s_arr and var. // a_arr; -// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 -// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 -// CHECK: bitcast [2 x [[S_INT_TY]]]* %{{.+}} to [[S_INT_TY]]* -// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2 -// CHECK: call void [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* {{[^,]*}} [[S_ARR_CUR:%[^,]+]], -// CHECK: getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_CUR]], i{{.+}} 1 -// CHECK: icmp eq -// CHECK: br i1 // var; -// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 -// CHECK-NEXT: call void [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* {{[^,]*}} [[PRIVATE_VAR_REF]], // Provide pointer to destructor function, which will destroy private variables at the end of the task. -// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3 -// CHECK: [[DESTRUCTORS_PTR:%.+]] = bitcast %union{{.+}}* [[DESTRUCTORS_REF]] to i32 (i32, i8*)** -// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_PTR]], // Start task. -// CHECK: call void @__kmpc_taskloop(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_TMAIN_TY]]*, [[KMP_TASK_TMAIN_TY]]*, i32)* [[TMAIN_DUP:@.+]] to i8*)) - -// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_TMAIN_TY]]* noalias noundef %{{.+}}, i32** noalias noundef %{{.+}}, [2 x i32]** noalias noundef %{{.+}}, [2 x [[S_INT_TY]]]** noalias noundef %{{.+}}, [[S_INT_TY]]** noalias noundef %{{.+}}) -// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_TMAIN_TY]]*, [[PRIVATES_TMAIN_TY]]** -// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 0 -// CHECK: [[ARG1:%.+]] = load i32**, i32*** %{{.+}}, -// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG1]], -// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 1 -// CHECK: [[ARG2:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}}, -// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG2]], -// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 2 -// CHECK: [[ARG3:%.+]] = load [2 x [[S_INT_TY]]]**, [2 x [[S_INT_TY]]]*** %{{.+}}, -// CHECK: store [2 x [[S_INT_TY]]]* [[PRIV_S_VAR]], [2 x [[S_INT_TY]]]** [[ARG3]], -// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 3 -// CHECK: [[ARG4:%.+]] = load [[S_INT_TY]]**, [[S_INT_TY]]*** {{.+}}, -// CHECK: store [[S_INT_TY]]* [[PRIV_VAR]], [[S_INT_TY]]** [[ARG4]], -// CHECK: ret void - -// CHECK: define internal noundef i32 [[TASK_ENTRY]](i32 noundef %0, [[KMP_TASK_TMAIN_TY]]* noalias noundef %1) -// CHECK: alloca i32*, -// CHECK-DAG: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, -// CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, -// CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, -// CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], -// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], -// CHECK: [[FN:%.+]] = bitcast void (i8*, ...)* [[MAP_FN]] to void (i8*, -// CHECK: call void [[FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) -// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], -// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]], -// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], -// CHECK: [[PRIV_VAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[PRIV_VAR_ADDR]], + + // Privates actually are used. -// CHECK-DAG: [[PRIV_VAR]] -// CHECK-DAG: [[PRIV_T_VAR]] -// CHECK-DAG: [[PRIV_S_ARR]] -// CHECK-DAG: [[PRIV_VEC]] - -// CHECK: ret - -// CHECK: define internal void [[TMAIN_DUP]]([[KMP_TASK_TMAIN_TY]]* noundef %0, [[KMP_TASK_TMAIN_TY]]* noundef %1, i32 noundef %2) -// CHECK: getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* %{{.+}}, i32 0, i32 2 -// CHECK: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* %{{.+}}, i32 0, i32 2 -// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* %{{.+}}, i32 0, i32 0 -// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i64 2 -// CHECK: br i1 % - -// CHECK: phi [[S_INT_TY]]* -// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* -// CHECK: getelementptr [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i32 1 -// CHECK: icmp eq [[S_INT_TY]]* % -// CHECK: br i1 % - -// CHECK: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* %{{.+}}, i32 0, i32 3 -// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* -// CHECK: ret void - -// CHECK: define internal noundef i32 [[DESTRUCTORS]](i32 noundef %0, [[KMP_TASK_TMAIN_TY]]* noalias noundef %1) -// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 -// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 -// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 -// CHECK: call void @_ZN1SIiED1Ev([[S_INT_TY]]* {{[^,]*}} [[PRIVATE_VAR_REF]]) -// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 -// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2 -// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} -1 -// CHECK: call void @_ZN1SIiED1Ev([[S_INT_TY]]* {{[^,]*}} [[PRIVATE_S_ARR_ELEM_REF]]) -// CHECK: icmp eq -// CHECK: br i1 -// CHECK: ret i32 + + + + + #endif #else -// ARRAY-LABEL: array_func struct St { int a, b; St() : a(0), b(0) {} @@ -482,13 +184,2207 @@ struct St { }; void array_func(int n, float a[n], St s[2]) { -// ARRAY: call i8* @__kmpc_omp_task_alloc( -// ARRAY: call void @__kmpc_taskloop( -// ARRAY: store float** %{{.+}}, float*** %{{.+}}, -// ARRAY: store %struct.St** %{{.+}}, %struct.St*** %{{.+}}, #pragma omp parallel master taskloop firstprivate(a, s) for (int i = 0; i < 10; ++i) ; } #endif +// CHECK-LABEL: define {{[^@]+}}@main +// CHECK-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S]], align 8 +// CHECK-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 16 +// CHECK-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 8 +// CHECK-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// CHECK-NEXT: call void @_ZN1SIdEC1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) +// CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]], double noundef 0.000000e+00) +// CHECK-NEXT: store i32 0, i32* [[T_VAR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const.main.vec to i8*), i64 8, i1 false) +// CHECK-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 +// CHECK-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_BEGIN]], double noundef 1.000000e+00) +// CHECK-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_ELEMENT]], double noundef 2.000000e+00) +// CHECK-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]], double noundef 3.000000e+00) +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* +// CHECK-NEXT: store i32 [[TMP1]], i32* [[CONV]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[VEC]], i64 [[TMP2]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[VAR]]) +// CHECK-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() +// CHECK-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 +// CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]]) #[[ATTR4:[0-9]+]] +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK: arraydestroy.body: +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP3]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK: arraydestroy.done1: +// CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]]) #[[ATTR4]] +// CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP4]] +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ev +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIdEC2Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC1ERKS0_d +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[S:%.*]], double noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[S_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[T_ADDR:%.*]] = alloca double, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store %struct.S* [[S]], %struct.S** [[S_ADDR]], align 8 +// CHECK-NEXT: store double [[T]], double* [[T_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[S_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[T_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIdEC2ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP0]], double noundef [[TMP1]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ed +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], double noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store double [[A]], double* [[A_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[A_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIdEC2Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]], double noundef [[TMP0]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_outlined. +// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 8 dereferenceable(16) [[S_ARR:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 +// CHECK-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 +// CHECK-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 +// CHECK-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 +// CHECK-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 +// CHECK-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 +// CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* +// CHECK-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK: omp_if.then: +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP7]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP8]], align 8 +// CHECK-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: [[TMP9:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 9, i64 120, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to %struct.kmp_task_t_with_privates* +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP10]], i32 0, i32 0 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 0 +// CHECK-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP13]], i8* align 8 [[TMP14]], i64 16, i1 false) +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP10]], i32 0, i32 1 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 0 +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP16]], i32 0, i32 0 +// CHECK-NEXT: [[TMP17:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP18]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK: omp.arraycpy.body: +// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP17]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[OMP_ARRAYCPY_SRCELEMENTPAST]], double noundef 0.000000e+00) +// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK: omp.arraycpy.done1: +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 1 +// CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP19]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP2]], double noundef 0.000000e+00) +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 2 +// CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK-NEXT: store i32 [[TMP21]], i32* [[TMP20]], align 8 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 3 +// CHECK-NEXT: [[TMP23:%.*]] = bitcast [2 x i32]* [[TMP22]] to i8* +// CHECK-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP23]], i8* align 4 [[TMP24]], i64 8, i1 false) +// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 4 +// CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 +// CHECK-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4 +// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 3 +// CHECK-NEXT: [[TMP28:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP27]] to i32 (i32, i8*)** +// CHECK-NEXT: store i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_destructor. to i32 (i32, i8*)*), i32 (i32, i8*)** [[TMP28]], align 8 +// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 5 +// CHECK-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 6 +// CHECK-NEXT: store i64 9, i64* [[TMP30]], align 8 +// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 7 +// CHECK-NEXT: store i64 1, i64* [[TMP31]], align 8 +// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 9 +// CHECK-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i8* +// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP33]], i8 0, i64 8, i1 false) +// CHECK-NEXT: [[TMP34:%.*]] = load i64, i64* [[TMP31]], align 8 +// CHECK-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP9]], i32 1, i64* [[TMP29]], i64* [[TMP30]], i64 [[TMP34]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates*, i32)* @.omp_task_dup. to i8*)) +// CHECK-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: br label [[OMP_IF_END]] +// CHECK: omp_if.end: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_privates_map. +// CHECK-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], %struct.S** noalias noundef [[TMP1:%.*]], i32** noalias noundef [[TMP2:%.*]], [2 x %struct.S]** noalias noundef [[TMP3:%.*]], [2 x i32]** noalias noundef [[TMP4:%.*]], i32** noalias noundef [[TMP5:%.*]]) #[[ATTR6:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.S**, align 8 +// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i32**, align 8 +// CHECK-NEXT: [[DOTADDR3:%.*]] = alloca [2 x %struct.S]**, align 8 +// CHECK-NEXT: [[DOTADDR4:%.*]] = alloca [2 x i32]**, align 8 +// CHECK-NEXT: [[DOTADDR5:%.*]] = alloca i32**, align 8 +// CHECK-NEXT: store %struct..kmp_privates.t* [[TMP0]], %struct..kmp_privates.t** [[DOTADDR]], align 8 +// CHECK-NEXT: store %struct.S** [[TMP1]], %struct.S*** [[DOTADDR1]], align 8 +// CHECK-NEXT: store i32** [[TMP2]], i32*** [[DOTADDR2]], align 8 +// CHECK-NEXT: store [2 x %struct.S]** [[TMP3]], [2 x %struct.S]*** [[DOTADDR3]], align 8 +// CHECK-NEXT: store [2 x i32]** [[TMP4]], [2 x i32]*** [[DOTADDR4]], align 8 +// CHECK-NEXT: store i32** [[TMP5]], i32*** [[DOTADDR5]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load %struct..kmp_privates.t*, %struct..kmp_privates.t** [[DOTADDR]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 0 +// CHECK-NEXT: [[TMP8:%.*]] = load [2 x %struct.S]**, [2 x %struct.S]*** [[DOTADDR3]], align 8 +// CHECK-NEXT: store [2 x %struct.S]* [[TMP7]], [2 x %struct.S]** [[TMP8]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 1 +// CHECK-NEXT: [[TMP10:%.*]] = load %struct.S**, %struct.S*** [[DOTADDR1]], align 8 +// CHECK-NEXT: store %struct.S* [[TMP9]], %struct.S** [[TMP10]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 2 +// CHECK-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[DOTADDR2]], align 8 +// CHECK-NEXT: store i32* [[TMP11]], i32** [[TMP12]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 3 +// CHECK-NEXT: [[TMP14:%.*]] = load [2 x i32]**, [2 x i32]*** [[DOTADDR4]], align 8 +// CHECK-NEXT: store [2 x i32]* [[TMP13]], [2 x i32]** [[TMP14]], align 8 +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 4 +// CHECK-NEXT: [[TMP16:%.*]] = load i32**, i32*** [[DOTADDR5]], align 8 +// CHECK-NEXT: store i32* [[TMP15]], i32** [[TMP16]], align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_entry. +// CHECK-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// CHECK-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[DOTLB__ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTUB__ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [2 x %struct.S]*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca [2 x i32]*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR4_I:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[I_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// CHECK-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 +// CHECK-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* +// CHECK-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 +// CHECK-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 +// CHECK-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 8 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 7 +// CHECK-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP16]], align 8 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 8 +// CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 +// CHECK-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, %struct.S**, i32**, [2 x %struct.S]**, [2 x i32]**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, %struct.S**, i32**, [2 x %struct.S]**, [2 x i32]**, i32**)* +// CHECK-NEXT: call void [[TMP25]](i8* [[TMP24]], %struct.S** [[DOTFIRSTPRIV_PTR_ADDR_I]], i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [2 x %struct.S]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [2 x i32]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], i32** [[DOTFIRSTPRIV_PTR_ADDR4_I]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP26:%.*]] = load %struct.S*, %struct.S** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP28:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP29:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR4_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP31]] to i32 +// CHECK-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] +// CHECK: omp.inner.for.cond.i: +// CHECK-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK-NEXT: [[CONV5_I:%.*]] = sext i32 [[TMP32]] to i64 +// CHECK-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV5_I]], [[TMP33]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] +// CHECK: omp.inner.for.body.i: +// CHECK-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK-NEXT: store i32 [[TMP34]], i32* [[I_I]], align 4, !noalias !14 +// CHECK-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP29]], i64 0, i64 0 +// CHECK-NEXT: store i32 [[TMP35]], i32* [[ARRAYIDX_I]], align 4 +// CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP28]], i64 0, i64 0 +// CHECK-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[ARRAYIDX6_I]] to i8* +// CHECK-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[TMP26]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP36]], i8* align 8 [[TMP37]], i64 8, i1 false) +// CHECK-NEXT: store i32 33, i32* [[TMP30]], align 4 +// CHECK-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK-NEXT: [[ADD7_I:%.*]] = add nsw i32 [[TMP38]], 1 +// CHECK-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND_I]] +// CHECK: .omp_outlined..1.exit: +// CHECK-NEXT: ret i32 0 +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_dup. +// CHECK-SAME: (%struct.kmp_task_t_with_privates* noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]]) #[[ATTR7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates* [[TMP0]], %struct.kmp_task_t_with_privates** [[DOTADDR]], align 8 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-NEXT: store i32 [[TMP2]], i32* [[DOTADDR2]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP5]], i32 0, i32 0 +// CHECK-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 +// CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP8]], i32 0, i32 0 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP9]], i32 0, i32 0 +// CHECK-NEXT: [[TMP12:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP11]], align 8 +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP10]], i32 0, i32 0 +// CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP12]] to %struct.S* +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK: omp.arraycpy.body: +// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[OMP_ARRAYCPY_SRCELEMENTPAST]], double noundef 0.000000e+00) +// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK: omp.arraycpy.done3: +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP8]], i32 0, i32 1 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP9]], i32 0, i32 1 +// CHECK-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[TMP16]], align 8 +// CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP15]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP17]], double noundef 0.000000e+00) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_destructor. +// CHECK-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// CHECK-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP2]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 1 +// CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP5]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK: arraydestroy.body: +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK: arraydestroy.done2: +// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP7]] +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdED1Ev +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIdED2Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]]) #[[ATTR4]] +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_Z5tmainIiET_v +// CHECK-SAME: () #[[ATTR9:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// CHECK-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]], i32 noundef 0) +// CHECK-NEXT: store i32 0, i32* [[T_VAR]], align 128 +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) +// CHECK-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 +// CHECK-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_VAR]], align 128 +// CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* +// CHECK-NEXT: store i32 [[TMP1]], i32* [[CONV]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [2 x i32]* [[VEC]], i64 [[TMP2]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[VAR]]) +// CHECK-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK: arraydestroy.body: +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK: arraydestroy.done1: +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP4]] +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ev +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: store double 0.000000e+00, double* [[F]], align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC2ERKS0_d +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[S:%.*]], double noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[S_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[T_ADDR:%.*]] = alloca double, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store %struct.S* [[S]], %struct.S** [[S_ADDR]], align 8 +// CHECK-NEXT: store double [[T]], double* [[T_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[S_ADDR]], align 8 +// CHECK-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[F2]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load double, double* [[T_ADDR]], align 8 +// CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[TMP2]] +// CHECK-NEXT: store double [[ADD]], double* [[F]], align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ed +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], double noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store double [[A]], double* [[A_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[A_ADDR]], align 8 +// CHECK-NEXT: store double [[TMP0]], double* [[F]], align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdED2Ev +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_i +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// CHECK-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_ADDR]], align 4 +// CHECK-NEXT: call void @_ZN1SIiEC2ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0]], i32 noundef [[TMP1]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2 +// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 +// CHECK-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 +// CHECK-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 +// CHECK-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 +// CHECK-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 +// CHECK-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 +// CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* +// CHECK-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK: omp_if.then: +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP7]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP8]], align 8 +// CHECK-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: [[TMP9:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 9, i64 256, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.2*)* @.omp_task_entry..5 to i32 (i32, i8*)*)) +// CHECK-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to %struct.kmp_task_t_with_privates.2* +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP10]], i32 0, i32 0 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 0 +// CHECK-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 128 +// CHECK-NEXT: [[TMP14:%.*]] = bitcast %struct.anon.1* [[AGG_CAPTURED]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP13]], i8* align 8 [[TMP14]], i64 16, i1 false) +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP10]], i32 0, i32 2 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP15]], i32 0, i32 0 +// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP16]], align 128 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP15]], i32 0, i32 1 +// CHECK-NEXT: [[TMP19:%.*]] = bitcast [2 x i32]* [[TMP18]] to i8* +// CHECK-NEXT: [[TMP20:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i64 8, i1 false) +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP15]], i32 0, i32 2 +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP21]], i32 0, i32 0 +// CHECK-NEXT: [[TMP22:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP23]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK: omp.arraycpy.body: +// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP22]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 noundef 0) +// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK: omp.arraycpy.done1: +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP15]], i32 0, i32 3 +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP24]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP2]], i32 noundef 0) +// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 3 +// CHECK-NEXT: [[TMP26:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP25]] to i32 (i32, i8*)** +// CHECK-NEXT: store i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.2*)* @.omp_task_destructor..7 to i32 (i32, i8*)*), i32 (i32, i8*)** [[TMP26]], align 8 +// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 5 +// CHECK-NEXT: store i64 0, i64* [[TMP27]], align 8 +// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 6 +// CHECK-NEXT: store i64 9, i64* [[TMP28]], align 16 +// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 7 +// CHECK-NEXT: store i64 1, i64* [[TMP29]], align 8 +// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 9 +// CHECK-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i8* +// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP31]], i8 0, i64 8, i1 false) +// CHECK-NEXT: [[TMP32:%.*]] = load i64, i64* [[TMP29]], align 8 +// CHECK-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP9]], i32 1, i64* [[TMP27]], i64* [[TMP28]], i64 [[TMP32]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2*, i32)* @.omp_task_dup..6 to i8*)) +// CHECK-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: br label [[OMP_IF_END]] +// CHECK: omp_if.end: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_privates_map..4 +// CHECK-SAME: (%struct..kmp_privates.t.3* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]], [2 x i32]** noalias noundef [[TMP2:%.*]], [2 x %struct.S.0]** noalias noundef [[TMP3:%.*]], %struct.S.0** noalias noundef [[TMP4:%.*]]) #[[ATTR6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.3*, align 8 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 8 +// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca [2 x i32]**, align 8 +// CHECK-NEXT: [[DOTADDR3:%.*]] = alloca [2 x %struct.S.0]**, align 8 +// CHECK-NEXT: [[DOTADDR4:%.*]] = alloca %struct.S.0**, align 8 +// CHECK-NEXT: store %struct..kmp_privates.t.3* [[TMP0]], %struct..kmp_privates.t.3** [[DOTADDR]], align 8 +// CHECK-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 8 +// CHECK-NEXT: store [2 x i32]** [[TMP2]], [2 x i32]*** [[DOTADDR2]], align 8 +// CHECK-NEXT: store [2 x %struct.S.0]** [[TMP3]], [2 x %struct.S.0]*** [[DOTADDR3]], align 8 +// CHECK-NEXT: store %struct.S.0** [[TMP4]], %struct.S.0*** [[DOTADDR4]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load %struct..kmp_privates.t.3*, %struct..kmp_privates.t.3** [[DOTADDR]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 0 +// CHECK-NEXT: [[TMP7:%.*]] = load i32**, i32*** [[DOTADDR1]], align 8 +// CHECK-NEXT: store i32* [[TMP6]], i32** [[TMP7]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 1 +// CHECK-NEXT: [[TMP9:%.*]] = load [2 x i32]**, [2 x i32]*** [[DOTADDR2]], align 8 +// CHECK-NEXT: store [2 x i32]* [[TMP8]], [2 x i32]** [[TMP9]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 2 +// CHECK-NEXT: [[TMP11:%.*]] = load [2 x %struct.S.0]**, [2 x %struct.S.0]*** [[DOTADDR3]], align 8 +// CHECK-NEXT: store [2 x %struct.S.0]* [[TMP10]], [2 x %struct.S.0]** [[TMP11]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 3 +// CHECK-NEXT: [[TMP13:%.*]] = load %struct.S.0**, %struct.S.0*** [[DOTADDR4]], align 8 +// CHECK-NEXT: store %struct.S.0* [[TMP12]], %struct.S.0** [[TMP13]], align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_entry..5 +// CHECK-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// CHECK-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[DOTLB__ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTUB__ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca [2 x i32]*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [2 x %struct.S.0]*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[I_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 128 +// CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.1* +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 2 +// CHECK-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.3* [[TMP9]] to i8* +// CHECK-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.2* [[TMP3]] to i8* +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 +// CHECK-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 +// CHECK-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 16 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 7 +// CHECK-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP16]], align 8 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 8 +// CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 64 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 +// CHECK-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]]) +// CHECK-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !28 +// CHECK-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.3*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* @.omp_task_privates_map..4 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !28 +// CHECK-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: [[TMP22:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* +// CHECK-NEXT: call void [[TMP25]](i8* [[TMP24]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], [2 x i32]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [2 x %struct.S.0]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], %struct.S.0** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: [[TMP27:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !28 +// CHECK-NEXT: [[TMP28:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !28 +// CHECK-NEXT: [[TMP29:%.*]] = load %struct.S.0*, %struct.S.0** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !28 +// CHECK-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP30]] to i32 +// CHECK-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !28 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] +// CHECK: omp.inner.for.cond.i: +// CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !28 +// CHECK-NEXT: [[CONV4_I:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !28 +// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV4_I]], [[TMP32]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__3_EXIT:%.*]] +// CHECK: omp.inner.for.body.i: +// CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !28 +// CHECK-NEXT: store i32 [[TMP33]], i32* [[I_I]], align 4, !noalias !28 +// CHECK-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP26]], align 128 +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP27]], i64 0, i64 0 +// CHECK-NEXT: store i32 [[TMP34]], i32* [[ARRAYIDX_I]], align 4 +// CHECK-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP28]], i64 0, i64 0 +// CHECK-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5_I]] to i8* +// CHECK-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[TMP29]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i64 4, i1 false) +// CHECK-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !28 +// CHECK-NEXT: [[ADD6_I:%.*]] = add nsw i32 [[TMP37]], 1 +// CHECK-NEXT: store i32 [[ADD6_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !28 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND_I]] +// CHECK: .omp_outlined..3.exit: +// CHECK-NEXT: ret i32 0 +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_dup..6 +// CHECK-SAME: (%struct.kmp_task_t_with_privates.2* noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]]) #[[ATTR7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP0]], %struct.kmp_task_t_with_privates.2** [[DOTADDR]], align 8 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: store i32 [[TMP2]], i32* [[DOTADDR2]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP5]], i32 0, i32 0 +// CHECK-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 128 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 2 +// CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.1* +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP8]], i32 0, i32 2 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP9]], i32 0, i32 0 +// CHECK-NEXT: [[TMP12:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP11]], align 8 +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP10]], i32 0, i32 0 +// CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S.0]* [[TMP12]] to %struct.S.0* +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK: omp.arraycpy.body: +// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 noundef 0) +// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK: omp.arraycpy.done3: +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP8]], i32 0, i32 3 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP9]], i32 0, i32 1 +// CHECK-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP16]], align 8 +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP15]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP17]], i32 noundef 0) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_destructor..7 +// CHECK-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP2]], i32 0, i32 2 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP3]], i32 0, i32 2 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP3]], i32 0, i32 3 +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP5]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK: arraydestroy.body: +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK: arraydestroy.done2: +// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP7]] +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: store i32 0, i32* [[F]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_i +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// CHECK-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 +// CHECK-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_ADDR]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] +// CHECK-NEXT: store i32 [[ADD]], i32* [[F]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: ret void +// +// +// LAMBDA-LABEL: define {{[^@]+}}@main +// LAMBDA-SAME: () #[[ATTR0:[0-9]+]] { +// LAMBDA-NEXT: entry: +// LAMBDA-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 1 +// LAMBDA-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// LAMBDA-NEXT: call void @"_ZZ4mainENK3$_0clEv"(%class.anon* noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) +// LAMBDA-NEXT: ret i32 0 +// +// +// LAMBDA-LABEL: define {{[^@]+}}@.omp_outlined. +// LAMBDA-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// LAMBDA-NEXT: entry: +// LAMBDA-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// LAMBDA-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// LAMBDA-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// LAMBDA-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// LAMBDA-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// LAMBDA-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// LAMBDA-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// LAMBDA-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]]) +// LAMBDA-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +// LAMBDA-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// LAMBDA: omp_if.then: +// LAMBDA-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// LAMBDA-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// LAMBDA-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.kmp_task_t_with_privates* +// LAMBDA-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP5]], i32 0, i32 0 +// LAMBDA-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP5]], i32 0, i32 1 +// LAMBDA-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP7]], i32 0, i32 0 +// LAMBDA-NEXT: [[TMP9:%.*]] = load volatile double, double* @g, align 8 +// LAMBDA-NEXT: store volatile double [[TMP9]], double* [[TMP8]], align 8 +// LAMBDA-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP7]], i32 0, i32 1 +// LAMBDA-NEXT: [[TMP11:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 +// LAMBDA-NEXT: store i32 [[TMP11]], i32* [[TMP10]], align 8 +// LAMBDA-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 5 +// LAMBDA-NEXT: store i64 0, i64* [[TMP12]], align 8 +// LAMBDA-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 6 +// LAMBDA-NEXT: store i64 9, i64* [[TMP13]], align 8 +// LAMBDA-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 7 +// LAMBDA-NEXT: store i64 1, i64* [[TMP14]], align 8 +// LAMBDA-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 9 +// LAMBDA-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i8* +// LAMBDA-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP16]], i8 0, i64 8, i1 false) +// LAMBDA-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP14]], align 8 +// LAMBDA-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP4]], i32 1, i64* [[TMP12]], i64* [[TMP13]], i64 [[TMP17]], i32 1, i32 0, i64 0, i8* null) +// LAMBDA-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// LAMBDA-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// LAMBDA-NEXT: br label [[OMP_IF_END]] +// LAMBDA: omp_if.end: +// LAMBDA-NEXT: ret void +// +// +// LAMBDA-LABEL: define {{[^@]+}}@.omp_task_privates_map. +// LAMBDA-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], double** noalias noundef [[TMP1:%.*]], i32** noalias noundef [[TMP2:%.*]]) #[[ATTR5:[0-9]+]] { +// LAMBDA-NEXT: entry: +// LAMBDA-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 +// LAMBDA-NEXT: [[DOTADDR1:%.*]] = alloca double**, align 8 +// LAMBDA-NEXT: [[DOTADDR2:%.*]] = alloca i32**, align 8 +// LAMBDA-NEXT: store %struct..kmp_privates.t* [[TMP0]], %struct..kmp_privates.t** [[DOTADDR]], align 8 +// LAMBDA-NEXT: store double** [[TMP1]], double*** [[DOTADDR1]], align 8 +// LAMBDA-NEXT: store i32** [[TMP2]], i32*** [[DOTADDR2]], align 8 +// LAMBDA-NEXT: [[TMP3:%.*]] = load %struct..kmp_privates.t*, %struct..kmp_privates.t** [[DOTADDR]], align 8 +// LAMBDA-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 0 +// LAMBDA-NEXT: [[TMP5:%.*]] = load double**, double*** [[DOTADDR1]], align 8 +// LAMBDA-NEXT: store double* [[TMP4]], double** [[TMP5]], align 8 +// LAMBDA-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 1 +// LAMBDA-NEXT: [[TMP7:%.*]] = load i32**, i32*** [[DOTADDR2]], align 8 +// LAMBDA-NEXT: store i32* [[TMP6]], i32** [[TMP7]], align 8 +// LAMBDA-NEXT: ret void +// +// +// LAMBDA-LABEL: define {{[^@]+}}@.omp_task_entry. +// LAMBDA-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias noundef [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] { +// LAMBDA-NEXT: entry: +// LAMBDA-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// LAMBDA-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// LAMBDA-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// LAMBDA-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// LAMBDA-NEXT: [[DOTLB__ADDR_I:%.*]] = alloca i64, align 8 +// LAMBDA-NEXT: [[DOTUB__ADDR_I:%.*]] = alloca i64, align 8 +// LAMBDA-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 +// LAMBDA-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 +// LAMBDA-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// LAMBDA-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca double*, align 8 +// LAMBDA-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8 +// LAMBDA-NEXT: [[I_I:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: [[REF_TMP_I:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// LAMBDA-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// LAMBDA-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// LAMBDA-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// LAMBDA-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// LAMBDA-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// LAMBDA-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0 +// LAMBDA-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// LAMBDA-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// LAMBDA-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// LAMBDA-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// LAMBDA-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 +// LAMBDA-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* +// LAMBDA-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* +// LAMBDA-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 +// LAMBDA-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 +// LAMBDA-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 +// LAMBDA-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 8 +// LAMBDA-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 7 +// LAMBDA-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP16]], align 8 +// LAMBDA-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 8 +// LAMBDA-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 +// LAMBDA-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 +// LAMBDA-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 +// LAMBDA-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// LAMBDA-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// LAMBDA-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// LAMBDA-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// LAMBDA-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// LAMBDA-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// LAMBDA-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, double**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// LAMBDA-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, double**, i32**)* +// LAMBDA-NEXT: call void [[TMP25]](i8* [[TMP24]], double** [[DOTFIRSTPRIV_PTR_ADDR_I]], i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR3:[0-9]+]] +// LAMBDA-NEXT: [[TMP26:%.*]] = load double*, double** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP28]] to i32 +// LAMBDA-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// LAMBDA-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] +// LAMBDA: omp.inner.for.cond.i: +// LAMBDA-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// LAMBDA-NEXT: [[CONV2_I:%.*]] = sext i32 [[TMP29]] to i64 +// LAMBDA-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV2_I]], [[TMP30]] +// LAMBDA-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] +// LAMBDA: omp.inner.for.body.i: +// LAMBDA-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// LAMBDA-NEXT: store i32 [[TMP31]], i32* [[I_I]], align 4, !noalias !14 +// LAMBDA-NEXT: store double 1.000000e+00, double* [[TMP26]], align 8 +// LAMBDA-NEXT: store i32 11, i32* [[TMP27]], align 4 +// LAMBDA-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP_I]], i32 0, i32 0 +// LAMBDA-NEXT: store double* [[TMP26]], double** [[TMP32]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP_I]], i32 0, i32 1 +// LAMBDA-NEXT: store i32* [[TMP27]], i32** [[TMP33]], align 8, !noalias !14 +// LAMBDA-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(16) [[REF_TMP_I]]) +// LAMBDA-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// LAMBDA-NEXT: [[ADD3_I:%.*]] = add nsw i32 [[TMP34]], 1 +// LAMBDA-NEXT: store i32 [[ADD3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// LAMBDA-NEXT: br label [[OMP_INNER_FOR_COND_I]] +// LAMBDA: .omp_outlined..1.exit: +// LAMBDA-NEXT: ret i32 0 +// +// +// BLOCKS-LABEL: define {{[^@]+}}@main +// BLOCKS-SAME: () #[[ATTR1:[0-9]+]] { +// BLOCKS-NEXT: entry: +// BLOCKS-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// BLOCKS-NEXT: [[TMP0:%.*]] = load i8*, i8** getelementptr inbounds ([[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global to %struct.__block_literal_generic*), i32 0, i32 3), align 8 +// BLOCKS-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to void (i8*)* +// BLOCKS-NEXT: call void [[TMP1]](i8* noundef bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global to i8*)) +// BLOCKS-NEXT: ret i32 0 +// +// +// BLOCKS-LABEL: define {{[^@]+}}@__main_block_invoke +// BLOCKS-SAME: (i8* noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR2:[0-9]+]] { +// BLOCKS-NEXT: entry: +// BLOCKS-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 +// BLOCKS-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>*, align 8 +// BLOCKS-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 +// BLOCKS-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* +// BLOCKS-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>** [[BLOCK_ADDR]], align 8 +// BLOCKS-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// BLOCKS-NEXT: ret void +// +// +// BLOCKS-LABEL: define {{[^@]+}}@.omp_outlined. +// BLOCKS-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// BLOCKS-NEXT: entry: +// BLOCKS-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// BLOCKS-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// BLOCKS-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// BLOCKS-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// BLOCKS-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// BLOCKS-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// BLOCKS-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// BLOCKS-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// BLOCKS-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +// BLOCKS-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// BLOCKS: omp_if.then: +// BLOCKS-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// BLOCKS-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// BLOCKS-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.kmp_task_t_with_privates* +// BLOCKS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP5]], i32 0, i32 0 +// BLOCKS-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP5]], i32 0, i32 1 +// BLOCKS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP7]], i32 0, i32 0 +// BLOCKS-NEXT: [[TMP9:%.*]] = load volatile double, double* @g, align 8 +// BLOCKS-NEXT: store volatile double [[TMP9]], double* [[TMP8]], align 8 +// BLOCKS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP7]], i32 0, i32 1 +// BLOCKS-NEXT: [[TMP11:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 +// BLOCKS-NEXT: store i32 [[TMP11]], i32* [[TMP10]], align 8 +// BLOCKS-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 5 +// BLOCKS-NEXT: store i64 0, i64* [[TMP12]], align 8 +// BLOCKS-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 6 +// BLOCKS-NEXT: store i64 9, i64* [[TMP13]], align 8 +// BLOCKS-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 7 +// BLOCKS-NEXT: store i64 1, i64* [[TMP14]], align 8 +// BLOCKS-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 9 +// BLOCKS-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i8* +// BLOCKS-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP16]], i8 0, i64 8, i1 false) +// BLOCKS-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP14]], align 8 +// BLOCKS-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP4]], i32 1, i64* [[TMP12]], i64* [[TMP13]], i64 [[TMP17]], i32 1, i32 0, i64 0, i8* null) +// BLOCKS-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// BLOCKS-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// BLOCKS-NEXT: br label [[OMP_IF_END]] +// BLOCKS: omp_if.end: +// BLOCKS-NEXT: ret void +// +// +// BLOCKS-LABEL: define {{[^@]+}}@_block_invoke +// BLOCKS-SAME: (i8* noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR2]] { +// BLOCKS-NEXT: entry: +// BLOCKS-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 +// BLOCKS-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>*, align 8 +// BLOCKS-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 +// BLOCKS-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* +// BLOCKS-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>** [[BLOCK_ADDR]], align 8 +// BLOCKS-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], i32 0, i32 5 +// BLOCKS-NEXT: store double 2.000000e+00, double* [[BLOCK_CAPTURE_ADDR]], align 8 +// BLOCKS-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], i32 0, i32 6 +// BLOCKS-NEXT: store i32 22, i32* [[BLOCK_CAPTURE_ADDR1]], align 8 +// BLOCKS-NEXT: ret void +// +// +// BLOCKS-LABEL: define {{[^@]+}}@.omp_task_privates_map. +// BLOCKS-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], double** noalias noundef [[TMP1:%.*]], i32** noalias noundef [[TMP2:%.*]]) #[[ATTR6:[0-9]+]] { +// BLOCKS-NEXT: entry: +// BLOCKS-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 +// BLOCKS-NEXT: [[DOTADDR1:%.*]] = alloca double**, align 8 +// BLOCKS-NEXT: [[DOTADDR2:%.*]] = alloca i32**, align 8 +// BLOCKS-NEXT: store %struct..kmp_privates.t* [[TMP0]], %struct..kmp_privates.t** [[DOTADDR]], align 8 +// BLOCKS-NEXT: store double** [[TMP1]], double*** [[DOTADDR1]], align 8 +// BLOCKS-NEXT: store i32** [[TMP2]], i32*** [[DOTADDR2]], align 8 +// BLOCKS-NEXT: [[TMP3:%.*]] = load %struct..kmp_privates.t*, %struct..kmp_privates.t** [[DOTADDR]], align 8 +// BLOCKS-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 0 +// BLOCKS-NEXT: [[TMP5:%.*]] = load double**, double*** [[DOTADDR1]], align 8 +// BLOCKS-NEXT: store double* [[TMP4]], double** [[TMP5]], align 8 +// BLOCKS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 1 +// BLOCKS-NEXT: [[TMP7:%.*]] = load i32**, i32*** [[DOTADDR2]], align 8 +// BLOCKS-NEXT: store i32* [[TMP6]], i32** [[TMP7]], align 8 +// BLOCKS-NEXT: ret void +// +// +// BLOCKS-LABEL: define {{[^@]+}}@.omp_task_entry. +// BLOCKS-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { +// BLOCKS-NEXT: entry: +// BLOCKS-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// BLOCKS-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// BLOCKS-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// BLOCKS-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// BLOCKS-NEXT: [[DOTLB__ADDR_I:%.*]] = alloca i64, align 8 +// BLOCKS-NEXT: [[DOTUB__ADDR_I:%.*]] = alloca i64, align 8 +// BLOCKS-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 +// BLOCKS-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 +// BLOCKS-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// BLOCKS-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca double*, align 8 +// BLOCKS-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8 +// BLOCKS-NEXT: [[I_I:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: [[BLOCK_I:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, align 8 +// BLOCKS-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// BLOCKS-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// BLOCKS-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// BLOCKS-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// BLOCKS-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// BLOCKS-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0 +// BLOCKS-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// BLOCKS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// BLOCKS-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// BLOCKS-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// BLOCKS-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 +// BLOCKS-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* +// BLOCKS-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* +// BLOCKS-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 +// BLOCKS-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 +// BLOCKS-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 +// BLOCKS-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 8 +// BLOCKS-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 7 +// BLOCKS-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP16]], align 8 +// BLOCKS-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 8 +// BLOCKS-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 +// BLOCKS-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 +// BLOCKS-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 +// BLOCKS-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// BLOCKS-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// BLOCKS-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// BLOCKS-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// BLOCKS-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// BLOCKS-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// BLOCKS-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, double**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// BLOCKS-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, double**, i32**)* +// BLOCKS-NEXT: call void [[TMP25]](i8* [[TMP24]], double** [[DOTFIRSTPRIV_PTR_ADDR_I]], i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR4:[0-9]+]] +// BLOCKS-NEXT: [[TMP26:%.*]] = load double*, double** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP28]] to i32 +// BLOCKS-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// BLOCKS-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] +// BLOCKS: omp.inner.for.cond.i: +// BLOCKS-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// BLOCKS-NEXT: [[CONV2_I:%.*]] = sext i32 [[TMP29]] to i64 +// BLOCKS-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV2_I]], [[TMP30]] +// BLOCKS-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] +// BLOCKS: omp.inner.for.body.i: +// BLOCKS-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// BLOCKS-NEXT: store i32 [[TMP31]], i32* [[I_I]], align 4, !noalias !14 +// BLOCKS-NEXT: store double 1.000000e+00, double* [[TMP26]], align 8 +// BLOCKS-NEXT: store i32 11, i32* [[TMP27]], align 4 +// BLOCKS-NEXT: [[BLOCK_ISA_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 0 +// BLOCKS-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[BLOCK_FLAGS_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 1 +// BLOCKS-NEXT: store i32 1073741824, i32* [[BLOCK_FLAGS_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[BLOCK_RESERVED_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 2 +// BLOCKS-NEXT: store i32 0, i32* [[BLOCK_RESERVED_I]], align 4, !noalias !14 +// BLOCKS-NEXT: [[BLOCK_INVOKE_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 3 +// BLOCKS-NEXT: store i8* bitcast (void (i8*)* @_block_invoke to i8*), i8** [[BLOCK_INVOKE_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[BLOCK_DESCRIPTOR_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 4 +// BLOCKS-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp.2 to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[BLOCK_CAPTURED_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 5 +// BLOCKS-NEXT: [[TMP32:%.*]] = load volatile double, double* [[TMP26]], align 8 +// BLOCKS-NEXT: store volatile double [[TMP32]], double* [[BLOCK_CAPTURED_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[BLOCK_CAPTURED3_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 6 +// BLOCKS-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP27]], align 4 +// BLOCKS-NEXT: store i32 [[TMP33]], i32* [[BLOCK_CAPTURED3_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP34:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]] to void ()* +// BLOCKS-NEXT: [[BLOCK_LITERAL_I:%.*]] = bitcast void ()* [[TMP34]] to %struct.__block_literal_generic* +// BLOCKS-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL_I]], i32 0, i32 3 +// BLOCKS-NEXT: [[TMP36:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL_I]] to i8* +// BLOCKS-NEXT: [[TMP37:%.*]] = load i8*, i8** [[TMP35]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP38:%.*]] = bitcast i8* [[TMP37]] to void (i8*)* +// BLOCKS-NEXT: call void [[TMP38]](i8* noundef [[TMP36]]) #[[ATTR4]] +// BLOCKS-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// BLOCKS-NEXT: [[ADD4_I:%.*]] = add nsw i32 [[TMP39]], 1 +// BLOCKS-NEXT: store i32 [[ADD4_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// BLOCKS-NEXT: br label [[OMP_INNER_FOR_COND_I]] +// BLOCKS: .omp_outlined..1.exit: +// BLOCKS-NEXT: ret i32 0 +// +// +// ARRAY-LABEL: define {{[^@]+}}@_Z10array_funciPfP2St +// ARRAY-SAME: (i32 noundef [[N:%.*]], float* noundef [[A:%.*]], %struct.St* noundef [[S:%.*]]) #[[ATTR0:[0-9]+]] { +// ARRAY-NEXT: entry: +// ARRAY-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// ARRAY-NEXT: [[S_ADDR:%.*]] = alloca %struct.St*, align 8 +// ARRAY-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// ARRAY-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// ARRAY-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 8 +// ARRAY-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// ARRAY-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// ARRAY-NEXT: [[TMP2:%.*]] = load float*, float** [[A_ADDR]], align 8 +// ARRAY-NEXT: [[TMP3:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 8 +// ARRAY-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, float*, %struct.St*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]], float* [[TMP2]], %struct.St* [[TMP3]]) +// ARRAY-NEXT: ret void +// +// +// ARRAY-LABEL: define {{[^@]+}}@.omp_outlined. +// ARRAY-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], float* noundef [[A:%.*]], %struct.St* noundef [[S:%.*]]) #[[ATTR1:[0-9]+]] { +// ARRAY-NEXT: entry: +// ARRAY-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// ARRAY-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// ARRAY-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// ARRAY-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// ARRAY-NEXT: [[S_ADDR:%.*]] = alloca %struct.St*, align 8 +// ARRAY-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// ARRAY-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// ARRAY-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// ARRAY-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// ARRAY-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// ARRAY-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 8 +// ARRAY-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// ARRAY-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// ARRAY-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// ARRAY-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// ARRAY-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// ARRAY-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// ARRAY: omp_if.then: +// ARRAY-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// ARRAY-NEXT: store i64 [[TMP0]], i64* [[TMP5]], align 8 +// ARRAY-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// ARRAY-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i64 96, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// ARRAY-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to %struct.kmp_task_t_with_privates* +// ARRAY-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP7]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 +// ARRAY-NEXT: [[TMP11:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* +// ARRAY-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 [[TMP11]], i64 8, i1 false) +// ARRAY-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP7]], i32 0, i32 1 +// ARRAY-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP12]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP14:%.*]] = load float*, float** [[A_ADDR]], align 8 +// ARRAY-NEXT: store float* [[TMP14]], float** [[TMP13]], align 8 +// ARRAY-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP12]], i32 0, i32 1 +// ARRAY-NEXT: [[TMP16:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 8 +// ARRAY-NEXT: store %struct.St* [[TMP16]], %struct.St** [[TMP15]], align 8 +// ARRAY-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 5 +// ARRAY-NEXT: store i64 0, i64* [[TMP17]], align 8 +// ARRAY-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 6 +// ARRAY-NEXT: store i64 9, i64* [[TMP18]], align 8 +// ARRAY-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 7 +// ARRAY-NEXT: store i64 1, i64* [[TMP19]], align 8 +// ARRAY-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 9 +// ARRAY-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i8* +// ARRAY-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP21]], i8 0, i64 8, i1 false) +// ARRAY-NEXT: [[TMP22:%.*]] = load i64, i64* [[TMP19]], align 8 +// ARRAY-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* [[TMP6]], i32 1, i64* [[TMP17]], i64* [[TMP18]], i64 [[TMP22]], i32 1, i32 0, i64 0, i8* null) +// ARRAY-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// ARRAY-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// ARRAY-NEXT: br label [[OMP_IF_END]] +// ARRAY: omp_if.end: +// ARRAY-NEXT: ret void +// +// +// ARRAY-LABEL: define {{[^@]+}}@.omp_task_privates_map. +// ARRAY-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], float*** noalias noundef [[TMP1:%.*]], %struct.St*** noalias noundef [[TMP2:%.*]]) #[[ATTR4:[0-9]+]] { +// ARRAY-NEXT: entry: +// ARRAY-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 +// ARRAY-NEXT: [[DOTADDR1:%.*]] = alloca float***, align 8 +// ARRAY-NEXT: [[DOTADDR2:%.*]] = alloca %struct.St***, align 8 +// ARRAY-NEXT: store %struct..kmp_privates.t* [[TMP0]], %struct..kmp_privates.t** [[DOTADDR]], align 8 +// ARRAY-NEXT: store float*** [[TMP1]], float**** [[DOTADDR1]], align 8 +// ARRAY-NEXT: store %struct.St*** [[TMP2]], %struct.St**** [[DOTADDR2]], align 8 +// ARRAY-NEXT: [[TMP3:%.*]] = load %struct..kmp_privates.t*, %struct..kmp_privates.t** [[DOTADDR]], align 8 +// ARRAY-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP5:%.*]] = load float***, float**** [[DOTADDR1]], align 8 +// ARRAY-NEXT: store float** [[TMP4]], float*** [[TMP5]], align 8 +// ARRAY-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 1 +// ARRAY-NEXT: [[TMP7:%.*]] = load %struct.St***, %struct.St**** [[DOTADDR2]], align 8 +// ARRAY-NEXT: store %struct.St** [[TMP6]], %struct.St*** [[TMP7]], align 8 +// ARRAY-NEXT: ret void +// +// +// ARRAY-LABEL: define {{[^@]+}}@.omp_task_entry. +// ARRAY-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias noundef [[TMP1:%.*]]) #[[ATTR5:[0-9]+]] { +// ARRAY-NEXT: entry: +// ARRAY-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// ARRAY-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// ARRAY-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// ARRAY-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// ARRAY-NEXT: [[DOTLB__ADDR_I:%.*]] = alloca i64, align 8 +// ARRAY-NEXT: [[DOTUB__ADDR_I:%.*]] = alloca i64, align 8 +// ARRAY-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 +// ARRAY-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 +// ARRAY-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// ARRAY-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca float**, align 8 +// ARRAY-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca %struct.St**, align 8 +// ARRAY-NEXT: [[I_I:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// ARRAY-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// ARRAY-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// ARRAY-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// ARRAY-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// ARRAY-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// ARRAY-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// ARRAY-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// ARRAY-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 +// ARRAY-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* +// ARRAY-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* +// ARRAY-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 +// ARRAY-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 +// ARRAY-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 +// ARRAY-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 8 +// ARRAY-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 7 +// ARRAY-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP16]], align 8 +// ARRAY-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 8 +// ARRAY-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 +// ARRAY-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 +// ARRAY-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 +// ARRAY-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// ARRAY-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// ARRAY-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// ARRAY-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// ARRAY-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// ARRAY-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// ARRAY-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, float***, %struct.St***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// ARRAY-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP24:%.*]] = load i64, i64* [[TMP23]], align 8 +// ARRAY-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP27:%.*]] = bitcast void (i8*, ...)* [[TMP25]] to void (i8*, float***, %struct.St***)* +// ARRAY-NEXT: call void [[TMP27]](i8* [[TMP26]], float*** [[DOTFIRSTPRIV_PTR_ADDR_I]], %struct.St*** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR2:[0-9]+]] +// ARRAY-NEXT: [[TMP28:%.*]] = load float**, float*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP29:%.*]] = load %struct.St**, %struct.St*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP30]] to i32 +// ARRAY-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// ARRAY-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] +// ARRAY: omp.inner.for.cond.i: +// ARRAY-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// ARRAY-NEXT: [[CONV2_I:%.*]] = sext i32 [[TMP31]] to i64 +// ARRAY-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV2_I]], [[TMP32]] +// ARRAY-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] +// ARRAY: omp.inner.for.body.i: +// ARRAY-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// ARRAY-NEXT: store i32 [[TMP33]], i32* [[I_I]], align 4, !noalias !14 +// ARRAY-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// ARRAY-NEXT: [[ADD3_I:%.*]] = add nsw i32 [[TMP34]], 1 +// ARRAY-NEXT: store i32 [[ADD3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// ARRAY-NEXT: br label [[OMP_INNER_FOR_COND_I]] +// ARRAY: .omp_outlined..1.exit: +// ARRAY-NEXT: ret i32 0 +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@main +// SIMD-ONLY0-SAME: () #[[ATTR0:[0-9]+]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// SIMD-ONLY0-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S]], align 8 +// SIMD-ONLY0-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// SIMD-ONLY0-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 16 +// SIMD-ONLY0-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 8 +// SIMD-ONLY0-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]], double noundef 0.000000e+00) +// SIMD-ONLY0-NEXT: store i32 0, i32* [[T_VAR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// SIMD-ONLY0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const.main.vec to i8*), i64 8, i1 false) +// SIMD-ONLY0-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_BEGIN]], double noundef 1.000000e+00) +// SIMD-ONLY0-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYINIT_BEGIN]], i64 1 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_ELEMENT]], double noundef 2.000000e+00) +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]], double noundef 3.000000e+00) +// SIMD-ONLY0-NEXT: store i32 0, i32* [[I]], align 4 +// SIMD-ONLY0-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY0: for.cond: +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i32, i32* [[I]], align 4 +// SIMD-ONLY0-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10 +// SIMD-ONLY0-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY0: for.body: +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 +// SIMD-ONLY0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 +// SIMD-ONLY0-NEXT: store i32 [[TMP2]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY0-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = bitcast %struct.S* [[ARRAYIDX1]] to i8* +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// SIMD-ONLY0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 8 [[TMP4]], i64 8, i1 false) +// SIMD-ONLY0-NEXT: store i32 33, i32* @_ZZ4mainE5sivar, align 4 +// SIMD-ONLY0-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY0: for.inc: +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load i32, i32* [[I]], align 4 +// SIMD-ONLY0-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1 +// SIMD-ONLY0-NEXT: store i32 [[INC]], i32* [[I]], align 4 +// SIMD-ONLY0-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]] +// SIMD-ONLY0: for.end: +// SIMD-ONLY0-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() +// SIMD-ONLY0-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]]) #[[ATTR4:[0-9]+]] +// SIMD-ONLY0-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// SIMD-ONLY0-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// SIMD-ONLY0: arraydestroy.body: +// SIMD-ONLY0-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP6]], [[FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// SIMD-ONLY0-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// SIMD-ONLY0-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// SIMD-ONLY0: arraydestroy.done2: +// SIMD-ONLY0-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// SIMD-ONLY0-NEXT: ret i32 [[TMP7]] +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ev +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC2Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]]) +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdEC1ERKS0_d +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[S:%.*]], double noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: [[S_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: [[T_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[S]], %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store double [[T]], double* [[T_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load double, double* [[T_ADDR]], align 8 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC2ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP0]], double noundef [[TMP1]]) +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ed +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], double noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store double [[A]], double* [[A_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load double, double* [[A_ADDR]], align 8 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC2Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]], double noundef [[TMP0]]) +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_Z5tmainIiET_v +// SIMD-ONLY0-SAME: () #[[ATTR3:[0-9]+]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// SIMD-ONLY0-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0]], align 4 +// SIMD-ONLY0-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// SIMD-ONLY0-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// SIMD-ONLY0-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// SIMD-ONLY0-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// SIMD-ONLY0-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]], i32 noundef 0) +// SIMD-ONLY0-NEXT: store i32 0, i32* [[T_VAR]], align 128 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// SIMD-ONLY0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) +// SIMD-ONLY0-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// SIMD-ONLY0-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) +// SIMD-ONLY0-NEXT: store i32 0, i32* [[I]], align 4 +// SIMD-ONLY0-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY0: for.cond: +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i32, i32* [[I]], align 4 +// SIMD-ONLY0-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10 +// SIMD-ONLY0-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY0: for.body: +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 128 +// SIMD-ONLY0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 +// SIMD-ONLY0-NEXT: store i32 [[TMP2]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY0-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = bitcast %struct.S.0* [[ARRAYIDX1]] to i8* +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = bitcast %struct.S.0* [[VAR]] to i8* +// SIMD-ONLY0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i64 4, i1 false) +// SIMD-ONLY0-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY0: for.inc: +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load i32, i32* [[I]], align 4 +// SIMD-ONLY0-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1 +// SIMD-ONLY0-NEXT: store i32 [[INC]], i32* [[I]], align 4 +// SIMD-ONLY0-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// SIMD-ONLY0: for.end: +// SIMD-ONLY0-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// SIMD-ONLY0-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// SIMD-ONLY0: arraydestroy.body: +// SIMD-ONLY0-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP6]], [[FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// SIMD-ONLY0-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// SIMD-ONLY0-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// SIMD-ONLY0: arraydestroy.done2: +// SIMD-ONLY0-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// SIMD-ONLY0-NEXT: ret i32 [[TMP7]] +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdED1Ev +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdED2Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ev +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: store double 0.000000e+00, double* [[F]], align 8 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdED2Ev +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdEC2ERKS0_d +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[S:%.*]], double noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: [[S_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: [[T_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[S]], %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store double [[T]], double* [[T_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP0]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load double, double* [[F2]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load double, double* [[T_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[TMP2]] +// SIMD-ONLY0-NEXT: store double [[ADD]], double* [[F]], align 8 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ed +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], double noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store double [[A]], double* [[A_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load double, double* [[A_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP0]], double* [[F]], align 8 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_i +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_ADDR]], align 4 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC2ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0]], i32 noundef [[TMP1]]) +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: store i32 0, i32* [[F]], align 4 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_i +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] +// SIMD-ONLY0-NEXT: store i32 [[ADD]], i32* [[F]], align 4 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@main +// SIMD-ONLY1-SAME: () #[[ATTR0:[0-9]+]] { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// SIMD-ONLY1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S]], align 8 +// SIMD-ONLY1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// SIMD-ONLY1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 16 +// SIMD-ONLY1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 8 +// SIMD-ONLY1-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]], double noundef 0.000000e+00) +// SIMD-ONLY1-NEXT: store i32 0, i32* [[T_VAR]], align 4 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// SIMD-ONLY1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const.main.vec to i8*), i64 8, i1 false) +// SIMD-ONLY1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_BEGIN]], double noundef 1.000000e+00) +// SIMD-ONLY1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYINIT_BEGIN]], i64 1 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_ELEMENT]], double noundef 2.000000e+00) +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]], double noundef 3.000000e+00) +// SIMD-ONLY1-NEXT: store i32 0, i32* [[I]], align 4 +// SIMD-ONLY1-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY1: for.cond: +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load i32, i32* [[I]], align 4 +// SIMD-ONLY1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10 +// SIMD-ONLY1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY1: for.body: +// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 4 +// SIMD-ONLY1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 +// SIMD-ONLY1-NEXT: store i32 [[TMP2]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY1-NEXT: [[TMP3:%.*]] = bitcast %struct.S* [[ARRAYIDX1]] to i8* +// SIMD-ONLY1-NEXT: [[TMP4:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// SIMD-ONLY1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP3]], i8* align 8 [[TMP4]], i64 8, i1 false) +// SIMD-ONLY1-NEXT: store i32 33, i32* @_ZZ4mainE5sivar, align 4 +// SIMD-ONLY1-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY1: for.inc: +// SIMD-ONLY1-NEXT: [[TMP5:%.*]] = load i32, i32* [[I]], align 4 +// SIMD-ONLY1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1 +// SIMD-ONLY1-NEXT: store i32 [[INC]], i32* [[I]], align 4 +// SIMD-ONLY1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]] +// SIMD-ONLY1: for.end: +// SIMD-ONLY1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() +// SIMD-ONLY1-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]]) #[[ATTR4:[0-9]+]] +// SIMD-ONLY1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// SIMD-ONLY1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// SIMD-ONLY1: arraydestroy.body: +// SIMD-ONLY1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP6]], [[FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// SIMD-ONLY1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// SIMD-ONLY1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// SIMD-ONLY1: arraydestroy.done2: +// SIMD-ONLY1-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// SIMD-ONLY1-NEXT: ret i32 [[TMP7]] +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ev +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC2Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]]) +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdEC1ERKS0_d +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[S:%.*]], double noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: [[T_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[S]], %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store double [[T]], double* [[T_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load double, double* [[T_ADDR]], align 8 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC2ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP0]], double noundef [[TMP1]]) +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ed +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], double noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store double [[A]], double* [[A_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load double, double* [[A_ADDR]], align 8 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC2Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]], double noundef [[TMP0]]) +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_Z5tmainIiET_v +// SIMD-ONLY1-SAME: () #[[ATTR3:[0-9]+]] { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// SIMD-ONLY1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0]], align 4 +// SIMD-ONLY1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// SIMD-ONLY1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// SIMD-ONLY1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// SIMD-ONLY1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// SIMD-ONLY1-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]], i32 noundef 0) +// SIMD-ONLY1-NEXT: store i32 0, i32* [[T_VAR]], align 128 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// SIMD-ONLY1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) +// SIMD-ONLY1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// SIMD-ONLY1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) +// SIMD-ONLY1-NEXT: store i32 0, i32* [[I]], align 4 +// SIMD-ONLY1-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY1: for.cond: +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load i32, i32* [[I]], align 4 +// SIMD-ONLY1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10 +// SIMD-ONLY1-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY1: for.body: +// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_VAR]], align 128 +// SIMD-ONLY1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 +// SIMD-ONLY1-NEXT: store i32 [[TMP2]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY1-NEXT: [[TMP3:%.*]] = bitcast %struct.S.0* [[ARRAYIDX1]] to i8* +// SIMD-ONLY1-NEXT: [[TMP4:%.*]] = bitcast %struct.S.0* [[VAR]] to i8* +// SIMD-ONLY1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP3]], i8* align 4 [[TMP4]], i64 4, i1 false) +// SIMD-ONLY1-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY1: for.inc: +// SIMD-ONLY1-NEXT: [[TMP5:%.*]] = load i32, i32* [[I]], align 4 +// SIMD-ONLY1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP5]], 1 +// SIMD-ONLY1-NEXT: store i32 [[INC]], i32* [[I]], align 4 +// SIMD-ONLY1-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// SIMD-ONLY1: for.end: +// SIMD-ONLY1-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// SIMD-ONLY1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// SIMD-ONLY1: arraydestroy.body: +// SIMD-ONLY1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP6]], [[FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// SIMD-ONLY1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// SIMD-ONLY1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// SIMD-ONLY1: arraydestroy.done2: +// SIMD-ONLY1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// SIMD-ONLY1-NEXT: ret i32 [[TMP7]] +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdED1Ev +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdED2Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ev +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: store double 0.000000e+00, double* [[F]], align 8 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdED2Ev +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdEC2ERKS0_d +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[S:%.*]], double noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: [[T_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[S]], %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store double [[T]], double* [[T_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP0]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load double, double* [[F2]], align 8 +// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = load double, double* [[T_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[TMP2]] +// SIMD-ONLY1-NEXT: store double [[ADD]], double* [[F]], align 8 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ed +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], double noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store double [[A]], double* [[A_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load double, double* [[A_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store double [[TMP0]], double* [[F]], align 8 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_i +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_ADDR]], align 4 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC2ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0]], i32 noundef [[TMP1]]) +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: store i32 0, i32* [[F]], align 4 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_i +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 +// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] +// SIMD-ONLY1-NEXT: store i32 [[ADD]], i32* [[F]], align 4 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// SIMD-ONLY1-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@main +// SIMD-ONLY2-SAME: () #[[ATTR0:[0-9]+]] { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 1 +// SIMD-ONLY2-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// SIMD-ONLY2-NEXT: call void @"_ZZ4mainENK3$_0clEv"(%class.anon* noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) +// SIMD-ONLY2-NEXT: ret i32 0 +// +// +// SIMD-ONLY3-LABEL: define {{[^@]+}}@main +// SIMD-ONLY3-SAME: () #[[ATTR1:[0-9]+]] { +// SIMD-ONLY3-NEXT: entry: +// SIMD-ONLY3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SIMD-ONLY3-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// SIMD-ONLY3-NEXT: [[TMP0:%.*]] = load i8*, i8** getelementptr inbounds ([[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global to %struct.__block_literal_generic*), i32 0, i32 3), align 8 +// SIMD-ONLY3-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to void (i8*)* +// SIMD-ONLY3-NEXT: call void [[TMP1]](i8* noundef bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global to i8*)) +// SIMD-ONLY3-NEXT: ret i32 0 +// +// +// SIMD-ONLY3-LABEL: define {{[^@]+}}@__main_block_invoke +// SIMD-ONLY3-SAME: (i8* noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR2:[0-9]+]] { +// SIMD-ONLY3-NEXT: entry: +// SIMD-ONLY3-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 +// SIMD-ONLY3-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>*, align 8 +// SIMD-ONLY3-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY3-NEXT: [[BLOCK1:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, align 8 +// SIMD-ONLY3-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 +// SIMD-ONLY3-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* +// SIMD-ONLY3-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>** [[BLOCK_ADDR]], align 8 +// SIMD-ONLY3-NEXT: store i32 0, i32* [[I]], align 4 +// SIMD-ONLY3-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY3: for.cond: +// SIMD-ONLY3-NEXT: [[TMP0:%.*]] = load i32, i32* [[I]], align 4 +// SIMD-ONLY3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10 +// SIMD-ONLY3-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY3: for.body: +// SIMD-ONLY3-NEXT: store double 1.000000e+00, double* @g, align 8 +// SIMD-ONLY3-NEXT: store i32 11, i32* @_ZZ4mainE5sivar, align 4 +// SIMD-ONLY3-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK1]], i32 0, i32 0 +// SIMD-ONLY3-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA]], align 8 +// SIMD-ONLY3-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK1]], i32 0, i32 1 +// SIMD-ONLY3-NEXT: store i32 1073741824, i32* [[BLOCK_FLAGS]], align 8 +// SIMD-ONLY3-NEXT: [[BLOCK_RESERVED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK1]], i32 0, i32 2 +// SIMD-ONLY3-NEXT: store i32 0, i32* [[BLOCK_RESERVED]], align 4 +// SIMD-ONLY3-NEXT: [[BLOCK_INVOKE:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK1]], i32 0, i32 3 +// SIMD-ONLY3-NEXT: store i8* bitcast (void (i8*)* @__main_block_invoke_2 to i8*), i8** [[BLOCK_INVOKE]], align 8 +// SIMD-ONLY3-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK1]], i32 0, i32 4 +// SIMD-ONLY3-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp.1 to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR]], align 8 +// SIMD-ONLY3-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK1]], i32 0, i32 5 +// SIMD-ONLY3-NEXT: [[TMP1:%.*]] = load volatile double, double* @g, align 8 +// SIMD-ONLY3-NEXT: store volatile double [[TMP1]], double* [[BLOCK_CAPTURED]], align 8 +// SIMD-ONLY3-NEXT: [[BLOCK_CAPTURED2:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK1]], i32 0, i32 6 +// SIMD-ONLY3-NEXT: [[TMP2:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 +// SIMD-ONLY3-NEXT: store i32 [[TMP2]], i32* [[BLOCK_CAPTURED2]], align 8 +// SIMD-ONLY3-NEXT: [[TMP3:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK1]] to void ()* +// SIMD-ONLY3-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP3]] to %struct.__block_literal_generic* +// SIMD-ONLY3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 +// SIMD-ONLY3-NEXT: [[TMP5:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* +// SIMD-ONLY3-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP4]], align 8 +// SIMD-ONLY3-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to void (i8*)* +// SIMD-ONLY3-NEXT: call void [[TMP7]](i8* noundef [[TMP5]]) +// SIMD-ONLY3-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY3: for.inc: +// SIMD-ONLY3-NEXT: [[TMP8:%.*]] = load i32, i32* [[I]], align 4 +// SIMD-ONLY3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 +// SIMD-ONLY3-NEXT: store i32 [[INC]], i32* [[I]], align 4 +// SIMD-ONLY3-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]] +// SIMD-ONLY3: for.end: +// SIMD-ONLY3-NEXT: ret void +// +// +// SIMD-ONLY3-LABEL: define {{[^@]+}}@__main_block_invoke_2 +// SIMD-ONLY3-SAME: (i8* noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR2]] { +// SIMD-ONLY3-NEXT: entry: +// SIMD-ONLY3-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 +// SIMD-ONLY3-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>*, align 8 +// SIMD-ONLY3-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 +// SIMD-ONLY3-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* +// SIMD-ONLY3-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>** [[BLOCK_ADDR]], align 8 +// SIMD-ONLY3-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], i32 0, i32 5 +// SIMD-ONLY3-NEXT: store double 2.000000e+00, double* [[BLOCK_CAPTURE_ADDR]], align 8 +// SIMD-ONLY3-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], i32 0, i32 6 +// SIMD-ONLY3-NEXT: store i32 22, i32* [[BLOCK_CAPTURE_ADDR1]], align 8 +// SIMD-ONLY3-NEXT: ret void +// +// +// SIMD-ONLY4-LABEL: define {{[^@]+}}@_Z10array_funciPfP2St +// SIMD-ONLY4-SAME: (i32 noundef [[N:%.*]], float* noundef [[A:%.*]], %struct.St* noundef [[S:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY4-NEXT: entry: +// SIMD-ONLY4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY4-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// SIMD-ONLY4-NEXT: [[S_ADDR:%.*]] = alloca %struct.St*, align 8 +// SIMD-ONLY4-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY4-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// SIMD-ONLY4-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 8 +// SIMD-ONLY4-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY4-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// SIMD-ONLY4-NEXT: store i32 0, i32* [[I]], align 4 +// SIMD-ONLY4-NEXT: br label [[FOR_COND:%.*]] +// SIMD-ONLY4: for.cond: +// SIMD-ONLY4-NEXT: [[TMP2:%.*]] = load i32, i32* [[I]], align 4 +// SIMD-ONLY4-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10 +// SIMD-ONLY4-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] +// SIMD-ONLY4: for.body: +// SIMD-ONLY4-NEXT: br label [[FOR_INC:%.*]] +// SIMD-ONLY4: for.inc: +// SIMD-ONLY4-NEXT: [[TMP3:%.*]] = load i32, i32* [[I]], align 4 +// SIMD-ONLY4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP3]], 1 +// SIMD-ONLY4-NEXT: store i32 [[INC]], i32* [[I]], align 4 +// SIMD-ONLY4-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]] +// SIMD-ONLY4: for.end: +// SIMD-ONLY4-NEXT: ret void +// diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp index f4ded3c7797e9..31e610d0ddffa 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_simd_firstprivate_codegen.cpp @@ -1,3 +1,4 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck %s // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s @@ -7,11 +8,10 @@ // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -std=c++11 -triple x86_64-apple-darwin10 -emit-pch -o %t %s -// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple x86_64-apple-darwin10 -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -std=c++11 -DLAMBDA -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY2 %s +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -fblocks -DBLOCKS -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY3 %s +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -std=c++11 -DARRAY -triple x86_64-apple-darwin10 -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY4 %s // expected-no-diagnostics #ifndef ARRAY @@ -30,15 +30,6 @@ struct S { volatile double g; -// CHECK-DAG: [[KMP_TASK_T_TY:%.+]] = type { i8*, i32 (i32, i8*)*, i32, %union{{.+}}, %union{{.+}}, i64, i64, i64, i32, i8* } -// CHECK-DAG: [[S_DOUBLE_TY:%.+]] = type { double } -// CHECK-DAG: [[PRIVATES_MAIN_TY:%.+]] = type {{.?}}{ [2 x [[S_DOUBLE_TY]]], [[S_DOUBLE_TY]], i32, [2 x i32] -// CHECK-DAG: [[CAP_MAIN_TY:%.+]] = type { [2 x [[S_DOUBLE_TY]]]*, [[S_DOUBLE_TY]]* } -// CHECK-DAG: [[KMP_TASK_MAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [[PRIVATES_MAIN_TY]] } -// CHECK-DAG: [[S_INT_TY:%.+]] = type { i32 } -// CHECK-DAG: [[CAP_TMAIN_TY:%.+]] = type { [2 x [[S_INT_TY]]]*, [[S_INT_TY]]* } -// CHECK-DAG: [[PRIVATES_TMAIN_TY:%.+]] = type { i32, [2 x i32], [2 x [[S_INT_TY]]], [[S_INT_TY]], [104 x i8] } -// CHECK-DAG: [[KMP_TASK_TMAIN_TY:%.+]] = type { [[KMP_TASK_T_TY]], [{{[0-9]+}} x i8], [[PRIVATES_TMAIN_TY]] } template T tmain() { S ttt; @@ -58,48 +49,14 @@ T tmain() { int main() { static int sivar; #ifdef LAMBDA - // LAMBDA: [[G:@.+]] ={{.*}} global double - // LAMBDA: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0, - // LAMBDA-LABEL: @main - // LAMBDA: call{{( x86_thiscallcc)?}} void [[OUTER_LAMBDA:@.+]]( [&]() { - // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( -// LAMBDA: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master( -// LAMBDA-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0 -// LAMBDA-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] -// LAMBDA: [[THEN]] -// LAMBDA: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) -// LAMBDA: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 -// LAMBDA: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 -// LAMBDA: [[G_VAL:%.+]] = load volatile double, double* @{{.+}}, -// LAMBDA: store volatile double [[G_VAL]], double* [[G_PRIVATE_ADDR]] - -// LAMBDA: [[SIVAR_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 -// LAMBDA: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* @{{.+}}, -// LAMBDA: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]] - -// LAMBDA: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null) -// LAMBDA: call {{.*}}void @__kmpc_end_master( -// LAMBDA-NEXT: br label {{%?}}[[EXIT]] -// LAMBDA: [[EXIT]] -// LAMBDA: ret + + #pragma omp parallel master taskloop simd firstprivate(g, sivar) for (int i = 0; i < 10; ++i) { - // LAMBDA: define {{.+}} void [[INNER_LAMBDA:@.+]](%{{.+}}* {{[^,]*}} [[ARG_PTR:%.+]]) - // LAMBDA: store %{{.+}}* [[ARG_PTR]], %{{.+}}** [[ARG_PTR_REF:%.+]], - // LAMBDA: [[ARG_PTR:%.+]] = load %{{.+}}*, %{{.+}}** [[ARG_PTR_REF]] - // LAMBDA: [[G_PTR_REF:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[ARG_PTR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // LAMBDA: [[G_REF:%.+]] = load double*, double** [[G_PTR_REF]] - // LAMBDA: store double 2.0{{.+}}, double* [[G_REF]] - - // LAMBDA: store double* %{{.+}}, double** %{{.+}}, - // LAMBDA: define internal noundef i32 [[TASK_ENTRY]](i32 noundef %0, %{{.+}}* noalias noundef %1) + g = 1; sivar = 11; - // LAMBDA: store double 1.0{{.+}}, double* %{{.+}}, - // LAMBDA: store i{{[0-9]+}} 11, i{{[0-9]+}}* %{{.+}}, - // LAMBDA: call void [[INNER_LAMBDA]](% - // LAMBDA: ret [&]() { g = 2; sivar = 22; @@ -108,51 +65,13 @@ int main() { }(); return 0; #elif defined(BLOCKS) - // BLOCKS: [[G:@.+]] ={{.*}} global double - // BLOCKS: [[SIVAR:@.+]] = internal global i{{[0-9]+}} 0, - // BLOCKS-LABEL: @main - // BLOCKS: call void {{%.+}}(i8 ^{ - // BLOCKS: define{{.*}} internal{{.*}} void {{.+}}(i8* - // BLOCKS: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master( - // BLOCKS-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0 - // BLOCKS-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] - // BLOCKS: [[THEN]] - // BLOCKS: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%{{[^ ]+}} @{{[^,]+}}, i32 %{{[^,]+}}, i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %{{[^*]+}}*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) - // BLOCKS: [[PRIVATES:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i{{.+}} 0, i{{.+}} 1 - // BLOCKS: [[G_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 - // BLOCKS: [[G_VAL:%.+]] = load volatile double, double* @{{.+}}, - // BLOCKS: store volatile double [[G_VAL]], double* [[G_PRIVATE_ADDR]] - - // BLOCKS: [[SIVAR_PRIVATE_ADDR:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 - // BLOCKS: [[SIVAR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* @{{.+}}, - // BLOCKS: store i{{[0-9]+}} [[SIVAR_VAL]], i{{[0-9]+}}* [[SIVAR_PRIVATE_ADDR]] - // BLOCKS: call void @__kmpc_taskloop(%{{.+}}* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* null) - // BLOCKS: call {{.*}}void @__kmpc_end_master( - // BLOCKS-NEXT: br label {{%?}}[[EXIT]] - // BLOCKS: [[EXIT]] - // BLOCKS: ret + #pragma omp parallel master taskloop simd firstprivate(g, sivar) for (int i = 0; i < 10; ++i) { - // BLOCKS: define {{.+}} void {{@.+}}(i8* - // BLOCKS-NOT: [[G]]{{[[^:word:]]}} - // BLOCKS: store double 2.0{{.+}}, double* - // BLOCKS-NOT: [[G]]{{[[^:word:]]}} - // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}} - // BLOCKS: store i{{[0-9]+}} 22, i{{[0-9]+}}* - // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}} - // BLOCKS: ret - - // BLOCKS: store double* %{{.+}}, double** %{{.+}}, - // BLOCKS: store i{{[0-9]+}}* %{{.+}}, i{{[0-9]+}}** %{{.+}}, - // BLOCKS: define internal noundef i32 [[TASK_ENTRY]](i32 noundef %0, %{{.+}}* noalias noundef %1) + g = 1; sivar = 11; - // BLOCKS: store double 1.0{{.+}}, double* %{{.+}}, - // BLOCKS-NOT: [[G]]{{[[^:word:]]}} - // BLOCKS: store i{{[0-9]+}} 11, i{{[0-9]+}}* %{{.+}}, - // BLOCKS-NOT: [[SIVAR]]{{[[^:word:]]}} - // BLOCKS: call void {{%.+}}(i8 ^{ g = 2; sivar = 22; @@ -177,303 +96,86 @@ int main() { #endif } -// CHECK: [[SIVAR:.+]] = internal global i{{[0-9]+}} 0, -// CHECK: define{{.*}} i{{[0-9]+}} @main() -// CHECK: alloca [[S_DOUBLE_TY]], -// CHECK: [[TEST:%.+]] = alloca [[S_DOUBLE_TY]], -// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32, -// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32], -// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]], -// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]], - -// CHECK: call {{.*}} [[S_DOUBLE_TY_COPY_CONSTR:@.+]]([[S_DOUBLE_TY]]* {{[^,]*}} [[TEST]], - -// CHECK: [[RES:%.+]] = call {{.*}}i32 @__kmpc_master( -// CHECK-NEXT: [[IS_MASTER:%.+]] = icmp ne i32 [[RES]], 0 -// CHECK-NEXT: br i1 [[IS_MASTER]], label {{%?}}[[THEN:.+]], label {{%?}}[[EXIT:.+]] -// CHECK: [[THEN]] + + // Store original variables in capture struct. -// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: store [2 x [[S_DOUBLE_TY]]]* %{{.+}}, [2 x [[S_DOUBLE_TY]]]** [[S_ARR_REF]], -// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]], [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: store [[S_DOUBLE_TY]]* %{{.+}}, [[S_DOUBLE_TY]]** [[VAR_REF]], // Allocate task. // Returns struct kmp_task_t { // [[KMP_TASK_T]] task_data; // [[KMP_TASK_MAIN_TY]] privates; // }; -// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 9, i64 120, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) -// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_MAIN_TY]]* // Fill kmp_task_t->shareds by copying from original capture argument. -// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]], -// CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_MAIN_TY]]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 16, i1 false) // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). // Also copy address of private copy to the corresponding shareds reference. -// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 // Constructors for s_arr and var. // s_arr; -// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: bitcast [2 x [[S_DOUBLE_TY]]]* %{{.+}} to [[S_DOUBLE_TY]]* -// CHECK: call void [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* {{[^,]*}} [[S_ARR_CUR:%[^,]+]], -// CHECK: getelementptr [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* [[S_ARR_CUR]], i{{.+}} 1 -// CHECK: getelementptr [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} 1 -// CHECK: icmp eq -// CHECK: br i1 // var; -// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 -// CHECK-NEXT: call void [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* {{[^,]*}} [[PRIVATE_VAR_REF]], [[S_DOUBLE_TY]]* {{.*}}, // t_var; -// CHECK: [[PRIVATE_T_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 -// CHECK-NEXT: [[T_VAR:%.+]] = load i32, i32* %{{.+}}, -// CHECK-NEXT: store i32 [[T_VAR]], i32* [[PRIVATE_T_VAR_REF]], // vec; -// CHECK: [[PRIVATE_VEC_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 -// CHECK-NEXT: bitcast [2 x i32]* [[PRIVATE_VEC_REF]] to i8* -// CHECK-NEXT: bitcast [2 x i32]* %{{.+}} to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64( // sivar; -// CHECK: [[PRIVATE_SIVAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 4 -// CHECK-NEXT: [[SIVAR:%.+]] = load i{{.+}}, i{{.+}}* @{{.+}}, -// CHECK-NEXT: store i32 [[SIVAR]], i32* [[PRIVATE_SIVAR_REF]], // Provide pointer to destructor function, which will destroy private variables at the end of the task. -// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3 -// CHECK: [[DESTRUCTORS_PTR:%.+]] = bitcast %union{{.+}}* [[DESTRUCTORS_REF]] to i32 (i32, i8*)** -// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_MAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_PTR]], // Start task. -// CHECK: call void @__kmpc_taskloop(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_MAIN_TY]]*, [[KMP_TASK_MAIN_TY]]*, i32)* [[MAIN_DUP:@.+]] to i8*)) -// CHECK: call {{.*}}void @__kmpc_end_master( -// CHECK-NEXT: br label {{%?}}[[EXIT]] -// CHECK: [[EXIT]] - -// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_MAIN_TY]]* noalias noundef %0, [[S_DOUBLE_TY]]** noalias noundef %1, i32** noalias noundef %2, [2 x [[S_DOUBLE_TY]]]** noalias noundef %3, [2 x i32]** noalias noundef %4, i32** noalias noundef %5) -// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_MAIN_TY]]*, [[PRIVATES_MAIN_TY]]** -// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 0 -// CHECK: [[ARG3:%.+]] = load [2 x [[S_DOUBLE_TY]]]**, [2 x [[S_DOUBLE_TY]]]*** %{{.+}}, -// CHECK: store [2 x [[S_DOUBLE_TY]]]* [[PRIV_S_VAR]], [2 x [[S_DOUBLE_TY]]]** [[ARG3]], -// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 1 -// CHECK: [[ARG1:%.+]] = load [[S_DOUBLE_TY]]**, [[S_DOUBLE_TY]]*** {{.+}}, -// CHECK: store [[S_DOUBLE_TY]]* [[PRIV_VAR]], [[S_DOUBLE_TY]]** [[ARG1]], -// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 2 -// CHECK: [[ARG2:%.+]] = load i32**, i32*** %{{.+}}, -// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG2]], -// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 3 -// CHECK: [[ARG4:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}}, -// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG4]], -// CHECK: [[PRIV_SIVAR:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i32 0, i32 4 -// CHECK: [[ARG5:%.+]] = load i{{[0-9]+}}**, i{{[0-9]+}}*** %{{.+}}, -// CHECK: store i{{[0-9]+}}* [[PRIV_SIVAR]], i{{[0-9]+}}** [[ARG5]], -// CHECK: ret void - -// CHECK: define internal noundef i32 [[TASK_ENTRY]](i32 noundef %0, [[KMP_TASK_MAIN_TY]]* noalias noundef %1) - -// CHECK: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_DOUBLE_TY]]*, -// CHECK: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, -// CHECK: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_DOUBLE_TY]]]*, -// CHECK: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, -// CHECK: [[PRIV_SIVAR_ADDR:%.+]] = alloca i32*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_MAIN_TY]]*, [[S_DOUBLE_TY]]**, i32**, [2 x [[S_DOUBLE_TY]]]**, [2 x i32]**, i32**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], -// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], - -// CHECK: [[FN:%.+]] = bitcast void (i8*, ...)* [[MAP_FN]] to void (i8*, -// CHECK: call void [[FN]](i8* %{{.+}}, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], i32** [[PRIV_T_VAR_ADDR]], [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], i32** [[PRIV_SIVAR_ADDR]]) - -// CHECK: [[PRIV_VAR:%.+]] = load [[S_DOUBLE_TY]]*, [[S_DOUBLE_TY]]** [[PRIV_VAR_ADDR]], -// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], -// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_DOUBLE_TY]]]*, [2 x [[S_DOUBLE_TY]]]** [[PRIV_S_ARR_ADDR]], -// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]], -// CHECK: [[PRIV_SIVAR:%.+]] = load i32*, i32** [[PRIV_SIVAR_ADDR]], + + + + + // Privates actually are used. -// CHECK-DAG: [[PRIV_VAR]] -// CHECK-DAG: [[PRIV_T_VAR]] -// CHECK-DAG: [[PRIV_S_ARR]] -// CHECK-DAG: [[PRIV_VEC]] -// CHECK-DAG: [[PRIV_SIVAR]] - -// CHECK: ret - -// CHECK: define internal void [[MAIN_DUP]]([[KMP_TASK_MAIN_TY]]* noundef %0, [[KMP_TASK_MAIN_TY]]* noundef %1, i32 noundef %2) -// CHECK: getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* %{{.+}}, i32 0, i32 1 -// CHECK: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* %{{.+}}, i32 0, i32 0 -// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* %{{.+}}, i32 0, i32 0 -// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i64 2 -// CHECK: br i1 % - -// CHECK: phi [[S_DOUBLE_TY]]* -// CHECK: call {{.*}} [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* -// CHECK: getelementptr [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i32 1 -// CHECK: icmp eq [[S_DOUBLE_TY]]* % -// CHECK: br i1 % - -// CHECK: getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* %{{.+}}, i32 0, i32 1 -// CHECK: call {{.*}} [[S_DOUBLE_TY_COPY_CONSTR]]([[S_DOUBLE_TY]]* -// CHECK: ret void - -// CHECK: define internal noundef i32 [[DESTRUCTORS]](i32 noundef %{{.+}}, [[KMP_TASK_MAIN_TY]]* noalias noundef %{{.+}}) -// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_MAIN_TY]], [[KMP_TASK_MAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 -// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_MAIN_TY]], [[PRIVATES_MAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 -// CHECK: call void @_ZN1SIdED1Ev([[S_DOUBLE_TY]]* {{[^,]*}} [[PRIVATE_VAR_REF]]) -// CHECK: getelementptr inbounds [2 x [[S_DOUBLE_TY]]], [2 x [[S_DOUBLE_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 -// CHECK: getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} 2 -// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_DOUBLE_TY]], [[S_DOUBLE_TY]]* %{{.+}}, i{{.+}} -1 -// CHECK: call void @_ZN1SIdED1Ev([[S_DOUBLE_TY]]* {{[^,]*}} [[PRIVATE_S_ARR_ELEM_REF]]) -// CHECK: icmp eq -// CHECK: br i1 -// CHECK: ret i32 - -// CHECK: alloca [[S_INT_TY]], -// CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]], -// CHECK: [[T_VAR_ADDR:%.+]] = alloca i32, align 128 -// CHECK: [[VEC_ADDR:%.+]] = alloca [2 x i32], -// CHECK: [[S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]], -// CHECK: [[VAR_ADDR:%.+]] = alloca [[S_INT_TY]], - -// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR:@.+]]([[S_INT_TY]]* {{[^,]*}} [[TEST]], + + + + + + + // Store original variables in capture struct. -// CHECK: [[S_ARR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: store [2 x [[S_INT_TY]]]* %{{.+}}, [2 x [[S_INT_TY]]]** [[S_ARR_REF]], -// CHECK: [[VAR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]], [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: store [[S_INT_TY]]* %{{.+}}, [[S_INT_TY]]** [[VAR_REF]], // Allocate task. // Returns struct kmp_task_t { // [[KMP_TASK_T_TY]] task_data; // [[KMP_TASK_TMAIN_TY]] privates; // }; -// CHECK: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i32 9, i64 256, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[TASK_ENTRY:@[^ ]+]] to i32 (i32, i8*)*)) -// CHECK: [[RES_KMP_TASK:%.+]] = bitcast i8* [[RES]] to [[KMP_TASK_TMAIN_TY]]* // Fill kmp_task_t->shareds by copying from original capture argument. -// CHECK: [[TASK:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[SHAREDS_REF_ADDR:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[SHAREDS_REF:%.+]] = load i8*, i8** [[SHAREDS_REF_ADDR]], -// CHECK: [[CAPTURES_ADDR:%.+]] = bitcast [[CAP_TMAIN_TY]]* %{{.+}} to i8* -// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[SHAREDS_REF]], i8* align 8 [[CAPTURES_ADDR]], i64 16, i1 false) // Initialize kmp_task_t->privates with default values (no init for simple types, default constructors for classes). -// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 // t_var; -// CHECK: [[PRIVATE_T_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 0 -// CHECK: [[T_VAR:%.+]] = load i32, i32* %{{.+}}, align 128 -// CHECK: store i32 [[T_VAR]], i32* [[PRIVATE_T_VAR_REF]], align 128 // vec; -// CHECK: [[PRIVATE_VEC_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 1 -// CHECK-NEXT: bitcast [2 x i32]* [[PRIVATE_VEC_REF]] to i8* -// CHECK-NEXT: bitcast [2 x i32]* %{{.+}} to i8* -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64( // Constructors for s_arr and var. // a_arr; -// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 -// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 -// CHECK: bitcast [2 x [[S_INT_TY]]]* %{{.+}} to [[S_INT_TY]]* -// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2 -// CHECK: call void [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* {{[^,]*}} [[S_ARR_CUR:%[^,]+]], -// CHECK: getelementptr [[S_INT_TY]], [[S_INT_TY]]* [[S_ARR_CUR]], i{{.+}} 1 -// CHECK: icmp eq -// CHECK: br i1 // var; -// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 -// CHECK-NEXT: call void [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* {{[^,]*}} [[PRIVATE_VAR_REF]], // Provide pointer to destructor function, which will destroy private variables at the end of the task. -// CHECK: [[DESTRUCTORS_REF:%.+]] = getelementptr inbounds [[KMP_TASK_T_TY]], [[KMP_TASK_T_TY]]* [[TASK]], i{{.+}} 0, i{{.+}} 3 -// CHECK: [[DESTRUCTORS_PTR:%.+]] = bitcast %union{{.+}}* [[DESTRUCTORS_REF]] to i32 (i32, i8*)** -// CHECK: store i32 (i32, i8*)* bitcast (i32 (i32, [[KMP_TASK_TMAIN_TY]]*)* [[DESTRUCTORS:@.+]] to i32 (i32, i8*)*), i32 (i32, i8*)** [[DESTRUCTORS_PTR]], // Start task. -// CHECK: call void @__kmpc_taskloop(%struct.ident_t* @{{.+}}, i32 %{{.+}}, i8* [[RES]], i32 1, i64* %{{.+}}, i64* %{{.+}}, i64 %{{.+}}, i32 1, i32 0, i64 0, i8* bitcast (void ([[KMP_TASK_TMAIN_TY]]*, [[KMP_TASK_TMAIN_TY]]*, i32)* [[TMAIN_DUP:@.+]] to i8*)) - -// CHECK: define internal void [[PRIVATES_MAP_FN:@.+]]([[PRIVATES_TMAIN_TY]]* noalias noundef %{{.+}}, i32** noalias noundef %{{.+}}, [2 x i32]** noalias noundef %{{.+}}, [2 x [[S_INT_TY]]]** noalias noundef %{{.+}}, [[S_INT_TY]]** noalias noundef %{{.+}}) -// CHECK: [[PRIVATES:%.+]] = load [[PRIVATES_TMAIN_TY]]*, [[PRIVATES_TMAIN_TY]]** -// CHECK: [[PRIV_T_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 0 -// CHECK: [[ARG1:%.+]] = load i32**, i32*** %{{.+}}, -// CHECK: store i32* [[PRIV_T_VAR]], i32** [[ARG1]], -// CHECK: [[PRIV_VEC:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 1 -// CHECK: [[ARG2:%.+]] = load [2 x i32]**, [2 x i32]*** %{{.+}}, -// CHECK: store [2 x i32]* [[PRIV_VEC]], [2 x i32]** [[ARG2]], -// CHECK: [[PRIV_S_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 2 -// CHECK: [[ARG3:%.+]] = load [2 x [[S_INT_TY]]]**, [2 x [[S_INT_TY]]]*** %{{.+}}, -// CHECK: store [2 x [[S_INT_TY]]]* [[PRIV_S_VAR]], [2 x [[S_INT_TY]]]** [[ARG3]], -// CHECK: [[PRIV_VAR:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i32 0, i32 3 -// CHECK: [[ARG4:%.+]] = load [[S_INT_TY]]**, [[S_INT_TY]]*** {{.+}}, -// CHECK: store [[S_INT_TY]]* [[PRIV_VAR]], [[S_INT_TY]]** [[ARG4]], -// CHECK: ret void - -// CHECK: define internal noundef i32 [[TASK_ENTRY]](i32 noundef %0, [[KMP_TASK_TMAIN_TY]]* noalias noundef %1) -// CHECK: alloca i32*, -// CHECK-DAG: [[PRIV_T_VAR_ADDR:%.+]] = alloca i32*, -// CHECK-DAG: [[PRIV_VEC_ADDR:%.+]] = alloca [2 x i32]*, -// CHECK-DAG: [[PRIV_S_ARR_ADDR:%.+]] = alloca [2 x [[S_INT_TY]]]*, -// CHECK-DAG: [[PRIV_VAR_ADDR:%.+]] = alloca [[S_INT_TY]]*, -// CHECK: store void (i8*, ...)* bitcast (void ([[PRIVATES_TMAIN_TY]]*, i32**, [2 x i32]**, [2 x [[S_INT_TY]]]**, [[S_INT_TY]]**)* [[PRIVATES_MAP_FN]] to void (i8*, ...)*), void (i8*, ...)** [[MAP_FN_ADDR:%.+]], -// CHECK: [[MAP_FN:%.+]] = load void (i8*, ...)*, void (i8*, ...)** [[MAP_FN_ADDR]], -// CHECK: [[FN:%.+]] = bitcast void (i8*, ...)* [[MAP_FN]] to void (i8*, -// CHECK: call void [[FN]](i8* %{{.+}}, i32** [[PRIV_T_VAR_ADDR]], [2 x i32]** [[PRIV_VEC_ADDR]], [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], [[S_INT_TY]]** [[PRIV_VAR_ADDR]]) -// CHECK: [[PRIV_T_VAR:%.+]] = load i32*, i32** [[PRIV_T_VAR_ADDR]], -// CHECK: [[PRIV_VEC:%.+]] = load [2 x i32]*, [2 x i32]** [[PRIV_VEC_ADDR]], -// CHECK: [[PRIV_S_ARR:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[PRIV_S_ARR_ADDR]], -// CHECK: [[PRIV_VAR:%.+]] = load [[S_INT_TY]]*, [[S_INT_TY]]** [[PRIV_VAR_ADDR]], + + // Privates actually are used. -// CHECK-DAG: [[PRIV_VAR]] -// CHECK-DAG: [[PRIV_T_VAR]] -// CHECK-DAG: [[PRIV_S_ARR]] -// CHECK-DAG: [[PRIV_VEC]] - -// CHECK: ret - -// CHECK: define internal void [[TMAIN_DUP]]([[KMP_TASK_TMAIN_TY]]* noundef %0, [[KMP_TASK_TMAIN_TY]]* noundef %1, i32 noundef %2) -// CHECK: getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* %{{.+}}, i32 0, i32 2 -// CHECK: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* %{{.+}}, i32 0, i32 2 -// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* %{{.+}}, i32 0, i32 0 -// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i64 2 -// CHECK: br i1 % - -// CHECK: phi [[S_INT_TY]]* -// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* -// CHECK: getelementptr [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i32 1 -// CHECK: icmp eq [[S_INT_TY]]* % -// CHECK: br i1 % - -// CHECK: getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* %{{.+}}, i32 0, i32 3 -// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* -// CHECK: ret void - -// CHECK: define internal noundef i32 [[DESTRUCTORS]](i32 noundef %0, [[KMP_TASK_TMAIN_TY]]* noalias noundef %1) -// CHECK: [[PRIVATES:%.+]] = getelementptr inbounds [[KMP_TASK_TMAIN_TY]], [[KMP_TASK_TMAIN_TY]]* [[RES_KMP_TASK:%.+]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 -// CHECK: [[PRIVATE_S_ARR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 2 -// CHECK: [[PRIVATE_VAR_REF:%.+]] = getelementptr inbounds [[PRIVATES_TMAIN_TY]], [[PRIVATES_TMAIN_TY]]* [[PRIVATES]], i{{.+}} 0, i{{.+}} 3 -// CHECK: call void @_ZN1SIiED1Ev([[S_INT_TY]]* {{[^,]*}} [[PRIVATE_VAR_REF]]) -// CHECK: getelementptr inbounds [2 x [[S_INT_TY]]], [2 x [[S_INT_TY]]]* [[PRIVATE_S_ARR_REF]], i{{.+}} 0, i{{.+}} 0 -// CHECK: getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} 2 -// CHECK: [[PRIVATE_S_ARR_ELEM_REF:%.+]] = getelementptr inbounds [[S_INT_TY]], [[S_INT_TY]]* %{{.+}}, i{{.+}} -1 -// CHECK: call void @_ZN1SIiED1Ev([[S_INT_TY]]* {{[^,]*}} [[PRIVATE_S_ARR_ELEM_REF]]) -// CHECK: icmp eq -// CHECK: br i1 -// CHECK: ret i32 + + + + + #endif #else -// ARRAY-LABEL: array_func struct St { int a, b; St() : a(0), b(0) {} @@ -482,13 +184,2309 @@ struct St { }; void array_func(int n, float a[n], St s[2]) { -// ARRAY: call i8* @__kmpc_omp_task_alloc( -// ARRAY: call void @__kmpc_taskloop( -// ARRAY: store float** %{{.+}}, float*** %{{.+}}, -// ARRAY: store %struct.St** %{{.+}}, %struct.St*** %{{.+}}, #pragma omp parallel master taskloop simd firstprivate(a, s) for (int i = 0; i < 10; ++i) ; } #endif +// CHECK-LABEL: define {{[^@]+}}@main +// CHECK-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// CHECK-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S]], align 8 +// CHECK-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 16 +// CHECK-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 8 +// CHECK-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// CHECK-NEXT: call void @_ZN1SIdEC1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) +// CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]], double noundef 0.000000e+00) +// CHECK-NEXT: store i32 0, i32* [[T_VAR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const.main.vec to i8*), i64 8, i1 false) +// CHECK-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 +// CHECK-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_BEGIN]], double noundef 1.000000e+00) +// CHECK-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_ELEMENT]], double noundef 2.000000e+00) +// CHECK-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]], double noundef 3.000000e+00) +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_VAR]], align 4 +// CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* +// CHECK-NEXT: store i32 [[TMP1]], i32* [[CONV]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S]*, %struct.S*)* @.omp_outlined. to void (i32*, i32*, ...)*), [2 x i32]* [[VEC]], i64 [[TMP2]], [2 x %struct.S]* [[S_ARR]], %struct.S* [[VAR]]) +// CHECK-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() +// CHECK-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 +// CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]]) #[[ATTR4:[0-9]+]] +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK: arraydestroy.body: +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP3]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK: arraydestroy.done1: +// CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]]) #[[ATTR4]] +// CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP4]] +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ev +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIdEC2Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC1ERKS0_d +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[S:%.*]], double noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[S_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[T_ADDR:%.*]] = alloca double, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store %struct.S* [[S]], %struct.S** [[S_ADDR]], align 8 +// CHECK-NEXT: store double [[T]], double* [[T_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[S_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[T_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIdEC2ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP0]], double noundef [[TMP1]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ed +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], double noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store double [[A]], double* [[A_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[A_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIdEC2Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]], double noundef [[TMP0]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_outlined. +// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S]* noundef nonnull align 8 dereferenceable(16) [[S_ARR:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR:%.*]]) #[[ATTR3:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 +// CHECK-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S]*, align 8 +// CHECK-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 +// CHECK-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 +// CHECK-NEXT: store [2 x %struct.S]* [[S_ARR]], [2 x %struct.S]** [[S_ARR_ADDR]], align 8 +// CHECK-NEXT: store %struct.S* [[VAR]], %struct.S** [[VAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 +// CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* +// CHECK-NEXT: [[TMP1:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[S_ARR_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load %struct.S*, %struct.S** [[VAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK: omp_if.then: +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store [2 x %struct.S]* [[TMP1]], [2 x %struct.S]** [[TMP7]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK-NEXT: store %struct.S* [[TMP2]], %struct.S** [[TMP8]], align 8 +// CHECK-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: [[TMP9:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 9, i64 120, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// CHECK-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to %struct.kmp_task_t_with_privates* +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP10]], i32 0, i32 0 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 0 +// CHECK-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP13]], i8* align 8 [[TMP14]], i64 16, i1 false) +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP10]], i32 0, i32 1 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 0 +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP16]], i32 0, i32 0 +// CHECK-NEXT: [[TMP17:%.*]] = bitcast [2 x %struct.S]* [[TMP1]] to %struct.S* +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP18]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK: omp.arraycpy.body: +// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP17]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[OMP_ARRAYCPY_SRCELEMENTPAST]], double noundef 0.000000e+00) +// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP18]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK: omp.arraycpy.done1: +// CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 1 +// CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP19]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP2]], double noundef 0.000000e+00) +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 2 +// CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK-NEXT: store i32 [[TMP21]], i32* [[TMP20]], align 8 +// CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 3 +// CHECK-NEXT: [[TMP23:%.*]] = bitcast [2 x i32]* [[TMP22]] to i8* +// CHECK-NEXT: [[TMP24:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP23]], i8* align 4 [[TMP24]], i64 8, i1 false) +// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP15]], i32 0, i32 4 +// CHECK-NEXT: [[TMP26:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 +// CHECK-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4 +// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 3 +// CHECK-NEXT: [[TMP28:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP27]] to i32 (i32, i8*)** +// CHECK-NEXT: store i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_destructor. to i32 (i32, i8*)*), i32 (i32, i8*)** [[TMP28]], align 8 +// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 5 +// CHECK-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 6 +// CHECK-NEXT: store i64 9, i64* [[TMP30]], align 8 +// CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 7 +// CHECK-NEXT: store i64 1, i64* [[TMP31]], align 8 +// CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 9 +// CHECK-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i8* +// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP33]], i8 0, i64 8, i1 false) +// CHECK-NEXT: [[TMP34:%.*]] = load i64, i64* [[TMP31]], align 8 +// CHECK-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP9]], i32 1, i64* [[TMP29]], i64* [[TMP30]], i64 [[TMP34]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates*, i32)* @.omp_task_dup. to i8*)) +// CHECK-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: br label [[OMP_IF_END]] +// CHECK: omp_if.end: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_privates_map. +// CHECK-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], %struct.S** noalias noundef [[TMP1:%.*]], i32** noalias noundef [[TMP2:%.*]], [2 x %struct.S]** noalias noundef [[TMP3:%.*]], [2 x i32]** noalias noundef [[TMP4:%.*]], i32** noalias noundef [[TMP5:%.*]]) #[[ATTR6:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.S**, align 8 +// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i32**, align 8 +// CHECK-NEXT: [[DOTADDR3:%.*]] = alloca [2 x %struct.S]**, align 8 +// CHECK-NEXT: [[DOTADDR4:%.*]] = alloca [2 x i32]**, align 8 +// CHECK-NEXT: [[DOTADDR5:%.*]] = alloca i32**, align 8 +// CHECK-NEXT: store %struct..kmp_privates.t* [[TMP0]], %struct..kmp_privates.t** [[DOTADDR]], align 8 +// CHECK-NEXT: store %struct.S** [[TMP1]], %struct.S*** [[DOTADDR1]], align 8 +// CHECK-NEXT: store i32** [[TMP2]], i32*** [[DOTADDR2]], align 8 +// CHECK-NEXT: store [2 x %struct.S]** [[TMP3]], [2 x %struct.S]*** [[DOTADDR3]], align 8 +// CHECK-NEXT: store [2 x i32]** [[TMP4]], [2 x i32]*** [[DOTADDR4]], align 8 +// CHECK-NEXT: store i32** [[TMP5]], i32*** [[DOTADDR5]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load %struct..kmp_privates.t*, %struct..kmp_privates.t** [[DOTADDR]], align 8 +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 0 +// CHECK-NEXT: [[TMP8:%.*]] = load [2 x %struct.S]**, [2 x %struct.S]*** [[DOTADDR3]], align 8 +// CHECK-NEXT: store [2 x %struct.S]* [[TMP7]], [2 x %struct.S]** [[TMP8]], align 8 +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 1 +// CHECK-NEXT: [[TMP10:%.*]] = load %struct.S**, %struct.S*** [[DOTADDR1]], align 8 +// CHECK-NEXT: store %struct.S* [[TMP9]], %struct.S** [[TMP10]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 2 +// CHECK-NEXT: [[TMP12:%.*]] = load i32**, i32*** [[DOTADDR2]], align 8 +// CHECK-NEXT: store i32* [[TMP11]], i32** [[TMP12]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 3 +// CHECK-NEXT: [[TMP14:%.*]] = load [2 x i32]**, [2 x i32]*** [[DOTADDR4]], align 8 +// CHECK-NEXT: store [2 x i32]* [[TMP13]], [2 x i32]** [[TMP14]], align 8 +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP6]], i32 0, i32 4 +// CHECK-NEXT: [[TMP16:%.*]] = load i32**, i32*** [[DOTADDR5]], align 8 +// CHECK-NEXT: store i32* [[TMP15]], i32** [[TMP16]], align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_entry. +// CHECK-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// CHECK-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[DOTLB__ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTUB__ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [2 x %struct.S]*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca [2 x i32]*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR4_I:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[I_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// CHECK-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 +// CHECK-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* +// CHECK-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 +// CHECK-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 +// CHECK-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 8 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 7 +// CHECK-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP16]], align 8 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 8 +// CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 +// CHECK-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, %struct.S**, i32**, [2 x %struct.S]**, [2 x i32]**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, %struct.S**, i32**, [2 x %struct.S]**, [2 x i32]**, i32**)* +// CHECK-NEXT: call void [[TMP25]](i8* [[TMP24]], %struct.S** [[DOTFIRSTPRIV_PTR_ADDR_I]], i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [2 x %struct.S]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [2 x i32]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], i32** [[DOTFIRSTPRIV_PTR_ADDR4_I]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP26:%.*]] = load %struct.S*, %struct.S** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP28:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP29:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR4_I]], align 8, !noalias !14 +// CHECK-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP31]] to i32 +// CHECK-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] +// CHECK: omp.inner.for.cond.i: +// CHECK-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// CHECK-NEXT: [[CONV5_I:%.*]] = sext i32 [[TMP32]] to i64 +// CHECK-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV5_I]], [[TMP33]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] +// CHECK: omp.inner.for.body.i: +// CHECK-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// CHECK-NEXT: store i32 [[TMP34]], i32* [[I_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// CHECK-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP27]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP29]], i64 0, i64 0 +// CHECK-NEXT: store i32 [[TMP35]], i32* [[ARRAYIDX_I]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP28]], i64 0, i64 0 +// CHECK-NEXT: [[TMP36:%.*]] = bitcast %struct.S* [[ARRAYIDX6_I]] to i8* +// CHECK-NEXT: [[TMP37:%.*]] = bitcast %struct.S* [[TMP26]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP36]], i8* align 8 [[TMP37]], i64 8, i1 false), !llvm.access.group [[ACC_GRP15]] +// CHECK-NEXT: store i32 33, i32* [[TMP30]], align 4, !llvm.access.group [[ACC_GRP15]] +// CHECK-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// CHECK-NEXT: [[ADD7_I:%.*]] = add nsw i32 [[TMP38]], 1 +// CHECK-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK: .omp_outlined..1.exit: +// CHECK-NEXT: ret i32 0 +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_dup. +// CHECK-SAME: (%struct.kmp_task_t_with_privates* noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]]) #[[ATTR7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates* [[TMP0]], %struct.kmp_task_t_with_privates** [[DOTADDR]], align 8 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-NEXT: store i32 [[TMP2]], i32* [[DOTADDR2]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP5]], i32 0, i32 0 +// CHECK-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 +// CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP8]], i32 0, i32 0 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP9]], i32 0, i32 0 +// CHECK-NEXT: [[TMP12:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP11]], align 8 +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP10]], i32 0, i32 0 +// CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S]* [[TMP12]] to %struct.S* +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK: omp.arraycpy.body: +// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[OMP_ARRAYCPY_SRCELEMENTPAST]], double noundef 0.000000e+00) +// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S]], %struct.S* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK: omp.arraycpy.done3: +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP8]], i32 0, i32 1 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP9]], i32 0, i32 1 +// CHECK-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[TMP16]], align 8 +// CHECK-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP15]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP17]], double noundef 0.000000e+00) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_destructor. +// CHECK-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// CHECK-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP2]], i32 0, i32 1 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 1 +// CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP5]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK: arraydestroy.body: +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK: arraydestroy.done2: +// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP7]] +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdED1Ev +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIdED2Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]]) #[[ATTR4]] +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_Z5tmainIiET_v +// CHECK-SAME: () #[[ATTR9:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// CHECK-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// CHECK-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// CHECK-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// CHECK-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]], i32 noundef 0) +// CHECK-NEXT: store i32 0, i32* [[T_VAR]], align 128 +// CHECK-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) +// CHECK-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 +// CHECK-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// CHECK-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 +// CHECK-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// CHECK-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_VAR]], align 128 +// CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_CASTED]] to i32* +// CHECK-NEXT: store i32 [[TMP1]], i32* [[CONV]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[T_VAR_CASTED]], align 8 +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, [2 x i32]*, i64, [2 x %struct.S.0]*, %struct.S.0*)* @.omp_outlined..2 to void (i32*, i32*, ...)*), [2 x i32]* [[VEC]], i64 [[TMP2]], [2 x %struct.S.0]* [[S_ARR]], %struct.S.0* [[VAR]]) +// CHECK-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK: arraydestroy.body: +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP3]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK: arraydestroy.done1: +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP4]] +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ev +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: store double 0.000000e+00, double* [[F]], align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC2ERKS0_d +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[S:%.*]], double noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[S_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[T_ADDR:%.*]] = alloca double, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store %struct.S* [[S]], %struct.S** [[S_ADDR]], align 8 +// CHECK-NEXT: store double [[T]], double* [[T_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[S_ADDR]], align 8 +// CHECK-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[F2]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load double, double* [[T_ADDR]], align 8 +// CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[TMP2]] +// CHECK-NEXT: store double [[ADD]], double* [[F]], align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ed +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], double noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store double [[A]], double* [[A_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[A_ADDR]], align 8 +// CHECK-NEXT: store double [[TMP0]], double* [[F]], align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIdED2Ev +// CHECK-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// CHECK-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_i +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// CHECK-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_ADDR]], align 4 +// CHECK-NEXT: call void @_ZN1SIiEC2ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0]], i32 noundef [[TMP1]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2 +// CHECK-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], [2 x i32]* noundef nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 noundef [[T_VAR:%.*]], [2 x %struct.S.0]* noundef nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[VEC_ADDR:%.*]] = alloca [2 x i32]*, align 8 +// CHECK-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[S_ARR_ADDR:%.*]] = alloca [2 x %struct.S.0]*, align 8 +// CHECK-NEXT: [[VAR_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 +// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: store [2 x i32]* [[VEC]], [2 x i32]** [[VEC_ADDR]], align 8 +// CHECK-NEXT: store i64 [[T_VAR]], i64* [[T_VAR_ADDR]], align 8 +// CHECK-NEXT: store [2 x %struct.S.0]* [[S_ARR]], [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 +// CHECK-NEXT: store %struct.S.0* [[VAR]], %struct.S.0** [[VAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load [2 x i32]*, [2 x i32]** [[VEC_ADDR]], align 8 +// CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[T_VAR_ADDR]] to i32* +// CHECK-NEXT: [[TMP1:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[S_ARR_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load %struct.S.0*, %struct.S.0** [[VAR_ADDR]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0 +// CHECK-NEXT: br i1 [[TMP6]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// CHECK: omp_if.then: +// CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK-NEXT: store [2 x %struct.S.0]* [[TMP1]], [2 x %struct.S.0]** [[TMP7]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[AGG_CAPTURED]], i32 0, i32 1 +// CHECK-NEXT: store %struct.S.0* [[TMP2]], %struct.S.0** [[TMP8]], align 8 +// CHECK-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: [[TMP9:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i32 9, i64 256, i64 16, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.2*)* @.omp_task_entry..5 to i32 (i32, i8*)*)) +// CHECK-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to %struct.kmp_task_t_with_privates.2* +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP10]], i32 0, i32 0 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 0 +// CHECK-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 128 +// CHECK-NEXT: [[TMP14:%.*]] = bitcast %struct.anon.1* [[AGG_CAPTURED]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP13]], i8* align 8 [[TMP14]], i64 16, i1 false) +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP10]], i32 0, i32 2 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP15]], i32 0, i32 0 +// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK-NEXT: store i32 [[TMP17]], i32* [[TMP16]], align 128 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP15]], i32 0, i32 1 +// CHECK-NEXT: [[TMP19:%.*]] = bitcast [2 x i32]* [[TMP18]] to i8* +// CHECK-NEXT: [[TMP20:%.*]] = bitcast [2 x i32]* [[TMP0]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP19]], i8* align 4 [[TMP20]], i64 8, i1 false) +// CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP15]], i32 0, i32 2 +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP21]], i32 0, i32 0 +// CHECK-NEXT: [[TMP22:%.*]] = bitcast [2 x %struct.S.0]* [[TMP1]] to %struct.S.0* +// CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP23]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE1:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK: omp.arraycpy.body: +// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP22]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[OMP_IF_THEN]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 noundef 0) +// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP23]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE1]], label [[OMP_ARRAYCPY_BODY]] +// CHECK: omp.arraycpy.done1: +// CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP15]], i32 0, i32 3 +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP24]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP2]], i32 noundef 0) +// CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 3 +// CHECK-NEXT: [[TMP26:%.*]] = bitcast %union.kmp_cmplrdata_t* [[TMP25]] to i32 (i32, i8*)** +// CHECK-NEXT: store i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.2*)* @.omp_task_destructor..7 to i32 (i32, i8*)*), i32 (i32, i8*)** [[TMP26]], align 8 +// CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 5 +// CHECK-NEXT: store i64 0, i64* [[TMP27]], align 8 +// CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 6 +// CHECK-NEXT: store i64 9, i64* [[TMP28]], align 16 +// CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 7 +// CHECK-NEXT: store i64 1, i64* [[TMP29]], align 8 +// CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP11]], i32 0, i32 9 +// CHECK-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i8* +// CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP31]], i8 0, i64 8, i1 false) +// CHECK-NEXT: [[TMP32:%.*]] = load i64, i64* [[TMP29]], align 8 +// CHECK-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]], i8* [[TMP9]], i32 1, i64* [[TMP27]], i64* [[TMP28]], i64 [[TMP32]], i32 1, i32 0, i64 0, i8* bitcast (void (%struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2*, i32)* @.omp_task_dup..6 to i8*)) +// CHECK-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK-NEXT: br label [[OMP_IF_END]] +// CHECK: omp_if.end: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_privates_map..4 +// CHECK-SAME: (%struct..kmp_privates.t.3* noalias noundef [[TMP0:%.*]], i32** noalias noundef [[TMP1:%.*]], [2 x i32]** noalias noundef [[TMP2:%.*]], [2 x %struct.S.0]** noalias noundef [[TMP3:%.*]], %struct.S.0** noalias noundef [[TMP4:%.*]]) #[[ATTR6]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t.3*, align 8 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32**, align 8 +// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca [2 x i32]**, align 8 +// CHECK-NEXT: [[DOTADDR3:%.*]] = alloca [2 x %struct.S.0]**, align 8 +// CHECK-NEXT: [[DOTADDR4:%.*]] = alloca %struct.S.0**, align 8 +// CHECK-NEXT: store %struct..kmp_privates.t.3* [[TMP0]], %struct..kmp_privates.t.3** [[DOTADDR]], align 8 +// CHECK-NEXT: store i32** [[TMP1]], i32*** [[DOTADDR1]], align 8 +// CHECK-NEXT: store [2 x i32]** [[TMP2]], [2 x i32]*** [[DOTADDR2]], align 8 +// CHECK-NEXT: store [2 x %struct.S.0]** [[TMP3]], [2 x %struct.S.0]*** [[DOTADDR3]], align 8 +// CHECK-NEXT: store %struct.S.0** [[TMP4]], %struct.S.0*** [[DOTADDR4]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load %struct..kmp_privates.t.3*, %struct..kmp_privates.t.3** [[DOTADDR]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 0 +// CHECK-NEXT: [[TMP7:%.*]] = load i32**, i32*** [[DOTADDR1]], align 8 +// CHECK-NEXT: store i32* [[TMP6]], i32** [[TMP7]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 1 +// CHECK-NEXT: [[TMP9:%.*]] = load [2 x i32]**, [2 x i32]*** [[DOTADDR2]], align 8 +// CHECK-NEXT: store [2 x i32]* [[TMP8]], [2 x i32]** [[TMP9]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 2 +// CHECK-NEXT: [[TMP11:%.*]] = load [2 x %struct.S.0]**, [2 x %struct.S.0]*** [[DOTADDR3]], align 8 +// CHECK-NEXT: store [2 x %struct.S.0]* [[TMP10]], [2 x %struct.S.0]** [[TMP11]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP5]], i32 0, i32 3 +// CHECK-NEXT: [[TMP13:%.*]] = load %struct.S.0**, %struct.S.0*** [[DOTADDR4]], align 8 +// CHECK-NEXT: store %struct.S.0* [[TMP12]], %struct.S.0** [[TMP13]], align 8 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_entry..5 +// CHECK-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// CHECK-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[DOTLB__ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTUB__ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon.1*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca [2 x i32]*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR2_I:%.*]] = alloca [2 x %struct.S.0]*, align 8 +// CHECK-NEXT: [[DOTFIRSTPRIV_PTR_ADDR3_I:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[I_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 128 +// CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.1* +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 2 +// CHECK-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.3* [[TMP9]] to i8* +// CHECK-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.2* [[TMP3]] to i8* +// CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 +// CHECK-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 +// CHECK-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 16 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 7 +// CHECK-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP16]], align 8 +// CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 8 +// CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 64 +// CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 +// CHECK-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META28:![0-9]+]]) +// CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]]) +// CHECK-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !32 +// CHECK-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.3*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* @.omp_task_privates_map..4 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !32 +// CHECK-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: [[TMP22:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* +// CHECK-NEXT: call void [[TMP25]](i8* [[TMP24]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], [2 x i32]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [2 x %struct.S.0]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], %struct.S.0** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR4]] +// CHECK-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: [[TMP27:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !32 +// CHECK-NEXT: [[TMP28:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !32 +// CHECK-NEXT: [[TMP29:%.*]] = load %struct.S.0*, %struct.S.0** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !32 +// CHECK-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !32 +// CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP30]] to i32 +// CHECK-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !32 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] +// CHECK: omp.inner.for.cond.i: +// CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !32, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK-NEXT: [[CONV4_I:%.*]] = sext i32 [[TMP31]] to i64 +// CHECK-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !32, !llvm.access.group [[ACC_GRP33]] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV4_I]], [[TMP32]] +// CHECK-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__3_EXIT:%.*]] +// CHECK: omp.inner.for.body.i: +// CHECK-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !32, !llvm.access.group [[ACC_GRP33]] +// CHECK-NEXT: store i32 [[TMP33]], i32* [[I_I]], align 4, !noalias !32, !llvm.access.group [[ACC_GRP33]] +// CHECK-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP26]], align 128, !llvm.access.group [[ACC_GRP33]] +// CHECK-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP27]], i64 0, i64 0 +// CHECK-NEXT: store i32 [[TMP34]], i32* [[ARRAYIDX_I]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP28]], i64 0, i64 0 +// CHECK-NEXT: [[TMP35:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5_I]] to i8* +// CHECK-NEXT: [[TMP36:%.*]] = bitcast %struct.S.0* [[TMP29]] to i8* +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP35]], i8* align 4 [[TMP36]], i64 4, i1 false), !llvm.access.group [[ACC_GRP33]] +// CHECK-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !32, !llvm.access.group [[ACC_GRP33]] +// CHECK-NEXT: [[ADD6_I:%.*]] = add nsw i32 [[TMP37]], 1 +// CHECK-NEXT: store i32 [[ADD6_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !32, !llvm.access.group [[ACC_GRP33]] +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK: .omp_outlined..3.exit: +// CHECK-NEXT: ret i32 0 +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_dup..6 +// CHECK-SAME: (%struct.kmp_task_t_with_privates.2* noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noundef [[TMP1:%.*]], i32 noundef [[TMP2:%.*]]) #[[ATTR7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK-NEXT: [[DOTADDR2:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP0]], %struct.kmp_task_t_with_privates.2** [[DOTADDR]], align 8 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: store i32 [[TMP2]], i32* [[DOTADDR2]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP5]], i32 0, i32 0 +// CHECK-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 128 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2]], %struct.kmp_task_t_with_privates.2* [[TMP3]], i32 0, i32 2 +// CHECK-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.1* +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP8]], i32 0, i32 2 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP9]], i32 0, i32 0 +// CHECK-NEXT: [[TMP12:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[TMP11]], align 8 +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP10]], i32 0, i32 0 +// CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x %struct.S.0]* [[TMP12]] to %struct.S.0* +// CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq %struct.S.0* [[ARRAY_BEGIN]], [[TMP14]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK: omp.arraycpy.body: +// CHECK-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP13]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi %struct.S.0* [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 noundef 0) +// CHECK-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], %struct.S.0* [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 +// CHECK-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq %struct.S.0* [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP14]] +// CHECK-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK: omp.arraycpy.done3: +// CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP8]], i32 0, i32 3 +// CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP9]], i32 0, i32 1 +// CHECK-NEXT: [[TMP17:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP16]], align 8 +// CHECK-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP15]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP17]], i32 noundef 0) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@.omp_task_destructor..7 +// CHECK-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates.2* noalias noundef [[TMP1:%.*]]) #[[ATTR7]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates.2*, align 8 +// CHECK-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// CHECK-NEXT: store %struct.kmp_task_t_with_privates.2* [[TMP1]], %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load %struct.kmp_task_t_with_privates.2*, %struct.kmp_task_t_with_privates.2** [[DOTADDR1]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_2:%.*]], %struct.kmp_task_t_with_privates.2* [[TMP2]], i32 0, i32 2 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3:%.*]], %struct..kmp_privates.t.3* [[TMP3]], i32 0, i32 2 +// CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T_3]], %struct..kmp_privates.t.3* [[TMP3]], i32 0, i32 3 +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP5]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP4]], i32 0, i32 0 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// CHECK-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// CHECK: arraydestroy.body: +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP6]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// CHECK-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// CHECK-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// CHECK-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE2:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK: arraydestroy.done2: +// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[RETVAL]], align 4 +// CHECK-NEXT: ret i32 [[TMP7]] +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: store i32 0, i32* [[F]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_i +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// CHECK-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 +// CHECK-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_ADDR]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] +// CHECK-NEXT: store i32 [[ADD]], i32* [[F]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev +// CHECK-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// CHECK-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// CHECK-NEXT: ret void +// +// +// LAMBDA-LABEL: define {{[^@]+}}@main +// LAMBDA-SAME: () #[[ATTR0:[0-9]+]] { +// LAMBDA-NEXT: entry: +// LAMBDA-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 1 +// LAMBDA-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// LAMBDA-NEXT: call void @"_ZZ4mainENK3$_0clEv"(%class.anon* noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) +// LAMBDA-NEXT: ret i32 0 +// +// +// LAMBDA-LABEL: define {{[^@]+}}@.omp_outlined. +// LAMBDA-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] { +// LAMBDA-NEXT: entry: +// LAMBDA-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// LAMBDA-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// LAMBDA-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// LAMBDA-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// LAMBDA-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// LAMBDA-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// LAMBDA-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// LAMBDA-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]]) +// LAMBDA-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +// LAMBDA-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// LAMBDA: omp_if.then: +// LAMBDA-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// LAMBDA-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// LAMBDA-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.kmp_task_t_with_privates* +// LAMBDA-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP5]], i32 0, i32 0 +// LAMBDA-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP5]], i32 0, i32 1 +// LAMBDA-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP7]], i32 0, i32 0 +// LAMBDA-NEXT: [[TMP9:%.*]] = load volatile double, double* @g, align 8 +// LAMBDA-NEXT: store volatile double [[TMP9]], double* [[TMP8]], align 8 +// LAMBDA-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP7]], i32 0, i32 1 +// LAMBDA-NEXT: [[TMP11:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 +// LAMBDA-NEXT: store i32 [[TMP11]], i32* [[TMP10]], align 8 +// LAMBDA-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 5 +// LAMBDA-NEXT: store i64 0, i64* [[TMP12]], align 8 +// LAMBDA-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 6 +// LAMBDA-NEXT: store i64 9, i64* [[TMP13]], align 8 +// LAMBDA-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 7 +// LAMBDA-NEXT: store i64 1, i64* [[TMP14]], align 8 +// LAMBDA-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 9 +// LAMBDA-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i8* +// LAMBDA-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP16]], i8 0, i64 8, i1 false) +// LAMBDA-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP14]], align 8 +// LAMBDA-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP4]], i32 1, i64* [[TMP12]], i64* [[TMP13]], i64 [[TMP17]], i32 1, i32 0, i64 0, i8* null) +// LAMBDA-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// LAMBDA-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// LAMBDA-NEXT: br label [[OMP_IF_END]] +// LAMBDA: omp_if.end: +// LAMBDA-NEXT: ret void +// +// +// LAMBDA-LABEL: define {{[^@]+}}@.omp_task_privates_map. +// LAMBDA-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], double** noalias noundef [[TMP1:%.*]], i32** noalias noundef [[TMP2:%.*]]) #[[ATTR5:[0-9]+]] { +// LAMBDA-NEXT: entry: +// LAMBDA-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 +// LAMBDA-NEXT: [[DOTADDR1:%.*]] = alloca double**, align 8 +// LAMBDA-NEXT: [[DOTADDR2:%.*]] = alloca i32**, align 8 +// LAMBDA-NEXT: store %struct..kmp_privates.t* [[TMP0]], %struct..kmp_privates.t** [[DOTADDR]], align 8 +// LAMBDA-NEXT: store double** [[TMP1]], double*** [[DOTADDR1]], align 8 +// LAMBDA-NEXT: store i32** [[TMP2]], i32*** [[DOTADDR2]], align 8 +// LAMBDA-NEXT: [[TMP3:%.*]] = load %struct..kmp_privates.t*, %struct..kmp_privates.t** [[DOTADDR]], align 8 +// LAMBDA-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 0 +// LAMBDA-NEXT: [[TMP5:%.*]] = load double**, double*** [[DOTADDR1]], align 8 +// LAMBDA-NEXT: store double* [[TMP4]], double** [[TMP5]], align 8 +// LAMBDA-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 1 +// LAMBDA-NEXT: [[TMP7:%.*]] = load i32**, i32*** [[DOTADDR2]], align 8 +// LAMBDA-NEXT: store i32* [[TMP6]], i32** [[TMP7]], align 8 +// LAMBDA-NEXT: ret void +// +// +// LAMBDA-LABEL: define {{[^@]+}}@.omp_task_entry. +// LAMBDA-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias noundef [[TMP1:%.*]]) #[[ATTR6:[0-9]+]] { +// LAMBDA-NEXT: entry: +// LAMBDA-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// LAMBDA-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// LAMBDA-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// LAMBDA-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// LAMBDA-NEXT: [[DOTLB__ADDR_I:%.*]] = alloca i64, align 8 +// LAMBDA-NEXT: [[DOTUB__ADDR_I:%.*]] = alloca i64, align 8 +// LAMBDA-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 +// LAMBDA-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 +// LAMBDA-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// LAMBDA-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca double*, align 8 +// LAMBDA-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8 +// LAMBDA-NEXT: [[I_I:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: [[REF_TMP_I:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 +// LAMBDA-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// LAMBDA-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// LAMBDA-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// LAMBDA-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// LAMBDA-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// LAMBDA-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// LAMBDA-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0 +// LAMBDA-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// LAMBDA-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// LAMBDA-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// LAMBDA-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// LAMBDA-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 +// LAMBDA-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* +// LAMBDA-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* +// LAMBDA-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 +// LAMBDA-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 +// LAMBDA-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 +// LAMBDA-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 8 +// LAMBDA-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 7 +// LAMBDA-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP16]], align 8 +// LAMBDA-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 8 +// LAMBDA-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 +// LAMBDA-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 +// LAMBDA-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 +// LAMBDA-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// LAMBDA-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// LAMBDA-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// LAMBDA-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// LAMBDA-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// LAMBDA-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// LAMBDA-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, double**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// LAMBDA-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, double**, i32**)* +// LAMBDA-NEXT: call void [[TMP25]](i8* [[TMP24]], double** [[DOTFIRSTPRIV_PTR_ADDR_I]], i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR3:[0-9]+]] +// LAMBDA-NEXT: [[TMP26:%.*]] = load double*, double** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// LAMBDA-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP28]] to i32 +// LAMBDA-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// LAMBDA-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] +// LAMBDA: omp.inner.for.cond.i: +// LAMBDA-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// LAMBDA-NEXT: [[CONV2_I:%.*]] = sext i32 [[TMP29]] to i64 +// LAMBDA-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// LAMBDA-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV2_I]], [[TMP30]] +// LAMBDA-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] +// LAMBDA: omp.inner.for.body.i: +// LAMBDA-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// LAMBDA-NEXT: store i32 [[TMP31]], i32* [[I_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// LAMBDA-NEXT: store double 1.000000e+00, double* [[TMP26]], align 8, !llvm.access.group [[ACC_GRP15]] +// LAMBDA-NEXT: store i32 11, i32* [[TMP27]], align 4, !llvm.access.group [[ACC_GRP15]] +// LAMBDA-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP_I]], i32 0, i32 0 +// LAMBDA-NEXT: store double* [[TMP26]], double** [[TMP32]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// LAMBDA-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP_I]], i32 0, i32 1 +// LAMBDA-NEXT: store i32* [[TMP27]], i32** [[TMP33]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// LAMBDA-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* noundef nonnull align 8 dereferenceable(16) [[REF_TMP_I]]), !llvm.access.group [[ACC_GRP15]] +// LAMBDA-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// LAMBDA-NEXT: [[ADD3_I:%.*]] = add nsw i32 [[TMP34]], 1 +// LAMBDA-NEXT: store i32 [[ADD3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// LAMBDA-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP16:![0-9]+]] +// LAMBDA: .omp_outlined..1.exit: +// LAMBDA-NEXT: ret i32 0 +// +// +// BLOCKS-LABEL: define {{[^@]+}}@main +// BLOCKS-SAME: () #[[ATTR1:[0-9]+]] { +// BLOCKS-NEXT: entry: +// BLOCKS-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// BLOCKS-NEXT: [[TMP0:%.*]] = load i8*, i8** getelementptr inbounds ([[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global to %struct.__block_literal_generic*), i32 0, i32 3), align 8 +// BLOCKS-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to void (i8*)* +// BLOCKS-NEXT: call void [[TMP1]](i8* noundef bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global to i8*)) +// BLOCKS-NEXT: ret i32 0 +// +// +// BLOCKS-LABEL: define {{[^@]+}}@__main_block_invoke +// BLOCKS-SAME: (i8* noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR2:[0-9]+]] { +// BLOCKS-NEXT: entry: +// BLOCKS-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 +// BLOCKS-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>*, align 8 +// BLOCKS-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 +// BLOCKS-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* +// BLOCKS-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>** [[BLOCK_ADDR]], align 8 +// BLOCKS-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*)) +// BLOCKS-NEXT: ret void +// +// +// BLOCKS-LABEL: define {{[^@]+}}@.omp_outlined. +// BLOCKS-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR3:[0-9]+]] { +// BLOCKS-NEXT: entry: +// BLOCKS-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// BLOCKS-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// BLOCKS-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 1 +// BLOCKS-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// BLOCKS-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// BLOCKS-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// BLOCKS-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// BLOCKS-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// BLOCKS-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0 +// BLOCKS-NEXT: br i1 [[TMP3]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// BLOCKS: omp_if.then: +// BLOCKS-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// BLOCKS-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i32 1, i64 96, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// BLOCKS-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct.kmp_task_t_with_privates* +// BLOCKS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP5]], i32 0, i32 0 +// BLOCKS-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP5]], i32 0, i32 1 +// BLOCKS-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP7]], i32 0, i32 0 +// BLOCKS-NEXT: [[TMP9:%.*]] = load volatile double, double* @g, align 8 +// BLOCKS-NEXT: store volatile double [[TMP9]], double* [[TMP8]], align 8 +// BLOCKS-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP7]], i32 0, i32 1 +// BLOCKS-NEXT: [[TMP11:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4 +// BLOCKS-NEXT: store i32 [[TMP11]], i32* [[TMP10]], align 8 +// BLOCKS-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 5 +// BLOCKS-NEXT: store i64 0, i64* [[TMP12]], align 8 +// BLOCKS-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 6 +// BLOCKS-NEXT: store i64 9, i64* [[TMP13]], align 8 +// BLOCKS-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 7 +// BLOCKS-NEXT: store i64 1, i64* [[TMP14]], align 8 +// BLOCKS-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP6]], i32 0, i32 9 +// BLOCKS-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i8* +// BLOCKS-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP16]], i8 0, i64 8, i1 false) +// BLOCKS-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP14]], align 8 +// BLOCKS-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]], i8* [[TMP4]], i32 1, i64* [[TMP12]], i64* [[TMP13]], i64 [[TMP17]], i32 1, i32 0, i64 0, i8* null) +// BLOCKS-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// BLOCKS-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// BLOCKS-NEXT: br label [[OMP_IF_END]] +// BLOCKS: omp_if.end: +// BLOCKS-NEXT: ret void +// +// +// BLOCKS-LABEL: define {{[^@]+}}@_block_invoke +// BLOCKS-SAME: (i8* noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR2]] { +// BLOCKS-NEXT: entry: +// BLOCKS-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 +// BLOCKS-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>*, align 8 +// BLOCKS-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 +// BLOCKS-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* +// BLOCKS-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>** [[BLOCK_ADDR]], align 8 +// BLOCKS-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], i32 0, i32 5 +// BLOCKS-NEXT: store double 2.000000e+00, double* [[BLOCK_CAPTURE_ADDR]], align 8 +// BLOCKS-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], i32 0, i32 6 +// BLOCKS-NEXT: store i32 22, i32* [[BLOCK_CAPTURE_ADDR1]], align 8 +// BLOCKS-NEXT: ret void +// +// +// BLOCKS-LABEL: define {{[^@]+}}@.omp_task_privates_map. +// BLOCKS-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], double** noalias noundef [[TMP1:%.*]], i32** noalias noundef [[TMP2:%.*]]) #[[ATTR6:[0-9]+]] { +// BLOCKS-NEXT: entry: +// BLOCKS-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 +// BLOCKS-NEXT: [[DOTADDR1:%.*]] = alloca double**, align 8 +// BLOCKS-NEXT: [[DOTADDR2:%.*]] = alloca i32**, align 8 +// BLOCKS-NEXT: store %struct..kmp_privates.t* [[TMP0]], %struct..kmp_privates.t** [[DOTADDR]], align 8 +// BLOCKS-NEXT: store double** [[TMP1]], double*** [[DOTADDR1]], align 8 +// BLOCKS-NEXT: store i32** [[TMP2]], i32*** [[DOTADDR2]], align 8 +// BLOCKS-NEXT: [[TMP3:%.*]] = load %struct..kmp_privates.t*, %struct..kmp_privates.t** [[DOTADDR]], align 8 +// BLOCKS-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 0 +// BLOCKS-NEXT: [[TMP5:%.*]] = load double**, double*** [[DOTADDR1]], align 8 +// BLOCKS-NEXT: store double* [[TMP4]], double** [[TMP5]], align 8 +// BLOCKS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 1 +// BLOCKS-NEXT: [[TMP7:%.*]] = load i32**, i32*** [[DOTADDR2]], align 8 +// BLOCKS-NEXT: store i32* [[TMP6]], i32** [[TMP7]], align 8 +// BLOCKS-NEXT: ret void +// +// +// BLOCKS-LABEL: define {{[^@]+}}@.omp_task_entry. +// BLOCKS-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias noundef [[TMP1:%.*]]) #[[ATTR7:[0-9]+]] { +// BLOCKS-NEXT: entry: +// BLOCKS-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// BLOCKS-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// BLOCKS-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// BLOCKS-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// BLOCKS-NEXT: [[DOTLB__ADDR_I:%.*]] = alloca i64, align 8 +// BLOCKS-NEXT: [[DOTUB__ADDR_I:%.*]] = alloca i64, align 8 +// BLOCKS-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 +// BLOCKS-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 +// BLOCKS-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// BLOCKS-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca double*, align 8 +// BLOCKS-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca i32*, align 8 +// BLOCKS-NEXT: [[I_I:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: [[BLOCK_I:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, align 8 +// BLOCKS-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// BLOCKS-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// BLOCKS-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// BLOCKS-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// BLOCKS-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// BLOCKS-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// BLOCKS-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0 +// BLOCKS-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// BLOCKS-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// BLOCKS-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// BLOCKS-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// BLOCKS-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 +// BLOCKS-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* +// BLOCKS-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* +// BLOCKS-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 +// BLOCKS-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 +// BLOCKS-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 +// BLOCKS-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 8 +// BLOCKS-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 7 +// BLOCKS-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP16]], align 8 +// BLOCKS-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 8 +// BLOCKS-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 +// BLOCKS-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 +// BLOCKS-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 +// BLOCKS-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// BLOCKS-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// BLOCKS-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// BLOCKS-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// BLOCKS-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// BLOCKS-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// BLOCKS-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, double**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// BLOCKS-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, double**, i32**)* +// BLOCKS-NEXT: call void [[TMP25]](i8* [[TMP24]], double** [[DOTFIRSTPRIV_PTR_ADDR_I]], i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR4:[0-9]+]] +// BLOCKS-NEXT: [[TMP26:%.*]] = load double*, double** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// BLOCKS-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP28]] to i32 +// BLOCKS-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// BLOCKS-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] +// BLOCKS: omp.inner.for.cond.i: +// BLOCKS-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// BLOCKS-NEXT: [[CONV2_I:%.*]] = sext i32 [[TMP29]] to i64 +// BLOCKS-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV2_I]], [[TMP30]] +// BLOCKS-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] +// BLOCKS: omp.inner.for.body.i: +// BLOCKS-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: store i32 [[TMP31]], i32* [[I_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: store double 1.000000e+00, double* [[TMP26]], align 8, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: store i32 11, i32* [[TMP27]], align 4, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: [[BLOCK_ISA_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 0 +// BLOCKS-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA_I]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: [[BLOCK_FLAGS_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 1 +// BLOCKS-NEXT: store i32 1073741824, i32* [[BLOCK_FLAGS_I]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: [[BLOCK_RESERVED_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 2 +// BLOCKS-NEXT: store i32 0, i32* [[BLOCK_RESERVED_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: [[BLOCK_INVOKE_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 3 +// BLOCKS-NEXT: store i8* bitcast (void (i8*)* @_block_invoke to i8*), i8** [[BLOCK_INVOKE_I]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: [[BLOCK_DESCRIPTOR_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 4 +// BLOCKS-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp.2 to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR_I]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: [[BLOCK_CAPTURED_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 5 +// BLOCKS-NEXT: [[TMP32:%.*]] = load volatile double, double* [[TMP26]], align 8, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: store volatile double [[TMP32]], double* [[BLOCK_CAPTURED_I]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: [[BLOCK_CAPTURED3_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 6 +// BLOCKS-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP27]], align 4, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: store i32 [[TMP33]], i32* [[BLOCK_CAPTURED3_I]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: [[TMP34:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]] to void ()* +// BLOCKS-NEXT: [[BLOCK_LITERAL_I:%.*]] = bitcast void ()* [[TMP34]] to %struct.__block_literal_generic* +// BLOCKS-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL_I]], i32 0, i32 3 +// BLOCKS-NEXT: [[TMP36:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL_I]] to i8* +// BLOCKS-NEXT: [[TMP37:%.*]] = load i8*, i8** [[TMP35]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: [[TMP38:%.*]] = bitcast i8* [[TMP37]] to void (i8*)* +// BLOCKS-NEXT: call void [[TMP38]](i8* noundef [[TMP36]]) #[[ATTR4]], !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: [[ADD4_I:%.*]] = add nsw i32 [[TMP39]], 1 +// BLOCKS-NEXT: store i32 [[ADD4_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// BLOCKS-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP16:![0-9]+]] +// BLOCKS: .omp_outlined..1.exit: +// BLOCKS-NEXT: ret i32 0 +// +// +// ARRAY-LABEL: define {{[^@]+}}@_Z10array_funciPfP2St +// ARRAY-SAME: (i32 noundef [[N:%.*]], float* noundef [[A:%.*]], %struct.St* noundef [[S:%.*]]) #[[ATTR0:[0-9]+]] { +// ARRAY-NEXT: entry: +// ARRAY-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// ARRAY-NEXT: [[S_ADDR:%.*]] = alloca %struct.St*, align 8 +// ARRAY-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// ARRAY-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// ARRAY-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 8 +// ARRAY-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// ARRAY-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// ARRAY-NEXT: [[TMP2:%.*]] = load float*, float** [[A_ADDR]], align 8 +// ARRAY-NEXT: [[TMP3:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 8 +// ARRAY-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, float*, %struct.St*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]], float* [[TMP2]], %struct.St* [[TMP3]]) +// ARRAY-NEXT: ret void +// +// +// ARRAY-LABEL: define {{[^@]+}}@.omp_outlined. +// ARRAY-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], float* noundef [[A:%.*]], %struct.St* noundef [[S:%.*]]) #[[ATTR1:[0-9]+]] { +// ARRAY-NEXT: entry: +// ARRAY-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// ARRAY-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// ARRAY-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// ARRAY-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// ARRAY-NEXT: [[S_ADDR:%.*]] = alloca %struct.St*, align 8 +// ARRAY-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 +// ARRAY-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// ARRAY-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// ARRAY-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// ARRAY-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// ARRAY-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 8 +// ARRAY-NEXT: [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// ARRAY-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// ARRAY-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// ARRAY-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// ARRAY-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 +// ARRAY-NEXT: br i1 [[TMP4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_END:%.*]] +// ARRAY: omp_if.then: +// ARRAY-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[AGG_CAPTURED]], i32 0, i32 0 +// ARRAY-NEXT: store i64 [[TMP0]], i64* [[TMP5]], align 8 +// ARRAY-NEXT: call void @__kmpc_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// ARRAY-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 1, i64 96, i64 8, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* @.omp_task_entry. to i32 (i32, i8*)*)) +// ARRAY-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to %struct.kmp_task_t_with_privates* +// ARRAY-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP7]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 +// ARRAY-NEXT: [[TMP11:%.*]] = bitcast %struct.anon* [[AGG_CAPTURED]] to i8* +// ARRAY-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 [[TMP11]], i64 8, i1 false) +// ARRAY-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP7]], i32 0, i32 1 +// ARRAY-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP12]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP14:%.*]] = load float*, float** [[A_ADDR]], align 8 +// ARRAY-NEXT: store float* [[TMP14]], float** [[TMP13]], align 8 +// ARRAY-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP12]], i32 0, i32 1 +// ARRAY-NEXT: [[TMP16:%.*]] = load %struct.St*, %struct.St** [[S_ADDR]], align 8 +// ARRAY-NEXT: store %struct.St* [[TMP16]], %struct.St** [[TMP15]], align 8 +// ARRAY-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 5 +// ARRAY-NEXT: store i64 0, i64* [[TMP17]], align 8 +// ARRAY-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 6 +// ARRAY-NEXT: store i64 9, i64* [[TMP18]], align 8 +// ARRAY-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 7 +// ARRAY-NEXT: store i64 1, i64* [[TMP19]], align 8 +// ARRAY-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP8]], i32 0, i32 9 +// ARRAY-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i8* +// ARRAY-NEXT: call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP21]], i8 0, i64 8, i1 false) +// ARRAY-NEXT: [[TMP22:%.*]] = load i64, i64* [[TMP19]], align 8 +// ARRAY-NEXT: call void @__kmpc_taskloop(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i8* [[TMP6]], i32 1, i64* [[TMP17]], i64* [[TMP18]], i64 [[TMP22]], i32 1, i32 0, i64 0, i8* null) +// ARRAY-NEXT: call void @__kmpc_end_taskgroup(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// ARRAY-NEXT: call void @__kmpc_end_master(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// ARRAY-NEXT: br label [[OMP_IF_END]] +// ARRAY: omp_if.end: +// ARRAY-NEXT: ret void +// +// +// ARRAY-LABEL: define {{[^@]+}}@.omp_task_privates_map. +// ARRAY-SAME: (%struct..kmp_privates.t* noalias noundef [[TMP0:%.*]], float*** noalias noundef [[TMP1:%.*]], %struct.St*** noalias noundef [[TMP2:%.*]]) #[[ATTR4:[0-9]+]] { +// ARRAY-NEXT: entry: +// ARRAY-NEXT: [[DOTADDR:%.*]] = alloca %struct..kmp_privates.t*, align 8 +// ARRAY-NEXT: [[DOTADDR1:%.*]] = alloca float***, align 8 +// ARRAY-NEXT: [[DOTADDR2:%.*]] = alloca %struct.St***, align 8 +// ARRAY-NEXT: store %struct..kmp_privates.t* [[TMP0]], %struct..kmp_privates.t** [[DOTADDR]], align 8 +// ARRAY-NEXT: store float*** [[TMP1]], float**** [[DOTADDR1]], align 8 +// ARRAY-NEXT: store %struct.St*** [[TMP2]], %struct.St**** [[DOTADDR2]], align 8 +// ARRAY-NEXT: [[TMP3:%.*]] = load %struct..kmp_privates.t*, %struct..kmp_privates.t** [[DOTADDR]], align 8 +// ARRAY-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP5:%.*]] = load float***, float**** [[DOTADDR1]], align 8 +// ARRAY-NEXT: store float** [[TMP4]], float*** [[TMP5]], align 8 +// ARRAY-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T]], %struct..kmp_privates.t* [[TMP3]], i32 0, i32 1 +// ARRAY-NEXT: [[TMP7:%.*]] = load %struct.St***, %struct.St**** [[DOTADDR2]], align 8 +// ARRAY-NEXT: store %struct.St** [[TMP6]], %struct.St*** [[TMP7]], align 8 +// ARRAY-NEXT: ret void +// +// +// ARRAY-LABEL: define {{[^@]+}}@.omp_task_entry. +// ARRAY-SAME: (i32 noundef [[TMP0:%.*]], %struct.kmp_task_t_with_privates* noalias noundef [[TMP1:%.*]]) #[[ATTR5:[0-9]+]] { +// ARRAY-NEXT: entry: +// ARRAY-NEXT: [[DOTGLOBAL_TID__ADDR_I:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: [[DOTPART_ID__ADDR_I:%.*]] = alloca i32*, align 8 +// ARRAY-NEXT: [[DOTPRIVATES__ADDR_I:%.*]] = alloca i8*, align 8 +// ARRAY-NEXT: [[DOTCOPY_FN__ADDR_I:%.*]] = alloca void (i8*, ...)*, align 8 +// ARRAY-NEXT: [[DOTTASK_T__ADDR_I:%.*]] = alloca i8*, align 8 +// ARRAY-NEXT: [[DOTLB__ADDR_I:%.*]] = alloca i64, align 8 +// ARRAY-NEXT: [[DOTUB__ADDR_I:%.*]] = alloca i64, align 8 +// ARRAY-NEXT: [[DOTST__ADDR_I:%.*]] = alloca i64, align 8 +// ARRAY-NEXT: [[DOTLITER__ADDR_I:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: [[DOTREDUCTIONS__ADDR_I:%.*]] = alloca i8*, align 8 +// ARRAY-NEXT: [[__CONTEXT_ADDR_I:%.*]] = alloca %struct.anon*, align 8 +// ARRAY-NEXT: [[DOTFIRSTPRIV_PTR_ADDR_I:%.*]] = alloca float**, align 8 +// ARRAY-NEXT: [[DOTFIRSTPRIV_PTR_ADDR1_I:%.*]] = alloca %struct.St**, align 8 +// ARRAY-NEXT: [[I_I:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: [[DOTOMP_IV_I:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: [[DOTADDR:%.*]] = alloca i32, align 4 +// ARRAY-NEXT: [[DOTADDR1:%.*]] = alloca %struct.kmp_task_t_with_privates*, align 8 +// ARRAY-NEXT: store i32 [[TMP0]], i32* [[DOTADDR]], align 4 +// ARRAY-NEXT: store %struct.kmp_task_t_with_privates* [[TMP1]], %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// ARRAY-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTADDR]], align 4 +// ARRAY-NEXT: [[TMP3:%.*]] = load %struct.kmp_task_t_with_privates*, %struct.kmp_task_t_with_privates** [[DOTADDR1]], align 8 +// ARRAY-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 2 +// ARRAY-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 +// ARRAY-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* +// ARRAY-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 +// ARRAY-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* +// ARRAY-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* +// ARRAY-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 5 +// ARRAY-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8 +// ARRAY-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 6 +// ARRAY-NEXT: [[TMP15:%.*]] = load i64, i64* [[TMP14]], align 8 +// ARRAY-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 7 +// ARRAY-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP16]], align 8 +// ARRAY-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 8 +// ARRAY-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 +// ARRAY-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 +// ARRAY-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 +// ARRAY-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// ARRAY-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// ARRAY-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// ARRAY-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// ARRAY-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// ARRAY-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// ARRAY-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, float***, %struct.St***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// ARRAY-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 0 +// ARRAY-NEXT: [[TMP24:%.*]] = load i64, i64* [[TMP23]], align 8 +// ARRAY-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP27:%.*]] = bitcast void (i8*, ...)* [[TMP25]] to void (i8*, float***, %struct.St***)* +// ARRAY-NEXT: call void [[TMP27]](i8* [[TMP26]], float*** [[DOTFIRSTPRIV_PTR_ADDR_I]], %struct.St*** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR2:[0-9]+]] +// ARRAY-NEXT: [[TMP28:%.*]] = load float**, float*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP29:%.*]] = load %struct.St**, %struct.St*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[TMP30:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// ARRAY-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP30]] to i32 +// ARRAY-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// ARRAY-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] +// ARRAY: omp.inner.for.cond.i: +// ARRAY-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15:![0-9]+]] +// ARRAY-NEXT: [[CONV2_I:%.*]] = sext i32 [[TMP31]] to i64 +// ARRAY-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// ARRAY-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV2_I]], [[TMP32]] +// ARRAY-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] +// ARRAY: omp.inner.for.body.i: +// ARRAY-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// ARRAY-NEXT: store i32 [[TMP33]], i32* [[I_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// ARRAY-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// ARRAY-NEXT: [[ADD3_I:%.*]] = add nsw i32 [[TMP34]], 1 +// ARRAY-NEXT: store i32 [[ADD3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group [[ACC_GRP15]] +// ARRAY-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP16:![0-9]+]] +// ARRAY: .omp_outlined..1.exit: +// ARRAY-NEXT: ret i32 0 +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@main +// SIMD-ONLY0-SAME: () #[[ATTR0:[0-9]+]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// SIMD-ONLY0-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S]], align 8 +// SIMD-ONLY0-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// SIMD-ONLY0-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 16 +// SIMD-ONLY0-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 8 +// SIMD-ONLY0-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]], double noundef 0.000000e+00) +// SIMD-ONLY0-NEXT: store i32 0, i32* [[T_VAR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// SIMD-ONLY0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const.main.vec to i8*), i64 8, i1 false) +// SIMD-ONLY0-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_BEGIN]], double noundef 1.000000e+00) +// SIMD-ONLY0-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYINIT_BEGIN]], i64 1 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_ELEMENT]], double noundef 2.000000e+00) +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]], double noundef 3.000000e+00) +// SIMD-ONLY0-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// SIMD-ONLY0-NEXT: store i64 9, i64* [[DOTOMP_UB]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// SIMD-ONLY0-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV]], i32* [[DOTOMP_IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// SIMD-ONLY0: omp.inner.for.cond: +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// SIMD-ONLY0-NEXT: [[CONV1:%.*]] = sext i32 [[TMP2]] to i64 +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY0-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV1]], [[TMP3]] +// SIMD-ONLY0-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// SIMD-ONLY0: omp.inner.for.body: +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY0-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// SIMD-ONLY0-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// SIMD-ONLY0-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 +// SIMD-ONLY0-NEXT: store i32 [[TMP5]], i32* [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY0-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// SIMD-ONLY0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP6]], i8* align 8 [[TMP7]], i64 8, i1 false), !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY0-NEXT: store i32 33, i32* @_ZZ4mainE5sivar, align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY0-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// SIMD-ONLY0: omp.body.continue: +// SIMD-ONLY0-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// SIMD-ONLY0: omp.inner.for.inc: +// SIMD-ONLY0-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY0-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// SIMD-ONLY0-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY0-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// SIMD-ONLY0: omp.inner.for.end: +// SIMD-ONLY0-NEXT: store i32 10, i32* [[I]], align 4 +// SIMD-ONLY0-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() +// SIMD-ONLY0-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]]) #[[ATTR4:[0-9]+]] +// SIMD-ONLY0-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// SIMD-ONLY0-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// SIMD-ONLY0: arraydestroy.body: +// SIMD-ONLY0-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP9]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// SIMD-ONLY0-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// SIMD-ONLY0-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY]] +// SIMD-ONLY0: arraydestroy.done4: +// SIMD-ONLY0-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: [[TMP10:%.*]] = load i32, i32* [[RETVAL]], align 4 +// SIMD-ONLY0-NEXT: ret i32 [[TMP10]] +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ev +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC2Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]]) +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdEC1ERKS0_d +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[S:%.*]], double noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: [[S_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: [[T_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[S]], %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store double [[T]], double* [[T_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load double, double* [[T_ADDR]], align 8 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC2ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP0]], double noundef [[TMP1]]) +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ed +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], double noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store double [[A]], double* [[A_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load double, double* [[A_ADDR]], align 8 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdEC2Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]], double noundef [[TMP0]]) +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_Z5tmainIiET_v +// SIMD-ONLY0-SAME: () #[[ATTR3:[0-9]+]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// SIMD-ONLY0-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0]], align 4 +// SIMD-ONLY0-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// SIMD-ONLY0-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// SIMD-ONLY0-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// SIMD-ONLY0-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// SIMD-ONLY0-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]], i32 noundef 0) +// SIMD-ONLY0-NEXT: store i32 0, i32* [[T_VAR]], align 128 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// SIMD-ONLY0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) +// SIMD-ONLY0-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// SIMD-ONLY0-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) +// SIMD-ONLY0-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// SIMD-ONLY0-NEXT: store i64 9, i64* [[DOTOMP_UB]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// SIMD-ONLY0-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// SIMD-ONLY0-NEXT: store i32 [[CONV]], i32* [[DOTOMP_IV]], align 4 +// SIMD-ONLY0-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// SIMD-ONLY0: omp.inner.for.cond: +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// SIMD-ONLY0-NEXT: [[CONV1:%.*]] = sext i32 [[TMP2]] to i64 +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY0-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV1]], [[TMP3]] +// SIMD-ONLY0-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// SIMD-ONLY0: omp.inner.for.body: +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY0-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// SIMD-ONLY0-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// SIMD-ONLY0-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR]], align 128, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 +// SIMD-ONLY0-NEXT: store i32 [[TMP5]], i32* [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY0-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = bitcast %struct.S.0* [[ARRAYIDX2]] to i8* +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = bitcast %struct.S.0* [[VAR]] to i8* +// SIMD-ONLY0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP6]], i8* align 4 [[TMP7]], i64 4, i1 false), !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY0-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// SIMD-ONLY0: omp.body.continue: +// SIMD-ONLY0-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// SIMD-ONLY0: omp.inner.for.inc: +// SIMD-ONLY0-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY0-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// SIMD-ONLY0-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY0-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// SIMD-ONLY0: omp.inner.for.end: +// SIMD-ONLY0-NEXT: store i32 10, i32* [[I]], align 4 +// SIMD-ONLY0-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// SIMD-ONLY0-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// SIMD-ONLY0: arraydestroy.body: +// SIMD-ONLY0-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP9]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// SIMD-ONLY0-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// SIMD-ONLY0-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY]] +// SIMD-ONLY0: arraydestroy.done4: +// SIMD-ONLY0-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: [[TMP10:%.*]] = load i32, i32* [[RETVAL]], align 4 +// SIMD-ONLY0-NEXT: ret i32 [[TMP10]] +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdED1Ev +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: call void @_ZN1SIdED2Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ev +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: store double 0.000000e+00, double* [[F]], align 8 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdED2Ev +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdEC2ERKS0_d +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[S:%.*]], double noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: [[S_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: [[T_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[S]], %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store double [[T]], double* [[T_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP0]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load double, double* [[F2]], align 8 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load double, double* [[T_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[TMP2]] +// SIMD-ONLY0-NEXT: store double [[ADD]], double* [[F]], align 8 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ed +// SIMD-ONLY0-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], double noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY0-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY0-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store double [[A]], double* [[A_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load double, double* [[A_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store double [[TMP0]], double* [[F]], align 8 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_i +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_ADDR]], align 4 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC2ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0]], i32 noundef [[TMP1]]) +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: store i32 0, i32* [[F]], align 4 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_i +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] +// SIMD-ONLY0-NEXT: store i32 [[ADD]], i32* [[F]], align 4 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev +// SIMD-ONLY0-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY0-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@main +// SIMD-ONLY1-SAME: () #[[ATTR0:[0-9]+]] { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S:%.*]], align 8 +// SIMD-ONLY1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S]], align 8 +// SIMD-ONLY1-NEXT: [[T_VAR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// SIMD-ONLY1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S], align 16 +// SIMD-ONLY1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S]], align 8 +// SIMD-ONLY1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// SIMD-ONLY1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// SIMD-ONLY1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC1ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]], double noundef 0.000000e+00) +// SIMD-ONLY1-NEXT: store i32 0, i32* [[T_VAR]], align 4 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// SIMD-ONLY1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const.main.vec to i8*), i64 8, i1 false) +// SIMD-ONLY1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_BEGIN]], double noundef 1.000000e+00) +// SIMD-ONLY1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYINIT_BEGIN]], i64 1 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYINIT_ELEMENT]], double noundef 2.000000e+00) +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC1Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]], double noundef 3.000000e+00) +// SIMD-ONLY1-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// SIMD-ONLY1-NEXT: store i64 9, i64* [[DOTOMP_UB]], align 8 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// SIMD-ONLY1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// SIMD-ONLY1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_IV]], align 4 +// SIMD-ONLY1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// SIMD-ONLY1: omp.inner.for.cond: +// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// SIMD-ONLY1-NEXT: [[CONV1:%.*]] = sext i32 [[TMP2]] to i64 +// SIMD-ONLY1-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV1]], [[TMP3]] +// SIMD-ONLY1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// SIMD-ONLY1: omp.inner.for.body: +// SIMD-ONLY1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// SIMD-ONLY1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// SIMD-ONLY1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY1-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 +// SIMD-ONLY1-NEXT: store i32 [[TMP5]], i32* [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY1-NEXT: [[TMP6:%.*]] = bitcast %struct.S* [[ARRAYIDX2]] to i8* +// SIMD-ONLY1-NEXT: [[TMP7:%.*]] = bitcast %struct.S* [[VAR]] to i8* +// SIMD-ONLY1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP6]], i8* align 8 [[TMP7]], i64 8, i1 false), !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY1-NEXT: store i32 33, i32* @_ZZ4mainE5sivar, align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// SIMD-ONLY1: omp.body.continue: +// SIMD-ONLY1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// SIMD-ONLY1: omp.inner.for.inc: +// SIMD-ONLY1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// SIMD-ONLY1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// SIMD-ONLY1: omp.inner.for.end: +// SIMD-ONLY1-NEXT: store i32 10, i32* [[I]], align 4 +// SIMD-ONLY1-NEXT: [[CALL:%.*]] = call noundef i32 @_Z5tmainIiET_v() +// SIMD-ONLY1-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[VAR]]) #[[ATTR4:[0-9]+]] +// SIMD-ONLY1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[S_ARR]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAY_BEGIN]], i64 2 +// SIMD-ONLY1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// SIMD-ONLY1: arraydestroy.body: +// SIMD-ONLY1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S* [ [[TMP9]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// SIMD-ONLY1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// SIMD-ONLY1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY]] +// SIMD-ONLY1: arraydestroy.done4: +// SIMD-ONLY1-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TEST]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: call void @_ZN1SIdED1Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[TTT]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: [[TMP10:%.*]] = load i32, i32* [[RETVAL]], align 4 +// SIMD-ONLY1-NEXT: ret i32 [[TMP10]] +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ev +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC2Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]]) +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdEC1ERKS0_d +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[S:%.*]], double noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: [[T_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[S]], %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store double [[T]], double* [[T_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load double, double* [[T_ADDR]], align 8 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC2ERKS0_d(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[TMP0]], double noundef [[TMP1]]) +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdEC1Ed +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], double noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store double [[A]], double* [[A_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load double, double* [[A_ADDR]], align 8 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdEC2Ed(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]], double noundef [[TMP0]]) +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_Z5tmainIiET_v +// SIMD-ONLY1-SAME: () #[[ATTR3:[0-9]+]] { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[TTT:%.*]] = alloca [[STRUCT_S_0:%.*]], align 4 +// SIMD-ONLY1-NEXT: [[TEST:%.*]] = alloca [[STRUCT_S_0]], align 4 +// SIMD-ONLY1-NEXT: [[T_VAR:%.*]] = alloca i32, align 128 +// SIMD-ONLY1-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 4 +// SIMD-ONLY1-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 4 +// SIMD-ONLY1-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 4 +// SIMD-ONLY1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// SIMD-ONLY1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// SIMD-ONLY1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC1ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]], i32 noundef 0) +// SIMD-ONLY1-NEXT: store i32 0, i32* [[T_VAR]], align 128 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = bitcast [2 x i32]* [[VEC]] to i8* +// SIMD-ONLY1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP0]], i8* align 4 bitcast ([2 x i32]* @__const._Z5tmainIiET_v.vec to i8*), i64 8, i1 false) +// SIMD-ONLY1-NEXT: [[ARRAYINIT_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_BEGIN]], i32 noundef 1) +// SIMD-ONLY1-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYINIT_BEGIN]], i64 1 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 noundef 2) +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC1Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]], i32 noundef 3) +// SIMD-ONLY1-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// SIMD-ONLY1-NEXT: store i64 9, i64* [[DOTOMP_UB]], align 8 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// SIMD-ONLY1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// SIMD-ONLY1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_IV]], align 4 +// SIMD-ONLY1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// SIMD-ONLY1: omp.inner.for.cond: +// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6:![0-9]+]] +// SIMD-ONLY1-NEXT: [[CONV1:%.*]] = sext i32 [[TMP2]] to i64 +// SIMD-ONLY1-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV1]], [[TMP3]] +// SIMD-ONLY1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// SIMD-ONLY1: omp.inner.for.body: +// SIMD-ONLY1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP4]], 1 +// SIMD-ONLY1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// SIMD-ONLY1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY1-NEXT: [[TMP5:%.*]] = load i32, i32* [[T_VAR]], align 128, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[VEC]], i64 0, i64 0 +// SIMD-ONLY1-NEXT: store i32 [[TMP5]], i32* [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i64 0, i64 0 +// SIMD-ONLY1-NEXT: [[TMP6:%.*]] = bitcast %struct.S.0* [[ARRAYIDX2]] to i8* +// SIMD-ONLY1-NEXT: [[TMP7:%.*]] = bitcast %struct.S.0* [[VAR]] to i8* +// SIMD-ONLY1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP6]], i8* align 4 [[TMP7]], i64 4, i1 false), !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// SIMD-ONLY1: omp.body.continue: +// SIMD-ONLY1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// SIMD-ONLY1: omp.inner.for.inc: +// SIMD-ONLY1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 +// SIMD-ONLY1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP6]] +// SIMD-ONLY1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// SIMD-ONLY1: omp.inner.for.end: +// SIMD-ONLY1-NEXT: store i32 10, i32* [[I]], align 4 +// SIMD-ONLY1-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[S_ARR]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAY_BEGIN]], i64 2 +// SIMD-ONLY1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] +// SIMD-ONLY1: arraydestroy.body: +// SIMD-ONLY1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi %struct.S.0* [ [[TMP9]], [[OMP_INNER_FOR_END]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// SIMD-ONLY1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq %struct.S.0* [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] +// SIMD-ONLY1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4:%.*]], label [[ARRAYDESTROY_BODY]] +// SIMD-ONLY1: arraydestroy.done4: +// SIMD-ONLY1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: call void @_ZN1SIiED1Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TTT]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: [[TMP10:%.*]] = load i32, i32* [[RETVAL]], align 4 +// SIMD-ONLY1-NEXT: ret i32 [[TMP10]] +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdED1Ev +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: call void @_ZN1SIdED2Ev(%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS1]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ev +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: store double 0.000000e+00, double* [[F]], align 8 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdED2Ev +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdEC2ERKS0_d +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], %struct.S* noundef nonnull align 8 dereferenceable(8) [[S:%.*]], double noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: [[T_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[S]], %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store double [[T]], double* [[T_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load %struct.S*, %struct.S** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP0]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load double, double* [[F2]], align 8 +// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = load double, double* [[T_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[TMP2]] +// SIMD-ONLY1-NEXT: store double [[ADD]], double* [[F]], align 8 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIdEC2Ed +// SIMD-ONLY1-SAME: (%struct.S* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], double noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8 +// SIMD-ONLY1-NEXT: [[A_ADDR:%.*]] = alloca double, align 8 +// SIMD-ONLY1-NEXT: store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store double [[A]], double* [[A_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load double, double* [[A_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store double [[TMP0]], double* [[F]], align 8 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ev +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiEC1ERKS0_i +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load i32, i32* [[T_ADDR]], align 4 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC2ERKS0_i(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[TMP0]], i32 noundef [[TMP1]]) +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiEC1Ei +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiEC2Ei(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]) +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiED1Ev +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: call void @_ZN1SIiED2Ev(%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]] +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ev +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: store i32 0, i32* [[F]], align 4 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiEC2ERKS0_i +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], %struct.S.0* noundef nonnull align 4 dereferenceable(4) [[S:%.*]], i32 noundef [[T:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: [[S_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: [[T_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[S]], %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store i32 [[T]], i32* [[T_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load %struct.S.0*, %struct.S.0** [[S_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F2:%.*]] = getelementptr inbounds [[STRUCT_S_0]], %struct.S.0* [[TMP0]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load i32, i32* [[F2]], align 4 +// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = load i32, i32* [[T_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]] +// SIMD-ONLY1-NEXT: store i32 [[ADD]], i32* [[F]], align 4 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiEC2Ei +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT_S_0:%.*]], %struct.S.0* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// SIMD-ONLY1-NEXT: store i32 [[TMP0]], i32* [[F]], align 4 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN1SIiED2Ev +// SIMD-ONLY1-SAME: (%struct.S.0* noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR1]] align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S.0*, align 8 +// SIMD-ONLY1-NEXT: store %struct.S.0* [[THIS]], %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S.0*, %struct.S.0** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: ret void +// +// +// SIMD-ONLY2-LABEL: define {{[^@]+}}@main +// SIMD-ONLY2-SAME: () #[[ATTR0:[0-9]+]] { +// SIMD-ONLY2-NEXT: entry: +// SIMD-ONLY2-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SIMD-ONLY2-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 1 +// SIMD-ONLY2-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// SIMD-ONLY2-NEXT: call void @"_ZZ4mainENK3$_0clEv"(%class.anon* noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) +// SIMD-ONLY2-NEXT: ret i32 0 +// +// +// SIMD-ONLY3-LABEL: define {{[^@]+}}@main +// SIMD-ONLY3-SAME: () #[[ATTR1:[0-9]+]] { +// SIMD-ONLY3-NEXT: entry: +// SIMD-ONLY3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 +// SIMD-ONLY3-NEXT: store i32 0, i32* [[RETVAL]], align 4 +// SIMD-ONLY3-NEXT: [[TMP0:%.*]] = load i8*, i8** getelementptr inbounds ([[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global to %struct.__block_literal_generic*), i32 0, i32 3), align 8 +// SIMD-ONLY3-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to void (i8*)* +// SIMD-ONLY3-NEXT: call void [[TMP1]](i8* noundef bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor* }* @__block_literal_global to i8*)) +// SIMD-ONLY3-NEXT: ret i32 0 +// +// +// SIMD-ONLY3-LABEL: define {{[^@]+}}@__main_block_invoke +// SIMD-ONLY3-SAME: (i8* noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR2:[0-9]+]] { +// SIMD-ONLY3-NEXT: entry: +// SIMD-ONLY3-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 +// SIMD-ONLY3-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>*, align 8 +// SIMD-ONLY3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// SIMD-ONLY3-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// SIMD-ONLY3-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// SIMD-ONLY3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// SIMD-ONLY3-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY3-NEXT: [[BLOCK2:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, align 8 +// SIMD-ONLY3-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 +// SIMD-ONLY3-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* +// SIMD-ONLY3-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor* }>** [[BLOCK_ADDR]], align 8 +// SIMD-ONLY3-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// SIMD-ONLY3-NEXT: store i64 9, i64* [[DOTOMP_UB]], align 8 +// SIMD-ONLY3-NEXT: [[TMP0:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// SIMD-ONLY3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP0]] to i32 +// SIMD-ONLY3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_IV]], align 4 +// SIMD-ONLY3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// SIMD-ONLY3: omp.inner.for.cond: +// SIMD-ONLY3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// SIMD-ONLY3-NEXT: [[CONV1:%.*]] = sext i32 [[TMP1]] to i64 +// SIMD-ONLY3-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV1]], [[TMP2]] +// SIMD-ONLY3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// SIMD-ONLY3: omp.inner.for.body: +// SIMD-ONLY3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1 +// SIMD-ONLY3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// SIMD-ONLY3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: store double 1.000000e+00, double* @g, align 8, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: store i32 11, i32* @_ZZ4mainE5sivar, align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: [[BLOCK_ISA:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK2]], i32 0, i32 0 +// SIMD-ONLY3-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA]], align 8, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: [[BLOCK_FLAGS:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK2]], i32 0, i32 1 +// SIMD-ONLY3-NEXT: store i32 1073741824, i32* [[BLOCK_FLAGS]], align 8, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: [[BLOCK_RESERVED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK2]], i32 0, i32 2 +// SIMD-ONLY3-NEXT: store i32 0, i32* [[BLOCK_RESERVED]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: [[BLOCK_INVOKE:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK2]], i32 0, i32 3 +// SIMD-ONLY3-NEXT: store i8* bitcast (void (i8*)* @__main_block_invoke_2 to i8*), i8** [[BLOCK_INVOKE]], align 8, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: [[BLOCK_DESCRIPTOR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK2]], i32 0, i32 4 +// SIMD-ONLY3-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp.1 to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR]], align 8, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: [[BLOCK_CAPTURED:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK2]], i32 0, i32 5 +// SIMD-ONLY3-NEXT: [[TMP4:%.*]] = load volatile double, double* @g, align 8, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: store volatile double [[TMP4]], double* [[BLOCK_CAPTURED]], align 8, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: [[BLOCK_CAPTURED3:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK2]], i32 0, i32 6 +// SIMD-ONLY3-NEXT: [[TMP5:%.*]] = load i32, i32* @_ZZ4mainE5sivar, align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: store i32 [[TMP5]], i32* [[BLOCK_CAPTURED3]], align 8, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: [[TMP6:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK2]] to void ()* +// SIMD-ONLY3-NEXT: [[BLOCK_LITERAL:%.*]] = bitcast void ()* [[TMP6]] to %struct.__block_literal_generic* +// SIMD-ONLY3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL]], i32 0, i32 3 +// SIMD-ONLY3-NEXT: [[TMP8:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL]] to i8* +// SIMD-ONLY3-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP7]], align 8, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to void (i8*)* +// SIMD-ONLY3-NEXT: call void [[TMP10]](i8* noundef [[TMP8]]), !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// SIMD-ONLY3: omp.body.continue: +// SIMD-ONLY3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// SIMD-ONLY3: omp.inner.for.inc: +// SIMD-ONLY3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 +// SIMD-ONLY3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// SIMD-ONLY3: omp.inner.for.end: +// SIMD-ONLY3-NEXT: store i32 10, i32* [[I]], align 4 +// SIMD-ONLY3-NEXT: ret void +// +// +// SIMD-ONLY3-LABEL: define {{[^@]+}}@__main_block_invoke_2 +// SIMD-ONLY3-SAME: (i8* noundef [[DOTBLOCK_DESCRIPTOR:%.*]]) #[[ATTR2]] { +// SIMD-ONLY3-NEXT: entry: +// SIMD-ONLY3-NEXT: [[DOTBLOCK_DESCRIPTOR_ADDR:%.*]] = alloca i8*, align 8 +// SIMD-ONLY3-NEXT: [[BLOCK_ADDR:%.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>*, align 8 +// SIMD-ONLY3-NEXT: store i8* [[DOTBLOCK_DESCRIPTOR]], i8** [[DOTBLOCK_DESCRIPTOR_ADDR]], align 8 +// SIMD-ONLY3-NEXT: [[BLOCK:%.*]] = bitcast i8* [[DOTBLOCK_DESCRIPTOR]] to <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* +// SIMD-ONLY3-NEXT: store <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>** [[BLOCK_ADDR]], align 8 +// SIMD-ONLY3-NEXT: [[BLOCK_CAPTURE_ADDR:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], i32 0, i32 5 +// SIMD-ONLY3-NEXT: store double 2.000000e+00, double* [[BLOCK_CAPTURE_ADDR]], align 8 +// SIMD-ONLY3-NEXT: [[BLOCK_CAPTURE_ADDR1:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK]], i32 0, i32 6 +// SIMD-ONLY3-NEXT: store i32 22, i32* [[BLOCK_CAPTURE_ADDR1]], align 8 +// SIMD-ONLY3-NEXT: ret void +// +// +// SIMD-ONLY4-LABEL: define {{[^@]+}}@_Z10array_funciPfP2St +// SIMD-ONLY4-SAME: (i32 noundef [[N:%.*]], float* noundef [[A:%.*]], %struct.St* noundef [[S:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY4-NEXT: entry: +// SIMD-ONLY4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY4-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 +// SIMD-ONLY4-NEXT: [[S_ADDR:%.*]] = alloca %struct.St*, align 8 +// SIMD-ONLY4-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// SIMD-ONLY4-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// SIMD-ONLY4-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// SIMD-ONLY4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// SIMD-ONLY4-NEXT: [[I:%.*]] = alloca i32, align 4 +// SIMD-ONLY4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY4-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 +// SIMD-ONLY4-NEXT: store %struct.St* [[S]], %struct.St** [[S_ADDR]], align 8 +// SIMD-ONLY4-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY4-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// SIMD-ONLY4-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// SIMD-ONLY4-NEXT: store i64 9, i64* [[DOTOMP_UB]], align 8 +// SIMD-ONLY4-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// SIMD-ONLY4-NEXT: [[CONV:%.*]] = trunc i64 [[TMP2]] to i32 +// SIMD-ONLY4-NEXT: store i32 [[CONV]], i32* [[DOTOMP_IV]], align 4 +// SIMD-ONLY4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// SIMD-ONLY4: omp.inner.for.cond: +// SIMD-ONLY4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2:![0-9]+]] +// SIMD-ONLY4-NEXT: [[CONV1:%.*]] = sext i32 [[TMP3]] to i64 +// SIMD-ONLY4-NEXT: [[TMP4:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY4-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV1]], [[TMP4]] +// SIMD-ONLY4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// SIMD-ONLY4: omp.inner.for.body: +// SIMD-ONLY4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP5]], 1 +// SIMD-ONLY4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// SIMD-ONLY4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// SIMD-ONLY4: omp.body.continue: +// SIMD-ONLY4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// SIMD-ONLY4: omp.inner.for.inc: +// SIMD-ONLY4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP6]], 1 +// SIMD-ONLY4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP2]] +// SIMD-ONLY4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// SIMD-ONLY4: omp.inner.for.end: +// SIMD-ONLY4-NEXT: store i32 10, i32* [[I]], align 4 +// SIMD-ONLY4-NEXT: ret void +// diff --git a/clang/test/OpenMP/target_firstprivate_codegen.cpp b/clang/test/OpenMP/target_firstprivate_codegen.cpp index 4f8f5ae37472c..05a6e891204f2 100644 --- a/clang/test/OpenMP/target_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_firstprivate_codegen.cpp @@ -1,38 +1,37 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ // Test host codegen. -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK0 // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK1 +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK2 // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK3 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s // RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY01 %s +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY02 %s // RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s -// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY03 %s // Test target codegen - host bc file has to be created first. // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s -// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK1 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK2 // RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s -// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK3 // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s // RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s -// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY11 %s // RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -no-opaque-pointers -verify -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck --check-prefix SIMD-ONLY12 %s // RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s -// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s -// SIMD-ONLY1-NOT: {{__kmpc|__tgt}} +// RUN: %clang_cc1 -no-opaque-pointers -fopenmp-simd -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY13 %s // expected-no-diagnostics #ifndef HEADER @@ -47,28 +46,9 @@ struct TT { int ga = 5; #pragma omp end declare target -// CHECK-DAG: [[TT:%.+]] = type { i64, i8 } -// CHECK-DAG: [[TTII:%.+]] = type { i32, i32 } -// CHECK-DAG: [[S1:%.+]] = type { double } - -// TCHECK-DAG: [[TT:%.+]] = type { i64, i8 } -// TCHECK-DAG: [[TTII:%.+]] = type { i32, i32 } -// TCHECK-DAG: [[S1:%.+]] = type { double } - -// CHECK-DAG: [[SIZET:@.+]] = private unnamed_addr constant [3 x i{{32|64}}] [i[[SZ:32|64]] 4, i{{64|32}} {{8|4}}, i[[SZ:32|64]] 4] -// CHECK-DAG: [[MAPT:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 49, i64 288] -// CHECK-DAG: [[SIZET2:@.+]] = private unnamed_addr constant [9 x i64] [i64 2, i64 40, i64 {{4|8}}, i64 0, i64 400, i64 {{4|8}}, i64 {{4|8}}, i64 0, i64 {{12|16}}] -// CHECK-DAG: [[MAPT2:@.+]] = private unnamed_addr constant [9 x i64] [i64 288, i64 161, i64 800, i64 161, i64 161, i64 800, i64 800, i64 161, i64 161] -// CHECK-DAG: [[SIZET3:@.+]] = private unnamed_addr constant [2 x i{{32|64}}] [i{{32|64}} 0, i{{32|64}} 8] -// CHECK-DAG: [[MAPT3:@.+]] = private unnamed_addr constant [2 x i64] [i64 32, i64 161] -// CHECK-DAG: [[SIZET4:@.+]] = private unnamed_addr constant [5 x i64] [i64 8, i64 4, i64 {{4|8}}, i64 {{4|8}}, i64 0] -// CHECK-DAG: [[MAPT4:@.+]] = private unnamed_addr constant [5 x i64] [i64 547, i64 288, i64 800, i64 800, i64 161] -// CHECK-DAG: [[SIZET5:@.+]] = private unnamed_addr constant [3 x i{{32|64}}] [i[[SZ]] 4, i[[SZ]] 1, i[[SZ]] 40] -// CHECK-DAG: [[MAPT5:@.+]] = private unnamed_addr constant [3 x i64] [i64 288, i64 288, i64 161] -// CHECK-DAG: [[SIZET6:@.+]] = private unnamed_addr constant [2 x i{{32|64}}] [i[[SZ]] 4, i[[SZ]] 40] -// CHECK-DAG: [[MAPT6:@.+]] = private unnamed_addr constant [2 x i64] [i64 288, i64 161] - -// CHECK: define {{.*}}[[FOO:@.+]]( + + + int foo(int n, double *ptr) { int a = 0; short aa = 0; @@ -85,75 +65,7 @@ int foo(int n, double *ptr) { } // a is passed by value to tgt_target - // CHECK: [[N_ADDR:%.+]] = alloca i{{[0-9]+}}, - // CHECK: [[PTR_ADDR:%.+]] = alloca double*, - // CHECK: [[A:%.+]] = alloca i{{[0-9]+}}, - // CHECK: [[A2:%.+]] = alloca i{{[0-9]+}}, - // CHECK: [[B:%.+]] = alloca [10 x float], - // CHECK: [[SSTACK:%.+]] = alloca i8*, - // CHECK: [[C:%.+]] = alloca [5 x [10 x double]], - // CHECK: [[D:%.+]] = alloca [[TT]], - // CHECK: [[FP_E:%.+]] = alloca [[TTII]], - // CHECK: [[P:%.+]] = alloca i32*, align 64 - // CHECK: [[ACAST:%.+]] = alloca i{{[0-9]+}}, - // CHECK: [[BASE_PTR_ARR:%.+]] = alloca [3 x i8*], - // CHECK: [[PTR_ARR:%.+]] = alloca [3 x i8*], - // CHECK: [[A2CAST:%.+]] = alloca i{{[0-9]+}}, - // CHECK: [[BASE_PTR_ARR2:%.+]] = alloca [9 x i8*], - // CHECK: [[PTR_ARR2:%.+]] = alloca [9 x i8*], - // CHECK: [[SIZET2:%.+]] = alloca [9 x i{{[0-9]+}}], - // CHECK: [[BASE_PTR_ARR3:%.+]] = alloca [2 x i8*], - // CHECK: [[PTR_ARR3:%.+]] = alloca [2 x i8*], - // CHECK: [[N_ADDR_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[N_ADDR]], - // CHECK-64: [[N_EXT:%.+]] = zext i{{[0-9]+}} [[N_ADDR_VAL]] to i{{[0-9]+}} - // CHECK: [[SSAVE_RET:%.+]] = call i8* @llvm.stacksave() - // CHECK: store i8* [[SSAVE_RET]], i8** [[SSTACK]], - // CHECK-64: [[BN_VLA:%.+]] = alloca float, i{{[0-9]+}} [[N_EXT]], - // CHECK-32: [[BN_VLA:%.+]] = alloca float, i{{[0-9]+}} [[N_ADDR_VAL]], - // CHECK: [[N_ADDR_VAL2:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[N_ADDR]], - // CHECK-64: [[N_EXT2:%.+]] = zext i{{[0-9]+}} [[N_ADDR_VAL2]] to i{{[0-9]+}} - // CHECK-64: [[CN_SIZE:%.+]] = mul{{.+}} i{{[0-9]+}} 5, [[N_EXT2]] - // CHECK-32: [[CN_SIZE:%.+]] = mul{{.+}} i{{[0-9]+}} 5, [[N_ADDR_VAL2]] - // CHECK: [[CN_VLA:%.+]] = alloca double, i{{[0-9]+}} [[CN_SIZE]], - // CHECK: [[AVAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A]], - // CHECK-64: [[CONV:%.+]] = bitcast i{{[0-9]+}}* [[ACAST]] to i{{[0-9]+}}* - // CHECK-64: store i{{[0-9]+}} [[AVAL]], i{{[0-9]+}}* [[CONV]], - // CHECK-32: store i{{[0-9]+}} [[AVAL]], i{{[0-9]+}}* [[ACAST]], - // CHECK: [[ACAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[ACAST]], - // CHECK: [[P_PTR:%.+]] = load i32*, i32** [[P]], align 64 - // CHECK: [[BASE_PTR_GEP:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BASE_PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[ACAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} [[ACAST_VAL]], i{{[0-9]+}}* [[ACAST_TOPTR]], - // CHECK: [[PTR_GEP:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[ACAST_TOPTR2:%.+]] = bitcast i8** [[PTR_GEP]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} [[ACAST_VAL]], i{{[0-9]+}}* [[ACAST_TOPTR2]], - // CHECK: [[BASE_PTR_GEP:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BASE_PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 - // CHECK: [[PCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP]] to i32*** - // CHECK: store i32** [[P]], i32*** [[PCAST_TOPTR]], - // CHECK: [[PTR_GEP:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 - // CHECK: [[PCAST_TOPTR2:%.+]] = bitcast i8** [[PTR_GEP]] to i32** - // CHECK: store i32* [[P_PTR]], i32** [[PCAST_TOPTR2]], - // CHECK: [[BASE_PTR_GEP:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BASE_PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 - // CHECK: [[PCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP]] to i{{64|32}}* - // CHECK: store i{{64|32}} [[GA_VAL:%.*]], i{{64|32}}* [[PCAST_TOPTR]], - // CHECK: [[PTR_GEP:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 - // CHECK: [[PCAST_TOPTR2:%.+]] = bitcast i8** [[PTR_GEP]] to i{{64|32}}* - // CHECK: store i{{64|32}} [[GA_VAL]], i{{64|32}}* [[PCAST_TOPTR2]], - // CHECK: [[BASE_PTR_GEP_ARG:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BASE_PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[PTR_GEP_ARG:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: {{.+}} = call i32 @__tgt_target_kernel(%struct.ident_t* @{{.+}}, i64 -1, i32 -1, i32 0, i8* @.{{.+}}.region_id, %struct.__tgt_kernel_arguments* [[ARGS:%.+]]) - - // TCHECK: define weak_odr protected void @__omp_offloading_{{.+}}(i{{[0-9]+}} noundef [[A_IN:%.+]], i32** noundef nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) [[P_IN:%.+]], i{{[0-9]+}} noundef [[GA_IN:%.+]]) - // TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, - // TCHECK: [[P_ADDR:%.+]] = alloca i32**, - // TCHECK: [[GA_ADDR:%.+]] = alloca i{{64|32}}, - // TCHECK: [[P_PRIV:%.+]] = alloca i32*, - // TCHECK-NOT: alloca i{{[0-9]+}} - // TCHECK: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]], - // TCHECK: store i32** [[P_IN]], i32*** [[P_ADDR]], - // TCHECK: store i{{[0-9]+}} [[GA_IN]], i{{[0-9]+}}* [[GA_ADDR]], - // TCHECK-NOT: store i{{[0-9]+}} % - // TCHECK: ret void + #pragma omp target firstprivate(aa, b, bn, c, cn, d) { @@ -166,198 +78,44 @@ int foo(int n, double *ptr) { d.Y = 1; } - // CHECK: [[A2VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A2]], - // CHECK: [[A2CASTCONV:%.+]] = bitcast i{{[0-9]+}}* [[A2CAST]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} [[A2VAL]], i{{[0-9]+}}* [[A2CASTCONV]], - // CHECK: [[A2CAST_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[A2CAST]], - // CHECK-64: [[BN_SIZE:%.+]] = mul{{.+}} i{{[0-9]+}} [[N_EXT]], 4 - // CHECK-32: [[BN_SZ_SIZE:%.+]] = mul{{.+}} i{{[0-9]+}} [[N_ADDR_VAL]], 4 - // CHECK-32: [[BN_SIZE:%.+]] = sext i32 [[BN_SZ_SIZE]] to i64 - // CHECK-64: [[CN_SIZE_1:%.+]] = mul{{.+}} i{{[0-9]+}} 5, [[N_EXT2]] - // CHECK-32: [[CN_SIZE_1:%.+]] = mul{{.+}} i{{[0-9]+}} 5, [[N_ADDR_VAL2]] - // CHECK-64: [[CN_SIZE_2:%.+]] = mul{{.+}} i{{[0-9]+}} [[CN_SIZE_1]], 8 - // CHECK-32: [[CN_SZ_SIZE_2:%.+]] = mul{{.+}} i{{[0-9]+}} [[CN_SIZE_1]], 8 - // CHECK-32: [[CN_SIZE_2:%.+]] = sext i32 [[CN_SZ_SIZE_2]] to i64 // firstprivate(aa) --> base_ptr = aa, ptr = aa, size = 2 (short) - // CHECK: [[BASE_PTR_GEP2_0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[ACAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP2_0]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} [[A2CAST_VAL]], i{{[0-9]+}}* [[ACAST_TOPTR]], - // CHECK: [[PTR_GEP2_0:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[ACAST_TOPTR:%.+]] = bitcast i8** [[PTR_GEP2_0]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} [[A2CAST_VAL]], i{{[0-9]+}}* [[ACAST_TOPTR]], // firstprivate(b): base_ptr = &b[0], ptr = &b[0], size = 40 (sizeof(float)*10) - // CHECK: [[BASE_PTR_GEP2_1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP2_1]] to [10 x float]** - // CHECK: store [10 x float]* [[B]], [10 x float]** [[BCAST_TOPTR]], - // CHECK: [[PTR_GEP2_1:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTR_GEP2_1]] to [10 x float]** - // CHECK: store [10 x float]* [[B]], [10 x float]** [[BCAST_TOPTR]], // firstprivate(bn), 2 entries, n and bn: (1) base_ptr = n, ptr = n, size = 8 ; (2) base_ptr = &c[0], ptr = &c[0], size = n*sizeof(float) - // CHECK: [[BASE_PTR_GEP2_2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP2_2]] to i{{[0-9]+}}* - // CHECK-64: store i{{[0-9]+}} [[N_EXT]], i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK-32: store i{{[0-9]+}} [[N_ADDR_VAL]], i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK: [[PTR_GEP2_2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTR_GEP2_2]] to i{{[0-9]+}}* - // CHECK-64: store i{{[0-9]+}} [[N_EXT]], i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK-32: store i{{[0-9]+}} [[N_ADDR_VAL]], i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK: [[BASE_PTR_GEP2_3:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP2_3]] to float** - // CHECK: store float* [[BN_VLA]], float** [[BCAST_TOPTR]], - // CHECK: [[SIZE_GEPBN_3:%.+]] = getelementptr inbounds [9 x i{{[0-9]+}}], [9 x i{{[0-9]+}}]* [[SIZET2]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 - // CHECK: store i{{[0-9]+}} [[BN_SIZE]], i{{[0-9]+}}* [[SIZE_GEPBN_3]] // firstprivate(c): base_ptr = &c[0], ptr = &c[0], size = 400 (5*10*sizeof(double)) - // CHECK: [[BASE_PTR_GEP2_4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 4 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP2_4]] to [5 x [10 x double]]** - // CHECK: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[BCAST_TOPTR]], - // CHECK: [[PTR_GEP2_4:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 4 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTR_GEP2_4]] to [5 x [10 x double]]** - // CHECK: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[BCAST_TOPTR]], // firstprivate(cn), 3 entries, 5, n, cn: (1) base_ptr = 5, ptr = 5, size = 8; (2) (1) base_ptr = n, ptr = n, size = 8; (3) base_ptr = &cn[0], ptr = &cn[0], size = 5*n*sizeof(double) - // CHECK: [[BASE_PTR_GEP2_5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 5 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP2_5]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} 5, i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK: [[PTR_GEP2_5:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 5 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTR_GEP2_5]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} 5, i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK: [[BASE_PTR_GEP2_6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 6 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP2_6]] to i{{[0-9]+}}* - // CHECK-64: store i{{[0-9]+}} [[N_EXT2]], i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK-32: store i{{[0-9]+}} [[N_ADDR_VAL2]], i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK: [[PTR_GEP2_6:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 6 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTR_GEP2_6]] to i{{[0-9]+}}* - // CHECK-64: store i{{[0-9]+}} [[N_EXT2]], i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK-32: store i{{[0-9]+}} [[N_ADDR_VAL2]], i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK: [[BASE_PTR_GEP2_7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 7 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP2_7]] to double** - // CHECK: store double* [[CN_VLA]], double** [[BCAST_TOPTR]], - // CHECK: [[PTR_GEP2_7:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 7 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTR_GEP2_7]] to double** - // CHECK: store double* [[CN_VLA]], double** [[BCAST_TOPTR]], - // CHECK: [[SIZE_GEPCN_7:%.+]] = getelementptr inbounds [9 x i{{[0-9]+}}], [9 x i{{[0-9]+}}]* [[SIZET2]], i{{[0-9]+}} 0, i{{[0-9]+}} 7 - // CHECK: store i{{[0-9]+}} [[CN_SIZE_2]], i{{[0-9]+}}* [[SIZE_GEPCN_7]], // firstprivate(d): base_ptr = &d, ptr = &d, size = 16 - // CHECK: [[BASE_PTR_GEP2_8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 8 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP2_8]] to [[TT]]** - // CHECK: store [[TT]]* [[D]], [[TT]]** [[BCAST_TOPTR]], - // CHECK: [[PTR_GEP2_8:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 8 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTR_GEP2_8]] to [[TT]]** - // CHECK: store [[TT]]* [[D]], [[TT]]** [[BCAST_TOPTR]], - - // CHECK: [[BASE_PTR_GEP_ARG2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[PTR_GEP_ARG2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[SIZES_ARG2:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[SIZET2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: {{.+}} = call i32 @__tgt_target_kernel(%struct.ident_t* @{{.+}}, i64 -1, i32 -1, i32 0, i8* @.{{.+}}.region_id, %struct.__tgt_kernel_arguments* [[ARGS:%.+]]) + // make sure that firstprivate variables are generated in all cases and that we use those instances for operations inside the // target region - // TCHECK: define {{.*}}void @__omp_offloading_{{.+}}(i{{[0-9]+}} noundef [[A2_IN:%.+]], [10 x float]* {{.+}} [[B_IN:%.+]], i{{[0-9]+}} noundef [[BN_SZ:%.+]], float* {{.+}} [[BN_IN:%.+]], [5 x [10 x double]]* {{.+}} [[C_IN:%.+]], i{{[0-9]+}} noundef [[CN_SZ1:%.+]], i{{[0-9]+}} noundef [[CN_SZ2:%.+]], double* {{.+}} [[CN_IN:%.+]], [[TT]]* {{.+}} [[D_IN:%.+]]) - // TCHECK: [[A2_ADDR:%.+]] = alloca i{{[0-9]+}}, - // TCHECK: [[B_ADDR:%.+]] = alloca [10 x float]*, - // TCHECK: [[VLA_ADDR:%.+]] = alloca i{{[0-9]+}}, - // TCHECK: [[BN_ADDR:%.+]] = alloca float*, - // TCHECK: [[C_ADDR:%.+]] = alloca [5 x [10 x double]]*, - // TCHECK: [[VLA_ADDR2:%.+]] = alloca i{{[0-9]+}}, - // TCHECK: [[VLA_ADDR4:%.+]] = alloca i{{[0-9]+}}, - // TCHECK: [[CN_ADDR:%.+]] = alloca double*, - // TCHECK: [[D_ADDR:%.+]] = alloca [[TT]]*, - // TCHECK-NOT: alloca i{{[0-9]+}}, - // TCHECK: [[B_PRIV:%.+]] = alloca [10 x float], - // TCHECK: [[SSTACK:%.+]] = alloca i8*, - // TCHECK: [[C_PRIV:%.+]] = alloca [5 x [10 x double]], - // TCHECK: [[D_PRIV:%.+]] = alloca [[TT]], - // TCHECK: store i{{[0-9]+}} [[A2_IN]], i{{[0-9]+}}* [[A2_ADDR]], - // TCHECK: store [10 x float]* [[B_IN]], [10 x float]** [[B_ADDR]], - // TCHECK: store i{{[0-9]+}} [[BN_SZ]], i{{[0-9]+}}* [[VLA_ADDR]], - // TCHECK: store float* [[BN_IN]], float** [[BN_ADDR]], - // TCHECK: store [5 x [10 x double]]* [[C_IN]], [5 x [10 x double]]** [[C_ADDR]], - // TCHECK: store i{{[0-9]+}} [[CN_SZ1]], i{{[0-9]+}}* [[VLA_ADDR2]], - // TCHECK: store i{{[0-9]+}} [[CN_SZ2]], i{{[0-9]+}}* [[VLA_ADDR4]], - // TCHECK: store double* [[CN_IN]], double** [[CN_ADDR]], - // TCHECK: store [[TT]]* [[D_IN]], [[TT]]** [[D_ADDR]], - // TCHECK: [[CONV_A2ADDR:%.+]] = bitcast i{{[0-9]+}}* [[A2_ADDR]] to i{{[0-9]+}}* - // TCHECK: [[B_ADDR_REF:%.+]] = load [10 x float]*, [10 x float]** [[B_ADDR]], - // TCHECK: [[BN_SZ_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[VLA_ADDR]], - // TCHECK: [[BN_ADDR_REF:%.+]] = load float*, float** [[BN_ADDR]], - // TCHECK: [[C_ADDR_REF:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], - // TCHECK: [[CN_SZ1_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[VLA_ADDR2]], - // TCHECK: [[CN_SZ2_VAL:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[VLA_ADDR4]], - // TCHECK: [[CN_ADDR_REF:%.+]] = load double*, double** [[CN_ADDR]], - // TCHECK: [[D_ADDR_REF:%.+]] = load [[TT]]*, [[TT]]** [[D_ADDR]], // firstprivate(aa): a_priv = a_in - // TCHECK-NOT: store i{{[0-9]+}} % // firstprivate(b): memcpy(b_priv,b_in) - // TCHECK: [[B_PRIV_BCAST:%.+]] = bitcast [10 x float]* [[B_PRIV]] to i8* - // TCHECK: [[B_ADDR_REF_BCAST:%.+]] = bitcast [10 x float]* [[B_ADDR_REF]] to i8* - // TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[B_PRIV_BCAST]], i8* align {{[0-9]+}} [[B_ADDR_REF_BCAST]], {{.+}}) - // TCHECK: [[RET_STACK:%.+]] = call i8* @llvm.stacksave() - // TCHECK: store i8* [[RET_STACK]], i8** [[SSTACK]], // firstprivate(bn) - // TCHECK: [[BN_PRIV:%.+]] = alloca float, i{{[0-9]+}} [[BN_SZ_VAL]], - // TCHECK: [[BN_COPY_SZ:%.+]] = mul{{.+}} i{{[0-9]+}} [[BN_SZ_VAL]], 4 - // TCHECK: [[BN_PRIV__BCAST:%.+]] = bitcast float* [[BN_PRIV]] to i8* - // TCHECK: [[BN_REF_IN_BCAST:%.+]] = bitcast float* [[BN_ADDR_REF]] to i8* - // TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[BN_PRIV__BCAST]], i8* align {{[0-9]+}} [[BN_REF_IN_BCAST]], i{{[0-9]+}} [[BN_COPY_SZ]],{{.+}}) // firstprivate(c) - // TCHECK: [[C_PRIV_BCAST:%.+]] = bitcast [5 x [10 x double]]* [[C_PRIV]] to i8* - // TCHECK: [[C_IN_BCAST:%.+]] = bitcast [5 x [10 x double]]* [[C_ADDR_REF]] to i8* - // TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[C_PRIV_BCAST]], i8* align {{[0-9]+}} [[C_IN_BCAST]],{{.+}}) // firstprivate(cn) - // TCHECK: [[CN_SZ:%.+]] = mul{{.+}} i{{[0-9]+}} [[CN_SZ1_VAL]], [[CN_SZ2_VAL]] - // TCHECK: [[CN_PRIV:%.+]] = alloca double, i{{[0-9]+}} [[CN_SZ]], - // TCHECK: [[CN_SZ2:%.+]] = mul{{.+}} i{{[0-9]+}} [[CN_SZ1_VAL]], [[CN_SZ2_VAL]] - // TCHECK: [[CN_SZ2_CPY:%.+]] = mul{{.+}} i{{[0-9]+}} [[CN_SZ2]], 8 - // TCHECK: [[CN_PRIV_BCAST:%.+]] = bitcast double* [[CN_PRIV]] to i8* - // TCHECK: [[CN_IN_BCAST:%.+]] = bitcast double* [[CN_ADDR_REF]] to i8* - // TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[CN_PRIV_BCAST]], i8* align {{[0-9]+}} [[CN_IN_BCAST]], i{{[0-9]+}} [[CN_SZ2_CPY]],{{.+}}) // firstprivate(d) - // TCHECK: [[D_PRIV_BCAST:%.+]] = bitcast [[TT]]* [[D_PRIV]] to i8* - // TCHECK: [[D_IN_BCAST:%.+]] = bitcast [[TT]]* [[D_ADDR_REF]] to i8* - // TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[D_PRIV_BCAST]], i8* align {{[0-9]+}} [[D_IN_BCAST]],{{.+}}) #pragma omp target firstprivate(ptr, e) { ptr[0] = e.X; ptr[0]++; } - // CHECK: [[PTR_ADDR_REF:%.+]] = load double*, double** [[PTR_ADDR]], - - // CHECK: [[BASE_PTR_GEP3_0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BASE_PTR_ARR3]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP3_0]] to double** - // CHECK: store double* [[PTR_ADDR_REF]], double** [[BCAST_TOPTR]], - // CHECK: [[PTR_GEP3_0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTR_ARR3]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTR_GEP3_0]] to double** - // CHECK: store double* [[PTR_ADDR_REF]], double** [[BCAST_TOPTR]], - // CHECK: [[BASE_PTR_GEP3_1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BASE_PTR_ARR3]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTR_GEP3_1]] to [[TTII]]** - // CHECK: store [[TTII]]* [[FP_E]], [[TTII]]** [[BCAST_TOPTR]], - // CHECK: [[PTR_GEP3_1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTR_ARR3]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTR_GEP3_1]] to [[TTII]]** - // CHECK: store [[TTII]]* [[FP_E]], [[TTII]]** [[BCAST_TOPTR]], - - // CHECK: [[BASE_PTR_GEP_ARG3:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BASE_PTR_ARR3]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[PTR_GEP_ARG3:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTR_ARR3]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: {{.+}} = call i32 @__tgt_target_kernel(%struct.ident_t* @{{.+}}, i64 -1, i32 -1, i32 0, i8* @.{{.+}}.region_id, %struct.__tgt_kernel_arguments* [[ARGS:%.+]]) - - // TCHECK: define weak_odr protected void @__omp_offloading_{{.+}}(double* noundef [[PTR_IN:%.+]], [[TTII]]* noundef nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) [[E:%.+]]) - // TCHECK: [[PTR_ADDR:%.+]] = alloca double*, - // TCHECK-NOT: alloca [[TTII]], - // TCHECK-NOT: alloca double*, - // TCHECK: store double* [[PTR_IN]], double** [[PTR_ADDR]], - // TCHECK-NOT: store double* % + + + return a; } @@ -391,30 +149,13 @@ static int fstatic(int n) { return a; } -// TCHECK: define weak_odr protected void @__omp_offloading_{{.+}}(i{{[0-9]+}} noundef [[A_IN:%.+]], i{{[0-9]+}} noundef [[A3_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]]) -// TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, -// TCHECK: [[A3_ADDR:%.+]] = alloca i{{[0-9]+}}, -// TCHECK: [[B_ADDR:%.+]] = alloca [10 x i{{[0-9]+}}]*, -// TCHECK-NOT: alloca i{{[0-9]+}}, -// TCHECK: [[B_PRIV:%.+]] = alloca [10 x i{{[0-9]+}}], -// TCHECK: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]], -// TCHECK: store i{{[0-9]+}} [[A3_IN]], i{{[0-9]+}}* [[A3_ADDR]], -// TCHECK: store [10 x i{{[0-9]+}}]* [[B_IN]], [10 x i{{[0-9]+}}]** [[B_ADDR]], -// TCHECK-64: [[A_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A_ADDR]] to i{{[0-9]+}}* -// TCHECK: [[A3_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A3_ADDR]] to i8* -// TCHECK: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** [[B_ADDR]], // firstprivate(a): a_priv = a_in // firstprivate(aaa) -// TCHECK-NOT: store i{{[0-9]+}} % // firstprivate(b) -// TCHECK: [[B_PRIV_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_PRIV]] to i8* -// TCHECK: [[B_IN_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_ADDR_REF]] to i8* -// TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[B_PRIV_BCAST]], i8* align {{[0-9]+}} [[B_IN_BCAST]],{{.+}}) -// TCHECK: ret void struct S1 { double a; @@ -433,123 +174,33 @@ struct S1 { } // on the host side, we first generate r1, then the static function and the template above - // CHECK: define{{.+}} i32 {{.+}}([[S1]]* {{.+}}, i{{[0-9]+}} {{.+}}) - // CHECK: [[BASE_PTRS4:%.+]] = alloca [5 x i8*], - // CHECK: [[PTRS4:%.+]] = alloca [5 x i8*], - // CHECK: [[SIZET4:%.+]] = alloca [5 x i{{[0-9]+}}], // map(this: this ptr is implicitly captured (not firstprivate matter) - // CHECK: [[BP0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BASE_PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[CBP0:%.+]] = bitcast i8** [[BP0]] to %struct.S1** - // CHECK: store %struct.S1* [[THIS:%.+]], %struct.S1** [[CBP0]], - // CHECK: [[P0:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[CP0:%.+]] = bitcast i8** [[P0]] to double** - // CHECK: store double* [[A:%.+]], double** [[CP0]], // firstprivate(b): base_ptr = b, ptr = b, size = 4 (pass by-value) - // CHECK: [[BASE_PTRS_GEP4_1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BASE_PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTRS_GEP4_1]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} [[B_CAST:%.+]], i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK: [[PTRS_GEP4_1:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTRS_GEP4_1]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} [[B_CAST]], i{{[0-9]+}}* [[BCAST_TOPTR]], // firstprivate(c), 3 entries: 2, n, c - // CHECK: [[BASE_PTRS_GEP4_2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BASE_PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTRS_GEP4_2]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK: [[PTRS_GEP4_2:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTRS_GEP4_2]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} 2, i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK: [[BASE_PTRS_GEP4_3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BASE_PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTRS_GEP4_3]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} [[N:%.+]], i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK: [[PTRS_GEP4_3:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 3 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTRS_GEP4_3]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} [[N]], i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK: [[BASE_PTRS_GEP4_4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BASE_PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 4 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTRS_GEP4_4]] to i{{[0-9]+}}** - // CHECK: store i{{[0-9]+}}* [[B:%.+]], i{{[0-9]+}}** [[BCAST_TOPTR]], - // CHECK: [[PTRS_GEP4_4:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS4]], i{{[0-9]+}} 0, i{{[0-9]+}} 4 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTRS_GEP4_4]] to i{{[0-9]+}}** - // CHECK: store i{{[0-9]+}}* [[B]], i{{[0-9]+}}** [[BCAST_TOPTR]], - // CHECK: [[SIZES_GEP4_4:%.+]] = getelementptr inbounds [5 x i{{[0-9]+}}], [5 x i{{[0-9]+}}]* [[SIZET4]], i{{[0-9]+}} 0, i{{[0-9]+}} 4 - // CHECK: store i{{[0-9]+}} [[B_SIZE:%.+]], i{{[0-9]+}}* [[SIZES_GEP4_4]], // only check that we use the map types stored in the global variable - // CHECK: {{.+}} = call i32 @__tgt_target_kernel(%struct.ident_t* @{{.+}}, i64 -1, i32 -1, i32 0, i8* @.{{.+}}.region_id, %struct.__tgt_kernel_arguments* [[ARGS:%.+]]) - - // TCHECK: define weak_odr protected void @__omp_offloading_{{.+}}([[S1]]* noundef [[TH:%.+]], i{{[0-9]+}} noundef [[B_IN:%.+]], i{{[0-9]+}} noundef [[VLA:%.+]], i{{[0-9]+}} noundef [[VLA1:%.+]], i{{[0-9]+}}{{.+}} [[C_IN:%.+]]) - // TCHECK: [[TH_ADDR:%.+]] = alloca [[S1]]*, - // TCHECK: [[B_ADDR:%.+]] = alloca i{{[0-9]+}}, - // TCHECK: [[VLA_ADDR:%.+]] = alloca i{{[0-9]+}}, - // TCHECK: [[VLA_ADDR2:%.+]] = alloca i{{[0-9]+}}, - // TCHECK: [[C_ADDR:%.+]] = alloca i{{[0-9]+}}*, - // TCHECK-NOT: alloca i{{[0-9]+}}, - // TCHECK: [[SSTACK:%.+]] = alloca i8*, - - // TCHECK: store [[S1]]* [[TH]], [[S1]]** [[TH_ADDR]], - // TCHECK: store i{{[0-9]+}} [[B_IN]], i{{[0-9]+}}* [[B_ADDR]], - // TCHECK: store i{{[0-9]+}} [[VLA]], i{{[0-9]+}}* [[VLA_ADDR]], - // TCHECK: store i{{[0-9]+}} [[VLA1]], i{{[0-9]+}}* [[VLA_ADDR2]], - // TCHECK: store i{{[0-9]+}}* [[C_IN]], i{{[0-9]+}}** [[C_ADDR]], - // TCHECK: [[TH_ADDR_REF:%.+]] = load [[S1]]*, [[S1]]** [[TH_ADDR]], - // TCHECK-64: [[B_ADDR_CONV:%.+]] = bitcast i{{[0-9]+}}* [[B_ADDR]] to i{{[0-9]+}}* - // TCHECK: [[VLA_ADDR_REF:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[VLA_ADDR]], - // TCHECK: [[VLA_ADDR_REF2:%.+]] = load i{{[0-9]+}}, i{{[0-9]+}}* [[VLA_ADDR2]], - // TCHECK: [[C_ADDR_REF:%.+]] = load i{{[0-9]+}}*, i{{[0-9]+}}** [[C_ADDR]], + + // firstprivate(b) - // TCHECK-NOT: store i{{[0-9]+}} % - // TCHECK: [[RET_STACK:%.+]] = call i8* @llvm.stacksave() - // TCHECK: store i8* [[RET_STACK:%.+]], i8** [[SSTACK]], // firstprivate(c) - // TCHECK: [[C_SZ:%.+]] = mul{{.+}} i{{[0-9]+}} [[VLA_ADDR_REF]], [[VLA_ADDR_REF2]] - // TCHECK: [[C_PRIV:%.+]] = alloca i{{[0-9]+}}, i{{[0-9]+}} [[C_SZ]], - // TCHECK: [[C_SZ2:%.+]] = mul{{.+}} i{{[0-9]+}} [[VLA_ADDR_REF]], [[VLA_ADDR_REF2]] - // TCHECK: [[C_SZ_CPY:%.+]] = mul{{.+}} i{{[0-9]+}} [[C_SZ2]], 2 - // TCHECK: [[C_PRIV_BCAST:%.+]] = bitcast i{{[0-9]+}}* [[C_PRIV]] to i8* - // TCHECK: [[C_IN_BCAST:%.+]] = bitcast i{{[0-9]+}}* [[C_ADDR_REF]] to i8* - // TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[C_PRIV_BCAST]], i8* align {{[0-9]+}} [[C_IN_BCAST]],{{.+}}) // finish - // TCHECK: [[RELOAD_SSTACK:%.+]] = load i8*, i8** [[SSTACK]], - // TCHECK: call void @llvm.stackrestore(i8* [[RELOAD_SSTACK]]) - // TCHECK: ret void // static host function - // CHECK: define{{.+}} i32 {{.+}}(i{{[0-9]+}} {{.+}}) - // CHECK: [[BASE_PTRS5:%.+]] = alloca [3 x i8*], - // CHECK: [[PTRS5:%.+]] = alloca [3 x i8*], // firstprivate(a): by value - // CHECK: [[BASE_PTRS_GEP5_0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BASE_PTRS5]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTRS_GEP5_0]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} [[A_CAST:%.+]], i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK: [[PTRS_GEP5_0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PTRS5]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTRS_GEP5_0]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} [[A_CAST]], i{{[0-9]+}}* [[BCAST_TOPTR]], // firstprivate(aaa): by value - // CHECK: [[BASE_PTRS_GEP5_1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BASE_PTRS5]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTRS_GEP5_1]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} [[A3_CAST:%.+]], i{{[0-9]+}}* [[BCAST_TOPTR]], - // CHECK: [[PTRS_GEP5_1:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PTRS5]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTRS_GEP5_1]] to i{{[0-9]+}}* - // CHECK: store i{{[0-9]+}} [[A3_CAST]], i{{[0-9]+}}* [[BCAST_TOPTR]], // firstprivate(b): base_ptr = &b[0], ptr= &b[0] - // CHECK: [[BASE_PTRS_GEP5_2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BASE_PTRS5]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTRS_GEP5_2]] to [10 x i{{[0-9]+}}]** - // CHECK: store [10 x i{{[0-9]+}}]* [[B:%.+]], [10 x i{{[0-9]+}}]** [[BCAST_TOPTR]], - // CHECK: [[PTRS_GEP5_2:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PTRS5]], i{{[0-9]+}} 0, i{{[0-9]+}} 2 - // CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTRS_GEP5_2]] to [10 x i{{[0-9]+}}]** - // CHECK: store [10 x i{{[0-9]+}}]* [[B]], [10 x i{{[0-9]+}}]** [[BCAST_TOPTR]], // only check that the right sizes and map types are used - // CHECK: {{.+}} = call i32 @__tgt_target_kernel(%struct.ident_t* @{{.+}}, i64 -1, i32 -1, i32 0, i8* @.{{.+}}.region_id, %struct.__tgt_kernel_arguments* [[ARGS:%.+]]) }; int bar(int n, double *ptr) { @@ -565,46 +216,11786 @@ int bar(int n, double *ptr) { // template host and device -// CHECK: define{{.+}} i32 {{.+}}(i{{[0-9]+}} {{.+}}) -// CHECK: [[BASE_PTRS6:%.+]] = alloca [2 x i8*], -// CHECK: [[PTRS6:%.+]] = alloca [2 x i8*], // firstprivate(a): by value -// CHECK: [[BASE_PTRS_GEP6_0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BASE_PTRS6]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTRS_GEP6_0]] to i{{[0-9]+}}* -// CHECK: store i{{[0-9]+}} [[AT_CAST:%.+]], i{{[0-9]+}}* [[BCAST_TOPTR]], -// CHECK: [[PTRS_GEP6_0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTRS6]], i{{[0-9]+}} 0, i{{[0-9]+}} 0 -// CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTRS_GEP6_0]] to i{{[0-9]+}}* -// CHECK: store i{{[0-9]+}} [[AT_CAST]], i{{[0-9]+}}* [[BCAST_TOPTR]], // firstprivate(b): pointer -// CHECK: [[BASE_PTRS_GEP6_1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BASE_PTRS6]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[BASE_PTRS_GEP6_1]] to [10 x i{{[0-9]+}}]** -// CHECK: store [10 x i{{[0-9]+}}]* [[B:%.+]], [10 x i{{[0-9]+}}]** [[BCAST_TOPTR]], -// CHECK: [[PTRS_GEP6_1:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTRS6]], i{{[0-9]+}} 0, i{{[0-9]+}} 1 -// CHECK: [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTRS_GEP6_1]] to [10 x i{{[0-9]+}}]** -// CHECK: store [10 x i{{[0-9]+}}]* [[B]], [10 x i{{[0-9]+}}]** [[BCAST_TOPTR]], - -// CHECK: {{.+}} = call i32 @__tgt_target_kernel(%struct.ident_t* @{{.+}}, i64 -1, i32 -1, i32 0, i8* @.{{.+}}.region_id, %struct.__tgt_kernel_arguments* [[ARGS:%.+]]) - -// TCHECK: define weak_odr protected void @__omp_offloading_{{.+}}(i{{[0-9]+}} noundef [[A_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]]) -// TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, -// TCHECK: [[B_ADDR:%.+]] = alloca [10 x i{{[0-9]+}}]*, -// TCHECK-NOT: alloca i{{[0-9]+}}, -// TCHECK: [[B_PRIV:%.+]] = alloca [10 x i{{[0-9]+}}], -// TCHECK: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]], -// TCHECK: store [10 x i{{[0-9]+}}]* [[B_IN]], [10 x i{{[0-9]+}}]** [[B_ADDR]], -// TCHECK-64: [[A_ADDR_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A_ADDR]] to i{{[0-9]+}}* -// TCHECK: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** [[B_ADDR]], + + // firstprivate(a) -// TCHECK-NOT: store i{{[0-9]+}} % // firstprivate(b) -// TCHECK: [[B_PRIV_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_PRIV]] to i8* -// TCHECK: [[B_IN_BCAST:%.+]] = bitcast [10 x i{{[0-9]+}}]* [[B_ADDR_REF]] to i8* -// TCHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[B_PRIV_BCAST]], i8* align {{[0-9]+}} [[B_IN_BCAST]],{{.+}}) -// TCHECK: ret void #endif +// CHECK-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@_Z3fooiPd +// CHECK-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK-64-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// CHECK-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// CHECK-64-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK-64-NEXT: [[P:%.*]] = alloca i32*, align 64 +// CHECK-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[GA_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8 +// CHECK-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [9 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_PTRS5:%.*]] = alloca [9 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [9 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK-64-NEXT: store i16 0, i16* [[AA]], align 2 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK-64-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// CHECK-64-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8 +// CHECK-64-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4 +// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// CHECK-64-NEXT: [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]] +// CHECK-64-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8 +// CHECK-64-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// CHECK-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP6]], i32* [[X]], align 4 +// CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: store i32 [[TMP7]], i32* [[Y]], align 4 +// CHECK-64-NEXT: store i32* [[A]], i32** [[P]], align 64 +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK-64-NEXT: store i32 [[TMP8]], i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32*, i32** [[P]], align 64 +// CHECK-64-NEXT: [[TMP11:%.*]] = load i32, i32* @ga, align 4 +// CHECK-64-NEXT: [[CONV2:%.*]] = bitcast i64* [[GA_CASTED]] to i32* +// CHECK-64-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = load i64, i64* [[GA_CASTED]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64* +// CHECK-64-NEXT: store i64 [[TMP9]], i64* [[TMP14]], align 8 +// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64* +// CHECK-64-NEXT: store i64 [[TMP9]], i64* [[TMP16]], align 8 +// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK-64-NEXT: store i8* null, i8** [[TMP17]], align 8 +// CHECK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32** +// CHECK-64-NEXT: store i32* [[TMP10]], i32** [[TMP19]], align 8 +// CHECK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32** +// CHECK-64-NEXT: store i32* [[TMP10]], i32** [[TMP21]], align 8 +// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK-64-NEXT: store i8* null, i8** [[TMP22]], align 8 +// CHECK-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64* +// CHECK-64-NEXT: store i64 [[TMP12]], i64* [[TMP24]], align 8 +// CHECK-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64* +// CHECK-64-NEXT: store i64 [[TMP12]], i64* [[TMP26]], align 8 +// CHECK-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK-64-NEXT: store i8* null, i8** [[TMP27]], align 8 +// CHECK-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK-64-NEXT: store i32 2, i32* [[TMP30]], align 4 +// CHECK-64-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK-64-NEXT: store i32 3, i32* [[TMP31]], align 4 +// CHECK-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK-64-NEXT: store i8** [[TMP28]], i8*** [[TMP32]], align 8 +// CHECK-64-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK-64-NEXT: store i8** [[TMP29]], i8*** [[TMP33]], align 8 +// CHECK-64-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP34]], align 8 +// CHECK-64-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP35]], align 8 +// CHECK-64-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP36]], align 8 +// CHECK-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP37]], align 8 +// CHECK-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK-64-NEXT: store i64 0, i64* [[TMP38]], align 8 +// CHECK-64-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK-64-NEXT: store i64 0, i64* [[TMP39]], align 8 +// CHECK-64-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP40]], align 4 +// CHECK-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP41]], align 4 +// CHECK-64-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK-64-NEXT: store i32 0, i32* [[TMP42]], align 4 +// CHECK-64-NEXT: [[TMP43:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK-64-NEXT: [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0 +// CHECK-64-NEXT: br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK-64: omp_offload.failed: +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i64 [[TMP9]], i32* [[TMP10]], i64 [[TMP12]]) #[[ATTR3:[0-9]+]] +// CHECK-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK-64: omp_offload.cont: +// CHECK-64-NEXT: [[TMP45:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK-64-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* +// CHECK-64-NEXT: store i16 [[TMP45]], i16* [[CONV3]], align 2 +// CHECK-64-NEXT: [[TMP46:%.*]] = load i64, i64* [[AA_CASTED]], align 8 +// CHECK-64-NEXT: [[TMP47:%.*]] = mul nuw i64 [[TMP1]], 4 +// CHECK-64-NEXT: [[TMP48:%.*]] = mul nuw i64 5, [[TMP4]] +// CHECK-64-NEXT: [[TMP49:%.*]] = mul nuw i64 [[TMP48]], 8 +// CHECK-64-NEXT: [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP50]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i64 72, i1 false) +// CHECK-64-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i64* +// CHECK-64-NEXT: store i64 [[TMP46]], i64* [[TMP52]], align 8 +// CHECK-64-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i64* +// CHECK-64-NEXT: store i64 [[TMP46]], i64* [[TMP54]], align 8 +// CHECK-64-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 0 +// CHECK-64-NEXT: store i8* null, i8** [[TMP55]], align 8 +// CHECK-64-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]** +// CHECK-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 8 +// CHECK-64-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]** +// CHECK-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 8 +// CHECK-64-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 1 +// CHECK-64-NEXT: store i8* null, i8** [[TMP60]], align 8 +// CHECK-64-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i64* +// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[TMP62]], align 8 +// CHECK-64-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i64* +// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[TMP64]], align 8 +// CHECK-64-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 2 +// CHECK-64-NEXT: store i8* null, i8** [[TMP65]], align 8 +// CHECK-64-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3 +// CHECK-64-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float** +// CHECK-64-NEXT: store float* [[VLA]], float** [[TMP67]], align 8 +// CHECK-64-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3 +// CHECK-64-NEXT: [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float** +// CHECK-64-NEXT: store float* [[VLA]], float** [[TMP69]], align 8 +// CHECK-64-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3 +// CHECK-64-NEXT: store i64 [[TMP47]], i64* [[TMP70]], align 8 +// CHECK-64-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 3 +// CHECK-64-NEXT: store i8* null, i8** [[TMP71]], align 8 +// CHECK-64-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 4 +// CHECK-64-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]** +// CHECK-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 8 +// CHECK-64-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 4 +// CHECK-64-NEXT: [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]** +// CHECK-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 8 +// CHECK-64-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 4 +// CHECK-64-NEXT: store i8* null, i8** [[TMP76]], align 8 +// CHECK-64-NEXT: [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 5 +// CHECK-64-NEXT: [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64* +// CHECK-64-NEXT: store i64 5, i64* [[TMP78]], align 8 +// CHECK-64-NEXT: [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 5 +// CHECK-64-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64* +// CHECK-64-NEXT: store i64 5, i64* [[TMP80]], align 8 +// CHECK-64-NEXT: [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 5 +// CHECK-64-NEXT: store i8* null, i8** [[TMP81]], align 8 +// CHECK-64-NEXT: [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 6 +// CHECK-64-NEXT: [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64* +// CHECK-64-NEXT: store i64 [[TMP4]], i64* [[TMP83]], align 8 +// CHECK-64-NEXT: [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 6 +// CHECK-64-NEXT: [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64* +// CHECK-64-NEXT: store i64 [[TMP4]], i64* [[TMP85]], align 8 +// CHECK-64-NEXT: [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 6 +// CHECK-64-NEXT: store i8* null, i8** [[TMP86]], align 8 +// CHECK-64-NEXT: [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 7 +// CHECK-64-NEXT: [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double** +// CHECK-64-NEXT: store double* [[VLA1]], double** [[TMP88]], align 8 +// CHECK-64-NEXT: [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 7 +// CHECK-64-NEXT: [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double** +// CHECK-64-NEXT: store double* [[VLA1]], double** [[TMP90]], align 8 +// CHECK-64-NEXT: [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7 +// CHECK-64-NEXT: store i64 [[TMP49]], i64* [[TMP91]], align 8 +// CHECK-64-NEXT: [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 7 +// CHECK-64-NEXT: store i8* null, i8** [[TMP92]], align 8 +// CHECK-64-NEXT: [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 8 +// CHECK-64-NEXT: [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT** +// CHECK-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 8 +// CHECK-64-NEXT: [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 8 +// CHECK-64-NEXT: [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT** +// CHECK-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 8 +// CHECK-64-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 8 +// CHECK-64-NEXT: store i8* null, i8** [[TMP97]], align 8 +// CHECK-64-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK-64-NEXT: [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK-64-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 0 +// CHECK-64-NEXT: store i32 2, i32* [[TMP101]], align 4 +// CHECK-64-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 1 +// CHECK-64-NEXT: store i32 9, i32* [[TMP102]], align 4 +// CHECK-64-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 2 +// CHECK-64-NEXT: store i8** [[TMP98]], i8*** [[TMP103]], align 8 +// CHECK-64-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 3 +// CHECK-64-NEXT: store i8** [[TMP99]], i8*** [[TMP104]], align 8 +// CHECK-64-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 4 +// CHECK-64-NEXT: store i64* [[TMP100]], i64** [[TMP105]], align 8 +// CHECK-64-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 5 +// CHECK-64-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 8 +// CHECK-64-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 6 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP107]], align 8 +// CHECK-64-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 7 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP108]], align 8 +// CHECK-64-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 8 +// CHECK-64-NEXT: store i64 0, i64* [[TMP109]], align 8 +// CHECK-64-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 9 +// CHECK-64-NEXT: store i64 0, i64* [[TMP110]], align 8 +// CHECK-64-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 10 +// CHECK-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4 +// CHECK-64-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 11 +// CHECK-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4 +// CHECK-64-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 12 +// CHECK-64-NEXT: store i32 0, i32* [[TMP113]], align 4 +// CHECK-64-NEXT: [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]]) +// CHECK-64-NEXT: [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0 +// CHECK-64-NEXT: br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]] +// CHECK-64: omp_offload.failed8: +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i64 [[TMP46]], [10 x float]* [[B]], i64 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP4]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]] +// CHECK-64-NEXT: br label [[OMP_OFFLOAD_CONT9]] +// CHECK-64: omp_offload.cont9: +// CHECK-64-NEXT: [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double** +// CHECK-64-NEXT: store double* [[TMP116]], double** [[TMP118]], align 8 +// CHECK-64-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double** +// CHECK-64-NEXT: store double* [[TMP116]], double** [[TMP120]], align 8 +// CHECK-64-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 0 +// CHECK-64-NEXT: store i8* null, i8** [[TMP121]], align 8 +// CHECK-64-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0** +// CHECK-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 8 +// CHECK-64-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0** +// CHECK-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 8 +// CHECK-64-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 1 +// CHECK-64-NEXT: store i8* null, i8** [[TMP126]], align 8 +// CHECK-64-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CHECK-64-NEXT: [[KERNEL_ARGS13:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK-64-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 0 +// CHECK-64-NEXT: store i32 2, i32* [[TMP129]], align 4 +// CHECK-64-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 1 +// CHECK-64-NEXT: store i32 2, i32* [[TMP130]], align 4 +// CHECK-64-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 2 +// CHECK-64-NEXT: store i8** [[TMP127]], i8*** [[TMP131]], align 8 +// CHECK-64-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 3 +// CHECK-64-NEXT: store i8** [[TMP128]], i8*** [[TMP132]], align 8 +// CHECK-64-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 4 +// CHECK-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 8 +// CHECK-64-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 5 +// CHECK-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 8 +// CHECK-64-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 6 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP135]], align 8 +// CHECK-64-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 7 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP136]], align 8 +// CHECK-64-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 8 +// CHECK-64-NEXT: store i64 0, i64* [[TMP137]], align 8 +// CHECK-64-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 9 +// CHECK-64-NEXT: store i64 0, i64* [[TMP138]], align 8 +// CHECK-64-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 10 +// CHECK-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4 +// CHECK-64-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 11 +// CHECK-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4 +// CHECK-64-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 12 +// CHECK-64-NEXT: store i32 0, i32* [[TMP141]], align 4 +// CHECK-64-NEXT: [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]]) +// CHECK-64-NEXT: [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0 +// CHECK-64-NEXT: br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED14:%.*]], label [[OMP_OFFLOAD_CONT15:%.*]] +// CHECK-64: omp_offload.failed14: +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]] +// CHECK-64-NEXT: br label [[OMP_OFFLOAD_CONT15]] +// CHECK-64: omp_offload.cont15: +// CHECK-64-NEXT: [[TMP144:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK-64-NEXT: call void @llvm.stackrestore(i8* [[TMP145]]) +// CHECK-64-NEXT: ret i32 [[TMP144]] +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// CHECK-64-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-64-NEXT: [[GA_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK-64-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[GA]], i64* [[GA_ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32* +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// CHECK-64-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 +// CHECK-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 +// CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 +// CHECK-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 +// CHECK-64-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 +// CHECK-64-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// CHECK-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__VLA_EXPR2:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// CHECK-64-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 +// CHECK-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// CHECK-64-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 +// CHECK-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// CHECK-64-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 +// CHECK-64-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 +// CHECK-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK-64-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// CHECK-64-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false) +// CHECK-64-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// CHECK-64-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8 +// CHECK-64-NEXT: [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4 +// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4 +// CHECK-64-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// CHECK-64-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false) +// CHECK-64-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false) +// CHECK-64-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8 +// CHECK-64-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// CHECK-64-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8 +// CHECK-64-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// CHECK-64-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 +// CHECK-64-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// CHECK-64-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false) +// CHECK-64-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// CHECK-64-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false) +// CHECK-64-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK-64-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// CHECK-64-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK-64-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2 +// CHECK-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK-64-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3 +// CHECK-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// CHECK-64-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1 +// CHECK-64-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2 +// CHECK-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// CHECK-64-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]] +// CHECK-64-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]] +// CHECK-64-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3 +// CHECK-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// CHECK-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// CHECK-64-NEXT: store i64 1, i64* [[X]], align 8 +// CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// CHECK-64-NEXT: store i8 1, i8* [[Y]], align 8 +// CHECK-64-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK-64-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// CHECK-64-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK-64-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8 +// CHECK-64-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8* +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 8, i1 false) +// CHECK-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[X]], align 4 +// CHECK-64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP3]] to double +// CHECK-64-NEXT: [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 0 +// CHECK-64-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK-64-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 0 +// CHECK-64-NEXT: [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 8 +// CHECK-64-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// CHECK-64-NEXT: store double [[INC]], double* [[ARRAYIDX2]], align 8 +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@_Z3bariPd +// CHECK-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 +// CHECK-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK-64-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK-64-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]]) +// CHECK-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]]) +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]]) +// CHECK-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// CHECK-64-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// CHECK-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]]) +// CHECK-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// CHECK-64-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// CHECK-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: ret i32 [[TMP9]] +// CHECK-64-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// CHECK-64-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8 +// CHECK-64-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// CHECK-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK-64-NEXT: [[TMP3:%.*]] = call i8* @llvm.stacksave() +// CHECK-64-NEXT: store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8 +// CHECK-64-NEXT: [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]] +// CHECK-64-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2 +// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to i32* +// CHECK-64-NEXT: store i32 [[TMP5]], i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 +// CHECK-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP7:%.*]] = mul nuw i64 2, [[TMP2]] +// CHECK-64-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 +// CHECK-64-NEXT: [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP9]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i64 40, i1 false) +// CHECK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1** +// CHECK-64-NEXT: store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double** +// CHECK-64-NEXT: store double* [[A]], double** [[TMP13]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK-64-NEXT: store i8* null, i8** [[TMP14]], align 8 +// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64* +// CHECK-64-NEXT: store i64 [[TMP6]], i64* [[TMP16]], align 8 +// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i64* +// CHECK-64-NEXT: store i64 [[TMP6]], i64* [[TMP18]], align 8 +// CHECK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK-64-NEXT: store i8* null, i8** [[TMP19]], align 8 +// CHECK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64* +// CHECK-64-NEXT: store i64 2, i64* [[TMP21]], align 8 +// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64* +// CHECK-64-NEXT: store i64 2, i64* [[TMP23]], align 8 +// CHECK-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK-64-NEXT: store i8* null, i8** [[TMP24]], align 8 +// CHECK-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK-64-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64* +// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[TMP26]], align 8 +// CHECK-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK-64-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64* +// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[TMP28]], align 8 +// CHECK-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK-64-NEXT: store i8* null, i8** [[TMP29]], align 8 +// CHECK-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK-64-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16** +// CHECK-64-NEXT: store i16* [[VLA]], i16** [[TMP31]], align 8 +// CHECK-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK-64-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16** +// CHECK-64-NEXT: store i16* [[VLA]], i16** [[TMP33]], align 8 +// CHECK-64-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK-64-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 8 +// CHECK-64-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 +// CHECK-64-NEXT: store i8* null, i8** [[TMP35]], align 8 +// CHECK-64-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK-64-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK-64-NEXT: store i32 2, i32* [[TMP39]], align 4 +// CHECK-64-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK-64-NEXT: store i32 5, i32* [[TMP40]], align 4 +// CHECK-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK-64-NEXT: store i8** [[TMP36]], i8*** [[TMP41]], align 8 +// CHECK-64-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK-64-NEXT: store i8** [[TMP37]], i8*** [[TMP42]], align 8 +// CHECK-64-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK-64-NEXT: store i64* [[TMP38]], i64** [[TMP43]], align 8 +// CHECK-64-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK-64-NEXT: store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 8 +// CHECK-64-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP45]], align 8 +// CHECK-64-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP46]], align 8 +// CHECK-64-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK-64-NEXT: store i64 0, i64* [[TMP47]], align 8 +// CHECK-64-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK-64-NEXT: store i64 0, i64* [[TMP48]], align 8 +// CHECK-64-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4 +// CHECK-64-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4 +// CHECK-64-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK-64-NEXT: store i32 0, i32* [[TMP51]], align 4 +// CHECK-64-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK-64-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 +// CHECK-64-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK-64: omp_offload.failed: +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]] +// CHECK-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK-64: omp_offload.cont: +// CHECK-64-NEXT: [[TMP54:%.*]] = mul nsw i64 1, [[TMP2]] +// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP54]] +// CHECK-64-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK-64-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2 +// CHECK-64-NEXT: [[CONV3:%.*]] = sext i16 [[TMP55]] to i32 +// CHECK-64-NEXT: [[TMP56:%.*]] = load i32, i32* [[B]], align 4 +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP56]] +// CHECK-64-NEXT: [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK-64-NEXT: call void @llvm.stackrestore(i8* [[TMP57]]) +// CHECK-64-NEXT: ret i32 [[ADD4]] +// CHECK-64-LABEL: define {{[^@]+}}@_ZL7fstatici +// CHECK-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// CHECK-64-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8 +// CHECK-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK-64-NEXT: store i8 0, i8* [[AAA]], align 1 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK-64-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* +// CHECK-64-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 +// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64* +// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64* +// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8 +// CHECK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK-64-NEXT: store i8* null, i8** [[TMP8]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64* +// CHECK-64-NEXT: store i64 [[TMP3]], i64* [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64* +// CHECK-64-NEXT: store i64 [[TMP3]], i64* [[TMP12]], align 8 +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK-64-NEXT: store i8* null, i8** [[TMP13]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]** +// CHECK-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 8 +// CHECK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK-64-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]** +// CHECK-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 8 +// CHECK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK-64-NEXT: store i8* null, i8** [[TMP18]], align 8 +// CHECK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK-64-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK-64-NEXT: store i32 3, i32* [[TMP22]], align 4 +// CHECK-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK-64-NEXT: store i8** [[TMP19]], i8*** [[TMP23]], align 8 +// CHECK-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK-64-NEXT: store i8** [[TMP20]], i8*** [[TMP24]], align 8 +// CHECK-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 8 +// CHECK-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 8 +// CHECK-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP27]], align 8 +// CHECK-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP28]], align 8 +// CHECK-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK-64-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK-64-NEXT: store i64 0, i64* [[TMP30]], align 8 +// CHECK-64-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4 +// CHECK-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4 +// CHECK-64-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK-64-NEXT: store i32 0, i32* [[TMP33]], align 4 +// CHECK-64-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK-64-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK-64-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK-64: omp_offload.failed: +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i64 [[TMP1]], i64 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK-64: omp_offload.cont: +// CHECK-64-NEXT: [[TMP36:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: ret i32 [[TMP36]] +// CHECK-64-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// CHECK-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-64-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK-64-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK-64-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i64* +// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[TMP3]], align 8 +// CHECK-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64* +// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK-64-NEXT: store i8* null, i8** [[TMP6]], align 8 +// CHECK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]** +// CHECK-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 8 +// CHECK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK-64-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]** +// CHECK-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 8 +// CHECK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK-64-NEXT: store i8* null, i8** [[TMP11]], align 8 +// CHECK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK-64-NEXT: store i32 2, i32* [[TMP14]], align 4 +// CHECK-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK-64-NEXT: store i32 2, i32* [[TMP15]], align 4 +// CHECK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK-64-NEXT: store i8** [[TMP12]], i8*** [[TMP16]], align 8 +// CHECK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK-64-NEXT: store i8** [[TMP13]], i8*** [[TMP17]], align 8 +// CHECK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 8 +// CHECK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 8 +// CHECK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP20]], align 8 +// CHECK-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK-64-NEXT: store i8** null, i8*** [[TMP21]], align 8 +// CHECK-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK-64-NEXT: store i64 0, i64* [[TMP22]], align 8 +// CHECK-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK-64-NEXT: store i64 0, i64* [[TMP23]], align 8 +// CHECK-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4 +// CHECK-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4 +// CHECK-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK-64-NEXT: store i32 0, i32* [[TMP26]], align 4 +// CHECK-64-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK-64-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK-64-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK-64: omp_offload.failed: +// CHECK-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i64 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK-64: omp_offload.cont: +// CHECK-64-NEXT: [[TMP29:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-64-NEXT: ret i32 [[TMP29]] +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// CHECK-64-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// CHECK-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// CHECK-64-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* +// CHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// CHECK-64-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// CHECK-64-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8 +// CHECK-64-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// CHECK-64-NEXT: [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2 +// CHECK-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK-64-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8 +// CHECK-64-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// CHECK-64-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2 +// CHECK-64-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// CHECK-64-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false) +// CHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double +// CHECK-64-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 +// CHECK-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: store double [[ADD]], double* [[A]], align 8 +// CHECK-64-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK-64-NEXT: [[TMP11:%.*]] = load double, double* [[A5]], align 8 +// CHECK-64-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK-64-NEXT: store double [[INC]], double* [[A5]], align 8 +// CHECK-64-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 +// CHECK-64-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]] +// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]] +// CHECK-64-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK-64-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// CHECK-64-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK-64-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// CHECK-64-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK-64-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4 +// CHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK-64-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 +// CHECK-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* +// CHECK-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8* +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK-64-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK-64-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// CHECK-64-NEXT: store i8 [[CONV5]], i8* [[CONV1]], align 1 +// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2 +// CHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK-64-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// CHECK-64-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK-64-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// CHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2 +// CHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK-64-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// CHECK-64-NEXT: ret void +// CHECK-64-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK-64-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK-64-NEXT: ret void +// CHECK-32-LABEL: define {{[^@]+}}@_Z3fooiPd +// CHECK-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK-32-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// CHECK-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// CHECK-32-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK-32-NEXT: [[P:%.*]] = alloca i32*, align 64 +// CHECK-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[GA_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4 +// CHECK-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK-32-NEXT: store i16 0, i16* [[AA]], align 2 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = call i8* @llvm.stacksave() +// CHECK-32-NEXT: store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4 +// CHECK-32-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]] +// CHECK-32-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8 +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// CHECK-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[X]], align 4 +// CHECK-32-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[Y]], align 4 +// CHECK-32-NEXT: store i32* [[A]], i32** [[P]], align 64 +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: store i32 [[TMP6]], i32* [[A_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32*, i32** [[P]], align 64 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* @ga, align 4 +// CHECK-32-NEXT: store i32 [[TMP9]], i32* [[GA_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[GA_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32* +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[TMP12]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32* +// CHECK-32-NEXT: store i32 [[TMP7]], i32* [[TMP14]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK-32-NEXT: store i8* null, i8** [[TMP15]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i32** +// CHECK-32-NEXT: store i32* [[TMP8]], i32** [[TMP17]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32** +// CHECK-32-NEXT: store i32* [[TMP8]], i32** [[TMP19]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK-32-NEXT: store i8* null, i8** [[TMP20]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32* +// CHECK-32-NEXT: store i32 [[TMP10]], i32* [[TMP22]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32* +// CHECK-32-NEXT: store i32 [[TMP10]], i32* [[TMP24]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK-32-NEXT: store i8* null, i8** [[TMP25]], align 4 +// CHECK-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK-32-NEXT: store i32 2, i32* [[TMP28]], align 4 +// CHECK-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK-32-NEXT: store i32 3, i32* [[TMP29]], align 4 +// CHECK-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK-32-NEXT: store i8** [[TMP26]], i8*** [[TMP30]], align 4 +// CHECK-32-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK-32-NEXT: store i8** [[TMP27]], i8*** [[TMP31]], align 4 +// CHECK-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP32]], align 4 +// CHECK-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP33]], align 4 +// CHECK-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP34]], align 4 +// CHECK-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP35]], align 4 +// CHECK-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK-32-NEXT: store i64 0, i64* [[TMP36]], align 8 +// CHECK-32-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK-32-NEXT: store i64 0, i64* [[TMP37]], align 8 +// CHECK-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP38]], align 4 +// CHECK-32-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP39]], align 4 +// CHECK-32-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK-32-NEXT: store i32 0, i32* [[TMP40]], align 4 +// CHECK-32-NEXT: [[TMP41:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK-32-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK-32-NEXT: br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK-32: omp_offload.failed: +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i32 [[TMP7]], i32* [[TMP8]], i32 [[TMP10]]) #[[ATTR3:[0-9]+]] +// CHECK-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK-32: omp_offload.cont: +// CHECK-32-NEXT: [[TMP43:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_CASTED]] to i16* +// CHECK-32-NEXT: store i16 [[TMP43]], i16* [[CONV]], align 2 +// CHECK-32-NEXT: [[TMP44:%.*]] = load i32, i32* [[AA_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP45:%.*]] = mul nuw i32 [[TMP0]], 4 +// CHECK-32-NEXT: [[TMP46:%.*]] = sext i32 [[TMP45]] to i64 +// CHECK-32-NEXT: [[TMP47:%.*]] = mul nuw i32 5, [[TMP2]] +// CHECK-32-NEXT: [[TMP48:%.*]] = mul nuw i32 [[TMP47]], 8 +// CHECK-32-NEXT: [[TMP49:%.*]] = sext i32 [[TMP48]] to i64 +// CHECK-32-NEXT: [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP50]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i32 72, i1 false) +// CHECK-32-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32* +// CHECK-32-NEXT: store i32 [[TMP44]], i32* [[TMP52]], align 4 +// CHECK-32-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32* +// CHECK-32-NEXT: store i32 [[TMP44]], i32* [[TMP54]], align 4 +// CHECK-32-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0 +// CHECK-32-NEXT: store i8* null, i8** [[TMP55]], align 4 +// CHECK-32-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]** +// CHECK-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 4 +// CHECK-32-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]** +// CHECK-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 4 +// CHECK-32-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1 +// CHECK-32-NEXT: store i8* null, i8** [[TMP60]], align 4 +// CHECK-32-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32* +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[TMP62]], align 4 +// CHECK-32-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32* +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[TMP64]], align 4 +// CHECK-32-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 2 +// CHECK-32-NEXT: store i8* null, i8** [[TMP65]], align 4 +// CHECK-32-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3 +// CHECK-32-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float** +// CHECK-32-NEXT: store float* [[VLA]], float** [[TMP67]], align 4 +// CHECK-32-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 3 +// CHECK-32-NEXT: [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float** +// CHECK-32-NEXT: store float* [[VLA]], float** [[TMP69]], align 4 +// CHECK-32-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3 +// CHECK-32-NEXT: store i64 [[TMP46]], i64* [[TMP70]], align 4 +// CHECK-32-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 3 +// CHECK-32-NEXT: store i8* null, i8** [[TMP71]], align 4 +// CHECK-32-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4 +// CHECK-32-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]** +// CHECK-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 4 +// CHECK-32-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 4 +// CHECK-32-NEXT: [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]** +// CHECK-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 4 +// CHECK-32-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 4 +// CHECK-32-NEXT: store i8* null, i8** [[TMP76]], align 4 +// CHECK-32-NEXT: [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5 +// CHECK-32-NEXT: [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32* +// CHECK-32-NEXT: store i32 5, i32* [[TMP78]], align 4 +// CHECK-32-NEXT: [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 5 +// CHECK-32-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32* +// CHECK-32-NEXT: store i32 5, i32* [[TMP80]], align 4 +// CHECK-32-NEXT: [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 5 +// CHECK-32-NEXT: store i8* null, i8** [[TMP81]], align 4 +// CHECK-32-NEXT: [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6 +// CHECK-32-NEXT: [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32* +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[TMP83]], align 4 +// CHECK-32-NEXT: [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 6 +// CHECK-32-NEXT: [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32* +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[TMP85]], align 4 +// CHECK-32-NEXT: [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 6 +// CHECK-32-NEXT: store i8* null, i8** [[TMP86]], align 4 +// CHECK-32-NEXT: [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7 +// CHECK-32-NEXT: [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double** +// CHECK-32-NEXT: store double* [[VLA1]], double** [[TMP88]], align 4 +// CHECK-32-NEXT: [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 7 +// CHECK-32-NEXT: [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double** +// CHECK-32-NEXT: store double* [[VLA1]], double** [[TMP90]], align 4 +// CHECK-32-NEXT: [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7 +// CHECK-32-NEXT: store i64 [[TMP49]], i64* [[TMP91]], align 4 +// CHECK-32-NEXT: [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 7 +// CHECK-32-NEXT: store i8* null, i8** [[TMP92]], align 4 +// CHECK-32-NEXT: [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8 +// CHECK-32-NEXT: [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT** +// CHECK-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 4 +// CHECK-32-NEXT: [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 8 +// CHECK-32-NEXT: [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT** +// CHECK-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 4 +// CHECK-32-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 8 +// CHECK-32-NEXT: store i8* null, i8** [[TMP97]], align 4 +// CHECK-32-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK-32-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK-32-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 0 +// CHECK-32-NEXT: store i32 2, i32* [[TMP101]], align 4 +// CHECK-32-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 1 +// CHECK-32-NEXT: store i32 9, i32* [[TMP102]], align 4 +// CHECK-32-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 2 +// CHECK-32-NEXT: store i8** [[TMP98]], i8*** [[TMP103]], align 4 +// CHECK-32-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 3 +// CHECK-32-NEXT: store i8** [[TMP99]], i8*** [[TMP104]], align 4 +// CHECK-32-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 4 +// CHECK-32-NEXT: store i64* [[TMP100]], i64** [[TMP105]], align 4 +// CHECK-32-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 5 +// CHECK-32-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 4 +// CHECK-32-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 6 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP107]], align 4 +// CHECK-32-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 7 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP108]], align 4 +// CHECK-32-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 8 +// CHECK-32-NEXT: store i64 0, i64* [[TMP109]], align 8 +// CHECK-32-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 9 +// CHECK-32-NEXT: store i64 0, i64* [[TMP110]], align 8 +// CHECK-32-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 10 +// CHECK-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4 +// CHECK-32-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 11 +// CHECK-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4 +// CHECK-32-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 12 +// CHECK-32-NEXT: store i32 0, i32* [[TMP113]], align 4 +// CHECK-32-NEXT: [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]]) +// CHECK-32-NEXT: [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0 +// CHECK-32-NEXT: br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CHECK-32: omp_offload.failed6: +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i32 [[TMP44]], [10 x float]* [[B]], i32 [[TMP0]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP2]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]] +// CHECK-32-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CHECK-32: omp_offload.cont7: +// CHECK-32-NEXT: [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double** +// CHECK-32-NEXT: store double* [[TMP116]], double** [[TMP118]], align 4 +// CHECK-32-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double** +// CHECK-32-NEXT: store double* [[TMP116]], double** [[TMP120]], align 4 +// CHECK-32-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0 +// CHECK-32-NEXT: store i8* null, i8** [[TMP121]], align 4 +// CHECK-32-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0** +// CHECK-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 4 +// CHECK-32-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0** +// CHECK-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 4 +// CHECK-32-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1 +// CHECK-32-NEXT: store i8* null, i8** [[TMP126]], align 4 +// CHECK-32-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 +// CHECK-32-NEXT: [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK-32-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 0 +// CHECK-32-NEXT: store i32 2, i32* [[TMP129]], align 4 +// CHECK-32-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 1 +// CHECK-32-NEXT: store i32 2, i32* [[TMP130]], align 4 +// CHECK-32-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 2 +// CHECK-32-NEXT: store i8** [[TMP127]], i8*** [[TMP131]], align 4 +// CHECK-32-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 3 +// CHECK-32-NEXT: store i8** [[TMP128]], i8*** [[TMP132]], align 4 +// CHECK-32-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 4 +// CHECK-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 4 +// CHECK-32-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 5 +// CHECK-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 4 +// CHECK-32-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 6 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP135]], align 4 +// CHECK-32-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 7 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP136]], align 4 +// CHECK-32-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 8 +// CHECK-32-NEXT: store i64 0, i64* [[TMP137]], align 8 +// CHECK-32-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 9 +// CHECK-32-NEXT: store i64 0, i64* [[TMP138]], align 8 +// CHECK-32-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 10 +// CHECK-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4 +// CHECK-32-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 11 +// CHECK-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4 +// CHECK-32-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 12 +// CHECK-32-NEXT: store i32 0, i32* [[TMP141]], align 4 +// CHECK-32-NEXT: [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]]) +// CHECK-32-NEXT: [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0 +// CHECK-32-NEXT: br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]] +// CHECK-32: omp_offload.failed12: +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]] +// CHECK-32-NEXT: br label [[OMP_OFFLOAD_CONT13]] +// CHECK-32: omp_offload.cont13: +// CHECK-32-NEXT: [[TMP144:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK-32-NEXT: call void @llvm.stackrestore(i8* [[TMP145]]) +// CHECK-32-NEXT: ret i32 [[TMP144]] +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// CHECK-32-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 4 +// CHECK-32-NEXT: [[GA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK-32-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[GA]], i32* [[GA_ADDR]], align 4 +// CHECK-32-NEXT: ret void +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// CHECK-32-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 +// CHECK-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 +// CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 +// CHECK-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 +// CHECK-32-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 +// CHECK-32-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// CHECK-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__VLA_EXPR2:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// CHECK-32-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 +// CHECK-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// CHECK-32-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 +// CHECK-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// CHECK-32-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 +// CHECK-32-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 +// CHECK-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 +// CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK-32-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// CHECK-32-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false) +// CHECK-32-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// CHECK-32-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4 +// CHECK-32-NEXT: [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// CHECK-32-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false) +// CHECK-32-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false) +// CHECK-32-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4 +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// CHECK-32-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8 +// CHECK-32-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// CHECK-32-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false) +// CHECK-32-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// CHECK-32-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false) +// CHECK-32-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK-32-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// CHECK-32-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK-32-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2 +// CHECK-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK-32-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3 +// CHECK-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// CHECK-32-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1 +// CHECK-32-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 +// CHECK-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// CHECK-32-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]] +// CHECK-32-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]] +// CHECK-32-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3 +// CHECK-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// CHECK-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// CHECK-32-NEXT: store i64 1, i64* [[X]], align 4 +// CHECK-32-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// CHECK-32-NEXT: store i8 1, i8* [[Y]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK-32-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// CHECK-32-NEXT: ret void +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// CHECK-32-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK-32-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4 +// CHECK-32-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8* +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 8, i1 false) +// CHECK-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[X]], align 4 +// CHECK-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP3]] to double +// CHECK-32-NEXT: [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i32 0 +// CHECK-32-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK-32-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i32 0 +// CHECK-32-NEXT: [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 4 +// CHECK-32-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// CHECK-32-NEXT: store double [[INC]], double* [[ARRAYIDX2]], align 4 +// CHECK-32-NEXT: ret void +// CHECK-32-LABEL: define {{[^@]+}}@_Z3bariPd +// CHECK-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 +// CHECK-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK-32-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]]) +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]]) +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]]) +// CHECK-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// CHECK-32-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]]) +// CHECK-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// CHECK-32-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: ret i32 [[TMP9]] +// CHECK-32-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// CHECK-32-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4 +// CHECK-32-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// CHECK-32-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]] +// CHECK-32-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[B]], align 4 +// CHECK-32-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 +// CHECK-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP6:%.*]] = mul nuw i32 2, [[TMP1]] +// CHECK-32-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// CHECK-32-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 +// CHECK-32-NEXT: [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i32 40, i1 false) +// CHECK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1** +// CHECK-32-NEXT: store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double** +// CHECK-32-NEXT: store double* [[A]], double** [[TMP13]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK-32-NEXT: store i8* null, i8** [[TMP14]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i32* +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[TMP16]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32* +// CHECK-32-NEXT: store i32 [[TMP5]], i32* [[TMP18]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK-32-NEXT: store i8* null, i8** [[TMP19]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32* +// CHECK-32-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32* +// CHECK-32-NEXT: store i32 2, i32* [[TMP23]], align 4 +// CHECK-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK-32-NEXT: store i8* null, i8** [[TMP24]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK-32-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32* +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[TMP26]], align 4 +// CHECK-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK-32-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32* +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[TMP28]], align 4 +// CHECK-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK-32-NEXT: store i8* null, i8** [[TMP29]], align 4 +// CHECK-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK-32-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16** +// CHECK-32-NEXT: store i16* [[VLA]], i16** [[TMP31]], align 4 +// CHECK-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK-32-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16** +// CHECK-32-NEXT: store i16* [[VLA]], i16** [[TMP33]], align 4 +// CHECK-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK-32-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 4 +// CHECK-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 +// CHECK-32-NEXT: store i8* null, i8** [[TMP35]], align 4 +// CHECK-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK-32-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK-32-NEXT: store i32 2, i32* [[TMP39]], align 4 +// CHECK-32-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK-32-NEXT: store i32 5, i32* [[TMP40]], align 4 +// CHECK-32-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK-32-NEXT: store i8** [[TMP36]], i8*** [[TMP41]], align 4 +// CHECK-32-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK-32-NEXT: store i8** [[TMP37]], i8*** [[TMP42]], align 4 +// CHECK-32-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK-32-NEXT: store i64* [[TMP38]], i64** [[TMP43]], align 4 +// CHECK-32-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK-32-NEXT: store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 4 +// CHECK-32-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP45]], align 4 +// CHECK-32-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP46]], align 4 +// CHECK-32-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK-32-NEXT: store i64 0, i64* [[TMP47]], align 8 +// CHECK-32-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK-32-NEXT: store i64 0, i64* [[TMP48]], align 8 +// CHECK-32-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4 +// CHECK-32-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4 +// CHECK-32-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK-32-NEXT: store i32 0, i32* [[TMP51]], align 4 +// CHECK-32-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK-32-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 +// CHECK-32-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK-32: omp_offload.failed: +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]] +// CHECK-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK-32: omp_offload.cont: +// CHECK-32-NEXT: [[TMP54:%.*]] = mul nsw i32 1, [[TMP1]] +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP54]] +// CHECK-32-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK-32-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2 +// CHECK-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP55]] to i32 +// CHECK-32-NEXT: [[TMP56:%.*]] = load i32, i32* [[B]], align 4 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP56]] +// CHECK-32-NEXT: [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK-32-NEXT: call void @llvm.stackrestore(i8* [[TMP57]]) +// CHECK-32-NEXT: ret i32 [[ADD3]] +// CHECK-32-LABEL: define {{[^@]+}}@_ZL7fstatici +// CHECK-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// CHECK-32-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4 +// CHECK-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK-32-NEXT: store i8 0, i8* [[AAA]], align 1 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* +// CHECK-32-NEXT: store i8 [[TMP2]], i8* [[CONV]], align 1 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32* +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32* +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4 +// CHECK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK-32-NEXT: store i8* null, i8** [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32* +// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32* +// CHECK-32-NEXT: store i32 [[TMP3]], i32* [[TMP12]], align 4 +// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK-32-NEXT: store i8* null, i8** [[TMP13]], align 4 +// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]** +// CHECK-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]** +// CHECK-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK-32-NEXT: store i8* null, i8** [[TMP18]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK-32-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK-32-NEXT: store i32 3, i32* [[TMP22]], align 4 +// CHECK-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK-32-NEXT: store i8** [[TMP19]], i8*** [[TMP23]], align 4 +// CHECK-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK-32-NEXT: store i8** [[TMP20]], i8*** [[TMP24]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 4 +// CHECK-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 4 +// CHECK-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP27]], align 4 +// CHECK-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP28]], align 4 +// CHECK-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK-32-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK-32-NEXT: store i64 0, i64* [[TMP30]], align 8 +// CHECK-32-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4 +// CHECK-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4 +// CHECK-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK-32-NEXT: store i32 0, i32* [[TMP33]], align 4 +// CHECK-32-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK-32-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK-32-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK-32: omp_offload.failed: +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i32 [[TMP1]], i32 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK-32: omp_offload.cont: +// CHECK-32-NEXT: [[TMP36:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: ret i32 [[TMP36]] +// CHECK-32-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// CHECK-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 4 +// CHECK-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK-32-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32* +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[TMP3]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32* +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK-32-NEXT: store i8* null, i8** [[TMP6]], align 4 +// CHECK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]** +// CHECK-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 4 +// CHECK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK-32-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]** +// CHECK-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 4 +// CHECK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK-32-NEXT: store i8* null, i8** [[TMP11]], align 4 +// CHECK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK-32-NEXT: store i32 2, i32* [[TMP14]], align 4 +// CHECK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK-32-NEXT: store i32 2, i32* [[TMP15]], align 4 +// CHECK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK-32-NEXT: store i8** [[TMP12]], i8*** [[TMP16]], align 4 +// CHECK-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK-32-NEXT: store i8** [[TMP13]], i8*** [[TMP17]], align 4 +// CHECK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 4 +// CHECK-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 4 +// CHECK-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP20]], align 4 +// CHECK-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK-32-NEXT: store i8** null, i8*** [[TMP21]], align 4 +// CHECK-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK-32-NEXT: store i64 0, i64* [[TMP22]], align 8 +// CHECK-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK-32-NEXT: store i64 0, i64* [[TMP23]], align 8 +// CHECK-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4 +// CHECK-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4 +// CHECK-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK-32-NEXT: store i32 0, i32* [[TMP26]], align 4 +// CHECK-32-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK-32-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK-32-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK-32: omp_offload.failed: +// CHECK-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i32 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK-32: omp_offload.cont: +// CHECK-32-NEXT: [[TMP29:%.*]] = load i32, i32* [[A]], align 4 +// CHECK-32-NEXT: ret i32 [[TMP29]] +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// CHECK-32-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// CHECK-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// CHECK-32-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// CHECK-32-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// CHECK-32-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4 +// CHECK-32-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// CHECK-32-NEXT: [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2 +// CHECK-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK-32-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// CHECK-32-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// CHECK-32-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// CHECK-32-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// CHECK-32-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false) +// CHECK-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double +// CHECK-32-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: store double [[ADD]], double* [[A]], align 4 +// CHECK-32-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK-32-NEXT: [[TMP11:%.*]] = load double, double* [[A4]], align 4 +// CHECK-32-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK-32-NEXT: store double [[INC]], double* [[A4]], align 4 +// CHECK-32-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// CHECK-32-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]] +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]] +// CHECK-32-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK-32-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// CHECK-32-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK-32-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// CHECK-32-NEXT: ret void +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// CHECK-32-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK-32-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 +// CHECK-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* +// CHECK-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK-32-NEXT: [[CONV2:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 +// CHECK-32-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 +// CHECK-32-NEXT: store i8 [[CONV4]], i8* [[CONV]], align 1 +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK-32-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 +// CHECK-32-NEXT: ret void +// CHECK-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// CHECK-32-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// CHECK-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// CHECK-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK-32-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// CHECK-32-NEXT: ret void +// CHECK-32-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK-32-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK-32-NEXT: entry: +// CHECK-32-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK-32-NEXT: ret void +// CHECK0-64-LABEL: define {{[^@]+}}@_Z3fooiPd +// CHECK0-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK0-64-NEXT: entry: +// CHECK0-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK0-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK0-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK0-64-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK0-64-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// CHECK0-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK0-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK0-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// CHECK0-64-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK0-64-NEXT: [[P:%.*]] = alloca i32*, align 64 +// CHECK0-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[GA_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8 +// CHECK0-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [9 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_PTRS5:%.*]] = alloca [9 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [9 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x i8*], align 8 +// CHECK0-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK0-64-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK0-64-NEXT: store i16 0, i16* [[AA]], align 2 +// CHECK0-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK0-64-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// CHECK0-64-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8 +// CHECK0-64-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4 +// CHECK0-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK0-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// CHECK0-64-NEXT: [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]] +// CHECK0-64-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8 +// CHECK0-64-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// CHECK0-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: store i32 [[TMP6]], i32* [[X]], align 4 +// CHECK0-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// CHECK0-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: store i32 [[TMP7]], i32* [[Y]], align 4 +// CHECK0-64-NEXT: store i32* [[A]], i32** [[P]], align 64 +// CHECK0-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK0-64-NEXT: store i32 [[TMP8]], i32* [[CONV]], align 4 +// CHECK0-64-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK0-64-NEXT: [[TMP10:%.*]] = load i32*, i32** [[P]], align 64 +// CHECK0-64-NEXT: [[TMP11:%.*]] = load i32, i32* @ga, align 4 +// CHECK0-64-NEXT: [[CONV2:%.*]] = bitcast i64* [[GA_CASTED]] to i32* +// CHECK0-64-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 +// CHECK0-64-NEXT: [[TMP12:%.*]] = load i64, i64* [[GA_CASTED]], align 8 +// CHECK0-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP9]], i64* [[TMP14]], align 8 +// CHECK0-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP9]], i64* [[TMP16]], align 8 +// CHECK0-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP17]], align 8 +// CHECK0-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK0-64-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32** +// CHECK0-64-NEXT: store i32* [[TMP10]], i32** [[TMP19]], align 8 +// CHECK0-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK0-64-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32** +// CHECK0-64-NEXT: store i32* [[TMP10]], i32** [[TMP21]], align 8 +// CHECK0-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP22]], align 8 +// CHECK0-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK0-64-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP12]], i64* [[TMP24]], align 8 +// CHECK0-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK0-64-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP12]], i64* [[TMP26]], align 8 +// CHECK0-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP27]], align 8 +// CHECK0-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK0-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK0-64-NEXT: store i32 2, i32* [[TMP30]], align 4 +// CHECK0-64-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK0-64-NEXT: store i32 3, i32* [[TMP31]], align 4 +// CHECK0-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK0-64-NEXT: store i8** [[TMP28]], i8*** [[TMP32]], align 8 +// CHECK0-64-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK0-64-NEXT: store i8** [[TMP29]], i8*** [[TMP33]], align 8 +// CHECK0-64-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK0-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP34]], align 8 +// CHECK0-64-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK0-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP35]], align 8 +// CHECK0-64-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK0-64-NEXT: store i8** null, i8*** [[TMP36]], align 8 +// CHECK0-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK0-64-NEXT: store i8** null, i8*** [[TMP37]], align 8 +// CHECK0-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK0-64-NEXT: store i64 0, i64* [[TMP38]], align 8 +// CHECK0-64-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK0-64-NEXT: store i64 0, i64* [[TMP39]], align 8 +// CHECK0-64-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK0-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP40]], align 4 +// CHECK0-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK0-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP41]], align 4 +// CHECK0-64-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK0-64-NEXT: store i32 0, i32* [[TMP42]], align 4 +// CHECK0-64-NEXT: [[TMP43:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK0-64-NEXT: [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0 +// CHECK0-64-NEXT: br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK0-64: omp_offload.failed: +// CHECK0-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i64 [[TMP9]], i32* [[TMP10]], i64 [[TMP12]]) #[[ATTR3:[0-9]+]] +// CHECK0-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK0-64: omp_offload.cont: +// CHECK0-64-NEXT: [[TMP45:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK0-64-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* +// CHECK0-64-NEXT: store i16 [[TMP45]], i16* [[CONV3]], align 2 +// CHECK0-64-NEXT: [[TMP46:%.*]] = load i64, i64* [[AA_CASTED]], align 8 +// CHECK0-64-NEXT: [[TMP47:%.*]] = mul nuw i64 [[TMP1]], 4 +// CHECK0-64-NEXT: [[TMP48:%.*]] = mul nuw i64 5, [[TMP4]] +// CHECK0-64-NEXT: [[TMP49:%.*]] = mul nuw i64 [[TMP48]], 8 +// CHECK0-64-NEXT: [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP50]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i64 72, i1 false) +// CHECK0-64-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP46]], i64* [[TMP52]], align 8 +// CHECK0-64-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP46]], i64* [[TMP54]], align 8 +// CHECK0-64-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 0 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP55]], align 8 +// CHECK0-64-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1 +// CHECK0-64-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]** +// CHECK0-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 8 +// CHECK0-64-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1 +// CHECK0-64-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]** +// CHECK0-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 8 +// CHECK0-64-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 1 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP60]], align 8 +// CHECK0-64-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2 +// CHECK0-64-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP1]], i64* [[TMP62]], align 8 +// CHECK0-64-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2 +// CHECK0-64-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP1]], i64* [[TMP64]], align 8 +// CHECK0-64-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 2 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP65]], align 8 +// CHECK0-64-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3 +// CHECK0-64-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float** +// CHECK0-64-NEXT: store float* [[VLA]], float** [[TMP67]], align 8 +// CHECK0-64-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3 +// CHECK0-64-NEXT: [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float** +// CHECK0-64-NEXT: store float* [[VLA]], float** [[TMP69]], align 8 +// CHECK0-64-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3 +// CHECK0-64-NEXT: store i64 [[TMP47]], i64* [[TMP70]], align 8 +// CHECK0-64-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 3 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP71]], align 8 +// CHECK0-64-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 4 +// CHECK0-64-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]** +// CHECK0-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 8 +// CHECK0-64-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 4 +// CHECK0-64-NEXT: [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]** +// CHECK0-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 8 +// CHECK0-64-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 4 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP76]], align 8 +// CHECK0-64-NEXT: [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 5 +// CHECK0-64-NEXT: [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64* +// CHECK0-64-NEXT: store i64 5, i64* [[TMP78]], align 8 +// CHECK0-64-NEXT: [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 5 +// CHECK0-64-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64* +// CHECK0-64-NEXT: store i64 5, i64* [[TMP80]], align 8 +// CHECK0-64-NEXT: [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 5 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP81]], align 8 +// CHECK0-64-NEXT: [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 6 +// CHECK0-64-NEXT: [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP4]], i64* [[TMP83]], align 8 +// CHECK0-64-NEXT: [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 6 +// CHECK0-64-NEXT: [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP4]], i64* [[TMP85]], align 8 +// CHECK0-64-NEXT: [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 6 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP86]], align 8 +// CHECK0-64-NEXT: [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 7 +// CHECK0-64-NEXT: [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double** +// CHECK0-64-NEXT: store double* [[VLA1]], double** [[TMP88]], align 8 +// CHECK0-64-NEXT: [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 7 +// CHECK0-64-NEXT: [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double** +// CHECK0-64-NEXT: store double* [[VLA1]], double** [[TMP90]], align 8 +// CHECK0-64-NEXT: [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7 +// CHECK0-64-NEXT: store i64 [[TMP49]], i64* [[TMP91]], align 8 +// CHECK0-64-NEXT: [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 7 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP92]], align 8 +// CHECK0-64-NEXT: [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 8 +// CHECK0-64-NEXT: [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT** +// CHECK0-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 8 +// CHECK0-64-NEXT: [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 8 +// CHECK0-64-NEXT: [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT** +// CHECK0-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 8 +// CHECK0-64-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 8 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP97]], align 8 +// CHECK0-64-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK0-64-NEXT: [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK0-64-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 0 +// CHECK0-64-NEXT: store i32 2, i32* [[TMP101]], align 4 +// CHECK0-64-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 1 +// CHECK0-64-NEXT: store i32 9, i32* [[TMP102]], align 4 +// CHECK0-64-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 2 +// CHECK0-64-NEXT: store i8** [[TMP98]], i8*** [[TMP103]], align 8 +// CHECK0-64-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 3 +// CHECK0-64-NEXT: store i8** [[TMP99]], i8*** [[TMP104]], align 8 +// CHECK0-64-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 4 +// CHECK0-64-NEXT: store i64* [[TMP100]], i64** [[TMP105]], align 8 +// CHECK0-64-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 5 +// CHECK0-64-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 8 +// CHECK0-64-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 6 +// CHECK0-64-NEXT: store i8** null, i8*** [[TMP107]], align 8 +// CHECK0-64-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 7 +// CHECK0-64-NEXT: store i8** null, i8*** [[TMP108]], align 8 +// CHECK0-64-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 8 +// CHECK0-64-NEXT: store i64 0, i64* [[TMP109]], align 8 +// CHECK0-64-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 9 +// CHECK0-64-NEXT: store i64 0, i64* [[TMP110]], align 8 +// CHECK0-64-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 10 +// CHECK0-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4 +// CHECK0-64-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 11 +// CHECK0-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4 +// CHECK0-64-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 12 +// CHECK0-64-NEXT: store i32 0, i32* [[TMP113]], align 4 +// CHECK0-64-NEXT: [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]]) +// CHECK0-64-NEXT: [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0 +// CHECK0-64-NEXT: br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]] +// CHECK0-64: omp_offload.failed8: +// CHECK0-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i64 [[TMP46]], [10 x float]* [[B]], i64 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP4]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]] +// CHECK0-64-NEXT: br label [[OMP_OFFLOAD_CONT9]] +// CHECK0-64: omp_offload.cont9: +// CHECK0-64-NEXT: [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double** +// CHECK0-64-NEXT: store double* [[TMP116]], double** [[TMP118]], align 8 +// CHECK0-64-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double** +// CHECK0-64-NEXT: store double* [[TMP116]], double** [[TMP120]], align 8 +// CHECK0-64-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 0 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP121]], align 8 +// CHECK0-64-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1 +// CHECK0-64-NEXT: [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0** +// CHECK0-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 8 +// CHECK0-64-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 1 +// CHECK0-64-NEXT: [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0** +// CHECK0-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 8 +// CHECK0-64-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 1 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP126]], align 8 +// CHECK0-64-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CHECK0-64-NEXT: [[KERNEL_ARGS13:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK0-64-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 0 +// CHECK0-64-NEXT: store i32 2, i32* [[TMP129]], align 4 +// CHECK0-64-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 1 +// CHECK0-64-NEXT: store i32 2, i32* [[TMP130]], align 4 +// CHECK0-64-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 2 +// CHECK0-64-NEXT: store i8** [[TMP127]], i8*** [[TMP131]], align 8 +// CHECK0-64-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 3 +// CHECK0-64-NEXT: store i8** [[TMP128]], i8*** [[TMP132]], align 8 +// CHECK0-64-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 4 +// CHECK0-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 8 +// CHECK0-64-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 5 +// CHECK0-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 8 +// CHECK0-64-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 6 +// CHECK0-64-NEXT: store i8** null, i8*** [[TMP135]], align 8 +// CHECK0-64-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 7 +// CHECK0-64-NEXT: store i8** null, i8*** [[TMP136]], align 8 +// CHECK0-64-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 8 +// CHECK0-64-NEXT: store i64 0, i64* [[TMP137]], align 8 +// CHECK0-64-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 9 +// CHECK0-64-NEXT: store i64 0, i64* [[TMP138]], align 8 +// CHECK0-64-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 10 +// CHECK0-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4 +// CHECK0-64-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 11 +// CHECK0-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4 +// CHECK0-64-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 12 +// CHECK0-64-NEXT: store i32 0, i32* [[TMP141]], align 4 +// CHECK0-64-NEXT: [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]]) +// CHECK0-64-NEXT: [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0 +// CHECK0-64-NEXT: br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED14:%.*]], label [[OMP_OFFLOAD_CONT15:%.*]] +// CHECK0-64: omp_offload.failed14: +// CHECK0-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]] +// CHECK0-64-NEXT: br label [[OMP_OFFLOAD_CONT15]] +// CHECK0-64: omp_offload.cont15: +// CHECK0-64-NEXT: [[TMP144:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-64-NEXT: [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK0-64-NEXT: call void @llvm.stackrestore(i8* [[TMP145]]) +// CHECK0-64-NEXT: ret i32 [[TMP144]] +// CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// CHECK0-64-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK0-64-NEXT: entry: +// CHECK0-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 8 +// CHECK0-64-NEXT: [[GA_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK0-64-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 8 +// CHECK0-64-NEXT: store i64 [[GA]], i64* [[GA_ADDR]], align 8 +// CHECK0-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK0-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32* +// CHECK0-64-NEXT: ret void +// CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// CHECK0-64-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] { +// CHECK0-64-NEXT: entry: +// CHECK0-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 +// CHECK0-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 +// CHECK0-64-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 +// CHECK0-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 +// CHECK0-64-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 +// CHECK0-64-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// CHECK0-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK0-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK0-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[__VLA_EXPR2:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// CHECK0-64-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 +// CHECK0-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 +// CHECK0-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// CHECK0-64-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 +// CHECK0-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 +// CHECK0-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// CHECK0-64-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 +// CHECK0-64-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 +// CHECK0-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 +// CHECK0-64-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK0-64-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// CHECK0-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 +// CHECK0-64-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// CHECK0-64-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false) +// CHECK0-64-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// CHECK0-64-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8 +// CHECK0-64-NEXT: [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4 +// CHECK0-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK0-64-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4 +// CHECK0-64-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// CHECK0-64-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false) +// CHECK0-64-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// CHECK0-64-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false) +// CHECK0-64-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// CHECK0-64-NEXT: [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8 +// CHECK0-64-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// CHECK0-64-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8 +// CHECK0-64-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// CHECK0-64-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 +// CHECK0-64-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// CHECK0-64-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false) +// CHECK0-64-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// CHECK0-64-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false) +// CHECK0-64-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK0-64-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK0-64-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// CHECK0-64-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK0-64-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// CHECK0-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2 +// CHECK0-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK0-64-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3 +// CHECK0-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// CHECK0-64-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1 +// CHECK0-64-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2 +// CHECK0-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// CHECK0-64-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]] +// CHECK0-64-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]] +// CHECK0-64-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3 +// CHECK0-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// CHECK0-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// CHECK0-64-NEXT: store i64 1, i64* [[X]], align 8 +// CHECK0-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// CHECK0-64-NEXT: store i8 1, i8* [[Y]], align 8 +// CHECK0-64-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK0-64-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// CHECK0-64-NEXT: ret void +// CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// CHECK0-64-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] { +// CHECK0-64-NEXT: entry: +// CHECK0-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK0-64-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8 +// CHECK0-64-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK0-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK0-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8* +// CHECK0-64-NEXT: [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8* +// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 8, i1 false) +// CHECK0-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[X]], align 4 +// CHECK0-64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP3]] to double +// CHECK0-64-NEXT: [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK0-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 0 +// CHECK0-64-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 8 +// CHECK0-64-NEXT: [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK0-64-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 0 +// CHECK0-64-NEXT: [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 8 +// CHECK0-64-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// CHECK0-64-NEXT: store double [[INC]], double* [[ARRAYIDX2]], align 8 +// CHECK0-64-NEXT: ret void +// CHECK0-64-LABEL: define {{[^@]+}}@_Z3bariPd +// CHECK0-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// CHECK0-64-NEXT: entry: +// CHECK0-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK0-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK0-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK0-64-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 +// CHECK0-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK0-64-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK0-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK0-64-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]]) +// CHECK0-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// CHECK0-64-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK0-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]]) +// CHECK0-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// CHECK0-64-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK0-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]]) +// CHECK0-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// CHECK0-64-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// CHECK0-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]]) +// CHECK0-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// CHECK0-64-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// CHECK0-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-64-NEXT: ret i32 [[TMP9]] +// CHECK0-64-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// CHECK0-64-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK0-64-NEXT: entry: +// CHECK0-64-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK0-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK0-64-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK0-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK0-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8 +// CHECK0-64-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// CHECK0-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK0-64-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// CHECK0-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK0-64-NEXT: [[TMP3:%.*]] = call i8* @llvm.stacksave() +// CHECK0-64-NEXT: store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8 +// CHECK0-64-NEXT: [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]] +// CHECK0-64-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2 +// CHECK0-64-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8 +// CHECK0-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK0-64-NEXT: [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to i32* +// CHECK0-64-NEXT: store i32 [[TMP5]], i32* [[CONV]], align 4 +// CHECK0-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 +// CHECK0-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP7:%.*]] = mul nuw i64 2, [[TMP2]] +// CHECK0-64-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 +// CHECK0-64-NEXT: [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP9]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i64 40, i1 false) +// CHECK0-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1** +// CHECK0-64-NEXT: store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8 +// CHECK0-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double** +// CHECK0-64-NEXT: store double* [[A]], double** [[TMP13]], align 8 +// CHECK0-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP14]], align 8 +// CHECK0-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK0-64-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP6]], i64* [[TMP16]], align 8 +// CHECK0-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK0-64-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP6]], i64* [[TMP18]], align 8 +// CHECK0-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP19]], align 8 +// CHECK0-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK0-64-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64* +// CHECK0-64-NEXT: store i64 2, i64* [[TMP21]], align 8 +// CHECK0-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK0-64-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64* +// CHECK0-64-NEXT: store i64 2, i64* [[TMP23]], align 8 +// CHECK0-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP24]], align 8 +// CHECK0-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK0-64-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP2]], i64* [[TMP26]], align 8 +// CHECK0-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK0-64-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP2]], i64* [[TMP28]], align 8 +// CHECK0-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP29]], align 8 +// CHECK0-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK0-64-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16** +// CHECK0-64-NEXT: store i16* [[VLA]], i16** [[TMP31]], align 8 +// CHECK0-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK0-64-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16** +// CHECK0-64-NEXT: store i16* [[VLA]], i16** [[TMP33]], align 8 +// CHECK0-64-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK0-64-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 8 +// CHECK0-64-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP35]], align 8 +// CHECK0-64-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK0-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK0-64-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK0-64-NEXT: store i32 2, i32* [[TMP39]], align 4 +// CHECK0-64-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK0-64-NEXT: store i32 5, i32* [[TMP40]], align 4 +// CHECK0-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK0-64-NEXT: store i8** [[TMP36]], i8*** [[TMP41]], align 8 +// CHECK0-64-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK0-64-NEXT: store i8** [[TMP37]], i8*** [[TMP42]], align 8 +// CHECK0-64-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK0-64-NEXT: store i64* [[TMP38]], i64** [[TMP43]], align 8 +// CHECK0-64-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK0-64-NEXT: store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 8 +// CHECK0-64-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK0-64-NEXT: store i8** null, i8*** [[TMP45]], align 8 +// CHECK0-64-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK0-64-NEXT: store i8** null, i8*** [[TMP46]], align 8 +// CHECK0-64-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK0-64-NEXT: store i64 0, i64* [[TMP47]], align 8 +// CHECK0-64-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK0-64-NEXT: store i64 0, i64* [[TMP48]], align 8 +// CHECK0-64-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK0-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4 +// CHECK0-64-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK0-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4 +// CHECK0-64-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK0-64-NEXT: store i32 0, i32* [[TMP51]], align 4 +// CHECK0-64-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK0-64-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 +// CHECK0-64-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK0-64: omp_offload.failed: +// CHECK0-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]] +// CHECK0-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK0-64: omp_offload.cont: +// CHECK0-64-NEXT: [[TMP54:%.*]] = mul nsw i64 1, [[TMP2]] +// CHECK0-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP54]] +// CHECK0-64-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK0-64-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2 +// CHECK0-64-NEXT: [[CONV3:%.*]] = sext i16 [[TMP55]] to i32 +// CHECK0-64-NEXT: [[TMP56:%.*]] = load i32, i32* [[B]], align 4 +// CHECK0-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP56]] +// CHECK0-64-NEXT: [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK0-64-NEXT: call void @llvm.stackrestore(i8* [[TMP57]]) +// CHECK0-64-NEXT: ret i32 [[ADD4]] +// CHECK0-64-LABEL: define {{[^@]+}}@_ZL7fstatici +// CHECK0-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] { +// CHECK0-64-NEXT: entry: +// CHECK0-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK0-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK0-64-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// CHECK0-64-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK0-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8 +// CHECK0-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK0-64-NEXT: store i8 0, i8* [[AAA]], align 1 +// CHECK0-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK0-64-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 +// CHECK0-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK0-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK0-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* +// CHECK0-64-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1 +// CHECK0-64-NEXT: [[TMP3:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 +// CHECK0-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8 +// CHECK0-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8 +// CHECK0-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP8]], align 8 +// CHECK0-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK0-64-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP3]], i64* [[TMP10]], align 8 +// CHECK0-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK0-64-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP3]], i64* [[TMP12]], align 8 +// CHECK0-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP13]], align 8 +// CHECK0-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK0-64-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]** +// CHECK0-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 8 +// CHECK0-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK0-64-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]** +// CHECK0-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 8 +// CHECK0-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP18]], align 8 +// CHECK0-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK0-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK0-64-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK0-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK0-64-NEXT: store i32 3, i32* [[TMP22]], align 4 +// CHECK0-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK0-64-NEXT: store i8** [[TMP19]], i8*** [[TMP23]], align 8 +// CHECK0-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK0-64-NEXT: store i8** [[TMP20]], i8*** [[TMP24]], align 8 +// CHECK0-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK0-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 8 +// CHECK0-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK0-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 8 +// CHECK0-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK0-64-NEXT: store i8** null, i8*** [[TMP27]], align 8 +// CHECK0-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK0-64-NEXT: store i8** null, i8*** [[TMP28]], align 8 +// CHECK0-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK0-64-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK0-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK0-64-NEXT: store i64 0, i64* [[TMP30]], align 8 +// CHECK0-64-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK0-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4 +// CHECK0-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK0-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4 +// CHECK0-64-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK0-64-NEXT: store i32 0, i32* [[TMP33]], align 4 +// CHECK0-64-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK0-64-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK0-64-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK0-64: omp_offload.failed: +// CHECK0-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i64 [[TMP1]], i64 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK0-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK0-64: omp_offload.cont: +// CHECK0-64-NEXT: [[TMP36:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-64-NEXT: ret i32 [[TMP36]] +// CHECK0-64-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// CHECK0-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat { +// CHECK0-64-NEXT: entry: +// CHECK0-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK0-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK0-64-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK0-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK0-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8 +// CHECK0-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK0-64-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK0-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK0-64-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 +// CHECK0-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK0-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP1]], i64* [[TMP3]], align 8 +// CHECK0-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64* +// CHECK0-64-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8 +// CHECK0-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP6]], align 8 +// CHECK0-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK0-64-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]** +// CHECK0-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 8 +// CHECK0-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK0-64-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]** +// CHECK0-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 8 +// CHECK0-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK0-64-NEXT: store i8* null, i8** [[TMP11]], align 8 +// CHECK0-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK0-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK0-64-NEXT: store i32 2, i32* [[TMP14]], align 4 +// CHECK0-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK0-64-NEXT: store i32 2, i32* [[TMP15]], align 4 +// CHECK0-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK0-64-NEXT: store i8** [[TMP12]], i8*** [[TMP16]], align 8 +// CHECK0-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK0-64-NEXT: store i8** [[TMP13]], i8*** [[TMP17]], align 8 +// CHECK0-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK0-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 8 +// CHECK0-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK0-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 8 +// CHECK0-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK0-64-NEXT: store i8** null, i8*** [[TMP20]], align 8 +// CHECK0-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK0-64-NEXT: store i8** null, i8*** [[TMP21]], align 8 +// CHECK0-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK0-64-NEXT: store i64 0, i64* [[TMP22]], align 8 +// CHECK0-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK0-64-NEXT: store i64 0, i64* [[TMP23]], align 8 +// CHECK0-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK0-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4 +// CHECK0-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK0-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4 +// CHECK0-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK0-64-NEXT: store i32 0, i32* [[TMP26]], align 4 +// CHECK0-64-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK0-64-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK0-64-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK0-64: omp_offload.failed: +// CHECK0-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i64 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK0-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK0-64: omp_offload.cont: +// CHECK0-64-NEXT: [[TMP29:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-64-NEXT: ret i32 [[TMP29]] +// CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// CHECK0-64-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK0-64-NEXT: entry: +// CHECK0-64-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK0-64-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// CHECK0-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK0-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// CHECK0-64-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 +// CHECK0-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// CHECK0-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// CHECK0-64-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// CHECK0-64-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* +// CHECK0-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// CHECK0-64-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// CHECK0-64-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8 +// CHECK0-64-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// CHECK0-64-NEXT: [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2 +// CHECK0-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK0-64-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8 +// CHECK0-64-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// CHECK0-64-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2 +// CHECK0-64-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// CHECK0-64-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false) +// CHECK0-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK0-64-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double +// CHECK0-64-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 +// CHECK0-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK0-64-NEXT: store double [[ADD]], double* [[A]], align 8 +// CHECK0-64-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK0-64-NEXT: [[TMP11:%.*]] = load double, double* [[A5]], align 8 +// CHECK0-64-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK0-64-NEXT: store double [[INC]], double* [[A5]], align 8 +// CHECK0-64-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 +// CHECK0-64-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]] +// CHECK0-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]] +// CHECK0-64-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK0-64-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// CHECK0-64-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK0-64-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// CHECK0-64-NEXT: ret void +// CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// CHECK0-64-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK0-64-NEXT: entry: +// CHECK0-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK0-64-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4 +// CHECK0-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK0-64-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 +// CHECK0-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK0-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK0-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* +// CHECK0-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8* +// CHECK0-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// CHECK0-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK0-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK0-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK0-64-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK0-64-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK0-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK0-64-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// CHECK0-64-NEXT: store i8 [[CONV5]], i8* [[CONV1]], align 1 +// CHECK0-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2 +// CHECK0-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK0-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK0-64-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK0-64-NEXT: ret void +// CHECK0-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// CHECK0-64-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK0-64-NEXT: entry: +// CHECK0-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK0-64-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK0-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK0-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK0-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK0-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK0-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK0-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK0-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// CHECK0-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK0-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK0-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK0-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2 +// CHECK0-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK0-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK0-64-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// CHECK0-64-NEXT: ret void +// CHECK0-64-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK0-64-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK0-64-NEXT: entry: +// CHECK0-64-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK0-64-NEXT: ret void +// CHECK1-64-LABEL: define {{[^@]+}}@_Z3fooiPd +// CHECK1-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK1-64-NEXT: entry: +// CHECK1-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-64-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-64-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// CHECK1-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK1-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK1-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// CHECK1-64-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK1-64-NEXT: [[P:%.*]] = alloca i32*, align 64 +// CHECK1-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[GA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-64-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [9 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_PTRS5:%.*]] = alloca [9 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [9 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK1-64-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK1-64-NEXT: store i16 0, i16* [[AA]], align 2 +// CHECK1-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK1-64-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// CHECK1-64-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8 +// CHECK1-64-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4 +// CHECK1-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// CHECK1-64-NEXT: [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]] +// CHECK1-64-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8 +// CHECK1-64-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// CHECK1-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: store i32 [[TMP6]], i32* [[X]], align 4 +// CHECK1-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// CHECK1-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: store i32 [[TMP7]], i32* [[Y]], align 4 +// CHECK1-64-NEXT: store i32* [[A]], i32** [[P]], align 64 +// CHECK1-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK1-64-NEXT: store i32 [[TMP8]], i32* [[CONV]], align 4 +// CHECK1-64-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK1-64-NEXT: [[TMP10:%.*]] = load i32*, i32** [[P]], align 64 +// CHECK1-64-NEXT: [[TMP11:%.*]] = load i32, i32* @ga, align 4 +// CHECK1-64-NEXT: [[CONV2:%.*]] = bitcast i64* [[GA_CASTED]] to i32* +// CHECK1-64-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 +// CHECK1-64-NEXT: [[TMP12:%.*]] = load i64, i64* [[GA_CASTED]], align 8 +// CHECK1-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP9]], i64* [[TMP14]], align 8 +// CHECK1-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP9]], i64* [[TMP16]], align 8 +// CHECK1-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP17]], align 8 +// CHECK1-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-64-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32** +// CHECK1-64-NEXT: store i32* [[TMP10]], i32** [[TMP19]], align 8 +// CHECK1-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-64-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32** +// CHECK1-64-NEXT: store i32* [[TMP10]], i32** [[TMP21]], align 8 +// CHECK1-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP22]], align 8 +// CHECK1-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-64-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP12]], i64* [[TMP24]], align 8 +// CHECK1-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-64-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP12]], i64* [[TMP26]], align 8 +// CHECK1-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP27]], align 8 +// CHECK1-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-64-NEXT: store i32 2, i32* [[TMP30]], align 4 +// CHECK1-64-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-64-NEXT: store i32 3, i32* [[TMP31]], align 4 +// CHECK1-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-64-NEXT: store i8** [[TMP28]], i8*** [[TMP32]], align 8 +// CHECK1-64-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-64-NEXT: store i8** [[TMP29]], i8*** [[TMP33]], align 8 +// CHECK1-64-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP34]], align 8 +// CHECK1-64-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP35]], align 8 +// CHECK1-64-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-64-NEXT: store i8** null, i8*** [[TMP36]], align 8 +// CHECK1-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-64-NEXT: store i8** null, i8*** [[TMP37]], align 8 +// CHECK1-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-64-NEXT: store i64 0, i64* [[TMP38]], align 8 +// CHECK1-64-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-64-NEXT: store i64 0, i64* [[TMP39]], align 8 +// CHECK1-64-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP40]], align 4 +// CHECK1-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP41]], align 4 +// CHECK1-64-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-64-NEXT: store i32 0, i32* [[TMP42]], align 4 +// CHECK1-64-NEXT: [[TMP43:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK1-64-NEXT: [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0 +// CHECK1-64-NEXT: br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-64: omp_offload.failed: +// CHECK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i64 [[TMP9]], i32* [[TMP10]], i64 [[TMP12]]) #[[ATTR3:[0-9]+]] +// CHECK1-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1-64: omp_offload.cont: +// CHECK1-64-NEXT: [[TMP45:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK1-64-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* +// CHECK1-64-NEXT: store i16 [[TMP45]], i16* [[CONV3]], align 2 +// CHECK1-64-NEXT: [[TMP46:%.*]] = load i64, i64* [[AA_CASTED]], align 8 +// CHECK1-64-NEXT: [[TMP47:%.*]] = mul nuw i64 [[TMP1]], 4 +// CHECK1-64-NEXT: [[TMP48:%.*]] = mul nuw i64 5, [[TMP4]] +// CHECK1-64-NEXT: [[TMP49:%.*]] = mul nuw i64 [[TMP48]], 8 +// CHECK1-64-NEXT: [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP50]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i64 72, i1 false) +// CHECK1-64-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP46]], i64* [[TMP52]], align 8 +// CHECK1-64-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP46]], i64* [[TMP54]], align 8 +// CHECK1-64-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 0 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP55]], align 8 +// CHECK1-64-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1 +// CHECK1-64-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]** +// CHECK1-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 8 +// CHECK1-64-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1 +// CHECK1-64-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]** +// CHECK1-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 8 +// CHECK1-64-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 1 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP60]], align 8 +// CHECK1-64-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2 +// CHECK1-64-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP1]], i64* [[TMP62]], align 8 +// CHECK1-64-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2 +// CHECK1-64-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP1]], i64* [[TMP64]], align 8 +// CHECK1-64-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 2 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP65]], align 8 +// CHECK1-64-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3 +// CHECK1-64-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float** +// CHECK1-64-NEXT: store float* [[VLA]], float** [[TMP67]], align 8 +// CHECK1-64-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3 +// CHECK1-64-NEXT: [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float** +// CHECK1-64-NEXT: store float* [[VLA]], float** [[TMP69]], align 8 +// CHECK1-64-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3 +// CHECK1-64-NEXT: store i64 [[TMP47]], i64* [[TMP70]], align 8 +// CHECK1-64-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 3 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP71]], align 8 +// CHECK1-64-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 4 +// CHECK1-64-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]** +// CHECK1-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 8 +// CHECK1-64-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 4 +// CHECK1-64-NEXT: [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]** +// CHECK1-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 8 +// CHECK1-64-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 4 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP76]], align 8 +// CHECK1-64-NEXT: [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 5 +// CHECK1-64-NEXT: [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64* +// CHECK1-64-NEXT: store i64 5, i64* [[TMP78]], align 8 +// CHECK1-64-NEXT: [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 5 +// CHECK1-64-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64* +// CHECK1-64-NEXT: store i64 5, i64* [[TMP80]], align 8 +// CHECK1-64-NEXT: [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 5 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP81]], align 8 +// CHECK1-64-NEXT: [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 6 +// CHECK1-64-NEXT: [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP4]], i64* [[TMP83]], align 8 +// CHECK1-64-NEXT: [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 6 +// CHECK1-64-NEXT: [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP4]], i64* [[TMP85]], align 8 +// CHECK1-64-NEXT: [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 6 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP86]], align 8 +// CHECK1-64-NEXT: [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 7 +// CHECK1-64-NEXT: [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double** +// CHECK1-64-NEXT: store double* [[VLA1]], double** [[TMP88]], align 8 +// CHECK1-64-NEXT: [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 7 +// CHECK1-64-NEXT: [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double** +// CHECK1-64-NEXT: store double* [[VLA1]], double** [[TMP90]], align 8 +// CHECK1-64-NEXT: [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7 +// CHECK1-64-NEXT: store i64 [[TMP49]], i64* [[TMP91]], align 8 +// CHECK1-64-NEXT: [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 7 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP92]], align 8 +// CHECK1-64-NEXT: [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 8 +// CHECK1-64-NEXT: [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT** +// CHECK1-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 8 +// CHECK1-64-NEXT: [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 8 +// CHECK1-64-NEXT: [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT** +// CHECK1-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 8 +// CHECK1-64-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 8 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP97]], align 8 +// CHECK1-64-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK1-64-NEXT: [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK1-64-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 0 +// CHECK1-64-NEXT: store i32 2, i32* [[TMP101]], align 4 +// CHECK1-64-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 1 +// CHECK1-64-NEXT: store i32 9, i32* [[TMP102]], align 4 +// CHECK1-64-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 2 +// CHECK1-64-NEXT: store i8** [[TMP98]], i8*** [[TMP103]], align 8 +// CHECK1-64-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 3 +// CHECK1-64-NEXT: store i8** [[TMP99]], i8*** [[TMP104]], align 8 +// CHECK1-64-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 4 +// CHECK1-64-NEXT: store i64* [[TMP100]], i64** [[TMP105]], align 8 +// CHECK1-64-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 5 +// CHECK1-64-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 8 +// CHECK1-64-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 6 +// CHECK1-64-NEXT: store i8** null, i8*** [[TMP107]], align 8 +// CHECK1-64-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 7 +// CHECK1-64-NEXT: store i8** null, i8*** [[TMP108]], align 8 +// CHECK1-64-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 8 +// CHECK1-64-NEXT: store i64 0, i64* [[TMP109]], align 8 +// CHECK1-64-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 9 +// CHECK1-64-NEXT: store i64 0, i64* [[TMP110]], align 8 +// CHECK1-64-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 10 +// CHECK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4 +// CHECK1-64-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 11 +// CHECK1-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4 +// CHECK1-64-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 12 +// CHECK1-64-NEXT: store i32 0, i32* [[TMP113]], align 4 +// CHECK1-64-NEXT: [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]]) +// CHECK1-64-NEXT: [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0 +// CHECK1-64-NEXT: br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]] +// CHECK1-64: omp_offload.failed8: +// CHECK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i64 [[TMP46]], [10 x float]* [[B]], i64 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP4]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]] +// CHECK1-64-NEXT: br label [[OMP_OFFLOAD_CONT9]] +// CHECK1-64: omp_offload.cont9: +// CHECK1-64-NEXT: [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double** +// CHECK1-64-NEXT: store double* [[TMP116]], double** [[TMP118]], align 8 +// CHECK1-64-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double** +// CHECK1-64-NEXT: store double* [[TMP116]], double** [[TMP120]], align 8 +// CHECK1-64-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 0 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP121]], align 8 +// CHECK1-64-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1 +// CHECK1-64-NEXT: [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0** +// CHECK1-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 8 +// CHECK1-64-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 1 +// CHECK1-64-NEXT: [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0** +// CHECK1-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 8 +// CHECK1-64-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 1 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP126]], align 8 +// CHECK1-64-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CHECK1-64-NEXT: [[KERNEL_ARGS13:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK1-64-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 0 +// CHECK1-64-NEXT: store i32 2, i32* [[TMP129]], align 4 +// CHECK1-64-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 1 +// CHECK1-64-NEXT: store i32 2, i32* [[TMP130]], align 4 +// CHECK1-64-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 2 +// CHECK1-64-NEXT: store i8** [[TMP127]], i8*** [[TMP131]], align 8 +// CHECK1-64-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 3 +// CHECK1-64-NEXT: store i8** [[TMP128]], i8*** [[TMP132]], align 8 +// CHECK1-64-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 4 +// CHECK1-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 8 +// CHECK1-64-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 5 +// CHECK1-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 8 +// CHECK1-64-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 6 +// CHECK1-64-NEXT: store i8** null, i8*** [[TMP135]], align 8 +// CHECK1-64-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 7 +// CHECK1-64-NEXT: store i8** null, i8*** [[TMP136]], align 8 +// CHECK1-64-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 8 +// CHECK1-64-NEXT: store i64 0, i64* [[TMP137]], align 8 +// CHECK1-64-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 9 +// CHECK1-64-NEXT: store i64 0, i64* [[TMP138]], align 8 +// CHECK1-64-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 10 +// CHECK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4 +// CHECK1-64-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 11 +// CHECK1-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4 +// CHECK1-64-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 12 +// CHECK1-64-NEXT: store i32 0, i32* [[TMP141]], align 4 +// CHECK1-64-NEXT: [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]]) +// CHECK1-64-NEXT: [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0 +// CHECK1-64-NEXT: br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED14:%.*]], label [[OMP_OFFLOAD_CONT15:%.*]] +// CHECK1-64: omp_offload.failed14: +// CHECK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]] +// CHECK1-64-NEXT: br label [[OMP_OFFLOAD_CONT15]] +// CHECK1-64: omp_offload.cont15: +// CHECK1-64-NEXT: [[TMP144:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-64-NEXT: [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-64-NEXT: call void @llvm.stackrestore(i8* [[TMP145]]) +// CHECK1-64-NEXT: ret i32 [[TMP144]] +// CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// CHECK1-64-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-64-NEXT: entry: +// CHECK1-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-64-NEXT: [[GA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK1-64-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 8 +// CHECK1-64-NEXT: store i64 [[GA]], i64* [[GA_ADDR]], align 8 +// CHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK1-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32* +// CHECK1-64-NEXT: ret void +// CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// CHECK1-64-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] { +// CHECK1-64-NEXT: entry: +// CHECK1-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 +// CHECK1-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-64-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 +// CHECK1-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-64-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 +// CHECK1-64-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// CHECK1-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK1-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK1-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[__VLA_EXPR2:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// CHECK1-64-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 +// CHECK1-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 +// CHECK1-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// CHECK1-64-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 +// CHECK1-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 +// CHECK1-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// CHECK1-64-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 +// CHECK1-64-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 +// CHECK1-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 +// CHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK1-64-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// CHECK1-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 +// CHECK1-64-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// CHECK1-64-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false) +// CHECK1-64-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// CHECK1-64-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8 +// CHECK1-64-NEXT: [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4 +// CHECK1-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-64-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4 +// CHECK1-64-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// CHECK1-64-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false) +// CHECK1-64-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// CHECK1-64-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false) +// CHECK1-64-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// CHECK1-64-NEXT: [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8 +// CHECK1-64-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// CHECK1-64-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8 +// CHECK1-64-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// CHECK1-64-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 +// CHECK1-64-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// CHECK1-64-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false) +// CHECK1-64-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// CHECK1-64-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false) +// CHECK1-64-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK1-64-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK1-64-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// CHECK1-64-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK1-64-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// CHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2 +// CHECK1-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK1-64-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3 +// CHECK1-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// CHECK1-64-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1 +// CHECK1-64-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2 +// CHECK1-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// CHECK1-64-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]] +// CHECK1-64-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]] +// CHECK1-64-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3 +// CHECK1-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// CHECK1-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// CHECK1-64-NEXT: store i64 1, i64* [[X]], align 8 +// CHECK1-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// CHECK1-64-NEXT: store i8 1, i8* [[Y]], align 8 +// CHECK1-64-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-64-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// CHECK1-64-NEXT: ret void +// CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// CHECK1-64-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] { +// CHECK1-64-NEXT: entry: +// CHECK1-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-64-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8 +// CHECK1-64-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK1-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK1-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8* +// CHECK1-64-NEXT: [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8* +// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[X]], align 4 +// CHECK1-64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP3]] to double +// CHECK1-64-NEXT: [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 0 +// CHECK1-64-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 8 +// CHECK1-64-NEXT: [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK1-64-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 0 +// CHECK1-64-NEXT: [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 8 +// CHECK1-64-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// CHECK1-64-NEXT: store double [[INC]], double* [[ARRAYIDX2]], align 8 +// CHECK1-64-NEXT: ret void +// CHECK1-64-LABEL: define {{[^@]+}}@_Z3bariPd +// CHECK1-64-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// CHECK1-64-NEXT: entry: +// CHECK1-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-64-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 +// CHECK1-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK1-64-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK1-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK1-64-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]]) +// CHECK1-64-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// CHECK1-64-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK1-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]]) +// CHECK1-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// CHECK1-64-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK1-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]]) +// CHECK1-64-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// CHECK1-64-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// CHECK1-64-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]]) +// CHECK1-64-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// CHECK1-64-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// CHECK1-64-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-64-NEXT: ret i32 [[TMP9]] +// CHECK1-64-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// CHECK1-64-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK1-64-NEXT: entry: +// CHECK1-64-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK1-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-64-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK1-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK1-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8 +// CHECK1-64-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// CHECK1-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK1-64-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// CHECK1-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK1-64-NEXT: [[TMP3:%.*]] = call i8* @llvm.stacksave() +// CHECK1-64-NEXT: store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8 +// CHECK1-64-NEXT: [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]] +// CHECK1-64-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2 +// CHECK1-64-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to i32* +// CHECK1-64-NEXT: store i32 [[TMP5]], i32* [[CONV]], align 4 +// CHECK1-64-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 +// CHECK1-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP7:%.*]] = mul nuw i64 2, [[TMP2]] +// CHECK1-64-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 +// CHECK1-64-NEXT: [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP9]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i64 40, i1 false) +// CHECK1-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1** +// CHECK1-64-NEXT: store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8 +// CHECK1-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double** +// CHECK1-64-NEXT: store double* [[A]], double** [[TMP13]], align 8 +// CHECK1-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP14]], align 8 +// CHECK1-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-64-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP6]], i64* [[TMP16]], align 8 +// CHECK1-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-64-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP6]], i64* [[TMP18]], align 8 +// CHECK1-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP19]], align 8 +// CHECK1-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-64-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64* +// CHECK1-64-NEXT: store i64 2, i64* [[TMP21]], align 8 +// CHECK1-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-64-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64* +// CHECK1-64-NEXT: store i64 2, i64* [[TMP23]], align 8 +// CHECK1-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP24]], align 8 +// CHECK1-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK1-64-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP2]], i64* [[TMP26]], align 8 +// CHECK1-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK1-64-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP2]], i64* [[TMP28]], align 8 +// CHECK1-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP29]], align 8 +// CHECK1-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK1-64-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16** +// CHECK1-64-NEXT: store i16* [[VLA]], i16** [[TMP31]], align 8 +// CHECK1-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK1-64-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16** +// CHECK1-64-NEXT: store i16* [[VLA]], i16** [[TMP33]], align 8 +// CHECK1-64-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK1-64-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 8 +// CHECK1-64-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP35]], align 8 +// CHECK1-64-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK1-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-64-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-64-NEXT: store i32 2, i32* [[TMP39]], align 4 +// CHECK1-64-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-64-NEXT: store i32 5, i32* [[TMP40]], align 4 +// CHECK1-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-64-NEXT: store i8** [[TMP36]], i8*** [[TMP41]], align 8 +// CHECK1-64-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-64-NEXT: store i8** [[TMP37]], i8*** [[TMP42]], align 8 +// CHECK1-64-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-64-NEXT: store i64* [[TMP38]], i64** [[TMP43]], align 8 +// CHECK1-64-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-64-NEXT: store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 8 +// CHECK1-64-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-64-NEXT: store i8** null, i8*** [[TMP45]], align 8 +// CHECK1-64-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-64-NEXT: store i8** null, i8*** [[TMP46]], align 8 +// CHECK1-64-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-64-NEXT: store i64 0, i64* [[TMP47]], align 8 +// CHECK1-64-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-64-NEXT: store i64 0, i64* [[TMP48]], align 8 +// CHECK1-64-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4 +// CHECK1-64-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4 +// CHECK1-64-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-64-NEXT: store i32 0, i32* [[TMP51]], align 4 +// CHECK1-64-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK1-64-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 +// CHECK1-64-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-64: omp_offload.failed: +// CHECK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]] +// CHECK1-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1-64: omp_offload.cont: +// CHECK1-64-NEXT: [[TMP54:%.*]] = mul nsw i64 1, [[TMP2]] +// CHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP54]] +// CHECK1-64-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK1-64-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2 +// CHECK1-64-NEXT: [[CONV3:%.*]] = sext i16 [[TMP55]] to i32 +// CHECK1-64-NEXT: [[TMP56:%.*]] = load i32, i32* [[B]], align 4 +// CHECK1-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP56]] +// CHECK1-64-NEXT: [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-64-NEXT: call void @llvm.stackrestore(i8* [[TMP57]]) +// CHECK1-64-NEXT: ret i32 [[ADD4]] +// CHECK1-64-LABEL: define {{[^@]+}}@_ZL7fstatici +// CHECK1-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] { +// CHECK1-64-NEXT: entry: +// CHECK1-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-64-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// CHECK1-64-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK1-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK1-64-NEXT: store i8 0, i8* [[AAA]], align 1 +// CHECK1-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK1-64-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 +// CHECK1-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK1-64-NEXT: [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK1-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* +// CHECK1-64-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1 +// CHECK1-64-NEXT: [[TMP3:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 +// CHECK1-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8 +// CHECK1-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8 +// CHECK1-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP8]], align 8 +// CHECK1-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-64-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP3]], i64* [[TMP10]], align 8 +// CHECK1-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-64-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP3]], i64* [[TMP12]], align 8 +// CHECK1-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP13]], align 8 +// CHECK1-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-64-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]** +// CHECK1-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 8 +// CHECK1-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-64-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]** +// CHECK1-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 8 +// CHECK1-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP18]], align 8 +// CHECK1-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-64-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK1-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-64-NEXT: store i32 3, i32* [[TMP22]], align 4 +// CHECK1-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-64-NEXT: store i8** [[TMP19]], i8*** [[TMP23]], align 8 +// CHECK1-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-64-NEXT: store i8** [[TMP20]], i8*** [[TMP24]], align 8 +// CHECK1-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 8 +// CHECK1-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-64-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 8 +// CHECK1-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-64-NEXT: store i8** null, i8*** [[TMP27]], align 8 +// CHECK1-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-64-NEXT: store i8** null, i8*** [[TMP28]], align 8 +// CHECK1-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-64-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK1-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-64-NEXT: store i64 0, i64* [[TMP30]], align 8 +// CHECK1-64-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4 +// CHECK1-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4 +// CHECK1-64-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-64-NEXT: store i32 0, i32* [[TMP33]], align 4 +// CHECK1-64-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK1-64-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK1-64-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-64: omp_offload.failed: +// CHECK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i64 [[TMP1]], i64 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK1-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1-64: omp_offload.cont: +// CHECK1-64-NEXT: [[TMP36:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-64-NEXT: ret i32 [[TMP36]] +// CHECK1-64-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// CHECK1-64-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat { +// CHECK1-64-NEXT: entry: +// CHECK1-64-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-64-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-64-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK1-64-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-64-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK1-64-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK1-64-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK1-64-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 +// CHECK1-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK1-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP1]], i64* [[TMP3]], align 8 +// CHECK1-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64* +// CHECK1-64-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8 +// CHECK1-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP6]], align 8 +// CHECK1-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-64-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]** +// CHECK1-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 8 +// CHECK1-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-64-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]** +// CHECK1-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 8 +// CHECK1-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-64-NEXT: store i8* null, i8** [[TMP11]], align 8 +// CHECK1-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-64-NEXT: store i32 2, i32* [[TMP14]], align 4 +// CHECK1-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-64-NEXT: store i32 2, i32* [[TMP15]], align 4 +// CHECK1-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-64-NEXT: store i8** [[TMP12]], i8*** [[TMP16]], align 8 +// CHECK1-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-64-NEXT: store i8** [[TMP13]], i8*** [[TMP17]], align 8 +// CHECK1-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 8 +// CHECK1-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-64-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 8 +// CHECK1-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-64-NEXT: store i8** null, i8*** [[TMP20]], align 8 +// CHECK1-64-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-64-NEXT: store i8** null, i8*** [[TMP21]], align 8 +// CHECK1-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-64-NEXT: store i64 0, i64* [[TMP22]], align 8 +// CHECK1-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-64-NEXT: store i64 0, i64* [[TMP23]], align 8 +// CHECK1-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4 +// CHECK1-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-64-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4 +// CHECK1-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-64-NEXT: store i32 0, i32* [[TMP26]], align 4 +// CHECK1-64-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK1-64-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-64-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1-64: omp_offload.failed: +// CHECK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i64 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK1-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1-64: omp_offload.cont: +// CHECK1-64-NEXT: [[TMP29:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-64-NEXT: ret i32 [[TMP29]] +// CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// CHECK1-64-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-64-NEXT: entry: +// CHECK1-64-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK1-64-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// CHECK1-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK1-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// CHECK1-64-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 +// CHECK1-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// CHECK1-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// CHECK1-64-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// CHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* +// CHECK1-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// CHECK1-64-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// CHECK1-64-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8 +// CHECK1-64-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// CHECK1-64-NEXT: [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2 +// CHECK1-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-64-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8 +// CHECK1-64-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// CHECK1-64-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2 +// CHECK1-64-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// CHECK1-64-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false) +// CHECK1-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-64-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double +// CHECK1-64-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 +// CHECK1-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK1-64-NEXT: store double [[ADD]], double* [[A]], align 8 +// CHECK1-64-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK1-64-NEXT: [[TMP11:%.*]] = load double, double* [[A5]], align 8 +// CHECK1-64-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK1-64-NEXT: store double [[INC]], double* [[A5]], align 8 +// CHECK1-64-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 +// CHECK1-64-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]] +// CHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]] +// CHECK1-64-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK1-64-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// CHECK1-64-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-64-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// CHECK1-64-NEXT: ret void +// CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// CHECK1-64-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK1-64-NEXT: entry: +// CHECK1-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-64-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4 +// CHECK1-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK1-64-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 +// CHECK1-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK1-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* +// CHECK1-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8* +// CHECK1-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// CHECK1-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-64-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK1-64-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK1-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK1-64-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// CHECK1-64-NEXT: store i8 [[CONV5]], i8* [[CONV1]], align 1 +// CHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2 +// CHECK1-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-64-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK1-64-NEXT: ret void +// CHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// CHECK1-64-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK1-64-NEXT: entry: +// CHECK1-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-64-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK1-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK1-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK1-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK1-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// CHECK1-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2 +// CHECK1-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK1-64-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// CHECK1-64-NEXT: ret void +// CHECK1-64-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK1-64-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK1-64-NEXT: entry: +// CHECK1-64-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK1-64-NEXT: ret void +// CHECK2-32-LABEL: define {{[^@]+}}@_Z3fooiPd +// CHECK2-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK2-32-NEXT: entry: +// CHECK2-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK2-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK2-32-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// CHECK2-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK2-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK2-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// CHECK2-32-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK2-32-NEXT: [[P:%.*]] = alloca i32*, align 64 +// CHECK2-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[GA_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK2-32-NEXT: store i16 0, i16* [[AA]], align 2 +// CHECK2-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP1:%.*]] = call i8* @llvm.stacksave() +// CHECK2-32-NEXT: store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4 +// CHECK2-32-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4 +// CHECK2-32-NEXT: store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4 +// CHECK2-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]] +// CHECK2-32-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8 +// CHECK2-32-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// CHECK2-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 [[TMP4]], i32* [[X]], align 4 +// CHECK2-32-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// CHECK2-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 [[TMP5]], i32* [[Y]], align 4 +// CHECK2-32-NEXT: store i32* [[A]], i32** [[P]], align 64 +// CHECK2-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-32-NEXT: store i32 [[TMP6]], i32* [[A_CASTED]], align 4 +// CHECK2-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK2-32-NEXT: [[TMP8:%.*]] = load i32*, i32** [[P]], align 64 +// CHECK2-32-NEXT: [[TMP9:%.*]] = load i32, i32* @ga, align 4 +// CHECK2-32-NEXT: store i32 [[TMP9]], i32* [[GA_CASTED]], align 4 +// CHECK2-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[GA_CASTED]], align 4 +// CHECK2-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP7]], i32* [[TMP12]], align 4 +// CHECK2-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP7]], i32* [[TMP14]], align 4 +// CHECK2-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP15]], align 4 +// CHECK2-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK2-32-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i32** +// CHECK2-32-NEXT: store i32* [[TMP8]], i32** [[TMP17]], align 4 +// CHECK2-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK2-32-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32** +// CHECK2-32-NEXT: store i32* [[TMP8]], i32** [[TMP19]], align 4 +// CHECK2-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP20]], align 4 +// CHECK2-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK2-32-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP10]], i32* [[TMP22]], align 4 +// CHECK2-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK2-32-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP10]], i32* [[TMP24]], align 4 +// CHECK2-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP25]], align 4 +// CHECK2-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK2-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK2-32-NEXT: store i32 2, i32* [[TMP28]], align 4 +// CHECK2-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK2-32-NEXT: store i32 3, i32* [[TMP29]], align 4 +// CHECK2-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK2-32-NEXT: store i8** [[TMP26]], i8*** [[TMP30]], align 4 +// CHECK2-32-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK2-32-NEXT: store i8** [[TMP27]], i8*** [[TMP31]], align 4 +// CHECK2-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK2-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP32]], align 4 +// CHECK2-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK2-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP33]], align 4 +// CHECK2-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK2-32-NEXT: store i8** null, i8*** [[TMP34]], align 4 +// CHECK2-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK2-32-NEXT: store i8** null, i8*** [[TMP35]], align 4 +// CHECK2-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK2-32-NEXT: store i64 0, i64* [[TMP36]], align 8 +// CHECK2-32-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK2-32-NEXT: store i64 0, i64* [[TMP37]], align 8 +// CHECK2-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK2-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP38]], align 4 +// CHECK2-32-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK2-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP39]], align 4 +// CHECK2-32-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK2-32-NEXT: store i32 0, i32* [[TMP40]], align 4 +// CHECK2-32-NEXT: [[TMP41:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK2-32-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK2-32-NEXT: br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK2-32: omp_offload.failed: +// CHECK2-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i32 [[TMP7]], i32* [[TMP8]], i32 [[TMP10]]) #[[ATTR3:[0-9]+]] +// CHECK2-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK2-32: omp_offload.cont: +// CHECK2-32-NEXT: [[TMP43:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK2-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_CASTED]] to i16* +// CHECK2-32-NEXT: store i16 [[TMP43]], i16* [[CONV]], align 2 +// CHECK2-32-NEXT: [[TMP44:%.*]] = load i32, i32* [[AA_CASTED]], align 4 +// CHECK2-32-NEXT: [[TMP45:%.*]] = mul nuw i32 [[TMP0]], 4 +// CHECK2-32-NEXT: [[TMP46:%.*]] = sext i32 [[TMP45]] to i64 +// CHECK2-32-NEXT: [[TMP47:%.*]] = mul nuw i32 5, [[TMP2]] +// CHECK2-32-NEXT: [[TMP48:%.*]] = mul nuw i32 [[TMP47]], 8 +// CHECK2-32-NEXT: [[TMP49:%.*]] = sext i32 [[TMP48]] to i64 +// CHECK2-32-NEXT: [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP50]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i32 72, i1 false) +// CHECK2-32-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP44]], i32* [[TMP52]], align 4 +// CHECK2-32-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP44]], i32* [[TMP54]], align 4 +// CHECK2-32-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP55]], align 4 +// CHECK2-32-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1 +// CHECK2-32-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]** +// CHECK2-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 4 +// CHECK2-32-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 1 +// CHECK2-32-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]** +// CHECK2-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 4 +// CHECK2-32-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP60]], align 4 +// CHECK2-32-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2 +// CHECK2-32-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP0]], i32* [[TMP62]], align 4 +// CHECK2-32-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 2 +// CHECK2-32-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP0]], i32* [[TMP64]], align 4 +// CHECK2-32-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 2 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP65]], align 4 +// CHECK2-32-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3 +// CHECK2-32-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float** +// CHECK2-32-NEXT: store float* [[VLA]], float** [[TMP67]], align 4 +// CHECK2-32-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 3 +// CHECK2-32-NEXT: [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float** +// CHECK2-32-NEXT: store float* [[VLA]], float** [[TMP69]], align 4 +// CHECK2-32-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3 +// CHECK2-32-NEXT: store i64 [[TMP46]], i64* [[TMP70]], align 4 +// CHECK2-32-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 3 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP71]], align 4 +// CHECK2-32-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4 +// CHECK2-32-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]** +// CHECK2-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 4 +// CHECK2-32-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 4 +// CHECK2-32-NEXT: [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]** +// CHECK2-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 4 +// CHECK2-32-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 4 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP76]], align 4 +// CHECK2-32-NEXT: [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5 +// CHECK2-32-NEXT: [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32* +// CHECK2-32-NEXT: store i32 5, i32* [[TMP78]], align 4 +// CHECK2-32-NEXT: [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 5 +// CHECK2-32-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32* +// CHECK2-32-NEXT: store i32 5, i32* [[TMP80]], align 4 +// CHECK2-32-NEXT: [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 5 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP81]], align 4 +// CHECK2-32-NEXT: [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6 +// CHECK2-32-NEXT: [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP2]], i32* [[TMP83]], align 4 +// CHECK2-32-NEXT: [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 6 +// CHECK2-32-NEXT: [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP2]], i32* [[TMP85]], align 4 +// CHECK2-32-NEXT: [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 6 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP86]], align 4 +// CHECK2-32-NEXT: [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7 +// CHECK2-32-NEXT: [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double** +// CHECK2-32-NEXT: store double* [[VLA1]], double** [[TMP88]], align 4 +// CHECK2-32-NEXT: [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 7 +// CHECK2-32-NEXT: [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double** +// CHECK2-32-NEXT: store double* [[VLA1]], double** [[TMP90]], align 4 +// CHECK2-32-NEXT: [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7 +// CHECK2-32-NEXT: store i64 [[TMP49]], i64* [[TMP91]], align 4 +// CHECK2-32-NEXT: [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 7 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP92]], align 4 +// CHECK2-32-NEXT: [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8 +// CHECK2-32-NEXT: [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT** +// CHECK2-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 4 +// CHECK2-32-NEXT: [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 8 +// CHECK2-32-NEXT: [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT** +// CHECK2-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 4 +// CHECK2-32-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 8 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP97]], align 4 +// CHECK2-32-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK2-32-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK2-32-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 0 +// CHECK2-32-NEXT: store i32 2, i32* [[TMP101]], align 4 +// CHECK2-32-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 1 +// CHECK2-32-NEXT: store i32 9, i32* [[TMP102]], align 4 +// CHECK2-32-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 2 +// CHECK2-32-NEXT: store i8** [[TMP98]], i8*** [[TMP103]], align 4 +// CHECK2-32-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 3 +// CHECK2-32-NEXT: store i8** [[TMP99]], i8*** [[TMP104]], align 4 +// CHECK2-32-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 4 +// CHECK2-32-NEXT: store i64* [[TMP100]], i64** [[TMP105]], align 4 +// CHECK2-32-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 5 +// CHECK2-32-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 4 +// CHECK2-32-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 6 +// CHECK2-32-NEXT: store i8** null, i8*** [[TMP107]], align 4 +// CHECK2-32-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 7 +// CHECK2-32-NEXT: store i8** null, i8*** [[TMP108]], align 4 +// CHECK2-32-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 8 +// CHECK2-32-NEXT: store i64 0, i64* [[TMP109]], align 8 +// CHECK2-32-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 9 +// CHECK2-32-NEXT: store i64 0, i64* [[TMP110]], align 8 +// CHECK2-32-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 10 +// CHECK2-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4 +// CHECK2-32-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 11 +// CHECK2-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4 +// CHECK2-32-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 12 +// CHECK2-32-NEXT: store i32 0, i32* [[TMP113]], align 4 +// CHECK2-32-NEXT: [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]]) +// CHECK2-32-NEXT: [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0 +// CHECK2-32-NEXT: br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CHECK2-32: omp_offload.failed6: +// CHECK2-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i32 [[TMP44]], [10 x float]* [[B]], i32 [[TMP0]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP2]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]] +// CHECK2-32-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CHECK2-32: omp_offload.cont7: +// CHECK2-32-NEXT: [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double** +// CHECK2-32-NEXT: store double* [[TMP116]], double** [[TMP118]], align 4 +// CHECK2-32-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double** +// CHECK2-32-NEXT: store double* [[TMP116]], double** [[TMP120]], align 4 +// CHECK2-32-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP121]], align 4 +// CHECK2-32-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1 +// CHECK2-32-NEXT: [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0** +// CHECK2-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 4 +// CHECK2-32-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 1 +// CHECK2-32-NEXT: [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0** +// CHECK2-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 4 +// CHECK2-32-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP126]], align 4 +// CHECK2-32-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 +// CHECK2-32-NEXT: [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK2-32-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 0 +// CHECK2-32-NEXT: store i32 2, i32* [[TMP129]], align 4 +// CHECK2-32-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 1 +// CHECK2-32-NEXT: store i32 2, i32* [[TMP130]], align 4 +// CHECK2-32-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 2 +// CHECK2-32-NEXT: store i8** [[TMP127]], i8*** [[TMP131]], align 4 +// CHECK2-32-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 3 +// CHECK2-32-NEXT: store i8** [[TMP128]], i8*** [[TMP132]], align 4 +// CHECK2-32-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 4 +// CHECK2-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 4 +// CHECK2-32-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 5 +// CHECK2-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 4 +// CHECK2-32-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 6 +// CHECK2-32-NEXT: store i8** null, i8*** [[TMP135]], align 4 +// CHECK2-32-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 7 +// CHECK2-32-NEXT: store i8** null, i8*** [[TMP136]], align 4 +// CHECK2-32-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 8 +// CHECK2-32-NEXT: store i64 0, i64* [[TMP137]], align 8 +// CHECK2-32-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 9 +// CHECK2-32-NEXT: store i64 0, i64* [[TMP138]], align 8 +// CHECK2-32-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 10 +// CHECK2-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4 +// CHECK2-32-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 11 +// CHECK2-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4 +// CHECK2-32-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 12 +// CHECK2-32-NEXT: store i32 0, i32* [[TMP141]], align 4 +// CHECK2-32-NEXT: [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]]) +// CHECK2-32-NEXT: [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0 +// CHECK2-32-NEXT: br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]] +// CHECK2-32: omp_offload.failed12: +// CHECK2-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]] +// CHECK2-32-NEXT: br label [[OMP_OFFLOAD_CONT13]] +// CHECK2-32: omp_offload.cont13: +// CHECK2-32-NEXT: [[TMP144:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-32-NEXT: [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK2-32-NEXT: call void @llvm.stackrestore(i8* [[TMP145]]) +// CHECK2-32-NEXT: ret i32 [[TMP144]] +// CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// CHECK2-32-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK2-32-NEXT: entry: +// CHECK2-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-32-NEXT: [[GA_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK2-32-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 [[GA]], i32* [[GA_ADDR]], align 4 +// CHECK2-32-NEXT: ret void +// CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// CHECK2-32-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] { +// CHECK2-32-NEXT: entry: +// CHECK2-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 +// CHECK2-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 +// CHECK2-32-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 +// CHECK2-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 +// CHECK2-32-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 +// CHECK2-32-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// CHECK2-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK2-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK2-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[__VLA_EXPR2:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// CHECK2-32-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 +// CHECK2-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// CHECK2-32-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 +// CHECK2-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// CHECK2-32-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 +// CHECK2-32-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 +// CHECK2-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 +// CHECK2-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK2-32-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// CHECK2-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 +// CHECK2-32-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// CHECK2-32-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false) +// CHECK2-32-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// CHECK2-32-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4 +// CHECK2-32-NEXT: [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4 +// CHECK2-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK2-32-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4 +// CHECK2-32-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// CHECK2-32-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false) +// CHECK2-32-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// CHECK2-32-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false) +// CHECK2-32-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// CHECK2-32-NEXT: [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8 +// CHECK2-32-NEXT: store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4 +// CHECK2-32-NEXT: store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4 +// CHECK2-32-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// CHECK2-32-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8 +// CHECK2-32-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// CHECK2-32-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false) +// CHECK2-32-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// CHECK2-32-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false) +// CHECK2-32-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK2-32-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK2-32-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// CHECK2-32-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK2-32-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// CHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2 +// CHECK2-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK2-32-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3 +// CHECK2-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// CHECK2-32-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1 +// CHECK2-32-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 +// CHECK2-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// CHECK2-32-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]] +// CHECK2-32-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]] +// CHECK2-32-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3 +// CHECK2-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// CHECK2-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// CHECK2-32-NEXT: store i64 1, i64* [[X]], align 4 +// CHECK2-32-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// CHECK2-32-NEXT: store i8 1, i8* [[Y]], align 4 +// CHECK2-32-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK2-32-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// CHECK2-32-NEXT: ret void +// CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// CHECK2-32-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] { +// CHECK2-32-NEXT: entry: +// CHECK2-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK2-32-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4 +// CHECK2-32-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK2-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK2-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8* +// CHECK2-32-NEXT: [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8* +// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 8, i1 false) +// CHECK2-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[X]], align 4 +// CHECK2-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP3]] to double +// CHECK2-32-NEXT: [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i32 0 +// CHECK2-32-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 4 +// CHECK2-32-NEXT: [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK2-32-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i32 0 +// CHECK2-32-NEXT: [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 4 +// CHECK2-32-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// CHECK2-32-NEXT: store double [[INC]], double* [[ARRAYIDX2]], align 4 +// CHECK2-32-NEXT: ret void +// CHECK2-32-LABEL: define {{[^@]+}}@_Z3bariPd +// CHECK2-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// CHECK2-32-NEXT: entry: +// CHECK2-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK2-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 +// CHECK2-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK2-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK2-32-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]]) +// CHECK2-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// CHECK2-32-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK2-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]]) +// CHECK2-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// CHECK2-32-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK2-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]]) +// CHECK2-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// CHECK2-32-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// CHECK2-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]]) +// CHECK2-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// CHECK2-32-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// CHECK2-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-32-NEXT: ret i32 [[TMP9]] +// CHECK2-32-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// CHECK2-32-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK2-32-NEXT: entry: +// CHECK2-32-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK2-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK2-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4 +// CHECK2-32-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK2-32-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// CHECK2-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// CHECK2-32-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4 +// CHECK2-32-NEXT: [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]] +// CHECK2-32-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2 +// CHECK2-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK2-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[B]], align 4 +// CHECK2-32-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 +// CHECK2-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 +// CHECK2-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP6:%.*]] = mul nuw i32 2, [[TMP1]] +// CHECK2-32-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// CHECK2-32-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 +// CHECK2-32-NEXT: [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i32 40, i1 false) +// CHECK2-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1** +// CHECK2-32-NEXT: store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4 +// CHECK2-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double** +// CHECK2-32-NEXT: store double* [[A]], double** [[TMP13]], align 4 +// CHECK2-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP14]], align 4 +// CHECK2-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK2-32-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP5]], i32* [[TMP16]], align 4 +// CHECK2-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK2-32-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP5]], i32* [[TMP18]], align 4 +// CHECK2-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP19]], align 4 +// CHECK2-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK2-32-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32* +// CHECK2-32-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK2-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK2-32-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32* +// CHECK2-32-NEXT: store i32 2, i32* [[TMP23]], align 4 +// CHECK2-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP24]], align 4 +// CHECK2-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK2-32-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP1]], i32* [[TMP26]], align 4 +// CHECK2-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK2-32-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP1]], i32* [[TMP28]], align 4 +// CHECK2-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP29]], align 4 +// CHECK2-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK2-32-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16** +// CHECK2-32-NEXT: store i16* [[VLA]], i16** [[TMP31]], align 4 +// CHECK2-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK2-32-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16** +// CHECK2-32-NEXT: store i16* [[VLA]], i16** [[TMP33]], align 4 +// CHECK2-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK2-32-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 4 +// CHECK2-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP35]], align 4 +// CHECK2-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK2-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK2-32-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK2-32-NEXT: store i32 2, i32* [[TMP39]], align 4 +// CHECK2-32-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK2-32-NEXT: store i32 5, i32* [[TMP40]], align 4 +// CHECK2-32-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK2-32-NEXT: store i8** [[TMP36]], i8*** [[TMP41]], align 4 +// CHECK2-32-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK2-32-NEXT: store i8** [[TMP37]], i8*** [[TMP42]], align 4 +// CHECK2-32-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK2-32-NEXT: store i64* [[TMP38]], i64** [[TMP43]], align 4 +// CHECK2-32-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK2-32-NEXT: store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 4 +// CHECK2-32-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK2-32-NEXT: store i8** null, i8*** [[TMP45]], align 4 +// CHECK2-32-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK2-32-NEXT: store i8** null, i8*** [[TMP46]], align 4 +// CHECK2-32-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK2-32-NEXT: store i64 0, i64* [[TMP47]], align 8 +// CHECK2-32-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK2-32-NEXT: store i64 0, i64* [[TMP48]], align 8 +// CHECK2-32-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK2-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4 +// CHECK2-32-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK2-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4 +// CHECK2-32-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK2-32-NEXT: store i32 0, i32* [[TMP51]], align 4 +// CHECK2-32-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK2-32-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 +// CHECK2-32-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK2-32: omp_offload.failed: +// CHECK2-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]] +// CHECK2-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK2-32: omp_offload.cont: +// CHECK2-32-NEXT: [[TMP54:%.*]] = mul nsw i32 1, [[TMP1]] +// CHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP54]] +// CHECK2-32-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK2-32-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2 +// CHECK2-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP55]] to i32 +// CHECK2-32-NEXT: [[TMP56:%.*]] = load i32, i32* [[B]], align 4 +// CHECK2-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP56]] +// CHECK2-32-NEXT: [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK2-32-NEXT: call void @llvm.stackrestore(i8* [[TMP57]]) +// CHECK2-32-NEXT: ret i32 [[ADD3]] +// CHECK2-32-LABEL: define {{[^@]+}}@_ZL7fstatici +// CHECK2-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] { +// CHECK2-32-NEXT: entry: +// CHECK2-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// CHECK2-32-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK2-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK2-32-NEXT: store i8 0, i8* [[AAA]], align 1 +// CHECK2-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-32-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 +// CHECK2-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK2-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK2-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* +// CHECK2-32-NEXT: store i8 [[TMP2]], i8* [[CONV]], align 1 +// CHECK2-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 +// CHECK2-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4 +// CHECK2-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4 +// CHECK2-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP8]], align 4 +// CHECK2-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK2-32-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP3]], i32* [[TMP10]], align 4 +// CHECK2-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK2-32-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP3]], i32* [[TMP12]], align 4 +// CHECK2-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP13]], align 4 +// CHECK2-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK2-32-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]** +// CHECK2-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 4 +// CHECK2-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK2-32-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]** +// CHECK2-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 4 +// CHECK2-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP18]], align 4 +// CHECK2-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK2-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK2-32-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK2-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK2-32-NEXT: store i32 3, i32* [[TMP22]], align 4 +// CHECK2-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK2-32-NEXT: store i8** [[TMP19]], i8*** [[TMP23]], align 4 +// CHECK2-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK2-32-NEXT: store i8** [[TMP20]], i8*** [[TMP24]], align 4 +// CHECK2-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK2-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 4 +// CHECK2-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK2-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 4 +// CHECK2-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK2-32-NEXT: store i8** null, i8*** [[TMP27]], align 4 +// CHECK2-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK2-32-NEXT: store i8** null, i8*** [[TMP28]], align 4 +// CHECK2-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK2-32-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK2-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK2-32-NEXT: store i64 0, i64* [[TMP30]], align 8 +// CHECK2-32-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK2-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4 +// CHECK2-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK2-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4 +// CHECK2-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK2-32-NEXT: store i32 0, i32* [[TMP33]], align 4 +// CHECK2-32-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK2-32-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK2-32-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK2-32: omp_offload.failed: +// CHECK2-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i32 [[TMP1]], i32 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK2-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK2-32: omp_offload.cont: +// CHECK2-32-NEXT: [[TMP36:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-32-NEXT: ret i32 [[TMP36]] +// CHECK2-32-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// CHECK2-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat { +// CHECK2-32-NEXT: entry: +// CHECK2-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK2-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK2-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-32-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 +// CHECK2-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK2-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP1]], i32* [[TMP3]], align 4 +// CHECK2-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32* +// CHECK2-32-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4 +// CHECK2-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP6]], align 4 +// CHECK2-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK2-32-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]** +// CHECK2-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 4 +// CHECK2-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK2-32-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]** +// CHECK2-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 4 +// CHECK2-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK2-32-NEXT: store i8* null, i8** [[TMP11]], align 4 +// CHECK2-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK2-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK2-32-NEXT: store i32 2, i32* [[TMP14]], align 4 +// CHECK2-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK2-32-NEXT: store i32 2, i32* [[TMP15]], align 4 +// CHECK2-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK2-32-NEXT: store i8** [[TMP12]], i8*** [[TMP16]], align 4 +// CHECK2-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK2-32-NEXT: store i8** [[TMP13]], i8*** [[TMP17]], align 4 +// CHECK2-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK2-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 4 +// CHECK2-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK2-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 4 +// CHECK2-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK2-32-NEXT: store i8** null, i8*** [[TMP20]], align 4 +// CHECK2-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK2-32-NEXT: store i8** null, i8*** [[TMP21]], align 4 +// CHECK2-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK2-32-NEXT: store i64 0, i64* [[TMP22]], align 8 +// CHECK2-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK2-32-NEXT: store i64 0, i64* [[TMP23]], align 8 +// CHECK2-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK2-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4 +// CHECK2-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK2-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4 +// CHECK2-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK2-32-NEXT: store i32 0, i32* [[TMP26]], align 4 +// CHECK2-32-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK2-32-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK2-32-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK2-32: omp_offload.failed: +// CHECK2-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i32 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK2-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK2-32: omp_offload.cont: +// CHECK2-32-NEXT: [[TMP29:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-32-NEXT: ret i32 [[TMP29]] +// CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// CHECK2-32-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK2-32-NEXT: entry: +// CHECK2-32-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK2-32-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// CHECK2-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK2-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// CHECK2-32-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// CHECK2-32-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// CHECK2-32-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4 +// CHECK2-32-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// CHECK2-32-NEXT: [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2 +// CHECK2-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK2-32-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// CHECK2-32-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// CHECK2-32-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// CHECK2-32-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// CHECK2-32-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false) +// CHECK2-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK2-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double +// CHECK2-32-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK2-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK2-32-NEXT: store double [[ADD]], double* [[A]], align 4 +// CHECK2-32-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK2-32-NEXT: [[TMP11:%.*]] = load double, double* [[A4]], align 4 +// CHECK2-32-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK2-32-NEXT: store double [[INC]], double* [[A4]], align 4 +// CHECK2-32-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// CHECK2-32-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]] +// CHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]] +// CHECK2-32-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK2-32-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// CHECK2-32-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK2-32-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// CHECK2-32-NEXT: ret void +// CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// CHECK2-32-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK2-32-NEXT: entry: +// CHECK2-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK2-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK2-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK2-32-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 +// CHECK2-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK2-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* +// CHECK2-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK2-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// CHECK2-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK2-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK2-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK2-32-NEXT: [[CONV2:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK2-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 +// CHECK2-32-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 +// CHECK2-32-NEXT: store i8 [[CONV4]], i8* [[CONV]], align 1 +// CHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// CHECK2-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK2-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK2-32-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 +// CHECK2-32-NEXT: ret void +// CHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// CHECK2-32-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK2-32-NEXT: entry: +// CHECK2-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK2-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK2-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK2-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK2-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK2-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// CHECK2-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK2-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK2-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// CHECK2-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK2-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK2-32-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// CHECK2-32-NEXT: ret void +// CHECK2-32-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK2-32-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK2-32-NEXT: entry: +// CHECK2-32-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK2-32-NEXT: ret void +// CHECK3-32-LABEL: define {{[^@]+}}@_Z3fooiPd +// CHECK3-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK3-32-NEXT: entry: +// CHECK3-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-32-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// CHECK3-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK3-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK3-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// CHECK3-32-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK3-32-NEXT: [[P:%.*]] = alloca i32*, align 64 +// CHECK3-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[GA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4 +// CHECK3-32-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK3-32-NEXT: store i16 0, i16* [[AA]], align 2 +// CHECK3-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP1:%.*]] = call i8* @llvm.stacksave() +// CHECK3-32-NEXT: store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4 +// CHECK3-32-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4 +// CHECK3-32-NEXT: store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4 +// CHECK3-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]] +// CHECK3-32-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8 +// CHECK3-32-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// CHECK3-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 [[TMP4]], i32* [[X]], align 4 +// CHECK3-32-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// CHECK3-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 [[TMP5]], i32* [[Y]], align 4 +// CHECK3-32-NEXT: store i32* [[A]], i32** [[P]], align 64 +// CHECK3-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-32-NEXT: store i32 [[TMP6]], i32* [[A_CASTED]], align 4 +// CHECK3-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK3-32-NEXT: [[TMP8:%.*]] = load i32*, i32** [[P]], align 64 +// CHECK3-32-NEXT: [[TMP9:%.*]] = load i32, i32* @ga, align 4 +// CHECK3-32-NEXT: store i32 [[TMP9]], i32* [[GA_CASTED]], align 4 +// CHECK3-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[GA_CASTED]], align 4 +// CHECK3-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP7]], i32* [[TMP12]], align 4 +// CHECK3-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP7]], i32* [[TMP14]], align 4 +// CHECK3-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP15]], align 4 +// CHECK3-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-32-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i32** +// CHECK3-32-NEXT: store i32* [[TMP8]], i32** [[TMP17]], align 4 +// CHECK3-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-32-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32** +// CHECK3-32-NEXT: store i32* [[TMP8]], i32** [[TMP19]], align 4 +// CHECK3-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP20]], align 4 +// CHECK3-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-32-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP10]], i32* [[TMP22]], align 4 +// CHECK3-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-32-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP10]], i32* [[TMP24]], align 4 +// CHECK3-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP25]], align 4 +// CHECK3-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-32-NEXT: store i32 2, i32* [[TMP28]], align 4 +// CHECK3-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-32-NEXT: store i32 3, i32* [[TMP29]], align 4 +// CHECK3-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-32-NEXT: store i8** [[TMP26]], i8*** [[TMP30]], align 4 +// CHECK3-32-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-32-NEXT: store i8** [[TMP27]], i8*** [[TMP31]], align 4 +// CHECK3-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP32]], align 4 +// CHECK3-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP33]], align 4 +// CHECK3-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-32-NEXT: store i8** null, i8*** [[TMP34]], align 4 +// CHECK3-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-32-NEXT: store i8** null, i8*** [[TMP35]], align 4 +// CHECK3-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-32-NEXT: store i64 0, i64* [[TMP36]], align 8 +// CHECK3-32-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-32-NEXT: store i64 0, i64* [[TMP37]], align 8 +// CHECK3-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP38]], align 4 +// CHECK3-32-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP39]], align 4 +// CHECK3-32-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-32-NEXT: store i32 0, i32* [[TMP40]], align 4 +// CHECK3-32-NEXT: [[TMP41:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK3-32-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK3-32-NEXT: br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-32: omp_offload.failed: +// CHECK3-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i32 [[TMP7]], i32* [[TMP8]], i32 [[TMP10]]) #[[ATTR3:[0-9]+]] +// CHECK3-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3-32: omp_offload.cont: +// CHECK3-32-NEXT: [[TMP43:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK3-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_CASTED]] to i16* +// CHECK3-32-NEXT: store i16 [[TMP43]], i16* [[CONV]], align 2 +// CHECK3-32-NEXT: [[TMP44:%.*]] = load i32, i32* [[AA_CASTED]], align 4 +// CHECK3-32-NEXT: [[TMP45:%.*]] = mul nuw i32 [[TMP0]], 4 +// CHECK3-32-NEXT: [[TMP46:%.*]] = sext i32 [[TMP45]] to i64 +// CHECK3-32-NEXT: [[TMP47:%.*]] = mul nuw i32 5, [[TMP2]] +// CHECK3-32-NEXT: [[TMP48:%.*]] = mul nuw i32 [[TMP47]], 8 +// CHECK3-32-NEXT: [[TMP49:%.*]] = sext i32 [[TMP48]] to i64 +// CHECK3-32-NEXT: [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP50]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i32 72, i1 false) +// CHECK3-32-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP44]], i32* [[TMP52]], align 4 +// CHECK3-32-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP44]], i32* [[TMP54]], align 4 +// CHECK3-32-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP55]], align 4 +// CHECK3-32-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1 +// CHECK3-32-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]** +// CHECK3-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 4 +// CHECK3-32-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 1 +// CHECK3-32-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]** +// CHECK3-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 4 +// CHECK3-32-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP60]], align 4 +// CHECK3-32-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2 +// CHECK3-32-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP0]], i32* [[TMP62]], align 4 +// CHECK3-32-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 2 +// CHECK3-32-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP0]], i32* [[TMP64]], align 4 +// CHECK3-32-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 2 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP65]], align 4 +// CHECK3-32-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3 +// CHECK3-32-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float** +// CHECK3-32-NEXT: store float* [[VLA]], float** [[TMP67]], align 4 +// CHECK3-32-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 3 +// CHECK3-32-NEXT: [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float** +// CHECK3-32-NEXT: store float* [[VLA]], float** [[TMP69]], align 4 +// CHECK3-32-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3 +// CHECK3-32-NEXT: store i64 [[TMP46]], i64* [[TMP70]], align 4 +// CHECK3-32-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 3 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP71]], align 4 +// CHECK3-32-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4 +// CHECK3-32-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]** +// CHECK3-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 4 +// CHECK3-32-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 4 +// CHECK3-32-NEXT: [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]** +// CHECK3-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 4 +// CHECK3-32-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 4 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP76]], align 4 +// CHECK3-32-NEXT: [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5 +// CHECK3-32-NEXT: [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32* +// CHECK3-32-NEXT: store i32 5, i32* [[TMP78]], align 4 +// CHECK3-32-NEXT: [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 5 +// CHECK3-32-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32* +// CHECK3-32-NEXT: store i32 5, i32* [[TMP80]], align 4 +// CHECK3-32-NEXT: [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 5 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP81]], align 4 +// CHECK3-32-NEXT: [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6 +// CHECK3-32-NEXT: [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP2]], i32* [[TMP83]], align 4 +// CHECK3-32-NEXT: [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 6 +// CHECK3-32-NEXT: [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP2]], i32* [[TMP85]], align 4 +// CHECK3-32-NEXT: [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 6 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP86]], align 4 +// CHECK3-32-NEXT: [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7 +// CHECK3-32-NEXT: [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double** +// CHECK3-32-NEXT: store double* [[VLA1]], double** [[TMP88]], align 4 +// CHECK3-32-NEXT: [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 7 +// CHECK3-32-NEXT: [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double** +// CHECK3-32-NEXT: store double* [[VLA1]], double** [[TMP90]], align 4 +// CHECK3-32-NEXT: [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7 +// CHECK3-32-NEXT: store i64 [[TMP49]], i64* [[TMP91]], align 4 +// CHECK3-32-NEXT: [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 7 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP92]], align 4 +// CHECK3-32-NEXT: [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8 +// CHECK3-32-NEXT: [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT** +// CHECK3-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 4 +// CHECK3-32-NEXT: [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 8 +// CHECK3-32-NEXT: [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT** +// CHECK3-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 4 +// CHECK3-32-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 8 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP97]], align 4 +// CHECK3-32-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK3-32-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK3-32-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 0 +// CHECK3-32-NEXT: store i32 2, i32* [[TMP101]], align 4 +// CHECK3-32-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 1 +// CHECK3-32-NEXT: store i32 9, i32* [[TMP102]], align 4 +// CHECK3-32-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 2 +// CHECK3-32-NEXT: store i8** [[TMP98]], i8*** [[TMP103]], align 4 +// CHECK3-32-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 3 +// CHECK3-32-NEXT: store i8** [[TMP99]], i8*** [[TMP104]], align 4 +// CHECK3-32-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 4 +// CHECK3-32-NEXT: store i64* [[TMP100]], i64** [[TMP105]], align 4 +// CHECK3-32-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 5 +// CHECK3-32-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 4 +// CHECK3-32-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 6 +// CHECK3-32-NEXT: store i8** null, i8*** [[TMP107]], align 4 +// CHECK3-32-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 7 +// CHECK3-32-NEXT: store i8** null, i8*** [[TMP108]], align 4 +// CHECK3-32-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 8 +// CHECK3-32-NEXT: store i64 0, i64* [[TMP109]], align 8 +// CHECK3-32-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 9 +// CHECK3-32-NEXT: store i64 0, i64* [[TMP110]], align 8 +// CHECK3-32-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 10 +// CHECK3-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4 +// CHECK3-32-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 11 +// CHECK3-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4 +// CHECK3-32-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 12 +// CHECK3-32-NEXT: store i32 0, i32* [[TMP113]], align 4 +// CHECK3-32-NEXT: [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]]) +// CHECK3-32-NEXT: [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0 +// CHECK3-32-NEXT: br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CHECK3-32: omp_offload.failed6: +// CHECK3-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i32 [[TMP44]], [10 x float]* [[B]], i32 [[TMP0]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP2]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]] +// CHECK3-32-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CHECK3-32: omp_offload.cont7: +// CHECK3-32-NEXT: [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double** +// CHECK3-32-NEXT: store double* [[TMP116]], double** [[TMP118]], align 4 +// CHECK3-32-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double** +// CHECK3-32-NEXT: store double* [[TMP116]], double** [[TMP120]], align 4 +// CHECK3-32-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP121]], align 4 +// CHECK3-32-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1 +// CHECK3-32-NEXT: [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0** +// CHECK3-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 4 +// CHECK3-32-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 1 +// CHECK3-32-NEXT: [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0** +// CHECK3-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 4 +// CHECK3-32-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP126]], align 4 +// CHECK3-32-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 +// CHECK3-32-NEXT: [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK3-32-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 0 +// CHECK3-32-NEXT: store i32 2, i32* [[TMP129]], align 4 +// CHECK3-32-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 1 +// CHECK3-32-NEXT: store i32 2, i32* [[TMP130]], align 4 +// CHECK3-32-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 2 +// CHECK3-32-NEXT: store i8** [[TMP127]], i8*** [[TMP131]], align 4 +// CHECK3-32-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 3 +// CHECK3-32-NEXT: store i8** [[TMP128]], i8*** [[TMP132]], align 4 +// CHECK3-32-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 4 +// CHECK3-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 4 +// CHECK3-32-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 5 +// CHECK3-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 4 +// CHECK3-32-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 6 +// CHECK3-32-NEXT: store i8** null, i8*** [[TMP135]], align 4 +// CHECK3-32-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 7 +// CHECK3-32-NEXT: store i8** null, i8*** [[TMP136]], align 4 +// CHECK3-32-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 8 +// CHECK3-32-NEXT: store i64 0, i64* [[TMP137]], align 8 +// CHECK3-32-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 9 +// CHECK3-32-NEXT: store i64 0, i64* [[TMP138]], align 8 +// CHECK3-32-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 10 +// CHECK3-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4 +// CHECK3-32-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 11 +// CHECK3-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4 +// CHECK3-32-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 12 +// CHECK3-32-NEXT: store i32 0, i32* [[TMP141]], align 4 +// CHECK3-32-NEXT: [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]]) +// CHECK3-32-NEXT: [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0 +// CHECK3-32-NEXT: br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]] +// CHECK3-32: omp_offload.failed12: +// CHECK3-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]] +// CHECK3-32-NEXT: br label [[OMP_OFFLOAD_CONT13]] +// CHECK3-32: omp_offload.cont13: +// CHECK3-32-NEXT: [[TMP144:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-32-NEXT: [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK3-32-NEXT: call void @llvm.stackrestore(i8* [[TMP145]]) +// CHECK3-32-NEXT: ret i32 [[TMP144]] +// CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// CHECK3-32-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-32-NEXT: entry: +// CHECK3-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-32-NEXT: [[GA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK3-32-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 [[GA]], i32* [[GA_ADDR]], align 4 +// CHECK3-32-NEXT: ret void +// CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// CHECK3-32-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] { +// CHECK3-32-NEXT: entry: +// CHECK3-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 +// CHECK3-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-32-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 +// CHECK3-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-32-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 +// CHECK3-32-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// CHECK3-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK3-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK3-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[__VLA_EXPR2:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// CHECK3-32-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 +// CHECK3-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// CHECK3-32-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 +// CHECK3-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// CHECK3-32-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 +// CHECK3-32-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 +// CHECK3-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 +// CHECK3-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK3-32-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// CHECK3-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 +// CHECK3-32-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// CHECK3-32-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false) +// CHECK3-32-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// CHECK3-32-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4 +// CHECK3-32-NEXT: [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4 +// CHECK3-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK3-32-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4 +// CHECK3-32-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// CHECK3-32-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false) +// CHECK3-32-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// CHECK3-32-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false) +// CHECK3-32-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// CHECK3-32-NEXT: [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8 +// CHECK3-32-NEXT: store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4 +// CHECK3-32-NEXT: store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4 +// CHECK3-32-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// CHECK3-32-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8 +// CHECK3-32-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// CHECK3-32-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false) +// CHECK3-32-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// CHECK3-32-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false) +// CHECK3-32-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-32-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK3-32-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// CHECK3-32-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK3-32-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// CHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2 +// CHECK3-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK3-32-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3 +// CHECK3-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// CHECK3-32-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1 +// CHECK3-32-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 +// CHECK3-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// CHECK3-32-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]] +// CHECK3-32-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]] +// CHECK3-32-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3 +// CHECK3-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// CHECK3-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// CHECK3-32-NEXT: store i64 1, i64* [[X]], align 4 +// CHECK3-32-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// CHECK3-32-NEXT: store i8 1, i8* [[Y]], align 4 +// CHECK3-32-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK3-32-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// CHECK3-32-NEXT: ret void +// CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// CHECK3-32-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] { +// CHECK3-32-NEXT: entry: +// CHECK3-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-32-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4 +// CHECK3-32-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK3-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK3-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8* +// CHECK3-32-NEXT: [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8* +// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[X]], align 4 +// CHECK3-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP3]] to double +// CHECK3-32-NEXT: [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i32 0 +// CHECK3-32-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 4 +// CHECK3-32-NEXT: [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK3-32-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i32 0 +// CHECK3-32-NEXT: [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 4 +// CHECK3-32-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// CHECK3-32-NEXT: store double [[INC]], double* [[ARRAYIDX2]], align 4 +// CHECK3-32-NEXT: ret void +// CHECK3-32-LABEL: define {{[^@]+}}@_Z3bariPd +// CHECK3-32-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// CHECK3-32-NEXT: entry: +// CHECK3-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 +// CHECK3-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK3-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK3-32-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]]) +// CHECK3-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// CHECK3-32-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK3-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]]) +// CHECK3-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// CHECK3-32-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK3-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]]) +// CHECK3-32-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-32-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// CHECK3-32-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// CHECK3-32-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]]) +// CHECK3-32-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-32-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// CHECK3-32-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// CHECK3-32-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-32-NEXT: ret i32 [[TMP9]] +// CHECK3-32-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// CHECK3-32-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK3-32-NEXT: entry: +// CHECK3-32-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK3-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK3-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4 +// CHECK3-32-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK3-32-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// CHECK3-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// CHECK3-32-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4 +// CHECK3-32-NEXT: [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]] +// CHECK3-32-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2 +// CHECK3-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK3-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-32-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 +// CHECK3-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 +// CHECK3-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP6:%.*]] = mul nuw i32 2, [[TMP1]] +// CHECK3-32-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// CHECK3-32-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 +// CHECK3-32-NEXT: [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i32 40, i1 false) +// CHECK3-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1** +// CHECK3-32-NEXT: store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4 +// CHECK3-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double** +// CHECK3-32-NEXT: store double* [[A]], double** [[TMP13]], align 4 +// CHECK3-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP14]], align 4 +// CHECK3-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-32-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP5]], i32* [[TMP16]], align 4 +// CHECK3-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-32-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP5]], i32* [[TMP18]], align 4 +// CHECK3-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP19]], align 4 +// CHECK3-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-32-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32* +// CHECK3-32-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK3-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-32-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32* +// CHECK3-32-NEXT: store i32 2, i32* [[TMP23]], align 4 +// CHECK3-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP24]], align 4 +// CHECK3-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK3-32-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP1]], i32* [[TMP26]], align 4 +// CHECK3-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK3-32-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP1]], i32* [[TMP28]], align 4 +// CHECK3-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP29]], align 4 +// CHECK3-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK3-32-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16** +// CHECK3-32-NEXT: store i16* [[VLA]], i16** [[TMP31]], align 4 +// CHECK3-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK3-32-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16** +// CHECK3-32-NEXT: store i16* [[VLA]], i16** [[TMP33]], align 4 +// CHECK3-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK3-32-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 4 +// CHECK3-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP35]], align 4 +// CHECK3-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK3-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-32-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-32-NEXT: store i32 2, i32* [[TMP39]], align 4 +// CHECK3-32-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-32-NEXT: store i32 5, i32* [[TMP40]], align 4 +// CHECK3-32-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-32-NEXT: store i8** [[TMP36]], i8*** [[TMP41]], align 4 +// CHECK3-32-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-32-NEXT: store i8** [[TMP37]], i8*** [[TMP42]], align 4 +// CHECK3-32-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-32-NEXT: store i64* [[TMP38]], i64** [[TMP43]], align 4 +// CHECK3-32-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-32-NEXT: store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 4 +// CHECK3-32-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-32-NEXT: store i8** null, i8*** [[TMP45]], align 4 +// CHECK3-32-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-32-NEXT: store i8** null, i8*** [[TMP46]], align 4 +// CHECK3-32-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-32-NEXT: store i64 0, i64* [[TMP47]], align 8 +// CHECK3-32-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-32-NEXT: store i64 0, i64* [[TMP48]], align 8 +// CHECK3-32-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4 +// CHECK3-32-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4 +// CHECK3-32-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-32-NEXT: store i32 0, i32* [[TMP51]], align 4 +// CHECK3-32-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK3-32-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 +// CHECK3-32-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-32: omp_offload.failed: +// CHECK3-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]] +// CHECK3-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3-32: omp_offload.cont: +// CHECK3-32-NEXT: [[TMP54:%.*]] = mul nsw i32 1, [[TMP1]] +// CHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP54]] +// CHECK3-32-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK3-32-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2 +// CHECK3-32-NEXT: [[CONV:%.*]] = sext i16 [[TMP55]] to i32 +// CHECK3-32-NEXT: [[TMP56:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP56]] +// CHECK3-32-NEXT: [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK3-32-NEXT: call void @llvm.stackrestore(i8* [[TMP57]]) +// CHECK3-32-NEXT: ret i32 [[ADD3]] +// CHECK3-32-LABEL: define {{[^@]+}}@_ZL7fstatici +// CHECK3-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] { +// CHECK3-32-NEXT: entry: +// CHECK3-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// CHECK3-32-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK3-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4 +// CHECK3-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK3-32-NEXT: store i8 0, i8* [[AAA]], align 1 +// CHECK3-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-32-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 +// CHECK3-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK3-32-NEXT: [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK3-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* +// CHECK3-32-NEXT: store i8 [[TMP2]], i8* [[CONV]], align 1 +// CHECK3-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 +// CHECK3-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4 +// CHECK3-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4 +// CHECK3-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP8]], align 4 +// CHECK3-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-32-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP3]], i32* [[TMP10]], align 4 +// CHECK3-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-32-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP3]], i32* [[TMP12]], align 4 +// CHECK3-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP13]], align 4 +// CHECK3-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-32-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]** +// CHECK3-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 4 +// CHECK3-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-32-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]** +// CHECK3-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 4 +// CHECK3-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP18]], align 4 +// CHECK3-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-32-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK3-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-32-NEXT: store i32 3, i32* [[TMP22]], align 4 +// CHECK3-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-32-NEXT: store i8** [[TMP19]], i8*** [[TMP23]], align 4 +// CHECK3-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-32-NEXT: store i8** [[TMP20]], i8*** [[TMP24]], align 4 +// CHECK3-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 4 +// CHECK3-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-32-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 4 +// CHECK3-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-32-NEXT: store i8** null, i8*** [[TMP27]], align 4 +// CHECK3-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-32-NEXT: store i8** null, i8*** [[TMP28]], align 4 +// CHECK3-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-32-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK3-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-32-NEXT: store i64 0, i64* [[TMP30]], align 8 +// CHECK3-32-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4 +// CHECK3-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4 +// CHECK3-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-32-NEXT: store i32 0, i32* [[TMP33]], align 4 +// CHECK3-32-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK3-32-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-32-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-32: omp_offload.failed: +// CHECK3-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i32 [[TMP1]], i32 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK3-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3-32: omp_offload.cont: +// CHECK3-32-NEXT: [[TMP36:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-32-NEXT: ret i32 [[TMP36]] +// CHECK3-32-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// CHECK3-32-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat { +// CHECK3-32-NEXT: entry: +// CHECK3-32-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK3-32-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-32-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK3-32-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-32-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 +// CHECK3-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK3-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP1]], i32* [[TMP3]], align 4 +// CHECK3-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32* +// CHECK3-32-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4 +// CHECK3-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP6]], align 4 +// CHECK3-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-32-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]** +// CHECK3-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 4 +// CHECK3-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-32-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]** +// CHECK3-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 4 +// CHECK3-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-32-NEXT: store i8* null, i8** [[TMP11]], align 4 +// CHECK3-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-32-NEXT: store i32 2, i32* [[TMP14]], align 4 +// CHECK3-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-32-NEXT: store i32 2, i32* [[TMP15]], align 4 +// CHECK3-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-32-NEXT: store i8** [[TMP12]], i8*** [[TMP16]], align 4 +// CHECK3-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-32-NEXT: store i8** [[TMP13]], i8*** [[TMP17]], align 4 +// CHECK3-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 4 +// CHECK3-32-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-32-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 4 +// CHECK3-32-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-32-NEXT: store i8** null, i8*** [[TMP20]], align 4 +// CHECK3-32-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-32-NEXT: store i8** null, i8*** [[TMP21]], align 4 +// CHECK3-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-32-NEXT: store i64 0, i64* [[TMP22]], align 8 +// CHECK3-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-32-NEXT: store i64 0, i64* [[TMP23]], align 8 +// CHECK3-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4 +// CHECK3-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-32-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4 +// CHECK3-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-32-NEXT: store i32 0, i32* [[TMP26]], align 4 +// CHECK3-32-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK3-32-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK3-32-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3-32: omp_offload.failed: +// CHECK3-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i32 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK3-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3-32: omp_offload.cont: +// CHECK3-32-NEXT: [[TMP29:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-32-NEXT: ret i32 [[TMP29]] +// CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// CHECK3-32-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-32-NEXT: entry: +// CHECK3-32-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK3-32-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// CHECK3-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK3-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// CHECK3-32-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// CHECK3-32-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// CHECK3-32-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4 +// CHECK3-32-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// CHECK3-32-NEXT: [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2 +// CHECK3-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK3-32-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// CHECK3-32-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// CHECK3-32-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// CHECK3-32-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// CHECK3-32-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false) +// CHECK3-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK3-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double +// CHECK3-32-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK3-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK3-32-NEXT: store double [[ADD]], double* [[A]], align 4 +// CHECK3-32-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK3-32-NEXT: [[TMP11:%.*]] = load double, double* [[A4]], align 4 +// CHECK3-32-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK3-32-NEXT: store double [[INC]], double* [[A4]], align 4 +// CHECK3-32-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// CHECK3-32-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]] +// CHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]] +// CHECK3-32-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK3-32-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// CHECK3-32-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK3-32-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// CHECK3-32-NEXT: ret void +// CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// CHECK3-32-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK3-32-NEXT: entry: +// CHECK3-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK3-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK3-32-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 +// CHECK3-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* +// CHECK3-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK3-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// CHECK3-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK3-32-NEXT: [[CONV2:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK3-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 +// CHECK3-32-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 +// CHECK3-32-NEXT: store i8 [[CONV4]], i8* [[CONV]], align 1 +// CHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// CHECK3-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK3-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK3-32-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 +// CHECK3-32-NEXT: ret void +// CHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// CHECK3-32-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK3-32-NEXT: entry: +// CHECK3-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK3-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK3-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK3-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// CHECK3-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// CHECK3-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK3-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK3-32-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// CHECK3-32-NEXT: ret void +// CHECK3-32-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK3-32-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK3-32-NEXT: entry: +// CHECK3-32-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK3-32-NEXT: ret void +// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// TCHECK-64-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] { +// TCHECK-64-NEXT: entry: +// TCHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 8 +// TCHECK-64-NEXT: [[GA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// TCHECK-64-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 8 +// TCHECK-64-NEXT: store i64 [[GA]], i64* [[GA_ADDR]], align 8 +// TCHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// TCHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32* +// TCHECK-64-NEXT: ret void +// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// TCHECK-64-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR0]] { +// TCHECK-64-NEXT: entry: +// TCHECK-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 +// TCHECK-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 +// TCHECK-64-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 +// TCHECK-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 +// TCHECK-64-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 +// TCHECK-64-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// TCHECK-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// TCHECK-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// TCHECK-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[__VLA_EXPR2:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// TCHECK-64-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 +// TCHECK-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 +// TCHECK-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// TCHECK-64-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 +// TCHECK-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 +// TCHECK-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// TCHECK-64-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 +// TCHECK-64-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 +// TCHECK-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 +// TCHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// TCHECK-64-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// TCHECK-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 +// TCHECK-64-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// TCHECK-64-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// TCHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false) +// TCHECK-64-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// TCHECK-64-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8 +// TCHECK-64-NEXT: [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4 +// TCHECK-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// TCHECK-64-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4 +// TCHECK-64-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// TCHECK-64-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// TCHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false) +// TCHECK-64-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// TCHECK-64-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// TCHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false) +// TCHECK-64-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// TCHECK-64-NEXT: [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8 +// TCHECK-64-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// TCHECK-64-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8 +// TCHECK-64-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// TCHECK-64-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 +// TCHECK-64-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// TCHECK-64-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// TCHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false) +// TCHECK-64-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// TCHECK-64-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// TCHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false) +// TCHECK-64-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// TCHECK-64-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// TCHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// TCHECK-64-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// TCHECK-64-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// TCHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2 +// TCHECK-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// TCHECK-64-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3 +// TCHECK-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// TCHECK-64-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1 +// TCHECK-64-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2 +// TCHECK-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// TCHECK-64-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]] +// TCHECK-64-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]] +// TCHECK-64-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3 +// TCHECK-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// TCHECK-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// TCHECK-64-NEXT: store i64 1, i64* [[X]], align 8 +// TCHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// TCHECK-64-NEXT: store i8 1, i8* [[Y]], align 8 +// TCHECK-64-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// TCHECK-64-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// TCHECK-64-NEXT: ret void +// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// TCHECK-64-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] { +// TCHECK-64-NEXT: entry: +// TCHECK-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// TCHECK-64-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8 +// TCHECK-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// TCHECK-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8 +// TCHECK-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0 +// TCHECK-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[X]], align 4 +// TCHECK-64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// TCHECK-64-NEXT: [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// TCHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 0 +// TCHECK-64-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 8 +// TCHECK-64-NEXT: [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// TCHECK-64-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 0 +// TCHECK-64-NEXT: [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 8 +// TCHECK-64-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00 +// TCHECK-64-NEXT: store double [[INC]], double* [[ARRAYIDX1]], align 8 +// TCHECK-64-NEXT: ret void +// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// TCHECK-64-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK-64-NEXT: entry: +// TCHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// TCHECK-64-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4 +// TCHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// TCHECK-64-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 +// TCHECK-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// TCHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// TCHECK-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* +// TCHECK-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8* +// TCHECK-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// TCHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// TCHECK-64-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 +// TCHECK-64-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32 +// TCHECK-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// TCHECK-64-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// TCHECK-64-NEXT: store i8 [[CONV5]], i8* [[CONV1]], align 1 +// TCHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2 +// TCHECK-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 +// TCHECK-64-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// TCHECK-64-NEXT: ret void +// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// TCHECK-64-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// TCHECK-64-NEXT: entry: +// TCHECK-64-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// TCHECK-64-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// TCHECK-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// TCHECK-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// TCHECK-64-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 +// TCHECK-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// TCHECK-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// TCHECK-64-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// TCHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* +// TCHECK-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// TCHECK-64-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// TCHECK-64-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8 +// TCHECK-64-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// TCHECK-64-NEXT: [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2 +// TCHECK-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// TCHECK-64-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8 +// TCHECK-64-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// TCHECK-64-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2 +// TCHECK-64-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// TCHECK-64-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// TCHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false) +// TCHECK-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK-64-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double +// TCHECK-64-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 +// TCHECK-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK-64-NEXT: store double [[ADD]], double* [[A]], align 8 +// TCHECK-64-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK-64-NEXT: [[TMP11:%.*]] = load double, double* [[A5]], align 8 +// TCHECK-64-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// TCHECK-64-NEXT: store double [[INC]], double* [[A5]], align 8 +// TCHECK-64-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 +// TCHECK-64-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]] +// TCHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]] +// TCHECK-64-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// TCHECK-64-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// TCHECK-64-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// TCHECK-64-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// TCHECK-64-NEXT: ret void +// TCHECK-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// TCHECK-64-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK-64-NEXT: entry: +// TCHECK-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// TCHECK-64-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// TCHECK-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// TCHECK-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// TCHECK-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// TCHECK-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// TCHECK-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// TCHECK-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// TCHECK-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// TCHECK-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2 +// TCHECK-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// TCHECK-64-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// TCHECK-64-NEXT: ret void +// TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// TCHECK1-64-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] { +// TCHECK1-64-NEXT: entry: +// TCHECK1-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 8 +// TCHECK1-64-NEXT: [[GA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// TCHECK1-64-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 8 +// TCHECK1-64-NEXT: store i64 [[GA]], i64* [[GA_ADDR]], align 8 +// TCHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// TCHECK1-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32* +// TCHECK1-64-NEXT: ret void +// TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// TCHECK1-64-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR0]] { +// TCHECK1-64-NEXT: entry: +// TCHECK1-64-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 +// TCHECK1-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 +// TCHECK1-64-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 +// TCHECK1-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 +// TCHECK1-64-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 +// TCHECK1-64-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// TCHECK1-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// TCHECK1-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// TCHECK1-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[__VLA_EXPR2:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// TCHECK1-64-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 +// TCHECK1-64-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 +// TCHECK1-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// TCHECK1-64-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 +// TCHECK1-64-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 +// TCHECK1-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// TCHECK1-64-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 +// TCHECK1-64-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 +// TCHECK1-64-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 +// TCHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// TCHECK1-64-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 +// TCHECK1-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// TCHECK1-64-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 +// TCHECK1-64-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 +// TCHECK1-64-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// TCHECK1-64-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 +// TCHECK1-64-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 +// TCHECK1-64-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 +// TCHECK1-64-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// TCHECK1-64-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// TCHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false) +// TCHECK1-64-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// TCHECK1-64-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8 +// TCHECK1-64-NEXT: [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4 +// TCHECK1-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// TCHECK1-64-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4 +// TCHECK1-64-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// TCHECK1-64-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// TCHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false) +// TCHECK1-64-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// TCHECK1-64-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// TCHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false) +// TCHECK1-64-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// TCHECK1-64-NEXT: [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8 +// TCHECK1-64-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// TCHECK1-64-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8 +// TCHECK1-64-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// TCHECK1-64-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 +// TCHECK1-64-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// TCHECK1-64-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// TCHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false) +// TCHECK1-64-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// TCHECK1-64-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// TCHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false) +// TCHECK1-64-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// TCHECK1-64-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// TCHECK1-64-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// TCHECK1-64-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// TCHECK1-64-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// TCHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2 +// TCHECK1-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// TCHECK1-64-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3 +// TCHECK1-64-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// TCHECK1-64-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1 +// TCHECK1-64-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2 +// TCHECK1-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// TCHECK1-64-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]] +// TCHECK1-64-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]] +// TCHECK1-64-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3 +// TCHECK1-64-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// TCHECK1-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// TCHECK1-64-NEXT: store i64 1, i64* [[X]], align 8 +// TCHECK1-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// TCHECK1-64-NEXT: store i8 1, i8* [[Y]], align 8 +// TCHECK1-64-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// TCHECK1-64-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// TCHECK1-64-NEXT: ret void +// TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// TCHECK1-64-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] { +// TCHECK1-64-NEXT: entry: +// TCHECK1-64-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// TCHECK1-64-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8 +// TCHECK1-64-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// TCHECK1-64-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8 +// TCHECK1-64-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8 +// TCHECK1-64-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0 +// TCHECK1-64-NEXT: [[TMP1:%.*]] = load i32, i32* [[X]], align 4 +// TCHECK1-64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// TCHECK1-64-NEXT: [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// TCHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 0 +// TCHECK1-64-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 8 +// TCHECK1-64-NEXT: [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// TCHECK1-64-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 0 +// TCHECK1-64-NEXT: [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 8 +// TCHECK1-64-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00 +// TCHECK1-64-NEXT: store double [[INC]], double* [[ARRAYIDX1]], align 8 +// TCHECK1-64-NEXT: ret void +// TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// TCHECK1-64-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK1-64-NEXT: entry: +// TCHECK1-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// TCHECK1-64-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4 +// TCHECK1-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// TCHECK1-64-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 +// TCHECK1-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// TCHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// TCHECK1-64-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* +// TCHECK1-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// TCHECK1-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8* +// TCHECK1-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// TCHECK1-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK1-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK1-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// TCHECK1-64-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 +// TCHECK1-64-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32 +// TCHECK1-64-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// TCHECK1-64-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// TCHECK1-64-NEXT: store i8 [[CONV5]], i8* [[CONV1]], align 1 +// TCHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2 +// TCHECK1-64-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK1-64-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 +// TCHECK1-64-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// TCHECK1-64-NEXT: ret void +// TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// TCHECK1-64-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// TCHECK1-64-NEXT: entry: +// TCHECK1-64-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// TCHECK1-64-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// TCHECK1-64-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// TCHECK1-64-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// TCHECK1-64-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 +// TCHECK1-64-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// TCHECK1-64-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// TCHECK1-64-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 +// TCHECK1-64-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// TCHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* +// TCHECK1-64-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// TCHECK1-64-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// TCHECK1-64-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 +// TCHECK1-64-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// TCHECK1-64-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8 +// TCHECK1-64-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// TCHECK1-64-NEXT: [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2 +// TCHECK1-64-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// TCHECK1-64-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8 +// TCHECK1-64-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// TCHECK1-64-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2 +// TCHECK1-64-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// TCHECK1-64-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// TCHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false) +// TCHECK1-64-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK1-64-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double +// TCHECK1-64-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 +// TCHECK1-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK1-64-NEXT: store double [[ADD]], double* [[A]], align 8 +// TCHECK1-64-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK1-64-NEXT: [[TMP11:%.*]] = load double, double* [[A5]], align 8 +// TCHECK1-64-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// TCHECK1-64-NEXT: store double [[INC]], double* [[A5]], align 8 +// TCHECK1-64-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 +// TCHECK1-64-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]] +// TCHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]] +// TCHECK1-64-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// TCHECK1-64-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// TCHECK1-64-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// TCHECK1-64-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// TCHECK1-64-NEXT: ret void +// TCHECK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// TCHECK1-64-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK1-64-NEXT: entry: +// TCHECK1-64-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-64-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// TCHECK1-64-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// TCHECK1-64-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// TCHECK1-64-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// TCHECK1-64-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// TCHECK1-64-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// TCHECK1-64-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// TCHECK1-64-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK1-64-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// TCHECK1-64-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK1-64-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK1-64-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// TCHECK1-64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2 +// TCHECK1-64-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK1-64-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// TCHECK1-64-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// TCHECK1-64-NEXT: ret void +// TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// TCHECK2-32-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] { +// TCHECK2-32-NEXT: entry: +// TCHECK2-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 4 +// TCHECK2-32-NEXT: [[GA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// TCHECK2-32-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 4 +// TCHECK2-32-NEXT: store i32 [[GA]], i32* [[GA_ADDR]], align 4 +// TCHECK2-32-NEXT: ret void +// TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// TCHECK2-32-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR0]] { +// TCHECK2-32-NEXT: entry: +// TCHECK2-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 +// TCHECK2-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 +// TCHECK2-32-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 +// TCHECK2-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 +// TCHECK2-32-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 +// TCHECK2-32-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// TCHECK2-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// TCHECK2-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// TCHECK2-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[__VLA_EXPR2:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// TCHECK2-32-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 +// TCHECK2-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 +// TCHECK2-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// TCHECK2-32-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 +// TCHECK2-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 +// TCHECK2-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// TCHECK2-32-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 +// TCHECK2-32-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 +// TCHECK2-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 +// TCHECK2-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// TCHECK2-32-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// TCHECK2-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 +// TCHECK2-32-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// TCHECK2-32-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// TCHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false) +// TCHECK2-32-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// TCHECK2-32-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4 +// TCHECK2-32-NEXT: [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4 +// TCHECK2-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// TCHECK2-32-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4 +// TCHECK2-32-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// TCHECK2-32-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// TCHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false) +// TCHECK2-32-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// TCHECK2-32-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// TCHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false) +// TCHECK2-32-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// TCHECK2-32-NEXT: [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8 +// TCHECK2-32-NEXT: store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4 +// TCHECK2-32-NEXT: store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4 +// TCHECK2-32-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// TCHECK2-32-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8 +// TCHECK2-32-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// TCHECK2-32-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// TCHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false) +// TCHECK2-32-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// TCHECK2-32-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// TCHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false) +// TCHECK2-32-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// TCHECK2-32-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// TCHECK2-32-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// TCHECK2-32-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// TCHECK2-32-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// TCHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2 +// TCHECK2-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// TCHECK2-32-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3 +// TCHECK2-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// TCHECK2-32-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1 +// TCHECK2-32-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 +// TCHECK2-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// TCHECK2-32-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]] +// TCHECK2-32-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]] +// TCHECK2-32-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3 +// TCHECK2-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// TCHECK2-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// TCHECK2-32-NEXT: store i64 1, i64* [[X]], align 4 +// TCHECK2-32-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// TCHECK2-32-NEXT: store i8 1, i8* [[Y]], align 4 +// TCHECK2-32-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// TCHECK2-32-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// TCHECK2-32-NEXT: ret void +// TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// TCHECK2-32-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] { +// TCHECK2-32-NEXT: entry: +// TCHECK2-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// TCHECK2-32-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4 +// TCHECK2-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// TCHECK2-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4 +// TCHECK2-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0 +// TCHECK2-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[X]], align 4 +// TCHECK2-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// TCHECK2-32-NEXT: [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// TCHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i32 0 +// TCHECK2-32-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 4 +// TCHECK2-32-NEXT: [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// TCHECK2-32-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i32 0 +// TCHECK2-32-NEXT: [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 4 +// TCHECK2-32-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00 +// TCHECK2-32-NEXT: store double [[INC]], double* [[ARRAYIDX1]], align 4 +// TCHECK2-32-NEXT: ret void +// TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// TCHECK2-32-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK2-32-NEXT: entry: +// TCHECK2-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// TCHECK2-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// TCHECK2-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// TCHECK2-32-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 +// TCHECK2-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// TCHECK2-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* +// TCHECK2-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// TCHECK2-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// TCHECK2-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// TCHECK2-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK2-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 +// TCHECK2-32-NEXT: [[CONV2:%.*]] = sext i8 [[TMP4]] to i32 +// TCHECK2-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 +// TCHECK2-32-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 +// TCHECK2-32-NEXT: store i8 [[CONV4]], i8* [[CONV]], align 1 +// TCHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// TCHECK2-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK2-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1 +// TCHECK2-32-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 +// TCHECK2-32-NEXT: ret void +// TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// TCHECK2-32-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// TCHECK2-32-NEXT: entry: +// TCHECK2-32-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// TCHECK2-32-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// TCHECK2-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// TCHECK2-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// TCHECK2-32-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// TCHECK2-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// TCHECK2-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// TCHECK2-32-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// TCHECK2-32-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// TCHECK2-32-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4 +// TCHECK2-32-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// TCHECK2-32-NEXT: [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2 +// TCHECK2-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// TCHECK2-32-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// TCHECK2-32-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// TCHECK2-32-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// TCHECK2-32-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// TCHECK2-32-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// TCHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false) +// TCHECK2-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// TCHECK2-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double +// TCHECK2-32-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// TCHECK2-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK2-32-NEXT: store double [[ADD]], double* [[A]], align 4 +// TCHECK2-32-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK2-32-NEXT: [[TMP11:%.*]] = load double, double* [[A4]], align 4 +// TCHECK2-32-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// TCHECK2-32-NEXT: store double [[INC]], double* [[A4]], align 4 +// TCHECK2-32-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// TCHECK2-32-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]] +// TCHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]] +// TCHECK2-32-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// TCHECK2-32-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// TCHECK2-32-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// TCHECK2-32-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// TCHECK2-32-NEXT: ret void +// TCHECK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// TCHECK2-32-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK2-32-NEXT: entry: +// TCHECK2-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// TCHECK2-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// TCHECK2-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// TCHECK2-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// TCHECK2-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// TCHECK2-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK2-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// TCHECK2-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// TCHECK2-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK2-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// TCHECK2-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// TCHECK2-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK2-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// TCHECK2-32-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// TCHECK2-32-NEXT: ret void +// TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// TCHECK3-32-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] { +// TCHECK3-32-NEXT: entry: +// TCHECK3-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 4 +// TCHECK3-32-NEXT: [[GA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// TCHECK3-32-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 4 +// TCHECK3-32-NEXT: store i32 [[GA]], i32* [[GA_ADDR]], align 4 +// TCHECK3-32-NEXT: ret void +// TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// TCHECK3-32-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR0]] { +// TCHECK3-32-NEXT: entry: +// TCHECK3-32-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 +// TCHECK3-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 +// TCHECK3-32-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 +// TCHECK3-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 +// TCHECK3-32-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 +// TCHECK3-32-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// TCHECK3-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// TCHECK3-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// TCHECK3-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[__VLA_EXPR2:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// TCHECK3-32-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 +// TCHECK3-32-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 +// TCHECK3-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// TCHECK3-32-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 +// TCHECK3-32-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 +// TCHECK3-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// TCHECK3-32-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 +// TCHECK3-32-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 +// TCHECK3-32-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 +// TCHECK3-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// TCHECK3-32-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// TCHECK3-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 +// TCHECK3-32-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// TCHECK3-32-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// TCHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false) +// TCHECK3-32-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// TCHECK3-32-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4 +// TCHECK3-32-NEXT: [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4 +// TCHECK3-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// TCHECK3-32-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4 +// TCHECK3-32-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// TCHECK3-32-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// TCHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false) +// TCHECK3-32-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// TCHECK3-32-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// TCHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false) +// TCHECK3-32-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// TCHECK3-32-NEXT: [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8 +// TCHECK3-32-NEXT: store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4 +// TCHECK3-32-NEXT: store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4 +// TCHECK3-32-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// TCHECK3-32-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8 +// TCHECK3-32-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// TCHECK3-32-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// TCHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false) +// TCHECK3-32-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// TCHECK3-32-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// TCHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false) +// TCHECK3-32-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// TCHECK3-32-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// TCHECK3-32-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// TCHECK3-32-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// TCHECK3-32-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// TCHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2 +// TCHECK3-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// TCHECK3-32-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3 +// TCHECK3-32-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// TCHECK3-32-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1 +// TCHECK3-32-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 +// TCHECK3-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// TCHECK3-32-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]] +// TCHECK3-32-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]] +// TCHECK3-32-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3 +// TCHECK3-32-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// TCHECK3-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// TCHECK3-32-NEXT: store i64 1, i64* [[X]], align 4 +// TCHECK3-32-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// TCHECK3-32-NEXT: store i8 1, i8* [[Y]], align 4 +// TCHECK3-32-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// TCHECK3-32-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// TCHECK3-32-NEXT: ret void +// TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// TCHECK3-32-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] { +// TCHECK3-32-NEXT: entry: +// TCHECK3-32-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// TCHECK3-32-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4 +// TCHECK3-32-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// TCHECK3-32-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4 +// TCHECK3-32-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0 +// TCHECK3-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[X]], align 4 +// TCHECK3-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// TCHECK3-32-NEXT: [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// TCHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i32 0 +// TCHECK3-32-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 4 +// TCHECK3-32-NEXT: [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// TCHECK3-32-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i32 0 +// TCHECK3-32-NEXT: [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 4 +// TCHECK3-32-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00 +// TCHECK3-32-NEXT: store double [[INC]], double* [[ARRAYIDX1]], align 4 +// TCHECK3-32-NEXT: ret void +// TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// TCHECK3-32-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK3-32-NEXT: entry: +// TCHECK3-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// TCHECK3-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// TCHECK3-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// TCHECK3-32-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 +// TCHECK3-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// TCHECK3-32-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* +// TCHECK3-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// TCHECK3-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// TCHECK3-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// TCHECK3-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK3-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 +// TCHECK3-32-NEXT: [[CONV2:%.*]] = sext i8 [[TMP4]] to i32 +// TCHECK3-32-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 +// TCHECK3-32-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 +// TCHECK3-32-NEXT: store i8 [[CONV4]], i8* [[CONV]], align 1 +// TCHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// TCHECK3-32-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK3-32-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1 +// TCHECK3-32-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 +// TCHECK3-32-NEXT: ret void +// TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// TCHECK3-32-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// TCHECK3-32-NEXT: entry: +// TCHECK3-32-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// TCHECK3-32-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// TCHECK3-32-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// TCHECK3-32-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// TCHECK3-32-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// TCHECK3-32-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// TCHECK3-32-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// TCHECK3-32-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// TCHECK3-32-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// TCHECK3-32-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4 +// TCHECK3-32-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// TCHECK3-32-NEXT: [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2 +// TCHECK3-32-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// TCHECK3-32-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// TCHECK3-32-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// TCHECK3-32-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// TCHECK3-32-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// TCHECK3-32-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// TCHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false) +// TCHECK3-32-NEXT: [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// TCHECK3-32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double +// TCHECK3-32-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// TCHECK3-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK3-32-NEXT: store double [[ADD]], double* [[A]], align 4 +// TCHECK3-32-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK3-32-NEXT: [[TMP11:%.*]] = load double, double* [[A4]], align 4 +// TCHECK3-32-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// TCHECK3-32-NEXT: store double [[INC]], double* [[A4]], align 4 +// TCHECK3-32-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// TCHECK3-32-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]] +// TCHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]] +// TCHECK3-32-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// TCHECK3-32-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// TCHECK3-32-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// TCHECK3-32-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// TCHECK3-32-NEXT: ret void +// TCHECK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// TCHECK3-32-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK3-32-NEXT: entry: +// TCHECK3-32-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-32-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// TCHECK3-32-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// TCHECK3-32-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// TCHECK3-32-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// TCHECK3-32-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// TCHECK3-32-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK3-32-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// TCHECK3-32-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// TCHECK3-32-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK3-32-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// TCHECK3-32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// TCHECK3-32-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK3-32-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// TCHECK3-32-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// TCHECK3-32-NEXT: ret void +// CHECK0-LABEL: define {{[^@]+}}@_Z3fooiPd +// CHECK0-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK0-NEXT: entry: +// CHECK0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK0-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK0-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK0-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK0-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// CHECK0-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK0-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK0-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// CHECK0-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK0-NEXT: [[P:%.*]] = alloca i32*, align 64 +// CHECK0-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[GA_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8 +// CHECK0-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK0-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [9 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_PTRS5:%.*]] = alloca [9 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [9 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8 +// CHECK0-NEXT: [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x i8*], align 8 +// CHECK0-NEXT: [[KERNEL_ARGS13:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK0-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK0-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK0-NEXT: store i16 0, i16* [[AA]], align 2 +// CHECK0-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK0-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// CHECK0-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8 +// CHECK0-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4 +// CHECK0-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK0-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// CHECK0-NEXT: [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]] +// CHECK0-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8 +// CHECK0-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// CHECK0-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: store i32 [[TMP6]], i32* [[X]], align 4 +// CHECK0-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// CHECK0-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: store i32 [[TMP7]], i32* [[Y]], align 4 +// CHECK0-NEXT: store i32* [[A]], i32** [[P]], align 64 +// CHECK0-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK0-NEXT: store i32 [[TMP8]], i32* [[CONV]], align 4 +// CHECK0-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK0-NEXT: [[TMP10:%.*]] = load i32*, i32** [[P]], align 64 +// CHECK0-NEXT: [[TMP11:%.*]] = load i32, i32* @ga, align 4 +// CHECK0-NEXT: [[CONV2:%.*]] = bitcast i64* [[GA_CASTED]] to i32* +// CHECK0-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 +// CHECK0-NEXT: [[TMP12:%.*]] = load i64, i64* [[GA_CASTED]], align 8 +// CHECK0-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64* +// CHECK0-NEXT: store i64 [[TMP9]], i64* [[TMP14]], align 8 +// CHECK0-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64* +// CHECK0-NEXT: store i64 [[TMP9]], i64* [[TMP16]], align 8 +// CHECK0-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK0-NEXT: store i8* null, i8** [[TMP17]], align 8 +// CHECK0-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK0-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32** +// CHECK0-NEXT: store i32* [[TMP10]], i32** [[TMP19]], align 8 +// CHECK0-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK0-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32** +// CHECK0-NEXT: store i32* [[TMP10]], i32** [[TMP21]], align 8 +// CHECK0-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK0-NEXT: store i8* null, i8** [[TMP22]], align 8 +// CHECK0-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK0-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64* +// CHECK0-NEXT: store i64 [[TMP12]], i64* [[TMP24]], align 8 +// CHECK0-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK0-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64* +// CHECK0-NEXT: store i64 [[TMP12]], i64* [[TMP26]], align 8 +// CHECK0-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK0-NEXT: store i8* null, i8** [[TMP27]], align 8 +// CHECK0-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK0-NEXT: store i32 2, i32* [[TMP30]], align 4 +// CHECK0-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK0-NEXT: store i32 3, i32* [[TMP31]], align 4 +// CHECK0-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK0-NEXT: store i8** [[TMP28]], i8*** [[TMP32]], align 8 +// CHECK0-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK0-NEXT: store i8** [[TMP29]], i8*** [[TMP33]], align 8 +// CHECK0-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK0-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP34]], align 8 +// CHECK0-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK0-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP35]], align 8 +// CHECK0-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK0-NEXT: store i8** null, i8*** [[TMP36]], align 8 +// CHECK0-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK0-NEXT: store i8** null, i8*** [[TMP37]], align 8 +// CHECK0-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK0-NEXT: store i64 0, i64* [[TMP38]], align 8 +// CHECK0-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK0-NEXT: store i64 0, i64* [[TMP39]], align 8 +// CHECK0-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK0-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP40]], align 4 +// CHECK0-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK0-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP41]], align 4 +// CHECK0-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK0-NEXT: store i32 0, i32* [[TMP42]], align 4 +// CHECK0-NEXT: [[TMP43:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK0-NEXT: [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0 +// CHECK0-NEXT: br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK0: omp_offload.failed: +// CHECK0-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i64 [[TMP9]], i32* [[TMP10]], i64 [[TMP12]]) #[[ATTR3:[0-9]+]] +// CHECK0-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK0: omp_offload.cont: +// CHECK0-NEXT: [[TMP45:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK0-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* +// CHECK0-NEXT: store i16 [[TMP45]], i16* [[CONV3]], align 2 +// CHECK0-NEXT: [[TMP46:%.*]] = load i64, i64* [[AA_CASTED]], align 8 +// CHECK0-NEXT: [[TMP47:%.*]] = mul nuw i64 [[TMP1]], 4 +// CHECK0-NEXT: [[TMP48:%.*]] = mul nuw i64 5, [[TMP4]] +// CHECK0-NEXT: [[TMP49:%.*]] = mul nuw i64 [[TMP48]], 8 +// CHECK0-NEXT: [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP50]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i64 72, i1 false) +// CHECK0-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i64* +// CHECK0-NEXT: store i64 [[TMP46]], i64* [[TMP52]], align 8 +// CHECK0-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i64* +// CHECK0-NEXT: store i64 [[TMP46]], i64* [[TMP54]], align 8 +// CHECK0-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 0 +// CHECK0-NEXT: store i8* null, i8** [[TMP55]], align 8 +// CHECK0-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1 +// CHECK0-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]** +// CHECK0-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 8 +// CHECK0-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1 +// CHECK0-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]** +// CHECK0-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 8 +// CHECK0-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 1 +// CHECK0-NEXT: store i8* null, i8** [[TMP60]], align 8 +// CHECK0-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2 +// CHECK0-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i64* +// CHECK0-NEXT: store i64 [[TMP1]], i64* [[TMP62]], align 8 +// CHECK0-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2 +// CHECK0-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i64* +// CHECK0-NEXT: store i64 [[TMP1]], i64* [[TMP64]], align 8 +// CHECK0-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 2 +// CHECK0-NEXT: store i8* null, i8** [[TMP65]], align 8 +// CHECK0-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3 +// CHECK0-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float** +// CHECK0-NEXT: store float* [[VLA]], float** [[TMP67]], align 8 +// CHECK0-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3 +// CHECK0-NEXT: [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float** +// CHECK0-NEXT: store float* [[VLA]], float** [[TMP69]], align 8 +// CHECK0-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3 +// CHECK0-NEXT: store i64 [[TMP47]], i64* [[TMP70]], align 8 +// CHECK0-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 3 +// CHECK0-NEXT: store i8* null, i8** [[TMP71]], align 8 +// CHECK0-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 4 +// CHECK0-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]** +// CHECK0-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 8 +// CHECK0-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 4 +// CHECK0-NEXT: [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]** +// CHECK0-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 8 +// CHECK0-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 4 +// CHECK0-NEXT: store i8* null, i8** [[TMP76]], align 8 +// CHECK0-NEXT: [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 5 +// CHECK0-NEXT: [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64* +// CHECK0-NEXT: store i64 5, i64* [[TMP78]], align 8 +// CHECK0-NEXT: [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 5 +// CHECK0-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64* +// CHECK0-NEXT: store i64 5, i64* [[TMP80]], align 8 +// CHECK0-NEXT: [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 5 +// CHECK0-NEXT: store i8* null, i8** [[TMP81]], align 8 +// CHECK0-NEXT: [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 6 +// CHECK0-NEXT: [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64* +// CHECK0-NEXT: store i64 [[TMP4]], i64* [[TMP83]], align 8 +// CHECK0-NEXT: [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 6 +// CHECK0-NEXT: [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64* +// CHECK0-NEXT: store i64 [[TMP4]], i64* [[TMP85]], align 8 +// CHECK0-NEXT: [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 6 +// CHECK0-NEXT: store i8* null, i8** [[TMP86]], align 8 +// CHECK0-NEXT: [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 7 +// CHECK0-NEXT: [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double** +// CHECK0-NEXT: store double* [[VLA1]], double** [[TMP88]], align 8 +// CHECK0-NEXT: [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 7 +// CHECK0-NEXT: [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double** +// CHECK0-NEXT: store double* [[VLA1]], double** [[TMP90]], align 8 +// CHECK0-NEXT: [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7 +// CHECK0-NEXT: store i64 [[TMP49]], i64* [[TMP91]], align 8 +// CHECK0-NEXT: [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 7 +// CHECK0-NEXT: store i8* null, i8** [[TMP92]], align 8 +// CHECK0-NEXT: [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 8 +// CHECK0-NEXT: [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT** +// CHECK0-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 8 +// CHECK0-NEXT: [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 8 +// CHECK0-NEXT: [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT** +// CHECK0-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 8 +// CHECK0-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 8 +// CHECK0-NEXT: store i8* null, i8** [[TMP97]], align 8 +// CHECK0-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 0 +// CHECK0-NEXT: store i32 2, i32* [[TMP101]], align 4 +// CHECK0-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 1 +// CHECK0-NEXT: store i32 9, i32* [[TMP102]], align 4 +// CHECK0-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 2 +// CHECK0-NEXT: store i8** [[TMP98]], i8*** [[TMP103]], align 8 +// CHECK0-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 3 +// CHECK0-NEXT: store i8** [[TMP99]], i8*** [[TMP104]], align 8 +// CHECK0-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 4 +// CHECK0-NEXT: store i64* [[TMP100]], i64** [[TMP105]], align 8 +// CHECK0-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 5 +// CHECK0-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 8 +// CHECK0-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 6 +// CHECK0-NEXT: store i8** null, i8*** [[TMP107]], align 8 +// CHECK0-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 7 +// CHECK0-NEXT: store i8** null, i8*** [[TMP108]], align 8 +// CHECK0-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 8 +// CHECK0-NEXT: store i64 0, i64* [[TMP109]], align 8 +// CHECK0-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 9 +// CHECK0-NEXT: store i64 0, i64* [[TMP110]], align 8 +// CHECK0-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 10 +// CHECK0-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4 +// CHECK0-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 11 +// CHECK0-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4 +// CHECK0-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 12 +// CHECK0-NEXT: store i32 0, i32* [[TMP113]], align 4 +// CHECK0-NEXT: [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]]) +// CHECK0-NEXT: [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0 +// CHECK0-NEXT: br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]] +// CHECK0: omp_offload.failed8: +// CHECK0-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i64 [[TMP46]], [10 x float]* [[B]], i64 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP4]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]] +// CHECK0-NEXT: br label [[OMP_OFFLOAD_CONT9]] +// CHECK0: omp_offload.cont9: +// CHECK0-NEXT: [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK0-NEXT: [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double** +// CHECK0-NEXT: store double* [[TMP116]], double** [[TMP118]], align 8 +// CHECK0-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double** +// CHECK0-NEXT: store double* [[TMP116]], double** [[TMP120]], align 8 +// CHECK0-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 0 +// CHECK0-NEXT: store i8* null, i8** [[TMP121]], align 8 +// CHECK0-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1 +// CHECK0-NEXT: [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0** +// CHECK0-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 8 +// CHECK0-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 1 +// CHECK0-NEXT: [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0** +// CHECK0-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 8 +// CHECK0-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 1 +// CHECK0-NEXT: store i8* null, i8** [[TMP126]], align 8 +// CHECK0-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 0 +// CHECK0-NEXT: store i32 2, i32* [[TMP129]], align 4 +// CHECK0-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 1 +// CHECK0-NEXT: store i32 2, i32* [[TMP130]], align 4 +// CHECK0-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 2 +// CHECK0-NEXT: store i8** [[TMP127]], i8*** [[TMP131]], align 8 +// CHECK0-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 3 +// CHECK0-NEXT: store i8** [[TMP128]], i8*** [[TMP132]], align 8 +// CHECK0-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 4 +// CHECK0-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 8 +// CHECK0-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 5 +// CHECK0-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 8 +// CHECK0-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 6 +// CHECK0-NEXT: store i8** null, i8*** [[TMP135]], align 8 +// CHECK0-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 7 +// CHECK0-NEXT: store i8** null, i8*** [[TMP136]], align 8 +// CHECK0-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 8 +// CHECK0-NEXT: store i64 0, i64* [[TMP137]], align 8 +// CHECK0-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 9 +// CHECK0-NEXT: store i64 0, i64* [[TMP138]], align 8 +// CHECK0-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 10 +// CHECK0-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4 +// CHECK0-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 11 +// CHECK0-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4 +// CHECK0-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 12 +// CHECK0-NEXT: store i32 0, i32* [[TMP141]], align 4 +// CHECK0-NEXT: [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]]) +// CHECK0-NEXT: [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0 +// CHECK0-NEXT: br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED14:%.*]], label [[OMP_OFFLOAD_CONT15:%.*]] +// CHECK0: omp_offload.failed14: +// CHECK0-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]] +// CHECK0-NEXT: br label [[OMP_OFFLOAD_CONT15]] +// CHECK0: omp_offload.cont15: +// CHECK0-NEXT: [[TMP144:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-NEXT: [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK0-NEXT: call void @llvm.stackrestore(i8* [[TMP145]]) +// CHECK0-NEXT: ret i32 [[TMP144]] +// +// +// CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// CHECK0-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK0-NEXT: entry: +// CHECK0-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 8 +// CHECK0-NEXT: [[GA_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK0-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 8 +// CHECK0-NEXT: store i64 [[GA]], i64* [[GA_ADDR]], align 8 +// CHECK0-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK0-NEXT: [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32* +// CHECK0-NEXT: ret void +// +// +// CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// CHECK0-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] { +// CHECK0-NEXT: entry: +// CHECK0-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 +// CHECK0-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 +// CHECK0-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 +// CHECK0-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 +// CHECK0-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 +// CHECK0-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// CHECK0-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK0-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK0-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[__VLA_EXPR2:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// CHECK0-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 +// CHECK0-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 +// CHECK0-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// CHECK0-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 +// CHECK0-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 +// CHECK0-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// CHECK0-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 +// CHECK0-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 +// CHECK0-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 +// CHECK0-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK0-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 +// CHECK0-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// CHECK0-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 +// CHECK0-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 +// CHECK0-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// CHECK0-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 +// CHECK0-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 +// CHECK0-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 +// CHECK0-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// CHECK0-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// CHECK0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false) +// CHECK0-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// CHECK0-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8 +// CHECK0-NEXT: [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4 +// CHECK0-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK0-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4 +// CHECK0-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// CHECK0-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// CHECK0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false) +// CHECK0-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// CHECK0-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// CHECK0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false) +// CHECK0-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// CHECK0-NEXT: [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8 +// CHECK0-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// CHECK0-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8 +// CHECK0-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// CHECK0-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 +// CHECK0-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// CHECK0-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// CHECK0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false) +// CHECK0-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// CHECK0-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// CHECK0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false) +// CHECK0-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK0-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK0-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// CHECK0-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK0-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// CHECK0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2 +// CHECK0-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK0-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3 +// CHECK0-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// CHECK0-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1 +// CHECK0-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2 +// CHECK0-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// CHECK0-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]] +// CHECK0-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]] +// CHECK0-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3 +// CHECK0-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// CHECK0-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// CHECK0-NEXT: store i64 1, i64* [[X]], align 8 +// CHECK0-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// CHECK0-NEXT: store i8 1, i8* [[Y]], align 8 +// CHECK0-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK0-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// CHECK0-NEXT: ret void +// +// +// CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// CHECK0-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] { +// CHECK0-NEXT: entry: +// CHECK0-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK0-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8 +// CHECK0-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK0-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK0-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8 +// CHECK0-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8 +// CHECK0-NEXT: [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8* +// CHECK0-NEXT: [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8* +// CHECK0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 8, i1 false) +// CHECK0-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP3:%.*]] = load i32, i32* [[X]], align 4 +// CHECK0-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP3]] to double +// CHECK0-NEXT: [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 0 +// CHECK0-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 8 +// CHECK0-NEXT: [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK0-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 0 +// CHECK0-NEXT: [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 8 +// CHECK0-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// CHECK0-NEXT: store double [[INC]], double* [[ARRAYIDX2]], align 8 +// CHECK0-NEXT: ret void +// +// +// CHECK0-LABEL: define {{[^@]+}}@_Z3bariPd +// CHECK0-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// CHECK0-NEXT: entry: +// CHECK0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK0-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK0-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK0-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 +// CHECK0-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK0-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK0-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK0-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]]) +// CHECK0-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// CHECK0-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK0-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]]) +// CHECK0-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// CHECK0-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK0-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]]) +// CHECK0-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// CHECK0-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// CHECK0-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]]) +// CHECK0-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// CHECK0-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// CHECK0-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-NEXT: ret i32 [[TMP9]] +// +// +// CHECK0-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// CHECK0-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK0-NEXT: entry: +// CHECK0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK0-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK0-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK0-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8 +// CHECK0-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK0-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// CHECK0-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// CHECK0-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK0-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// CHECK0-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK0-NEXT: [[TMP3:%.*]] = call i8* @llvm.stacksave() +// CHECK0-NEXT: store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8 +// CHECK0-NEXT: [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]] +// CHECK0-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2 +// CHECK0-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8 +// CHECK0-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK0-NEXT: [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to i32* +// CHECK0-NEXT: store i32 [[TMP5]], i32* [[CONV]], align 4 +// CHECK0-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 +// CHECK0-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP7:%.*]] = mul nuw i64 2, [[TMP2]] +// CHECK0-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 +// CHECK0-NEXT: [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP9]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i64 40, i1 false) +// CHECK0-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1** +// CHECK0-NEXT: store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8 +// CHECK0-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double** +// CHECK0-NEXT: store double* [[A]], double** [[TMP13]], align 8 +// CHECK0-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK0-NEXT: store i8* null, i8** [[TMP14]], align 8 +// CHECK0-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK0-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64* +// CHECK0-NEXT: store i64 [[TMP6]], i64* [[TMP16]], align 8 +// CHECK0-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK0-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i64* +// CHECK0-NEXT: store i64 [[TMP6]], i64* [[TMP18]], align 8 +// CHECK0-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK0-NEXT: store i8* null, i8** [[TMP19]], align 8 +// CHECK0-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK0-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64* +// CHECK0-NEXT: store i64 2, i64* [[TMP21]], align 8 +// CHECK0-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK0-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64* +// CHECK0-NEXT: store i64 2, i64* [[TMP23]], align 8 +// CHECK0-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK0-NEXT: store i8* null, i8** [[TMP24]], align 8 +// CHECK0-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK0-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64* +// CHECK0-NEXT: store i64 [[TMP2]], i64* [[TMP26]], align 8 +// CHECK0-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK0-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64* +// CHECK0-NEXT: store i64 [[TMP2]], i64* [[TMP28]], align 8 +// CHECK0-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK0-NEXT: store i8* null, i8** [[TMP29]], align 8 +// CHECK0-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK0-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16** +// CHECK0-NEXT: store i16* [[VLA]], i16** [[TMP31]], align 8 +// CHECK0-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK0-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16** +// CHECK0-NEXT: store i16* [[VLA]], i16** [[TMP33]], align 8 +// CHECK0-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK0-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 8 +// CHECK0-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 +// CHECK0-NEXT: store i8* null, i8** [[TMP35]], align 8 +// CHECK0-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK0-NEXT: store i32 2, i32* [[TMP39]], align 4 +// CHECK0-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK0-NEXT: store i32 5, i32* [[TMP40]], align 4 +// CHECK0-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK0-NEXT: store i8** [[TMP36]], i8*** [[TMP41]], align 8 +// CHECK0-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK0-NEXT: store i8** [[TMP37]], i8*** [[TMP42]], align 8 +// CHECK0-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK0-NEXT: store i64* [[TMP38]], i64** [[TMP43]], align 8 +// CHECK0-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK0-NEXT: store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 8 +// CHECK0-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK0-NEXT: store i8** null, i8*** [[TMP45]], align 8 +// CHECK0-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK0-NEXT: store i8** null, i8*** [[TMP46]], align 8 +// CHECK0-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK0-NEXT: store i64 0, i64* [[TMP47]], align 8 +// CHECK0-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK0-NEXT: store i64 0, i64* [[TMP48]], align 8 +// CHECK0-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK0-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4 +// CHECK0-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK0-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4 +// CHECK0-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK0-NEXT: store i32 0, i32* [[TMP51]], align 4 +// CHECK0-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK0-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 +// CHECK0-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK0: omp_offload.failed: +// CHECK0-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]] +// CHECK0-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK0: omp_offload.cont: +// CHECK0-NEXT: [[TMP54:%.*]] = mul nsw i64 1, [[TMP2]] +// CHECK0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP54]] +// CHECK0-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK0-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2 +// CHECK0-NEXT: [[CONV3:%.*]] = sext i16 [[TMP55]] to i32 +// CHECK0-NEXT: [[TMP56:%.*]] = load i32, i32* [[B]], align 4 +// CHECK0-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP56]] +// CHECK0-NEXT: [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK0-NEXT: call void @llvm.stackrestore(i8* [[TMP57]]) +// CHECK0-NEXT: ret i32 [[ADD4]] +// +// +// CHECK0-LABEL: define {{[^@]+}}@_ZL7fstatici +// CHECK0-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] { +// CHECK0-NEXT: entry: +// CHECK0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK0-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK0-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// CHECK0-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK0-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8 +// CHECK0-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK0-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK0-NEXT: store i8 0, i8* [[AAA]], align 1 +// CHECK0-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK0-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 +// CHECK0-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK0-NEXT: [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK0-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* +// CHECK0-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1 +// CHECK0-NEXT: [[TMP3:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 +// CHECK0-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64* +// CHECK0-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8 +// CHECK0-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64* +// CHECK0-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8 +// CHECK0-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK0-NEXT: store i8* null, i8** [[TMP8]], align 8 +// CHECK0-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK0-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64* +// CHECK0-NEXT: store i64 [[TMP3]], i64* [[TMP10]], align 8 +// CHECK0-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK0-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64* +// CHECK0-NEXT: store i64 [[TMP3]], i64* [[TMP12]], align 8 +// CHECK0-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK0-NEXT: store i8* null, i8** [[TMP13]], align 8 +// CHECK0-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK0-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]** +// CHECK0-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 8 +// CHECK0-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK0-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]** +// CHECK0-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 8 +// CHECK0-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK0-NEXT: store i8* null, i8** [[TMP18]], align 8 +// CHECK0-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK0-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK0-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK0-NEXT: store i32 3, i32* [[TMP22]], align 4 +// CHECK0-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK0-NEXT: store i8** [[TMP19]], i8*** [[TMP23]], align 8 +// CHECK0-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK0-NEXT: store i8** [[TMP20]], i8*** [[TMP24]], align 8 +// CHECK0-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK0-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 8 +// CHECK0-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK0-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 8 +// CHECK0-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK0-NEXT: store i8** null, i8*** [[TMP27]], align 8 +// CHECK0-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK0-NEXT: store i8** null, i8*** [[TMP28]], align 8 +// CHECK0-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK0-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK0-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK0-NEXT: store i64 0, i64* [[TMP30]], align 8 +// CHECK0-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK0-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4 +// CHECK0-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK0-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4 +// CHECK0-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK0-NEXT: store i32 0, i32* [[TMP33]], align 4 +// CHECK0-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK0-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK0-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK0: omp_offload.failed: +// CHECK0-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i64 [[TMP1]], i64 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK0-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK0: omp_offload.cont: +// CHECK0-NEXT: [[TMP36:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-NEXT: ret i32 [[TMP36]] +// +// +// CHECK0-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// CHECK0-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat { +// CHECK0-NEXT: entry: +// CHECK0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK0-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK0-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK0-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK0-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8 +// CHECK0-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK0-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK0-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK0-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK0-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 +// CHECK0-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK0-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i64* +// CHECK0-NEXT: store i64 [[TMP1]], i64* [[TMP3]], align 8 +// CHECK0-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64* +// CHECK0-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8 +// CHECK0-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK0-NEXT: store i8* null, i8** [[TMP6]], align 8 +// CHECK0-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK0-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]** +// CHECK0-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 8 +// CHECK0-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK0-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]** +// CHECK0-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 8 +// CHECK0-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK0-NEXT: store i8* null, i8** [[TMP11]], align 8 +// CHECK0-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK0-NEXT: store i32 2, i32* [[TMP14]], align 4 +// CHECK0-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK0-NEXT: store i32 2, i32* [[TMP15]], align 4 +// CHECK0-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK0-NEXT: store i8** [[TMP12]], i8*** [[TMP16]], align 8 +// CHECK0-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK0-NEXT: store i8** [[TMP13]], i8*** [[TMP17]], align 8 +// CHECK0-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK0-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 8 +// CHECK0-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK0-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 8 +// CHECK0-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK0-NEXT: store i8** null, i8*** [[TMP20]], align 8 +// CHECK0-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK0-NEXT: store i8** null, i8*** [[TMP21]], align 8 +// CHECK0-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK0-NEXT: store i64 0, i64* [[TMP22]], align 8 +// CHECK0-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK0-NEXT: store i64 0, i64* [[TMP23]], align 8 +// CHECK0-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK0-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4 +// CHECK0-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK0-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4 +// CHECK0-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK0-NEXT: store i32 0, i32* [[TMP26]], align 4 +// CHECK0-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK0-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK0-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK0: omp_offload.failed: +// CHECK0-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i64 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK0-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK0: omp_offload.cont: +// CHECK0-NEXT: [[TMP29:%.*]] = load i32, i32* [[A]], align 4 +// CHECK0-NEXT: ret i32 [[TMP29]] +// +// +// CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// CHECK0-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK0-NEXT: entry: +// CHECK0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK0-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// CHECK0-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK0-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// CHECK0-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 +// CHECK0-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// CHECK0-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// CHECK0-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 +// CHECK0-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// CHECK0-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* +// CHECK0-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// CHECK0-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// CHECK0-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 +// CHECK0-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// CHECK0-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8 +// CHECK0-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// CHECK0-NEXT: [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2 +// CHECK0-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK0-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8 +// CHECK0-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// CHECK0-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2 +// CHECK0-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// CHECK0-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// CHECK0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false) +// CHECK0-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK0-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double +// CHECK0-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 +// CHECK0-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK0-NEXT: store double [[ADD]], double* [[A]], align 8 +// CHECK0-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK0-NEXT: [[TMP11:%.*]] = load double, double* [[A5]], align 8 +// CHECK0-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK0-NEXT: store double [[INC]], double* [[A5]], align 8 +// CHECK0-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 +// CHECK0-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]] +// CHECK0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]] +// CHECK0-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK0-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// CHECK0-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK0-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// CHECK0-NEXT: ret void +// +// +// CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// CHECK0-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK0-NEXT: entry: +// CHECK0-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK0-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4 +// CHECK0-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK0-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 +// CHECK0-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK0-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK0-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* +// CHECK0-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK0-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8* +// CHECK0-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// CHECK0-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK0-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK0-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK0-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK0-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK0-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// CHECK0-NEXT: store i8 [[CONV5]], i8* [[CONV1]], align 1 +// CHECK0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2 +// CHECK0-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK0-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK0-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK0-NEXT: ret void +// +// +// CHECK0-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// CHECK0-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK0-NEXT: entry: +// CHECK0-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK0-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK0-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK0-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK0-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK0-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK0-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK0-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK0-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK0-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// CHECK0-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK0-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2 +// CHECK0-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK0-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK0-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// CHECK0-NEXT: ret void +// +// +// CHECK0-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK0-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK0-NEXT: entry: +// CHECK0-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK0-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_Z3fooiPd +// CHECK1-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK1-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// CHECK1-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK1-NEXT: [[P:%.*]] = alloca i32*, align 64 +// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[GA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [9 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS5:%.*]] = alloca [9 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS6:%.*]] = alloca [9 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 8 +// CHECK1-NEXT: [[KERNEL_ARGS7:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS11:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-NEXT: [[KERNEL_ARGS13:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK1-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK1-NEXT: store i16 0, i16* [[AA]], align 2 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// CHECK1-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4 +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// CHECK1-NEXT: [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]] +// CHECK1-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[X]], align 4 +// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[Y]], align 4 +// CHECK1-NEXT: store i32* [[A]], i32** [[P]], align 64 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP8]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[P]], align 64 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* @ga, align 4 +// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[GA_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[GA_CASTED]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i64* +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64* +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store i8* null, i8** [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32** +// CHECK1-NEXT: store i32* [[TMP10]], i32** [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32** +// CHECK1-NEXT: store i32* [[TMP10]], i32** [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store i8* null, i8** [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i64* +// CHECK1-NEXT: store i64 [[TMP12]], i64* [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64* +// CHECK1-NEXT: store i64 [[TMP12]], i64* [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store i8* null, i8** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, i32* [[TMP30]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 3, i32* [[TMP31]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store i8** [[TMP28]], i8*** [[TMP32]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store i8** [[TMP29]], i8*** [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store i8** null, i8*** [[TMP36]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store i8** null, i8*** [[TMP37]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, i64* [[TMP38]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, i64* [[TMP39]], align 8 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP40]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP41]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, i32* [[TMP42]], align 4 +// CHECK1-NEXT: [[TMP43:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP44:%.*]] = icmp ne i32 [[TMP43]], 0 +// CHECK1-NEXT: br i1 [[TMP44]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1: omp_offload.failed: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i64 [[TMP9]], i32* [[TMP10]], i64 [[TMP12]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1: omp_offload.cont: +// CHECK1-NEXT: [[TMP45:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[AA_CASTED]] to i16* +// CHECK1-NEXT: store i16 [[TMP45]], i16* [[CONV3]], align 2 +// CHECK1-NEXT: [[TMP46:%.*]] = load i64, i64* [[AA_CASTED]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = mul nuw i64 [[TMP1]], 4 +// CHECK1-NEXT: [[TMP48:%.*]] = mul nuw i64 5, [[TMP4]] +// CHECK1-NEXT: [[TMP49:%.*]] = mul nuw i64 [[TMP48]], 8 +// CHECK1-NEXT: [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP50]], i8* align 8 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i64 72, i1 false) +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i64* +// CHECK1-NEXT: store i64 [[TMP46]], i64* [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i64* +// CHECK1-NEXT: store i64 [[TMP46]], i64* [[TMP54]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 0 +// CHECK1-NEXT: store i8* null, i8** [[TMP55]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]** +// CHECK1-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]** +// CHECK1-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 1 +// CHECK1-NEXT: store i8* null, i8** [[TMP60]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i64* +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP62]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i64* +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP64]], align 8 +// CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 2 +// CHECK1-NEXT: store i8* null, i8** [[TMP65]], align 8 +// CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float** +// CHECK1-NEXT: store float* [[VLA]], float** [[TMP67]], align 8 +// CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float** +// CHECK1-NEXT: store float* [[VLA]], float** [[TMP69]], align 8 +// CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3 +// CHECK1-NEXT: store i64 [[TMP47]], i64* [[TMP70]], align 8 +// CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 3 +// CHECK1-NEXT: store i8* null, i8** [[TMP71]], align 8 +// CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]** +// CHECK1-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 8 +// CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]** +// CHECK1-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 8 +// CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP76]], align 8 +// CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i64* +// CHECK1-NEXT: store i64 5, i64* [[TMP78]], align 8 +// CHECK1-NEXT: [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 5 +// CHECK1-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i64* +// CHECK1-NEXT: store i64 5, i64* [[TMP80]], align 8 +// CHECK1-NEXT: [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 5 +// CHECK1-NEXT: store i8* null, i8** [[TMP81]], align 8 +// CHECK1-NEXT: [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i64* +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[TMP83]], align 8 +// CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 6 +// CHECK1-NEXT: [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i64* +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[TMP85]], align 8 +// CHECK1-NEXT: [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 6 +// CHECK1-NEXT: store i8* null, i8** [[TMP86]], align 8 +// CHECK1-NEXT: [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double** +// CHECK1-NEXT: store double* [[VLA1]], double** [[TMP88]], align 8 +// CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 7 +// CHECK1-NEXT: [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double** +// CHECK1-NEXT: store double* [[VLA1]], double** [[TMP90]], align 8 +// CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7 +// CHECK1-NEXT: store i64 [[TMP49]], i64* [[TMP91]], align 8 +// CHECK1-NEXT: [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 7 +// CHECK1-NEXT: store i8* null, i8** [[TMP92]], align 8 +// CHECK1-NEXT: [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT** +// CHECK1-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 8 +// CHECK1-NEXT: [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 8 +// CHECK1-NEXT: [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT** +// CHECK1-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 8 +// CHECK1-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS6]], i64 0, i64 8 +// CHECK1-NEXT: store i8* null, i8** [[TMP97]], align 8 +// CHECK1-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS5]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, i32* [[TMP101]], align 4 +// CHECK1-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 1 +// CHECK1-NEXT: store i32 9, i32* [[TMP102]], align 4 +// CHECK1-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 2 +// CHECK1-NEXT: store i8** [[TMP98]], i8*** [[TMP103]], align 8 +// CHECK1-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 3 +// CHECK1-NEXT: store i8** [[TMP99]], i8*** [[TMP104]], align 8 +// CHECK1-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 4 +// CHECK1-NEXT: store i64* [[TMP100]], i64** [[TMP105]], align 8 +// CHECK1-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 5 +// CHECK1-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 8 +// CHECK1-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 6 +// CHECK1-NEXT: store i8** null, i8*** [[TMP107]], align 8 +// CHECK1-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 7 +// CHECK1-NEXT: store i8** null, i8*** [[TMP108]], align 8 +// CHECK1-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, i64* [[TMP109]], align 8 +// CHECK1-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, i64* [[TMP110]], align 8 +// CHECK1-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4 +// CHECK1-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4 +// CHECK1-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, i32* [[TMP113]], align 4 +// CHECK1-NEXT: [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS7]]) +// CHECK1-NEXT: [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0 +// CHECK1-NEXT: br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED8:%.*]], label [[OMP_OFFLOAD_CONT9:%.*]] +// CHECK1: omp_offload.failed8: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i64 [[TMP46]], [10 x float]* [[B]], i64 [[TMP1]], float* [[VLA]], [5 x [10 x double]]* [[C]], i64 5, i64 [[TMP4]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT9]] +// CHECK1: omp_offload.cont9: +// CHECK1-NEXT: [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK1-NEXT: [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double** +// CHECK1-NEXT: store double* [[TMP116]], double** [[TMP118]], align 8 +// CHECK1-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double** +// CHECK1-NEXT: store double* [[TMP116]], double** [[TMP120]], align 8 +// CHECK1-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 0 +// CHECK1-NEXT: store i8* null, i8** [[TMP121]], align 8 +// CHECK1-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0** +// CHECK1-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 8 +// CHECK1-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0** +// CHECK1-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 8 +// CHECK1-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 1 +// CHECK1-NEXT: store i8* null, i8** [[TMP126]], align 8 +// CHECK1-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, i32* [[TMP129]], align 4 +// CHECK1-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 1 +// CHECK1-NEXT: store i32 2, i32* [[TMP130]], align 4 +// CHECK1-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 2 +// CHECK1-NEXT: store i8** [[TMP127]], i8*** [[TMP131]], align 8 +// CHECK1-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 3 +// CHECK1-NEXT: store i8** [[TMP128]], i8*** [[TMP132]], align 8 +// CHECK1-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 4 +// CHECK1-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 8 +// CHECK1-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 5 +// CHECK1-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 8 +// CHECK1-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 6 +// CHECK1-NEXT: store i8** null, i8*** [[TMP135]], align 8 +// CHECK1-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 7 +// CHECK1-NEXT: store i8** null, i8*** [[TMP136]], align 8 +// CHECK1-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, i64* [[TMP137]], align 8 +// CHECK1-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, i64* [[TMP138]], align 8 +// CHECK1-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4 +// CHECK1-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4 +// CHECK1-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, i32* [[TMP141]], align 4 +// CHECK1-NEXT: [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS13]]) +// CHECK1-NEXT: [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0 +// CHECK1-NEXT: br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED14:%.*]], label [[OMP_OFFLOAD_CONT15:%.*]] +// CHECK1: omp_offload.failed14: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT15]] +// CHECK1: omp_offload.cont15: +// CHECK1-NEXT: [[TMP144:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP145]]) +// CHECK1-NEXT: ret i32 [[TMP144]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// CHECK1-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[GA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK1-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[GA]], i64* [[GA_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32* +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// CHECK1-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR2]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 +// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 +// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 +// CHECK1-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK1-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__VLA_EXPR2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 +// CHECK1-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// CHECK1-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 +// CHECK1-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// CHECK1-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 +// CHECK1-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 +// CHECK1-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4 +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4 +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false) +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false) +// CHECK1-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8 +// CHECK1-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// CHECK1-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// CHECK1-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 +// CHECK1-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false) +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false) +// CHECK1-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK1-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// CHECK1-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK1-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2 +// CHECK1-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3 +// CHECK1-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1 +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2 +// CHECK1-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]] +// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]] +// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3 +// CHECK1-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// CHECK1-NEXT: store i64 1, i64* [[X]], align 8 +// CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// CHECK1-NEXT: store i8 1, i8* [[Y]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// CHECK1-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8 +// CHECK1-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK1-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK1-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8* +// CHECK1-NEXT: [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 8, i1 false) +// CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[X]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP3]] to double +// CHECK1-NEXT: [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 0 +// CHECK1-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// CHECK1-NEXT: store double [[INC]], double* [[ARRAYIDX2]], align 8 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_Z3bariPd +// CHECK1-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 +// CHECK1-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// CHECK1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]]) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]]) +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]]) +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: ret i32 [[TMP9]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// CHECK1-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 8 +// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +// CHECK1-NEXT: [[TMP3:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]] +// CHECK1-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[B_CASTED]], align 8 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = mul nuw i64 2, [[TMP2]] +// CHECK1-NEXT: [[TMP8:%.*]] = mul nuw i64 [[TMP7]], 2 +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP9]], i8* align 8 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i64 40, i1 false) +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1** +// CHECK1-NEXT: store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double** +// CHECK1-NEXT: store double* [[A]], double** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store i8* null, i8** [[TMP14]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i64* +// CHECK1-NEXT: store i64 [[TMP6]], i64* [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i64* +// CHECK1-NEXT: store i64 [[TMP6]], i64* [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store i8* null, i8** [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i64* +// CHECK1-NEXT: store i64 2, i64* [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i64* +// CHECK1-NEXT: store i64 2, i64* [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store i8* null, i8** [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i64* +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i64* +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK1-NEXT: store i8* null, i8** [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16** +// CHECK1-NEXT: store i16* [[VLA]], i16** [[TMP31]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16** +// CHECK1-NEXT: store i16* [[VLA]], i16** [[TMP33]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK1-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4 +// CHECK1-NEXT: store i8* null, i8** [[TMP35]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, i32* [[TMP39]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 5, i32* [[TMP40]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store i8** [[TMP36]], i8*** [[TMP41]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store i8** [[TMP37]], i8*** [[TMP42]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store i64* [[TMP38]], i64** [[TMP43]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 8 +// CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store i8** null, i8*** [[TMP45]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store i8** null, i8*** [[TMP46]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, i64* [[TMP47]], align 8 +// CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, i64* [[TMP48]], align 8 +// CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4 +// CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4 +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, i32* [[TMP51]], align 4 +// CHECK1-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 +// CHECK1-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1: omp_offload.failed: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i64 [[TMP6]], i64 2, i64 [[TMP2]], i16* [[VLA]]) #[[ATTR3]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1: omp_offload.cont: +// CHECK1-NEXT: [[TMP54:%.*]] = mul nsw i64 1, [[TMP2]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP54]] +// CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK1-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP55]] to i32 +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, i32* [[B]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], [[TMP56]] +// CHECK1-NEXT: [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP57]]) +// CHECK1-NEXT: ret i32 [[ADD4]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@_ZL7fstatici +// CHECK1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK1-NEXT: store i8 0, i8* [[AAA]], align 1 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_CASTED]] to i8* +// CHECK1-NEXT: store i8 [[TMP2]], i8* [[CONV1]], align 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[AAA_CASTED]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64* +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i64* +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store i8* null, i8** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i64* +// CHECK1-NEXT: store i64 [[TMP3]], i64* [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i64* +// CHECK1-NEXT: store i64 [[TMP3]], i64* [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store i8* null, i8** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]** +// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]** +// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK1-NEXT: store i8* null, i8** [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 3, i32* [[TMP22]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store i8** [[TMP19]], i8*** [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store i8** [[TMP20]], i8*** [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store i8** null, i8*** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store i8** null, i8*** [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, i64* [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, i32* [[TMP33]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK1-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1: omp_offload.failed: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i64 [[TMP1]], i64 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1: omp_offload.cont: +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: ret i32 [[TMP36]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// CHECK1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK1-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP0]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[A_CASTED]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i64* +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i64* +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK1-NEXT: store i8* null, i8** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]** +// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]** +// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK1-NEXT: store i8* null, i8** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK1-NEXT: store i32 2, i32* [[TMP14]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK1-NEXT: store i32 2, i32* [[TMP15]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK1-NEXT: store i8** [[TMP12]], i8*** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK1-NEXT: store i8** [[TMP13]], i8*** [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK1-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK1-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK1-NEXT: store i8** null, i8*** [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK1-NEXT: store i8** null, i8*** [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK1-NEXT: store i64 0, i64* [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK1-NEXT: store i64 0, i64* [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK1-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK1-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK1-NEXT: store i32 0, i32* [[TMP26]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK1: omp_offload.failed: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i64 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK1-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK1: omp_offload.cont: +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[A]], align 4 +// CHECK1-NEXT: ret i32 [[TMP29]] +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// CHECK1-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// CHECK1-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// CHECK1-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// CHECK1-NEXT: [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2 +// CHECK1-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// CHECK1-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// CHECK1-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false) +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double +// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: store double [[ADD]], double* [[A]], align 8 +// CHECK1-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = load double, double* [[A5]], align 8 +// CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK1-NEXT: store double [[INC]], double* [[A5]], align 8 +// CHECK1-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 +// CHECK1-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]] +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// CHECK1-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// CHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// CHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// CHECK1-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4 +// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 +// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8* +// CHECK1-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// CHECK1-NEXT: store i8 [[CONV5]], i8* [[CONV1]], align 1 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// CHECK1-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK1-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK1-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK1-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@_Z3fooiPd +// CHECK2-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// CHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK2-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// CHECK2-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK2-NEXT: [[P:%.*]] = alloca i32*, align 64 +// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[GA_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK2-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4 +// CHECK2-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-NEXT: [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK2-NEXT: store i16 0, i16* [[AA]], align 2 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = call i8* @llvm.stacksave() +// CHECK2-NEXT: store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4 +// CHECK2-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]] +// CHECK2-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// CHECK2-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[X]], align 4 +// CHECK2-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[Y]], align 4 +// CHECK2-NEXT: store i32* [[A]], i32** [[P]], align 64 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[A_CASTED]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32*, i32** [[P]], align 64 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* @ga, align 4 +// CHECK2-NEXT: store i32 [[TMP9]], i32* [[GA_CASTED]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[GA_CASTED]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32* +// CHECK2-NEXT: store i32 [[TMP7]], i32* [[TMP12]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32* +// CHECK2-NEXT: store i32 [[TMP7]], i32* [[TMP14]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK2-NEXT: store i8* null, i8** [[TMP15]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i32** +// CHECK2-NEXT: store i32* [[TMP8]], i32** [[TMP17]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32** +// CHECK2-NEXT: store i32* [[TMP8]], i32** [[TMP19]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK2-NEXT: store i8* null, i8** [[TMP20]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32* +// CHECK2-NEXT: store i32 [[TMP10]], i32* [[TMP22]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32* +// CHECK2-NEXT: store i32 [[TMP10]], i32* [[TMP24]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK2-NEXT: store i8* null, i8** [[TMP25]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK2-NEXT: store i32 2, i32* [[TMP28]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK2-NEXT: store i32 3, i32* [[TMP29]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK2-NEXT: store i8** [[TMP26]], i8*** [[TMP30]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK2-NEXT: store i8** [[TMP27]], i8*** [[TMP31]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK2-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP32]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK2-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP33]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK2-NEXT: store i8** null, i8*** [[TMP34]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK2-NEXT: store i8** null, i8*** [[TMP35]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK2-NEXT: store i64 0, i64* [[TMP36]], align 8 +// CHECK2-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK2-NEXT: store i64 0, i64* [[TMP37]], align 8 +// CHECK2-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK2-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP38]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK2-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP39]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK2-NEXT: store i32 0, i32* [[TMP40]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK2-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK2-NEXT: br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK2: omp_offload.failed: +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i32 [[TMP7]], i32* [[TMP8]], i32 [[TMP10]]) #[[ATTR3:[0-9]+]] +// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK2: omp_offload.cont: +// CHECK2-NEXT: [[TMP43:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_CASTED]] to i16* +// CHECK2-NEXT: store i16 [[TMP43]], i16* [[CONV]], align 2 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[AA_CASTED]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = mul nuw i32 [[TMP0]], 4 +// CHECK2-NEXT: [[TMP46:%.*]] = sext i32 [[TMP45]] to i64 +// CHECK2-NEXT: [[TMP47:%.*]] = mul nuw i32 5, [[TMP2]] +// CHECK2-NEXT: [[TMP48:%.*]] = mul nuw i32 [[TMP47]], 8 +// CHECK2-NEXT: [[TMP49:%.*]] = sext i32 [[TMP48]] to i64 +// CHECK2-NEXT: [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP50]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i32 72, i1 false) +// CHECK2-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32* +// CHECK2-NEXT: store i32 [[TMP44]], i32* [[TMP52]], align 4 +// CHECK2-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32* +// CHECK2-NEXT: store i32 [[TMP44]], i32* [[TMP54]], align 4 +// CHECK2-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0 +// CHECK2-NEXT: store i8* null, i8** [[TMP55]], align 4 +// CHECK2-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]** +// CHECK2-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 4 +// CHECK2-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]** +// CHECK2-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 4 +// CHECK2-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1 +// CHECK2-NEXT: store i8* null, i8** [[TMP60]], align 4 +// CHECK2-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32* +// CHECK2-NEXT: store i32 [[TMP0]], i32* [[TMP62]], align 4 +// CHECK2-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32* +// CHECK2-NEXT: store i32 [[TMP0]], i32* [[TMP64]], align 4 +// CHECK2-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 2 +// CHECK2-NEXT: store i8* null, i8** [[TMP65]], align 4 +// CHECK2-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float** +// CHECK2-NEXT: store float* [[VLA]], float** [[TMP67]], align 4 +// CHECK2-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float** +// CHECK2-NEXT: store float* [[VLA]], float** [[TMP69]], align 4 +// CHECK2-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3 +// CHECK2-NEXT: store i64 [[TMP46]], i64* [[TMP70]], align 4 +// CHECK2-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 3 +// CHECK2-NEXT: store i8* null, i8** [[TMP71]], align 4 +// CHECK2-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]** +// CHECK2-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 4 +// CHECK2-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]** +// CHECK2-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 4 +// CHECK2-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 4 +// CHECK2-NEXT: store i8* null, i8** [[TMP76]], align 4 +// CHECK2-NEXT: [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5 +// CHECK2-NEXT: [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32* +// CHECK2-NEXT: store i32 5, i32* [[TMP78]], align 4 +// CHECK2-NEXT: [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 5 +// CHECK2-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32* +// CHECK2-NEXT: store i32 5, i32* [[TMP80]], align 4 +// CHECK2-NEXT: [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 5 +// CHECK2-NEXT: store i8* null, i8** [[TMP81]], align 4 +// CHECK2-NEXT: [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6 +// CHECK2-NEXT: [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32* +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[TMP83]], align 4 +// CHECK2-NEXT: [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 6 +// CHECK2-NEXT: [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32* +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[TMP85]], align 4 +// CHECK2-NEXT: [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 6 +// CHECK2-NEXT: store i8* null, i8** [[TMP86]], align 4 +// CHECK2-NEXT: [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7 +// CHECK2-NEXT: [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double** +// CHECK2-NEXT: store double* [[VLA1]], double** [[TMP88]], align 4 +// CHECK2-NEXT: [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 7 +// CHECK2-NEXT: [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double** +// CHECK2-NEXT: store double* [[VLA1]], double** [[TMP90]], align 4 +// CHECK2-NEXT: [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7 +// CHECK2-NEXT: store i64 [[TMP49]], i64* [[TMP91]], align 4 +// CHECK2-NEXT: [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 7 +// CHECK2-NEXT: store i8* null, i8** [[TMP92]], align 4 +// CHECK2-NEXT: [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8 +// CHECK2-NEXT: [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT** +// CHECK2-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 4 +// CHECK2-NEXT: [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 8 +// CHECK2-NEXT: [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT** +// CHECK2-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 4 +// CHECK2-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 8 +// CHECK2-NEXT: store i8* null, i8** [[TMP97]], align 4 +// CHECK2-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 0 +// CHECK2-NEXT: store i32 2, i32* [[TMP101]], align 4 +// CHECK2-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 1 +// CHECK2-NEXT: store i32 9, i32* [[TMP102]], align 4 +// CHECK2-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 2 +// CHECK2-NEXT: store i8** [[TMP98]], i8*** [[TMP103]], align 4 +// CHECK2-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 3 +// CHECK2-NEXT: store i8** [[TMP99]], i8*** [[TMP104]], align 4 +// CHECK2-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 4 +// CHECK2-NEXT: store i64* [[TMP100]], i64** [[TMP105]], align 4 +// CHECK2-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 5 +// CHECK2-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 4 +// CHECK2-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 6 +// CHECK2-NEXT: store i8** null, i8*** [[TMP107]], align 4 +// CHECK2-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 7 +// CHECK2-NEXT: store i8** null, i8*** [[TMP108]], align 4 +// CHECK2-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 8 +// CHECK2-NEXT: store i64 0, i64* [[TMP109]], align 8 +// CHECK2-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 9 +// CHECK2-NEXT: store i64 0, i64* [[TMP110]], align 8 +// CHECK2-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 10 +// CHECK2-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4 +// CHECK2-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 11 +// CHECK2-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4 +// CHECK2-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 12 +// CHECK2-NEXT: store i32 0, i32* [[TMP113]], align 4 +// CHECK2-NEXT: [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]]) +// CHECK2-NEXT: [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0 +// CHECK2-NEXT: br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CHECK2: omp_offload.failed6: +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i32 [[TMP44]], [10 x float]* [[B]], i32 [[TMP0]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP2]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]] +// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CHECK2: omp_offload.cont7: +// CHECK2-NEXT: [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK2-NEXT: [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double** +// CHECK2-NEXT: store double* [[TMP116]], double** [[TMP118]], align 4 +// CHECK2-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double** +// CHECK2-NEXT: store double* [[TMP116]], double** [[TMP120]], align 4 +// CHECK2-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0 +// CHECK2-NEXT: store i8* null, i8** [[TMP121]], align 4 +// CHECK2-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0** +// CHECK2-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 4 +// CHECK2-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0** +// CHECK2-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 4 +// CHECK2-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1 +// CHECK2-NEXT: store i8* null, i8** [[TMP126]], align 4 +// CHECK2-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 0 +// CHECK2-NEXT: store i32 2, i32* [[TMP129]], align 4 +// CHECK2-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 1 +// CHECK2-NEXT: store i32 2, i32* [[TMP130]], align 4 +// CHECK2-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 2 +// CHECK2-NEXT: store i8** [[TMP127]], i8*** [[TMP131]], align 4 +// CHECK2-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 3 +// CHECK2-NEXT: store i8** [[TMP128]], i8*** [[TMP132]], align 4 +// CHECK2-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 4 +// CHECK2-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 4 +// CHECK2-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 5 +// CHECK2-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 4 +// CHECK2-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 6 +// CHECK2-NEXT: store i8** null, i8*** [[TMP135]], align 4 +// CHECK2-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 7 +// CHECK2-NEXT: store i8** null, i8*** [[TMP136]], align 4 +// CHECK2-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 8 +// CHECK2-NEXT: store i64 0, i64* [[TMP137]], align 8 +// CHECK2-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 9 +// CHECK2-NEXT: store i64 0, i64* [[TMP138]], align 8 +// CHECK2-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 10 +// CHECK2-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4 +// CHECK2-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 11 +// CHECK2-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4 +// CHECK2-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 12 +// CHECK2-NEXT: store i32 0, i32* [[TMP141]], align 4 +// CHECK2-NEXT: [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]]) +// CHECK2-NEXT: [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0 +// CHECK2-NEXT: br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]] +// CHECK2: omp_offload.failed12: +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]] +// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT13]] +// CHECK2: omp_offload.cont13: +// CHECK2-NEXT: [[TMP144:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-NEXT: [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK2-NEXT: call void @llvm.stackrestore(i8* [[TMP145]]) +// CHECK2-NEXT: ret i32 [[TMP144]] +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// CHECK2-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[GA_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK2-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[GA]], i32* [[GA_ADDR]], align 4 +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// CHECK2-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 +// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 +// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 +// CHECK2-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 +// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 +// CHECK2-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// CHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK2-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__VLA_EXPR2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// CHECK2-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 +// CHECK2-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// CHECK2-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 +// CHECK2-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// CHECK2-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 +// CHECK2-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 +// CHECK2-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 +// CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// CHECK2-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false) +// CHECK2-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4 +// CHECK2-NEXT: [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4 +// CHECK2-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// CHECK2-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false) +// CHECK2-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// CHECK2-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false) +// CHECK2-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// CHECK2-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8 +// CHECK2-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// CHECK2-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false) +// CHECK2-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// CHECK2-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false) +// CHECK2-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK2-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// CHECK2-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK2-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2 +// CHECK2-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3 +// CHECK2-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// CHECK2-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1 +// CHECK2-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 +// CHECK2-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]] +// CHECK2-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]] +// CHECK2-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3 +// CHECK2-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// CHECK2-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// CHECK2-NEXT: store i64 1, i64* [[X]], align 4 +// CHECK2-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// CHECK2-NEXT: store i8 1, i8* [[Y]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK2-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// CHECK2-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK2-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4 +// CHECK2-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK2-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK2-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8* +// CHECK2-NEXT: [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 8, i1 false) +// CHECK2-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[X]], align 4 +// CHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP3]] to double +// CHECK2-NEXT: [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i32 0 +// CHECK2-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i32 0 +// CHECK2-NEXT: [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// CHECK2-NEXT: store double [[INC]], double* [[ARRAYIDX2]], align 4 +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@_Z3bariPd +// CHECK2-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 +// CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK2-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]]) +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]]) +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]]) +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// CHECK2-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]]) +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// CHECK2-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-NEXT: ret i32 [[TMP9]] +// +// +// CHECK2-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// CHECK2-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4 +// CHECK2-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK2-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// CHECK2-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]] +// CHECK2-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[B]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP6:%.*]] = mul nuw i32 2, [[TMP1]] +// CHECK2-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// CHECK2-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 +// CHECK2-NEXT: [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i32 40, i1 false) +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1** +// CHECK2-NEXT: store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double** +// CHECK2-NEXT: store double* [[A]], double** [[TMP13]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK2-NEXT: store i8* null, i8** [[TMP14]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i32* +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[TMP16]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32* +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[TMP18]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK2-NEXT: store i8* null, i8** [[TMP19]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32* +// CHECK2-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32* +// CHECK2-NEXT: store i32 2, i32* [[TMP23]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK2-NEXT: store i8* null, i8** [[TMP24]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32* +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[TMP26]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32* +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[TMP28]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK2-NEXT: store i8* null, i8** [[TMP29]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16** +// CHECK2-NEXT: store i16* [[VLA]], i16** [[TMP31]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16** +// CHECK2-NEXT: store i16* [[VLA]], i16** [[TMP33]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK2-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 +// CHECK2-NEXT: store i8* null, i8** [[TMP35]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK2-NEXT: store i32 2, i32* [[TMP39]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK2-NEXT: store i32 5, i32* [[TMP40]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK2-NEXT: store i8** [[TMP36]], i8*** [[TMP41]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK2-NEXT: store i8** [[TMP37]], i8*** [[TMP42]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK2-NEXT: store i64* [[TMP38]], i64** [[TMP43]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK2-NEXT: store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK2-NEXT: store i8** null, i8*** [[TMP45]], align 4 +// CHECK2-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK2-NEXT: store i8** null, i8*** [[TMP46]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK2-NEXT: store i64 0, i64* [[TMP47]], align 8 +// CHECK2-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK2-NEXT: store i64 0, i64* [[TMP48]], align 8 +// CHECK2-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK2-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4 +// CHECK2-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK2-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4 +// CHECK2-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK2-NEXT: store i32 0, i32* [[TMP51]], align 4 +// CHECK2-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK2-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 +// CHECK2-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK2: omp_offload.failed: +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]] +// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK2: omp_offload.cont: +// CHECK2-NEXT: [[TMP54:%.*]] = mul nsw i32 1, [[TMP1]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP54]] +// CHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK2-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP55]] to i32 +// CHECK2-NEXT: [[TMP56:%.*]] = load i32, i32* [[B]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP56]] +// CHECK2-NEXT: [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK2-NEXT: call void @llvm.stackrestore(i8* [[TMP57]]) +// CHECK2-NEXT: ret i32 [[ADD3]] +// +// +// CHECK2-LABEL: define {{[^@]+}}@_ZL7fstatici +// CHECK2-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// CHECK2-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK2-NEXT: store i8 0, i8* [[AAA]], align 1 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* +// CHECK2-NEXT: store i8 [[TMP2]], i8* [[CONV]], align 1 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32* +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32* +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK2-NEXT: store i8* null, i8** [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32* +// CHECK2-NEXT: store i32 [[TMP3]], i32* [[TMP10]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32* +// CHECK2-NEXT: store i32 [[TMP3]], i32* [[TMP12]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK2-NEXT: store i8* null, i8** [[TMP13]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]** +// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]** +// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK2-NEXT: store i8* null, i8** [[TMP18]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK2-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK2-NEXT: store i32 3, i32* [[TMP22]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK2-NEXT: store i8** [[TMP19]], i8*** [[TMP23]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK2-NEXT: store i8** [[TMP20]], i8*** [[TMP24]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK2-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK2-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK2-NEXT: store i8** null, i8*** [[TMP27]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK2-NEXT: store i8** null, i8*** [[TMP28]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK2-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK2-NEXT: store i64 0, i64* [[TMP30]], align 8 +// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK2-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK2-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK2-NEXT: store i32 0, i32* [[TMP33]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK2-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK2-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK2: omp_offload.failed: +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i32 [[TMP1]], i32 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK2: omp_offload.cont: +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-NEXT: ret i32 [[TMP36]] +// +// +// CHECK2-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// CHECK2-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK2-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32* +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[TMP3]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32* +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK2-NEXT: store i8* null, i8** [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]** +// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]** +// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK2-NEXT: store i8* null, i8** [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK2-NEXT: store i32 2, i32* [[TMP14]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK2-NEXT: store i32 2, i32* [[TMP15]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK2-NEXT: store i8** [[TMP12]], i8*** [[TMP16]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK2-NEXT: store i8** [[TMP13]], i8*** [[TMP17]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK2-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK2-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK2-NEXT: store i8** null, i8*** [[TMP20]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK2-NEXT: store i8** null, i8*** [[TMP21]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK2-NEXT: store i64 0, i64* [[TMP22]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK2-NEXT: store i64 0, i64* [[TMP23]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK2-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK2-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK2-NEXT: store i32 0, i32* [[TMP26]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK2-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK2-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK2: omp_offload.failed: +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i32 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK2-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK2: omp_offload.cont: +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[A]], align 4 +// CHECK2-NEXT: ret i32 [[TMP29]] +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// CHECK2-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// CHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// CHECK2-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// CHECK2-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// CHECK2-NEXT: [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// CHECK2-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// CHECK2-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false) +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double +// CHECK2-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: store double [[ADD]], double* [[A]], align 4 +// CHECK2-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP11:%.*]] = load double, double* [[A4]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK2-NEXT: store double [[INC]], double* [[A4]], align 4 +// CHECK2-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// CHECK2-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]] +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]] +// CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK2-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// CHECK2-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK2-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// CHECK2-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK2-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 +// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* +// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK2-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK2-NEXT: [[CONV2:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 +// CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 +// CHECK2-NEXT: store i8 [[CONV4]], i8* [[CONV]], align 1 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK2-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// CHECK2-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK2-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK2-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK2-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK2-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_Z3fooiPd +// CHECK3-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// CHECK3-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK3-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// CHECK3-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK3-NEXT: [[P:%.*]] = alloca i32*, align 64 +// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[GA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4 +// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [9 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [9 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [9 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [9 x i64], align 4 +// CHECK3-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS8:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS9:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS10:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-NEXT: [[KERNEL_ARGS11:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK3-NEXT: store i16 0, i16* [[AA]], align 2 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = call i8* @llvm.stacksave() +// CHECK3-NEXT: store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4 +// CHECK3-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4 +// CHECK3-NEXT: store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]] +// CHECK3-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[X]], align 4 +// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[Y]], align 4 +// CHECK3-NEXT: store i32* [[A]], i32** [[P]], align 64 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[A_CASTED]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[P]], align 64 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* @ga, align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[GA_CASTED]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[GA_CASTED]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32* +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP14:%.*]] = bitcast i8** [[TMP13]] to i32* +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store i8* null, i8** [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to i32** +// CHECK3-NEXT: store i32* [[TMP8]], i32** [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to i32** +// CHECK3-NEXT: store i32* [[TMP8]], i32** [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store i8* null, i8** [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP22:%.*]] = bitcast i8** [[TMP21]] to i32* +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP24:%.*]] = bitcast i8** [[TMP23]] to i32* +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-NEXT: store i8* null, i8** [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 2, i32* [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 3, i32* [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store i8** [[TMP26]], i8*** [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store i8** [[TMP27]], i8*** [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes, i32 0, i32 0), i64** [[TMP32]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes, i32 0, i32 0), i64** [[TMP33]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store i8** null, i8*** [[TMP34]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store i8** null, i8*** [[TMP35]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, i64* [[TMP36]], align 8 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, i64* [[TMP37]], align 8 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP38]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, i32* [[TMP40]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP42:%.*]] = icmp ne i32 [[TMP41]], 0 +// CHECK3-NEXT: br i1 [[TMP42]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3: omp_offload.failed: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63(i32 [[TMP7]], i32* [[TMP8]], i32 [[TMP10]]) #[[ATTR3:[0-9]+]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3: omp_offload.cont: +// CHECK3-NEXT: [[TMP43:%.*]] = load i16, i16* [[AA]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_CASTED]] to i16* +// CHECK3-NEXT: store i16 [[TMP43]], i16* [[CONV]], align 2 +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[AA_CASTED]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = mul nuw i32 [[TMP0]], 4 +// CHECK3-NEXT: [[TMP46:%.*]] = sext i32 [[TMP45]] to i64 +// CHECK3-NEXT: [[TMP47:%.*]] = mul nuw i32 5, [[TMP2]] +// CHECK3-NEXT: [[TMP48:%.*]] = mul nuw i32 [[TMP47]], 8 +// CHECK3-NEXT: [[TMP49:%.*]] = sext i32 [[TMP48]] to i64 +// CHECK3-NEXT: [[TMP50:%.*]] = bitcast [9 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP50]], i8* align 4 bitcast ([9 x i64]* @.offload_sizes.1 to i8*), i32 72, i1 false) +// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32* +// CHECK3-NEXT: store i32 [[TMP44]], i32* [[TMP52]], align 4 +// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32* +// CHECK3-NEXT: store i32 [[TMP44]], i32* [[TMP54]], align 4 +// CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0 +// CHECK3-NEXT: store i8* null, i8** [[TMP55]], align 4 +// CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP57:%.*]] = bitcast i8** [[TMP56]] to [10 x float]** +// CHECK3-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP57]], align 4 +// CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP59:%.*]] = bitcast i8** [[TMP58]] to [10 x float]** +// CHECK3-NEXT: store [10 x float]* [[B]], [10 x float]** [[TMP59]], align 4 +// CHECK3-NEXT: [[TMP60:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1 +// CHECK3-NEXT: store i8* null, i8** [[TMP60]], align 4 +// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32* +// CHECK3-NEXT: store i32 [[TMP0]], i32* [[TMP62]], align 4 +// CHECK3-NEXT: [[TMP63:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32* +// CHECK3-NEXT: store i32 [[TMP0]], i32* [[TMP64]], align 4 +// CHECK3-NEXT: [[TMP65:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 2 +// CHECK3-NEXT: store i8* null, i8** [[TMP65]], align 4 +// CHECK3-NEXT: [[TMP66:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP67:%.*]] = bitcast i8** [[TMP66]] to float** +// CHECK3-NEXT: store float* [[VLA]], float** [[TMP67]], align 4 +// CHECK3-NEXT: [[TMP68:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP69:%.*]] = bitcast i8** [[TMP68]] to float** +// CHECK3-NEXT: store float* [[VLA]], float** [[TMP69]], align 4 +// CHECK3-NEXT: [[TMP70:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 3 +// CHECK3-NEXT: store i64 [[TMP46]], i64* [[TMP70]], align 4 +// CHECK3-NEXT: [[TMP71:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 3 +// CHECK3-NEXT: store i8* null, i8** [[TMP71]], align 4 +// CHECK3-NEXT: [[TMP72:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP73:%.*]] = bitcast i8** [[TMP72]] to [5 x [10 x double]]** +// CHECK3-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP73]], align 4 +// CHECK3-NEXT: [[TMP74:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP75:%.*]] = bitcast i8** [[TMP74]] to [5 x [10 x double]]** +// CHECK3-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[TMP75]], align 4 +// CHECK3-NEXT: [[TMP76:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 4 +// CHECK3-NEXT: store i8* null, i8** [[TMP76]], align 4 +// CHECK3-NEXT: [[TMP77:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP78:%.*]] = bitcast i8** [[TMP77]] to i32* +// CHECK3-NEXT: store i32 5, i32* [[TMP78]], align 4 +// CHECK3-NEXT: [[TMP79:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 5 +// CHECK3-NEXT: [[TMP80:%.*]] = bitcast i8** [[TMP79]] to i32* +// CHECK3-NEXT: store i32 5, i32* [[TMP80]], align 4 +// CHECK3-NEXT: [[TMP81:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 5 +// CHECK3-NEXT: store i8* null, i8** [[TMP81]], align 4 +// CHECK3-NEXT: [[TMP82:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP83:%.*]] = bitcast i8** [[TMP82]] to i32* +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP83]], align 4 +// CHECK3-NEXT: [[TMP84:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 6 +// CHECK3-NEXT: [[TMP85:%.*]] = bitcast i8** [[TMP84]] to i32* +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[TMP85]], align 4 +// CHECK3-NEXT: [[TMP86:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 6 +// CHECK3-NEXT: store i8* null, i8** [[TMP86]], align 4 +// CHECK3-NEXT: [[TMP87:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP88:%.*]] = bitcast i8** [[TMP87]] to double** +// CHECK3-NEXT: store double* [[VLA1]], double** [[TMP88]], align 4 +// CHECK3-NEXT: [[TMP89:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 7 +// CHECK3-NEXT: [[TMP90:%.*]] = bitcast i8** [[TMP89]] to double** +// CHECK3-NEXT: store double* [[VLA1]], double** [[TMP90]], align 4 +// CHECK3-NEXT: [[TMP91:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 7 +// CHECK3-NEXT: store i64 [[TMP49]], i64* [[TMP91]], align 4 +// CHECK3-NEXT: [[TMP92:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 7 +// CHECK3-NEXT: store i8* null, i8** [[TMP92]], align 4 +// CHECK3-NEXT: [[TMP93:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP94:%.*]] = bitcast i8** [[TMP93]] to %struct.TT** +// CHECK3-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP94]], align 4 +// CHECK3-NEXT: [[TMP95:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 8 +// CHECK3-NEXT: [[TMP96:%.*]] = bitcast i8** [[TMP95]] to %struct.TT** +// CHECK3-NEXT: store %struct.TT* [[D]], %struct.TT** [[TMP96]], align 4 +// CHECK3-NEXT: [[TMP97:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 8 +// CHECK3-NEXT: store i8* null, i8** [[TMP97]], align 4 +// CHECK3-NEXT: [[TMP98:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP99:%.*]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP100:%.*]] = getelementptr inbounds [9 x i64], [9 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 0 +// CHECK3-NEXT: store i32 2, i32* [[TMP101]], align 4 +// CHECK3-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 1 +// CHECK3-NEXT: store i32 9, i32* [[TMP102]], align 4 +// CHECK3-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 2 +// CHECK3-NEXT: store i8** [[TMP98]], i8*** [[TMP103]], align 4 +// CHECK3-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 3 +// CHECK3-NEXT: store i8** [[TMP99]], i8*** [[TMP104]], align 4 +// CHECK3-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 4 +// CHECK3-NEXT: store i64* [[TMP100]], i64** [[TMP105]], align 4 +// CHECK3-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 5 +// CHECK3-NEXT: store i64* getelementptr inbounds ([9 x i64], [9 x i64]* @.offload_maptypes.2, i32 0, i32 0), i64** [[TMP106]], align 4 +// CHECK3-NEXT: [[TMP107:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 6 +// CHECK3-NEXT: store i8** null, i8*** [[TMP107]], align 4 +// CHECK3-NEXT: [[TMP108:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 7 +// CHECK3-NEXT: store i8** null, i8*** [[TMP108]], align 4 +// CHECK3-NEXT: [[TMP109:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, i64* [[TMP109]], align 8 +// CHECK3-NEXT: [[TMP110:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, i64* [[TMP110]], align 8 +// CHECK3-NEXT: [[TMP111:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP111]], align 4 +// CHECK3-NEXT: [[TMP112:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP112]], align 4 +// CHECK3-NEXT: [[TMP113:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, i32* [[TMP113]], align 4 +// CHECK3-NEXT: [[TMP114:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS5]]) +// CHECK3-NEXT: [[TMP115:%.*]] = icmp ne i32 [[TMP114]], 0 +// CHECK3-NEXT: br i1 [[TMP115]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CHECK3: omp_offload.failed6: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70(i32 [[TMP44]], [10 x float]* [[B]], i32 [[TMP0]], float* [[VLA]], [5 x [10 x double]]* [[C]], i32 5, i32 [[TMP2]], double* [[VLA1]], %struct.TT* [[D]]) #[[ATTR3]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CHECK3: omp_offload.cont7: +// CHECK3-NEXT: [[TMP116:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK3-NEXT: [[TMP117:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP118:%.*]] = bitcast i8** [[TMP117]] to double** +// CHECK3-NEXT: store double* [[TMP116]], double** [[TMP118]], align 4 +// CHECK3-NEXT: [[TMP119:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP120:%.*]] = bitcast i8** [[TMP119]] to double** +// CHECK3-NEXT: store double* [[TMP116]], double** [[TMP120]], align 4 +// CHECK3-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 0 +// CHECK3-NEXT: store i8* null, i8** [[TMP121]], align 4 +// CHECK3-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP123:%.*]] = bitcast i8** [[TMP122]] to %struct.TT.0** +// CHECK3-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP123]], align 4 +// CHECK3-NEXT: [[TMP124:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP125:%.*]] = bitcast i8** [[TMP124]] to %struct.TT.0** +// CHECK3-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[TMP125]], align 4 +// CHECK3-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS10]], i32 0, i32 1 +// CHECK3-NEXT: store i8* null, i8** [[TMP126]], align 4 +// CHECK3-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP128:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 0 +// CHECK3-NEXT: store i32 2, i32* [[TMP129]], align 4 +// CHECK3-NEXT: [[TMP130:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 1 +// CHECK3-NEXT: store i32 2, i32* [[TMP130]], align 4 +// CHECK3-NEXT: [[TMP131:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 2 +// CHECK3-NEXT: store i8** [[TMP127]], i8*** [[TMP131]], align 4 +// CHECK3-NEXT: [[TMP132:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 3 +// CHECK3-NEXT: store i8** [[TMP128]], i8*** [[TMP132]], align 4 +// CHECK3-NEXT: [[TMP133:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 4 +// CHECK3-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.3, i32 0, i32 0), i64** [[TMP133]], align 4 +// CHECK3-NEXT: [[TMP134:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 5 +// CHECK3-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i32 0, i32 0), i64** [[TMP134]], align 4 +// CHECK3-NEXT: [[TMP135:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 6 +// CHECK3-NEXT: store i8** null, i8*** [[TMP135]], align 4 +// CHECK3-NEXT: [[TMP136:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 7 +// CHECK3-NEXT: store i8** null, i8*** [[TMP136]], align 4 +// CHECK3-NEXT: [[TMP137:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, i64* [[TMP137]], align 8 +// CHECK3-NEXT: [[TMP138:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, i64* [[TMP138]], align 8 +// CHECK3-NEXT: [[TMP139:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP139]], align 4 +// CHECK3-NEXT: [[TMP140:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP140]], align 4 +// CHECK3-NEXT: [[TMP141:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, i32* [[TMP141]], align 4 +// CHECK3-NEXT: [[TMP142:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS11]]) +// CHECK3-NEXT: [[TMP143:%.*]] = icmp ne i32 [[TMP142]], 0 +// CHECK3-NEXT: br i1 [[TMP143]], label [[OMP_OFFLOAD_FAILED12:%.*]], label [[OMP_OFFLOAD_CONT13:%.*]] +// CHECK3: omp_offload.failed12: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111(double* [[TMP116]], %struct.TT.0* [[E]]) #[[ATTR3]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT13]] +// CHECK3: omp_offload.cont13: +// CHECK3-NEXT: [[TMP144:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP145:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK3-NEXT: call void @llvm.stackrestore(i8* [[TMP145]]) +// CHECK3-NEXT: ret i32 [[TMP144]] +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// CHECK3-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[GA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[GA]], i32* [[GA_ADDR]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// CHECK3-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR2]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 +// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 +// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 +// CHECK3-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// CHECK3-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// CHECK3-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__VLA_EXPR2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// CHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 +// CHECK3-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// CHECK3-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 +// CHECK3-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// CHECK3-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 +// CHECK3-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 +// CHECK3-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false) +// CHECK3-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// CHECK3-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4 +// CHECK3-NEXT: [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4 +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// CHECK3-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false) +// CHECK3-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// CHECK3-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false) +// CHECK3-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// CHECK3-NEXT: [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// CHECK3-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8 +// CHECK3-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// CHECK3-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false) +// CHECK3-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// CHECK3-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false) +// CHECK3-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// CHECK3-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// CHECK3-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// CHECK3-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2 +// CHECK3-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3 +// CHECK3-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// CHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1 +// CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 +// CHECK3-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]] +// CHECK3-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]] +// CHECK3-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3 +// CHECK3-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// CHECK3-NEXT: store i64 1, i64* [[X]], align 4 +// CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// CHECK3-NEXT: store i8 1, i8* [[Y]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK3-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// CHECK3-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR2]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4 +// CHECK3-NEXT: [[E1:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// CHECK3-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK3-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = bitcast %struct.TT.0* [[E1]] to i8* +// CHECK3-NEXT: [[TMP2:%.*]] = bitcast %struct.TT.0* [[TMP0]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 8, i1 false) +// CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E1]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[X]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP3]] to double +// CHECK3-NEXT: [[TMP4:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP4]], i32 0 +// CHECK3-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP5]], i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = load double, double* [[ARRAYIDX2]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// CHECK3-NEXT: store double [[INC]], double* [[ARRAYIDX2]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_Z3bariPd +// CHECK3-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// CHECK3-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]]) +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]]) +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]]) +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: ret i32 [[TMP9]] +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// CHECK3-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [5 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [5 x i64], align 4 +// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// CHECK3-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]] +// CHECK3-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[B_CASTED]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[B_CASTED]], align 4 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = mul nuw i32 2, [[TMP1]] +// CHECK3-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// CHECK3-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64 +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast [5 x i64]* [[DOTOFFLOAD_SIZES]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP9]], i8* align 4 bitcast ([5 x i64]* @.offload_sizes.5 to i8*), i32 40, i1 false) +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %struct.S1** +// CHECK3-NEXT: store %struct.S1* [[THIS1]], %struct.S1** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = bitcast i8** [[TMP12]] to double** +// CHECK3-NEXT: store double* [[A]], double** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store i8* null, i8** [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP16:%.*]] = bitcast i8** [[TMP15]] to i32* +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP18:%.*]] = bitcast i8** [[TMP17]] to i32* +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store i8* null, i8** [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP21:%.*]] = bitcast i8** [[TMP20]] to i32* +// CHECK3-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP23:%.*]] = bitcast i8** [[TMP22]] to i32* +// CHECK3-NEXT: store i32 2, i32* [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-NEXT: store i8* null, i8** [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP26:%.*]] = bitcast i8** [[TMP25]] to i32* +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP28:%.*]] = bitcast i8** [[TMP27]] to i32* +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK3-NEXT: store i8* null, i8** [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP31:%.*]] = bitcast i8** [[TMP30]] to i16** +// CHECK3-NEXT: store i16* [[VLA]], i16** [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP33:%.*]] = bitcast i8** [[TMP32]] to i16** +// CHECK3-NEXT: store i16* [[VLA]], i16** [[TMP33]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 4 +// CHECK3-NEXT: store i64 [[TMP8]], i64* [[TMP34]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 4 +// CHECK3-NEXT: store i8* null, i8** [[TMP35]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [5 x i64], [5 x i64]* [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 2, i32* [[TMP39]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 5, i32* [[TMP40]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store i8** [[TMP36]], i8*** [[TMP41]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store i8** [[TMP37]], i8*** [[TMP42]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store i64* [[TMP38]], i64** [[TMP43]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store i64* getelementptr inbounds ([5 x i64], [5 x i64]* @.offload_maptypes.6, i32 0, i32 0), i64** [[TMP44]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store i8** null, i8*** [[TMP45]], align 4 +// CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store i8** null, i8*** [[TMP46]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, i64* [[TMP47]], align 8 +// CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, i64* [[TMP48]], align 8 +// CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP49]], align 4 +// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP50]], align 4 +// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, i32* [[TMP51]], align 4 +// CHECK3-NEXT: [[TMP52:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP53:%.*]] = icmp ne i32 [[TMP52]], 0 +// CHECK3-NEXT: br i1 [[TMP53]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3: omp_offload.failed: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167(%struct.S1* [[THIS1]], i32 [[TMP5]], i32 2, i32 [[TMP1]], i16* [[VLA]]) #[[ATTR3]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3: omp_offload.cont: +// CHECK3-NEXT: [[TMP54:%.*]] = mul nsw i32 1, [[TMP1]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP54]] +// CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK3-NEXT: [[TMP55:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP55]] to i32 +// CHECK3-NEXT: [[TMP56:%.*]] = load i32, i32* [[B]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], [[TMP56]] +// CHECK3-NEXT: [[TMP57:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK3-NEXT: call void @llvm.stackrestore(i8* [[TMP57]]) +// CHECK3-NEXT: ret i32 [[ADD3]] +// +// +// CHECK3-LABEL: define {{[^@]+}}@_ZL7fstatici +// CHECK3-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 4 +// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK3-NEXT: store i8 0, i8* [[AAA]], align 1 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[AAA]], align 1 +// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_CASTED]] to i8* +// CHECK3-NEXT: store i8 [[TMP2]], i8* [[CONV]], align 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[AAA_CASTED]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32* +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32* +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store i8* null, i8** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to i32* +// CHECK3-NEXT: store i32 [[TMP3]], i32* [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast i8** [[TMP11]] to i32* +// CHECK3-NEXT: store i32 [[TMP3]], i32* [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store i8* null, i8** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i8** [[TMP14]] to [10 x i32]** +// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to [10 x i32]** +// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK3-NEXT: store i8* null, i8** [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 2, i32* [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 3, i32* [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store i8** [[TMP19]], i8*** [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store i8** [[TMP20]], i8*** [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.7, i32 0, i32 0), i64** [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.8, i32 0, i32 0), i64** [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store i8** null, i8*** [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store i8** null, i8*** [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, i64* [[TMP29]], align 8 +// CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, i64* [[TMP30]], align 8 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP32]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, i32* [[TMP33]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 +// CHECK3-NEXT: br i1 [[TMP35]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3: omp_offload.failed: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142(i32 [[TMP1]], i32 [[TMP3]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3: omp_offload.cont: +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: ret i32 [[TMP36]] +// +// +// CHECK3-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// CHECK3-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// CHECK3-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[A]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: store i32 [[TMP0]], i32* [[A_CASTED]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A_CASTED]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = bitcast i8** [[TMP2]] to i32* +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8** [[TMP4]] to i32* +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK3-NEXT: store i8* null, i8** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8** [[TMP7]] to [10 x i32]** +// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to [10 x i32]** +// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK3-NEXT: store i8* null, i8** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK3-NEXT: store i32 2, i32* [[TMP14]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK3-NEXT: store i32 2, i32* [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK3-NEXT: store i8** [[TMP12]], i8*** [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK3-NEXT: store i8** [[TMP13]], i8*** [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK3-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64** [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK3-NEXT: store i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i64** [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK3-NEXT: store i8** null, i8*** [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK3-NEXT: store i8** null, i8*** [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK3-NEXT: store i64 0, i64* [[TMP22]], align 8 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK3-NEXT: store i64 0, i64* [[TMP23]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK3-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], [3 x i32]* [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK3-NEXT: store [3 x i32] zeroinitializer, [3 x i32]* [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK3-NEXT: store i32 0, i32* [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(%struct.ident_t* @[[GLOB1]], i64 -1, i32 -1, i32 0, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128.region_id, %struct.__tgt_kernel_arguments* [[KERNEL_ARGS]]) +// CHECK3-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK3-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK3: omp_offload.failed: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128(i32 [[TMP1]], [10 x i32]* [[B]]) #[[ATTR3]] +// CHECK3-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CHECK3: omp_offload.cont: +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[A]], align 4 +// CHECK3-NEXT: ret i32 [[TMP29]] +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// CHECK3-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR2]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// CHECK3-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// CHECK3-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// CHECK3-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// CHECK3-NEXT: [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// CHECK3-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double +// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: store double [[ADD]], double* [[A]], align 4 +// CHECK3-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = load double, double* [[A4]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// CHECK3-NEXT: store double [[INC]], double* [[A4]], align 4 +// CHECK3-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// CHECK3-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]] +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]] +// CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// CHECK3-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// CHECK3-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// CHECK3-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// CHECK3-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 +// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* +// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK3-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 +// CHECK3-NEXT: [[CONV2:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 +// CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 +// CHECK3-NEXT: store i8 [[CONV4]], i8* [[CONV]], align 1 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// CHECK3-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR2]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// CHECK3-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CHECK3-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: call void @__tgt_register_requires(i64 1) +// CHECK3-NEXT: ret void +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_Z3fooiPd +// SIMD-ONLY0-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// SIMD-ONLY0-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[AA:%.*]] = alloca i16, align 2 +// SIMD-ONLY0-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// SIMD-ONLY0-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// SIMD-ONLY0-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// SIMD-ONLY0-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// SIMD-ONLY0-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// SIMD-ONLY0-NEXT: [[P:%.*]] = alloca i32*, align 64 +// SIMD-ONLY0-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: store i16 0, i16* [[AA]], align 2 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY0-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY0-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4 +// SIMD-ONLY0-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]] +// SIMD-ONLY0-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8 +// SIMD-ONLY0-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// SIMD-ONLY0-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP6]], i32* [[X]], align 4 +// SIMD-ONLY0-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: store i32 [[TMP7]], i32* [[Y]], align 4 +// SIMD-ONLY0-NEXT: store i32* [[A]], i32** [[P]], align 64 +// SIMD-ONLY0-NEXT: [[TMP8:%.*]] = load i16, i16* [[AA]], align 2 +// SIMD-ONLY0-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// SIMD-ONLY0-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY0-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 +// SIMD-ONLY0-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// SIMD-ONLY0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B]], i64 0, i64 2 +// SIMD-ONLY0-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// SIMD-ONLY0-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[VLA]], i64 3 +// SIMD-ONLY0-NEXT: store float 1.000000e+00, float* [[ARRAYIDX3]], align 4 +// SIMD-ONLY0-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C]], i64 0, i64 1 +// SIMD-ONLY0-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX4]], i64 0, i64 2 +// SIMD-ONLY0-NEXT: store double 1.000000e+00, double* [[ARRAYIDX5]], align 8 +// SIMD-ONLY0-NEXT: [[TMP9:%.*]] = mul nsw i64 1, [[TMP4]] +// SIMD-ONLY0-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[VLA1]], i64 [[TMP9]] +// SIMD-ONLY0-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX6]], i64 3 +// SIMD-ONLY0-NEXT: store double 1.000000e+00, double* [[ARRAYIDX7]], align 8 +// SIMD-ONLY0-NEXT: [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: store i64 1, i64* [[X8]], align 8 +// SIMD-ONLY0-NEXT: [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 1 +// SIMD-ONLY0-NEXT: store i8 1, i8* [[Y9]], align 8 +// SIMD-ONLY0-NEXT: [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP10:%.*]] = load i32, i32* [[X10]], align 4 +// SIMD-ONLY0-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP10]] to double +// SIMD-ONLY0-NEXT: [[TMP11:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP11]], i64 0 +// SIMD-ONLY0-NEXT: store double [[CONV11]], double* [[ARRAYIDX12]], align 8 +// SIMD-ONLY0-NEXT: [[TMP12:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP12]], i64 0 +// SIMD-ONLY0-NEXT: [[TMP13:%.*]] = load double, double* [[ARRAYIDX13]], align 8 +// SIMD-ONLY0-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 +// SIMD-ONLY0-NEXT: store double [[INC]], double* [[ARRAYIDX13]], align 8 +// SIMD-ONLY0-NEXT: [[TMP14:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[TMP15:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY0-NEXT: call void @llvm.stackrestore(i8* [[TMP15]]) +// SIMD-ONLY0-NEXT: ret i32 [[TMP14]] +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_Z3bariPd +// SIMD-ONLY0-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// SIMD-ONLY0-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 +// SIMD-ONLY0-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]]) +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// SIMD-ONLY0-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]]) +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// SIMD-ONLY0-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]]) +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// SIMD-ONLY0-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]]) +// SIMD-ONLY0-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// SIMD-ONLY0-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: ret i32 [[TMP9]] +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// SIMD-ONLY0-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// SIMD-ONLY0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[B:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// SIMD-ONLY0-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// SIMD-ONLY0-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY0-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY0-NEXT: store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY0-NEXT: [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]] +// SIMD-ONLY0-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2 +// SIMD-ONLY0-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8 +// SIMD-ONLY0-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY0-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double +// SIMD-ONLY0-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// SIMD-ONLY0-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: store double [[ADD2]], double* [[A]], align 8 +// SIMD-ONLY0-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY0-NEXT: [[TMP6:%.*]] = load double, double* [[A3]], align 8 +// SIMD-ONLY0-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// SIMD-ONLY0-NEXT: store double [[INC]], double* [[A3]], align 8 +// SIMD-ONLY0-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// SIMD-ONLY0-NEXT: [[TMP7:%.*]] = mul nsw i64 1, [[TMP2]] +// SIMD-ONLY0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP7]] +// SIMD-ONLY0-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// SIMD-ONLY0-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2 +// SIMD-ONLY0-NEXT: [[TMP8:%.*]] = mul nsw i64 1, [[TMP2]] +// SIMD-ONLY0-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP8]] +// SIMD-ONLY0-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX6]], i64 1 +// SIMD-ONLY0-NEXT: [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2 +// SIMD-ONLY0-NEXT: [[CONV8:%.*]] = sext i16 [[TMP9]] to i32 +// SIMD-ONLY0-NEXT: [[TMP10:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY0-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] +// SIMD-ONLY0-NEXT: [[TMP11:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY0-NEXT: call void @llvm.stackrestore(i8* [[TMP11]]) +// SIMD-ONLY0-NEXT: ret i32 [[ADD9]] +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_ZL7fstatici +// SIMD-ONLY0-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// SIMD-ONLY0-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY0-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: store i8 0, i8* [[AAA]], align 1 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY0-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i8, i8* [[AAA]], align 1 +// SIMD-ONLY0-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// SIMD-ONLY0-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY0-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8 +// SIMD-ONLY0-NEXT: store i8 [[CONV2]], i8* [[AAA]], align 1 +// SIMD-ONLY0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i64 0, i64 2 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY0-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1 +// SIMD-ONLY0-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: ret i32 [[TMP3]] +// +// +// SIMD-ONLY0-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// SIMD-ONLY0-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat { +// SIMD-ONLY0-NEXT: entry: +// SIMD-ONLY0-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY0-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY0-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY0-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY0-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i64 0, i64 2 +// SIMD-ONLY0-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY0-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1 +// SIMD-ONLY0-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY0-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY0-NEXT: ret i32 [[TMP2]] +// +// +// SIMD-ONLY01-LABEL: define {{[^@]+}}@_Z3fooiPd +// SIMD-ONLY01-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY01-NEXT: entry: +// SIMD-ONLY01-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY01-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// SIMD-ONLY01-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY01-NEXT: [[AA:%.*]] = alloca i16, align 2 +// SIMD-ONLY01-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// SIMD-ONLY01-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// SIMD-ONLY01-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// SIMD-ONLY01-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// SIMD-ONLY01-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// SIMD-ONLY01-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// SIMD-ONLY01-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// SIMD-ONLY01-NEXT: [[P:%.*]] = alloca i32*, align 64 +// SIMD-ONLY01-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: store i16 0, i16* [[AA]], align 2 +// SIMD-ONLY01-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// SIMD-ONLY01-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY01-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY01-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4 +// SIMD-ONLY01-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// SIMD-ONLY01-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// SIMD-ONLY01-NEXT: [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]] +// SIMD-ONLY01-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8 +// SIMD-ONLY01-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// SIMD-ONLY01-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY01-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: store i32 [[TMP6]], i32* [[X]], align 4 +// SIMD-ONLY01-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// SIMD-ONLY01-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: store i32 [[TMP7]], i32* [[Y]], align 4 +// SIMD-ONLY01-NEXT: store i32* [[A]], i32** [[P]], align 64 +// SIMD-ONLY01-NEXT: [[TMP8:%.*]] = load i16, i16* [[AA]], align 2 +// SIMD-ONLY01-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// SIMD-ONLY01-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY01-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 +// SIMD-ONLY01-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// SIMD-ONLY01-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B]], i64 0, i64 2 +// SIMD-ONLY01-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// SIMD-ONLY01-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[VLA]], i64 3 +// SIMD-ONLY01-NEXT: store float 1.000000e+00, float* [[ARRAYIDX3]], align 4 +// SIMD-ONLY01-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C]], i64 0, i64 1 +// SIMD-ONLY01-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX4]], i64 0, i64 2 +// SIMD-ONLY01-NEXT: store double 1.000000e+00, double* [[ARRAYIDX5]], align 8 +// SIMD-ONLY01-NEXT: [[TMP9:%.*]] = mul nsw i64 1, [[TMP4]] +// SIMD-ONLY01-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[VLA1]], i64 [[TMP9]] +// SIMD-ONLY01-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX6]], i64 3 +// SIMD-ONLY01-NEXT: store double 1.000000e+00, double* [[ARRAYIDX7]], align 8 +// SIMD-ONLY01-NEXT: [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 0 +// SIMD-ONLY01-NEXT: store i64 1, i64* [[X8]], align 8 +// SIMD-ONLY01-NEXT: [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 1 +// SIMD-ONLY01-NEXT: store i8 1, i8* [[Y9]], align 8 +// SIMD-ONLY01-NEXT: [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY01-NEXT: [[TMP10:%.*]] = load i32, i32* [[X10]], align 4 +// SIMD-ONLY01-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP10]] to double +// SIMD-ONLY01-NEXT: [[TMP11:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP11]], i64 0 +// SIMD-ONLY01-NEXT: store double [[CONV11]], double* [[ARRAYIDX12]], align 8 +// SIMD-ONLY01-NEXT: [[TMP12:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP12]], i64 0 +// SIMD-ONLY01-NEXT: [[TMP13:%.*]] = load double, double* [[ARRAYIDX13]], align 8 +// SIMD-ONLY01-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 +// SIMD-ONLY01-NEXT: store double [[INC]], double* [[ARRAYIDX13]], align 8 +// SIMD-ONLY01-NEXT: [[TMP14:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[TMP15:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY01-NEXT: call void @llvm.stackrestore(i8* [[TMP15]]) +// SIMD-ONLY01-NEXT: ret i32 [[TMP14]] +// +// +// SIMD-ONLY01-LABEL: define {{[^@]+}}@_Z3bariPd +// SIMD-ONLY01-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// SIMD-ONLY01-NEXT: entry: +// SIMD-ONLY01-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY01-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// SIMD-ONLY01-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY01-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 +// SIMD-ONLY01-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]]) +// SIMD-ONLY01-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// SIMD-ONLY01-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]]) +// SIMD-ONLY01-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// SIMD-ONLY01-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]]) +// SIMD-ONLY01-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// SIMD-ONLY01-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]]) +// SIMD-ONLY01-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// SIMD-ONLY01-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: ret i32 [[TMP9]] +// +// +// SIMD-ONLY01-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// SIMD-ONLY01-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// SIMD-ONLY01-NEXT: entry: +// SIMD-ONLY01-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// SIMD-ONLY01-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY01-NEXT: [[B:%.*]] = alloca i32, align 4 +// SIMD-ONLY01-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// SIMD-ONLY01-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// SIMD-ONLY01-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// SIMD-ONLY01-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// SIMD-ONLY01-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY01-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// SIMD-ONLY01-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +// SIMD-ONLY01-NEXT: [[TMP3:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY01-NEXT: store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY01-NEXT: [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]] +// SIMD-ONLY01-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2 +// SIMD-ONLY01-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8 +// SIMD-ONLY01-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY01-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double +// SIMD-ONLY01-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// SIMD-ONLY01-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY01-NEXT: store double [[ADD2]], double* [[A]], align 8 +// SIMD-ONLY01-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY01-NEXT: [[TMP6:%.*]] = load double, double* [[A3]], align 8 +// SIMD-ONLY01-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// SIMD-ONLY01-NEXT: store double [[INC]], double* [[A3]], align 8 +// SIMD-ONLY01-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// SIMD-ONLY01-NEXT: [[TMP7:%.*]] = mul nsw i64 1, [[TMP2]] +// SIMD-ONLY01-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP7]] +// SIMD-ONLY01-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// SIMD-ONLY01-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2 +// SIMD-ONLY01-NEXT: [[TMP8:%.*]] = mul nsw i64 1, [[TMP2]] +// SIMD-ONLY01-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP8]] +// SIMD-ONLY01-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX6]], i64 1 +// SIMD-ONLY01-NEXT: [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2 +// SIMD-ONLY01-NEXT: [[CONV8:%.*]] = sext i16 [[TMP9]] to i32 +// SIMD-ONLY01-NEXT: [[TMP10:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY01-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] +// SIMD-ONLY01-NEXT: [[TMP11:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY01-NEXT: call void @llvm.stackrestore(i8* [[TMP11]]) +// SIMD-ONLY01-NEXT: ret i32 [[ADD9]] +// +// +// SIMD-ONLY01-LABEL: define {{[^@]+}}@_ZL7fstatici +// SIMD-ONLY01-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] { +// SIMD-ONLY01-NEXT: entry: +// SIMD-ONLY01-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY01-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY01-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// SIMD-ONLY01-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY01-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: store i8 0, i8* [[AAA]], align 1 +// SIMD-ONLY01-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY01-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[TMP1:%.*]] = load i8, i8* [[AAA]], align 1 +// SIMD-ONLY01-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// SIMD-ONLY01-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY01-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8 +// SIMD-ONLY01-NEXT: store i8 [[CONV2]], i8* [[AAA]], align 1 +// SIMD-ONLY01-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i64 0, i64 2 +// SIMD-ONLY01-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY01-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1 +// SIMD-ONLY01-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY01-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: ret i32 [[TMP3]] +// +// +// SIMD-ONLY01-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// SIMD-ONLY01-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat { +// SIMD-ONLY01-NEXT: entry: +// SIMD-ONLY01-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY01-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY01-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY01-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY01-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY01-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i64 0, i64 2 +// SIMD-ONLY01-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY01-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1 +// SIMD-ONLY01-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY01-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY01-NEXT: ret i32 [[TMP2]] +// +// +// SIMD-ONLY02-LABEL: define {{[^@]+}}@_Z3fooiPd +// SIMD-ONLY02-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY02-NEXT: entry: +// SIMD-ONLY02-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY02-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// SIMD-ONLY02-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY02-NEXT: [[AA:%.*]] = alloca i16, align 2 +// SIMD-ONLY02-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// SIMD-ONLY02-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// SIMD-ONLY02-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// SIMD-ONLY02-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// SIMD-ONLY02-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// SIMD-ONLY02-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// SIMD-ONLY02-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// SIMD-ONLY02-NEXT: [[P:%.*]] = alloca i32*, align 64 +// SIMD-ONLY02-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: store i16 0, i16* [[AA]], align 2 +// SIMD-ONLY02-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[TMP1:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY02-NEXT: store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY02-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4 +// SIMD-ONLY02-NEXT: store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4 +// SIMD-ONLY02-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]] +// SIMD-ONLY02-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8 +// SIMD-ONLY02-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// SIMD-ONLY02-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY02-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store i32 [[TMP4]], i32* [[X]], align 4 +// SIMD-ONLY02-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// SIMD-ONLY02-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store i32 [[TMP5]], i32* [[Y]], align 4 +// SIMD-ONLY02-NEXT: store i32* [[A]], i32** [[P]], align 64 +// SIMD-ONLY02-NEXT: [[TMP6:%.*]] = load i16, i16* [[AA]], align 2 +// SIMD-ONLY02-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 +// SIMD-ONLY02-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY02-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 +// SIMD-ONLY02-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// SIMD-ONLY02-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B]], i32 0, i32 2 +// SIMD-ONLY02-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// SIMD-ONLY02-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[VLA]], i32 3 +// SIMD-ONLY02-NEXT: store float 1.000000e+00, float* [[ARRAYIDX3]], align 4 +// SIMD-ONLY02-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C]], i32 0, i32 1 +// SIMD-ONLY02-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX4]], i32 0, i32 2 +// SIMD-ONLY02-NEXT: store double 1.000000e+00, double* [[ARRAYIDX5]], align 8 +// SIMD-ONLY02-NEXT: [[TMP7:%.*]] = mul nsw i32 1, [[TMP2]] +// SIMD-ONLY02-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[VLA1]], i32 [[TMP7]] +// SIMD-ONLY02-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX6]], i32 3 +// SIMD-ONLY02-NEXT: store double 1.000000e+00, double* [[ARRAYIDX7]], align 8 +// SIMD-ONLY02-NEXT: [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 0 +// SIMD-ONLY02-NEXT: store i64 1, i64* [[X8]], align 4 +// SIMD-ONLY02-NEXT: [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 1 +// SIMD-ONLY02-NEXT: store i8 1, i8* [[Y9]], align 4 +// SIMD-ONLY02-NEXT: [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY02-NEXT: [[TMP8:%.*]] = load i32, i32* [[X10]], align 4 +// SIMD-ONLY02-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP8]] to double +// SIMD-ONLY02-NEXT: [[TMP9:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP9]], i32 0 +// SIMD-ONLY02-NEXT: store double [[CONV11]], double* [[ARRAYIDX12]], align 4 +// SIMD-ONLY02-NEXT: [[TMP10:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP10]], i32 0 +// SIMD-ONLY02-NEXT: [[TMP11:%.*]] = load double, double* [[ARRAYIDX13]], align 4 +// SIMD-ONLY02-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// SIMD-ONLY02-NEXT: store double [[INC]], double* [[ARRAYIDX13]], align 4 +// SIMD-ONLY02-NEXT: [[TMP12:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY02-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// SIMD-ONLY02-NEXT: ret i32 [[TMP12]] +// +// +// SIMD-ONLY02-LABEL: define {{[^@]+}}@_Z3bariPd +// SIMD-ONLY02-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// SIMD-ONLY02-NEXT: entry: +// SIMD-ONLY02-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY02-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// SIMD-ONLY02-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY02-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 +// SIMD-ONLY02-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]]) +// SIMD-ONLY02-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// SIMD-ONLY02-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]]) +// SIMD-ONLY02-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// SIMD-ONLY02-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]]) +// SIMD-ONLY02-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// SIMD-ONLY02-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]]) +// SIMD-ONLY02-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// SIMD-ONLY02-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: ret i32 [[TMP9]] +// +// +// SIMD-ONLY02-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// SIMD-ONLY02-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// SIMD-ONLY02-NEXT: entry: +// SIMD-ONLY02-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// SIMD-ONLY02-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY02-NEXT: [[B:%.*]] = alloca i32, align 4 +// SIMD-ONLY02-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// SIMD-ONLY02-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// SIMD-ONLY02-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY02-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// SIMD-ONLY02-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY02-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY02-NEXT: [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]] +// SIMD-ONLY02-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2 +// SIMD-ONLY02-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// SIMD-ONLY02-NEXT: [[TMP4:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY02-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// SIMD-ONLY02-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// SIMD-ONLY02-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY02-NEXT: store double [[ADD2]], double* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY02-NEXT: [[TMP5:%.*]] = load double, double* [[A3]], align 4 +// SIMD-ONLY02-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 +// SIMD-ONLY02-NEXT: store double [[INC]], double* [[A3]], align 4 +// SIMD-ONLY02-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// SIMD-ONLY02-NEXT: [[TMP6:%.*]] = mul nsw i32 1, [[TMP1]] +// SIMD-ONLY02-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP6]] +// SIMD-ONLY02-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// SIMD-ONLY02-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2 +// SIMD-ONLY02-NEXT: [[TMP7:%.*]] = mul nsw i32 1, [[TMP1]] +// SIMD-ONLY02-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP7]] +// SIMD-ONLY02-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX6]], i32 1 +// SIMD-ONLY02-NEXT: [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2 +// SIMD-ONLY02-NEXT: [[CONV8:%.*]] = sext i16 [[TMP8]] to i32 +// SIMD-ONLY02-NEXT: [[TMP9:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY02-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP9]] +// SIMD-ONLY02-NEXT: [[TMP10:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY02-NEXT: call void @llvm.stackrestore(i8* [[TMP10]]) +// SIMD-ONLY02-NEXT: ret i32 [[ADD9]] +// +// +// SIMD-ONLY02-LABEL: define {{[^@]+}}@_ZL7fstatici +// SIMD-ONLY02-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] { +// SIMD-ONLY02-NEXT: entry: +// SIMD-ONLY02-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY02-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY02-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// SIMD-ONLY02-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY02-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: store i8 0, i8* [[AAA]], align 1 +// SIMD-ONLY02-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY02-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[TMP1:%.*]] = load i8, i8* [[AAA]], align 1 +// SIMD-ONLY02-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// SIMD-ONLY02-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY02-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8 +// SIMD-ONLY02-NEXT: store i8 [[CONV2]], i8* [[AAA]], align 1 +// SIMD-ONLY02-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i32 0, i32 2 +// SIMD-ONLY02-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY02-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1 +// SIMD-ONLY02-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY02-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: ret i32 [[TMP3]] +// +// +// SIMD-ONLY02-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// SIMD-ONLY02-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat { +// SIMD-ONLY02-NEXT: entry: +// SIMD-ONLY02-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY02-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY02-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY02-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY02-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i32 0, i32 2 +// SIMD-ONLY02-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY02-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1 +// SIMD-ONLY02-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY02-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY02-NEXT: ret i32 [[TMP2]] +// +// +// SIMD-ONLY03-LABEL: define {{[^@]+}}@_Z3fooiPd +// SIMD-ONLY03-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY03-NEXT: entry: +// SIMD-ONLY03-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY03-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// SIMD-ONLY03-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY03-NEXT: [[AA:%.*]] = alloca i16, align 2 +// SIMD-ONLY03-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// SIMD-ONLY03-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// SIMD-ONLY03-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// SIMD-ONLY03-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// SIMD-ONLY03-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// SIMD-ONLY03-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// SIMD-ONLY03-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// SIMD-ONLY03-NEXT: [[P:%.*]] = alloca i32*, align 64 +// SIMD-ONLY03-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: store i16 0, i16* [[AA]], align 2 +// SIMD-ONLY03-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[TMP1:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY03-NEXT: store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY03-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4 +// SIMD-ONLY03-NEXT: store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4 +// SIMD-ONLY03-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]] +// SIMD-ONLY03-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8 +// SIMD-ONLY03-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// SIMD-ONLY03-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY03-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store i32 [[TMP4]], i32* [[X]], align 4 +// SIMD-ONLY03-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// SIMD-ONLY03-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store i32 [[TMP5]], i32* [[Y]], align 4 +// SIMD-ONLY03-NEXT: store i32* [[A]], i32** [[P]], align 64 +// SIMD-ONLY03-NEXT: [[TMP6:%.*]] = load i16, i16* [[AA]], align 2 +// SIMD-ONLY03-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 +// SIMD-ONLY03-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY03-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 +// SIMD-ONLY03-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// SIMD-ONLY03-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B]], i32 0, i32 2 +// SIMD-ONLY03-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// SIMD-ONLY03-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[VLA]], i32 3 +// SIMD-ONLY03-NEXT: store float 1.000000e+00, float* [[ARRAYIDX3]], align 4 +// SIMD-ONLY03-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C]], i32 0, i32 1 +// SIMD-ONLY03-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX4]], i32 0, i32 2 +// SIMD-ONLY03-NEXT: store double 1.000000e+00, double* [[ARRAYIDX5]], align 8 +// SIMD-ONLY03-NEXT: [[TMP7:%.*]] = mul nsw i32 1, [[TMP2]] +// SIMD-ONLY03-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[VLA1]], i32 [[TMP7]] +// SIMD-ONLY03-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX6]], i32 3 +// SIMD-ONLY03-NEXT: store double 1.000000e+00, double* [[ARRAYIDX7]], align 8 +// SIMD-ONLY03-NEXT: [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 0 +// SIMD-ONLY03-NEXT: store i64 1, i64* [[X8]], align 4 +// SIMD-ONLY03-NEXT: [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 1 +// SIMD-ONLY03-NEXT: store i8 1, i8* [[Y9]], align 4 +// SIMD-ONLY03-NEXT: [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY03-NEXT: [[TMP8:%.*]] = load i32, i32* [[X10]], align 4 +// SIMD-ONLY03-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP8]] to double +// SIMD-ONLY03-NEXT: [[TMP9:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP9]], i32 0 +// SIMD-ONLY03-NEXT: store double [[CONV11]], double* [[ARRAYIDX12]], align 4 +// SIMD-ONLY03-NEXT: [[TMP10:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP10]], i32 0 +// SIMD-ONLY03-NEXT: [[TMP11:%.*]] = load double, double* [[ARRAYIDX13]], align 4 +// SIMD-ONLY03-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// SIMD-ONLY03-NEXT: store double [[INC]], double* [[ARRAYIDX13]], align 4 +// SIMD-ONLY03-NEXT: [[TMP12:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY03-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// SIMD-ONLY03-NEXT: ret i32 [[TMP12]] +// +// +// SIMD-ONLY03-LABEL: define {{[^@]+}}@_Z3bariPd +// SIMD-ONLY03-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// SIMD-ONLY03-NEXT: entry: +// SIMD-ONLY03-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY03-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// SIMD-ONLY03-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY03-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 +// SIMD-ONLY03-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]]) +// SIMD-ONLY03-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// SIMD-ONLY03-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]]) +// SIMD-ONLY03-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// SIMD-ONLY03-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]]) +// SIMD-ONLY03-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// SIMD-ONLY03-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]]) +// SIMD-ONLY03-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// SIMD-ONLY03-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: ret i32 [[TMP9]] +// +// +// SIMD-ONLY03-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// SIMD-ONLY03-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// SIMD-ONLY03-NEXT: entry: +// SIMD-ONLY03-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// SIMD-ONLY03-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY03-NEXT: [[B:%.*]] = alloca i32, align 4 +// SIMD-ONLY03-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// SIMD-ONLY03-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// SIMD-ONLY03-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY03-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// SIMD-ONLY03-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY03-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY03-NEXT: [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]] +// SIMD-ONLY03-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2 +// SIMD-ONLY03-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// SIMD-ONLY03-NEXT: [[TMP4:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY03-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// SIMD-ONLY03-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// SIMD-ONLY03-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY03-NEXT: store double [[ADD2]], double* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY03-NEXT: [[TMP5:%.*]] = load double, double* [[A3]], align 4 +// SIMD-ONLY03-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 +// SIMD-ONLY03-NEXT: store double [[INC]], double* [[A3]], align 4 +// SIMD-ONLY03-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// SIMD-ONLY03-NEXT: [[TMP6:%.*]] = mul nsw i32 1, [[TMP1]] +// SIMD-ONLY03-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP6]] +// SIMD-ONLY03-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// SIMD-ONLY03-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2 +// SIMD-ONLY03-NEXT: [[TMP7:%.*]] = mul nsw i32 1, [[TMP1]] +// SIMD-ONLY03-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP7]] +// SIMD-ONLY03-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX6]], i32 1 +// SIMD-ONLY03-NEXT: [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2 +// SIMD-ONLY03-NEXT: [[CONV8:%.*]] = sext i16 [[TMP8]] to i32 +// SIMD-ONLY03-NEXT: [[TMP9:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY03-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP9]] +// SIMD-ONLY03-NEXT: [[TMP10:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY03-NEXT: call void @llvm.stackrestore(i8* [[TMP10]]) +// SIMD-ONLY03-NEXT: ret i32 [[ADD9]] +// +// +// SIMD-ONLY03-LABEL: define {{[^@]+}}@_ZL7fstatici +// SIMD-ONLY03-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] { +// SIMD-ONLY03-NEXT: entry: +// SIMD-ONLY03-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY03-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY03-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// SIMD-ONLY03-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY03-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: store i8 0, i8* [[AAA]], align 1 +// SIMD-ONLY03-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY03-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[TMP1:%.*]] = load i8, i8* [[AAA]], align 1 +// SIMD-ONLY03-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// SIMD-ONLY03-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY03-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8 +// SIMD-ONLY03-NEXT: store i8 [[CONV2]], i8* [[AAA]], align 1 +// SIMD-ONLY03-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i32 0, i32 2 +// SIMD-ONLY03-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY03-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1 +// SIMD-ONLY03-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY03-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: ret i32 [[TMP3]] +// +// +// SIMD-ONLY03-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// SIMD-ONLY03-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat { +// SIMD-ONLY03-NEXT: entry: +// SIMD-ONLY03-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY03-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY03-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY03-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY03-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i32 0, i32 2 +// SIMD-ONLY03-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY03-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1 +// SIMD-ONLY03-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY03-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY03-NEXT: ret i32 [[TMP2]] +// +// +// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// TCHECK-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] { +// TCHECK-NEXT: entry: +// TCHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 8 +// TCHECK-NEXT: [[GA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// TCHECK-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 8 +// TCHECK-NEXT: store i64 [[GA]], i64* [[GA_ADDR]], align 8 +// TCHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// TCHECK-NEXT: [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32* +// TCHECK-NEXT: ret void +// +// +// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// TCHECK-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR0]] { +// TCHECK-NEXT: entry: +// TCHECK-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 +// TCHECK-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 +// TCHECK-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 +// TCHECK-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 +// TCHECK-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 +// TCHECK-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// TCHECK-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// TCHECK-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// TCHECK-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[__VLA_EXPR2:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// TCHECK-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 +// TCHECK-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 +// TCHECK-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// TCHECK-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 +// TCHECK-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 +// TCHECK-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// TCHECK-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 +// TCHECK-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 +// TCHECK-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 +// TCHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// TCHECK-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 +// TCHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// TCHECK-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 +// TCHECK-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 +// TCHECK-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// TCHECK-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 +// TCHECK-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 +// TCHECK-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 +// TCHECK-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// TCHECK-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// TCHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false) +// TCHECK-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// TCHECK-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8 +// TCHECK-NEXT: [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4 +// TCHECK-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// TCHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4 +// TCHECK-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// TCHECK-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// TCHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false) +// TCHECK-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// TCHECK-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// TCHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false) +// TCHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// TCHECK-NEXT: [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8 +// TCHECK-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// TCHECK-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8 +// TCHECK-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// TCHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 +// TCHECK-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// TCHECK-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// TCHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false) +// TCHECK-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// TCHECK-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// TCHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false) +// TCHECK-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// TCHECK-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// TCHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// TCHECK-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// TCHECK-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// TCHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2 +// TCHECK-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// TCHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3 +// TCHECK-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// TCHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1 +// TCHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2 +// TCHECK-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// TCHECK-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]] +// TCHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]] +// TCHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3 +// TCHECK-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// TCHECK-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// TCHECK-NEXT: store i64 1, i64* [[X]], align 8 +// TCHECK-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// TCHECK-NEXT: store i8 1, i8* [[Y]], align 8 +// TCHECK-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// TCHECK-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// TCHECK-NEXT: ret void +// +// +// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// TCHECK-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] { +// TCHECK-NEXT: entry: +// TCHECK-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// TCHECK-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8 +// TCHECK-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// TCHECK-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8 +// TCHECK-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8 +// TCHECK-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0 +// TCHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[X]], align 4 +// TCHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// TCHECK-NEXT: [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// TCHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 0 +// TCHECK-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 8 +// TCHECK-NEXT: [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// TCHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 0 +// TCHECK-NEXT: [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 8 +// TCHECK-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00 +// TCHECK-NEXT: store double [[INC]], double* [[ARRAYIDX1]], align 8 +// TCHECK-NEXT: ret void +// +// +// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// TCHECK-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK-NEXT: entry: +// TCHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// TCHECK-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4 +// TCHECK-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// TCHECK-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 +// TCHECK-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// TCHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// TCHECK-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* +// TCHECK-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// TCHECK-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8* +// TCHECK-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// TCHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// TCHECK-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 +// TCHECK-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32 +// TCHECK-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// TCHECK-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// TCHECK-NEXT: store i8 [[CONV5]], i8* [[CONV1]], align 1 +// TCHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2 +// TCHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 +// TCHECK-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// TCHECK-NEXT: ret void +// +// +// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// TCHECK-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// TCHECK-NEXT: entry: +// TCHECK-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// TCHECK-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// TCHECK-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// TCHECK-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// TCHECK-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 +// TCHECK-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// TCHECK-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// TCHECK-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 +// TCHECK-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// TCHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* +// TCHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// TCHECK-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// TCHECK-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 +// TCHECK-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// TCHECK-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8 +// TCHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// TCHECK-NEXT: [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2 +// TCHECK-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// TCHECK-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8 +// TCHECK-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// TCHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2 +// TCHECK-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// TCHECK-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// TCHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false) +// TCHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double +// TCHECK-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 +// TCHECK-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK-NEXT: store double [[ADD]], double* [[A]], align 8 +// TCHECK-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK-NEXT: [[TMP11:%.*]] = load double, double* [[A5]], align 8 +// TCHECK-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// TCHECK-NEXT: store double [[INC]], double* [[A5]], align 8 +// TCHECK-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 +// TCHECK-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]] +// TCHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]] +// TCHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// TCHECK-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// TCHECK-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// TCHECK-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// TCHECK-NEXT: ret void +// +// +// TCHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// TCHECK-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK-NEXT: entry: +// TCHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// TCHECK-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// TCHECK-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// TCHECK-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// TCHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// TCHECK-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// TCHECK-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// TCHECK-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// TCHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// TCHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2 +// TCHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// TCHECK-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// TCHECK-NEXT: ret void +// +// +// TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// TCHECK1-SAME: (i64 noundef [[A:%.*]], i32* noundef [[P:%.*]], i64 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] { +// TCHECK1-NEXT: entry: +// TCHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 8 +// TCHECK1-NEXT: [[GA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// TCHECK1-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 8 +// TCHECK1-NEXT: store i64 [[GA]], i64* [[GA_ADDR]], align 8 +// TCHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// TCHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[GA_ADDR]] to i32* +// TCHECK1-NEXT: ret void +// +// +// TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// TCHECK1-SAME: (i64 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i64 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 8 dereferenceable(400) [[C:%.*]], i64 noundef [[VLA1:%.*]], i64 noundef [[VLA3:%.*]], double* noundef nonnull align 8 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 8 dereferenceable(16) [[D:%.*]]) #[[ATTR0]] { +// TCHECK1-NEXT: entry: +// TCHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 8 +// TCHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 8 +// TCHECK1-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 8 +// TCHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[VLA_ADDR4:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 8 +// TCHECK1-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 8 +// TCHECK1-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// TCHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// TCHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// TCHECK1-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[__VLA_EXPR2:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// TCHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 +// TCHECK1-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 8 +// TCHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// TCHECK1-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 8 +// TCHECK1-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 8 +// TCHECK1-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// TCHECK1-NEXT: store i64 [[VLA3]], i64* [[VLA_ADDR4]], align 8 +// TCHECK1-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 8 +// TCHECK1-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 8 +// TCHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// TCHECK1-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP4:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// TCHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[VLA_ADDR4]], align 8 +// TCHECK1-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// TCHECK1-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// TCHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false) +// TCHECK1-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// TCHECK1-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 8 +// TCHECK1-NEXT: [[VLA6:%.*]] = alloca float, i64 [[TMP1]], align 4 +// TCHECK1-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// TCHECK1-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP1]], 4 +// TCHECK1-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// TCHECK1-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// TCHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i64 [[TMP11]], i1 false) +// TCHECK1-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// TCHECK1-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// TCHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i64 400, i1 false) +// TCHECK1-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// TCHECK1-NEXT: [[VLA8:%.*]] = alloca double, i64 [[TMP16]], align 8 +// TCHECK1-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// TCHECK1-NEXT: store i64 [[TMP5]], i64* [[__VLA_EXPR2]], align 8 +// TCHECK1-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP4]], [[TMP5]] +// TCHECK1-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 +// TCHECK1-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// TCHECK1-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// TCHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i64 [[TMP18]], i1 false) +// TCHECK1-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// TCHECK1-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// TCHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP21]], i8* align 8 [[TMP22]], i64 16, i1 false) +// TCHECK1-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// TCHECK1-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// TCHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// TCHECK1-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// TCHECK1-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// TCHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i64 0, i64 2 +// TCHECK1-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// TCHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i64 3 +// TCHECK1-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// TCHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i64 0, i64 1 +// TCHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i64 0, i64 2 +// TCHECK1-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// TCHECK1-NEXT: [[TMP24:%.*]] = mul nsw i64 1, [[TMP5]] +// TCHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i64 [[TMP24]] +// TCHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i64 3 +// TCHECK1-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// TCHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// TCHECK1-NEXT: store i64 1, i64* [[X]], align 8 +// TCHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// TCHECK1-NEXT: store i8 1, i8* [[Y]], align 8 +// TCHECK1-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// TCHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// TCHECK1-NEXT: ret void +// +// +// TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// TCHECK1-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] { +// TCHECK1-NEXT: entry: +// TCHECK1-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// TCHECK1-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 8 +// TCHECK1-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// TCHECK1-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 8 +// TCHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0 +// TCHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[X]], align 4 +// TCHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// TCHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// TCHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i64 0 +// TCHECK1-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 8 +// TCHECK1-NEXT: [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// TCHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 0 +// TCHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 8 +// TCHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00 +// TCHECK1-NEXT: store double [[INC]], double* [[ARRAYIDX1]], align 8 +// TCHECK1-NEXT: ret void +// +// +// TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// TCHECK1-SAME: (i64 noundef [[A:%.*]], i64 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK1-NEXT: entry: +// TCHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[AAA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// TCHECK1-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4 +// TCHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// TCHECK1-NEXT: store i64 [[AAA]], i64* [[AAA_ADDR]], align 8 +// TCHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// TCHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// TCHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[AAA_ADDR]] to i8* +// TCHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B2]] to i8* +// TCHECK1-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// TCHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// TCHECK1-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV1]], align 1 +// TCHECK1-NEXT: [[CONV3:%.*]] = sext i8 [[TMP4]] to i32 +// TCHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 +// TCHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i8 +// TCHECK1-NEXT: store i8 [[CONV5]], i8* [[CONV1]], align 1 +// TCHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B2]], i64 0, i64 2 +// TCHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP5]], 1 +// TCHECK1-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// TCHECK1-NEXT: ret void +// +// +// TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// TCHECK1-SAME: (%struct.S1* noundef [[THIS:%.*]], i64 noundef [[B:%.*]], i64 noundef [[VLA:%.*]], i64 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// TCHECK1-NEXT: entry: +// TCHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// TCHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[VLA_ADDR2:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 8 +// TCHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// TCHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// TCHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 +// TCHECK1-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 +// TCHECK1-NEXT: store i64 [[VLA1]], i64* [[VLA_ADDR2]], align 8 +// TCHECK1-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// TCHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[B_ADDR]] to i32* +// TCHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[VLA_ADDR2]], align 8 +// TCHECK1-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// TCHECK1-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 8 +// TCHECK1-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// TCHECK1-NEXT: [[VLA3:%.*]] = alloca i16, i64 [[TMP5]], align 2 +// TCHECK1-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// TCHECK1-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR1]], align 8 +// TCHECK1-NEXT: [[TMP6:%.*]] = mul nuw i64 [[TMP1]], [[TMP2]] +// TCHECK1-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 2 +// TCHECK1-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// TCHECK1-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// TCHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i64 [[TMP7]], i1 false) +// TCHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK1-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP10]] to double +// TCHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 +// TCHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK1-NEXT: store double [[ADD]], double* [[A]], align 8 +// TCHECK1-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK1-NEXT: [[TMP11:%.*]] = load double, double* [[A5]], align 8 +// TCHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// TCHECK1-NEXT: store double [[INC]], double* [[A5]], align 8 +// TCHECK1-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 +// TCHECK1-NEXT: [[TMP12:%.*]] = mul nsw i64 1, [[TMP2]] +// TCHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i64 [[TMP12]] +// TCHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// TCHECK1-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// TCHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// TCHECK1-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// TCHECK1-NEXT: ret void +// +// +// TCHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// TCHECK1-SAME: (i64 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK1-NEXT: entry: +// TCHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// TCHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// TCHECK1-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// TCHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// TCHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// TCHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// TCHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// TCHECK1-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i64 40, i1 false) +// TCHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 4 +// TCHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 4 +// TCHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i64 0, i64 2 +// TCHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// TCHECK1-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// TCHECK1-NEXT: ret void +// +// +// TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// TCHECK2-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] { +// TCHECK2-NEXT: entry: +// TCHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 4 +// TCHECK2-NEXT: [[GA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// TCHECK2-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 4 +// TCHECK2-NEXT: store i32 [[GA]], i32* [[GA_ADDR]], align 4 +// TCHECK2-NEXT: ret void +// +// +// TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// TCHECK2-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR0]] { +// TCHECK2-NEXT: entry: +// TCHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 +// TCHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 +// TCHECK2-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 +// TCHECK2-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 +// TCHECK2-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 +// TCHECK2-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// TCHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// TCHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// TCHECK2-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[__VLA_EXPR2:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// TCHECK2-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 +// TCHECK2-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 +// TCHECK2-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// TCHECK2-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 +// TCHECK2-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 +// TCHECK2-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// TCHECK2-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 +// TCHECK2-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 +// TCHECK2-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 +// TCHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// TCHECK2-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// TCHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 +// TCHECK2-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// TCHECK2-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// TCHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false) +// TCHECK2-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// TCHECK2-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4 +// TCHECK2-NEXT: [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4 +// TCHECK2-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// TCHECK2-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4 +// TCHECK2-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// TCHECK2-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// TCHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false) +// TCHECK2-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// TCHECK2-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// TCHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false) +// TCHECK2-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// TCHECK2-NEXT: [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8 +// TCHECK2-NEXT: store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4 +// TCHECK2-NEXT: store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4 +// TCHECK2-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// TCHECK2-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8 +// TCHECK2-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// TCHECK2-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// TCHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false) +// TCHECK2-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// TCHECK2-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// TCHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false) +// TCHECK2-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// TCHECK2-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// TCHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// TCHECK2-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// TCHECK2-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// TCHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2 +// TCHECK2-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// TCHECK2-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3 +// TCHECK2-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// TCHECK2-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1 +// TCHECK2-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 +// TCHECK2-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// TCHECK2-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]] +// TCHECK2-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]] +// TCHECK2-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3 +// TCHECK2-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// TCHECK2-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// TCHECK2-NEXT: store i64 1, i64* [[X]], align 4 +// TCHECK2-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// TCHECK2-NEXT: store i8 1, i8* [[Y]], align 4 +// TCHECK2-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// TCHECK2-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// TCHECK2-NEXT: ret void +// +// +// TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// TCHECK2-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] { +// TCHECK2-NEXT: entry: +// TCHECK2-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// TCHECK2-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4 +// TCHECK2-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// TCHECK2-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4 +// TCHECK2-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0 +// TCHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[X]], align 4 +// TCHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// TCHECK2-NEXT: [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// TCHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i32 0 +// TCHECK2-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 4 +// TCHECK2-NEXT: [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// TCHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i32 0 +// TCHECK2-NEXT: [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 4 +// TCHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00 +// TCHECK2-NEXT: store double [[INC]], double* [[ARRAYIDX1]], align 4 +// TCHECK2-NEXT: ret void +// +// +// TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// TCHECK2-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK2-NEXT: entry: +// TCHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// TCHECK2-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// TCHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// TCHECK2-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 +// TCHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// TCHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* +// TCHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// TCHECK2-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// TCHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// TCHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 +// TCHECK2-NEXT: [[CONV2:%.*]] = sext i8 [[TMP4]] to i32 +// TCHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 +// TCHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 +// TCHECK2-NEXT: store i8 [[CONV4]], i8* [[CONV]], align 1 +// TCHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// TCHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1 +// TCHECK2-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 +// TCHECK2-NEXT: ret void +// +// +// TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// TCHECK2-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// TCHECK2-NEXT: entry: +// TCHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// TCHECK2-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// TCHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// TCHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// TCHECK2-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// TCHECK2-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// TCHECK2-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// TCHECK2-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// TCHECK2-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// TCHECK2-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4 +// TCHECK2-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// TCHECK2-NEXT: [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2 +// TCHECK2-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// TCHECK2-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// TCHECK2-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// TCHECK2-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// TCHECK2-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// TCHECK2-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// TCHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false) +// TCHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// TCHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double +// TCHECK2-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// TCHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK2-NEXT: store double [[ADD]], double* [[A]], align 4 +// TCHECK2-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK2-NEXT: [[TMP11:%.*]] = load double, double* [[A4]], align 4 +// TCHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// TCHECK2-NEXT: store double [[INC]], double* [[A4]], align 4 +// TCHECK2-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// TCHECK2-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]] +// TCHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]] +// TCHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// TCHECK2-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// TCHECK2-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// TCHECK2-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// TCHECK2-NEXT: ret void +// +// +// TCHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// TCHECK2-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK2-NEXT: entry: +// TCHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// TCHECK2-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// TCHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// TCHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// TCHECK2-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// TCHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// TCHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// TCHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// TCHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// TCHECK2-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// TCHECK2-NEXT: ret void +// +// +// TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l63 +// TCHECK3-SAME: (i32 noundef [[A:%.*]], i32* noundef [[P:%.*]], i32 noundef [[GA:%.*]]) #[[ATTR0:[0-9]+]] { +// TCHECK3-NEXT: entry: +// TCHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[P_ADDR:%.*]] = alloca i32*, align 4 +// TCHECK3-NEXT: [[GA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// TCHECK3-NEXT: store i32* [[P]], i32** [[P_ADDR]], align 4 +// TCHECK3-NEXT: store i32 [[GA]], i32* [[GA_ADDR]], align 4 +// TCHECK3-NEXT: ret void +// +// +// TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l70 +// TCHECK3-SAME: (i32 noundef [[AA:%.*]], [10 x float]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]], i32 noundef [[VLA:%.*]], float* noundef nonnull align 4 dereferenceable(4) [[BN:%.*]], [5 x [10 x double]]* noundef nonnull align 4 dereferenceable(400) [[C:%.*]], i32 noundef [[VLA1:%.*]], i32 noundef [[VLA3:%.*]], double* noundef nonnull align 4 dereferenceable(8) [[CN:%.*]], %struct.TT* noundef nonnull align 4 dereferenceable(12) [[D:%.*]]) #[[ATTR0]] { +// TCHECK3-NEXT: entry: +// TCHECK3-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x float]*, align 4 +// TCHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[BN_ADDR:%.*]] = alloca float*, align 4 +// TCHECK3-NEXT: [[C_ADDR:%.*]] = alloca [5 x [10 x double]]*, align 4 +// TCHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[VLA_ADDR4:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[CN_ADDR:%.*]] = alloca double*, align 4 +// TCHECK3-NEXT: [[D_ADDR:%.*]] = alloca %struct.TT*, align 4 +// TCHECK3-NEXT: [[B5:%.*]] = alloca [10 x float], align 4 +// TCHECK3-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// TCHECK3-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[C7:%.*]] = alloca [5 x [10 x double]], align 8 +// TCHECK3-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[__VLA_EXPR2:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[D9:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// TCHECK3-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 +// TCHECK3-NEXT: store [10 x float]* [[B]], [10 x float]** [[B_ADDR]], align 4 +// TCHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// TCHECK3-NEXT: store float* [[BN]], float** [[BN_ADDR]], align 4 +// TCHECK3-NEXT: store [5 x [10 x double]]* [[C]], [5 x [10 x double]]** [[C_ADDR]], align 4 +// TCHECK3-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// TCHECK3-NEXT: store i32 [[VLA3]], i32* [[VLA_ADDR4]], align 4 +// TCHECK3-NEXT: store double* [[CN]], double** [[CN_ADDR]], align 4 +// TCHECK3-NEXT: store %struct.TT* [[D]], %struct.TT** [[D_ADDR]], align 4 +// TCHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// TCHECK3-NEXT: [[TMP0:%.*]] = load [10 x float]*, [10 x float]** [[B_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP2:%.*]] = load float*, float** [[BN_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP3:%.*]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// TCHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[VLA_ADDR4]], align 4 +// TCHECK3-NEXT: [[TMP6:%.*]] = load double*, double** [[CN_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP7:%.*]] = load %struct.TT*, %struct.TT** [[D_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP8:%.*]] = bitcast [10 x float]* [[B5]] to i8* +// TCHECK3-NEXT: [[TMP9:%.*]] = bitcast [10 x float]* [[TMP0]] to i8* +// TCHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false) +// TCHECK3-NEXT: [[TMP10:%.*]] = call i8* @llvm.stacksave() +// TCHECK3-NEXT: store i8* [[TMP10]], i8** [[SAVED_STACK]], align 4 +// TCHECK3-NEXT: [[VLA6:%.*]] = alloca float, i32 [[TMP1]], align 4 +// TCHECK3-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// TCHECK3-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP1]], 4 +// TCHECK3-NEXT: [[TMP12:%.*]] = bitcast float* [[VLA6]] to i8* +// TCHECK3-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP2]] to i8* +// TCHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP12]], i8* align 4 [[TMP13]], i32 [[TMP11]], i1 false) +// TCHECK3-NEXT: [[TMP14:%.*]] = bitcast [5 x [10 x double]]* [[C7]] to i8* +// TCHECK3-NEXT: [[TMP15:%.*]] = bitcast [5 x [10 x double]]* [[TMP3]] to i8* +// TCHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP14]], i8* align 8 [[TMP15]], i32 400, i1 false) +// TCHECK3-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// TCHECK3-NEXT: [[VLA8:%.*]] = alloca double, i32 [[TMP16]], align 8 +// TCHECK3-NEXT: store i32 [[TMP4]], i32* [[__VLA_EXPR1]], align 4 +// TCHECK3-NEXT: store i32 [[TMP5]], i32* [[__VLA_EXPR2]], align 4 +// TCHECK3-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP4]], [[TMP5]] +// TCHECK3-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8 +// TCHECK3-NEXT: [[TMP19:%.*]] = bitcast double* [[VLA8]] to i8* +// TCHECK3-NEXT: [[TMP20:%.*]] = bitcast double* [[TMP6]] to i8* +// TCHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP19]], i8* align 8 [[TMP20]], i32 [[TMP18]], i1 false) +// TCHECK3-NEXT: [[TMP21:%.*]] = bitcast %struct.TT* [[D9]] to i8* +// TCHECK3-NEXT: [[TMP22:%.*]] = bitcast %struct.TT* [[TMP7]] to i8* +// TCHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP21]], i8* align 4 [[TMP22]], i32 12, i1 false) +// TCHECK3-NEXT: [[TMP23:%.*]] = load i16, i16* [[CONV]], align 2 +// TCHECK3-NEXT: [[CONV10:%.*]] = sext i16 [[TMP23]] to i32 +// TCHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV10]], 1 +// TCHECK3-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD]] to i16 +// TCHECK3-NEXT: store i16 [[CONV11]], i16* [[CONV]], align 2 +// TCHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B5]], i32 0, i32 2 +// TCHECK3-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// TCHECK3-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[VLA6]], i32 3 +// TCHECK3-NEXT: store float 1.000000e+00, float* [[ARRAYIDX12]], align 4 +// TCHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C7]], i32 0, i32 1 +// TCHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 +// TCHECK3-NEXT: store double 1.000000e+00, double* [[ARRAYIDX14]], align 8 +// TCHECK3-NEXT: [[TMP24:%.*]] = mul nsw i32 1, [[TMP5]] +// TCHECK3-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[VLA8]], i32 [[TMP24]] +// TCHECK3-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX15]], i32 3 +// TCHECK3-NEXT: store double 1.000000e+00, double* [[ARRAYIDX16]], align 8 +// TCHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 0 +// TCHECK3-NEXT: store i64 1, i64* [[X]], align 4 +// TCHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D9]], i32 0, i32 1 +// TCHECK3-NEXT: store i8 1, i8* [[Y]], align 4 +// TCHECK3-NEXT: [[TMP25:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// TCHECK3-NEXT: call void @llvm.stackrestore(i8* [[TMP25]]) +// TCHECK3-NEXT: ret void +// +// +// TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooiPd_l111 +// TCHECK3-SAME: (double* noundef [[PTR:%.*]], %struct.TT.0* noundef nonnull align 4 dereferenceable(8) [[E:%.*]]) #[[ATTR0]] { +// TCHECK3-NEXT: entry: +// TCHECK3-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// TCHECK3-NEXT: [[E_ADDR:%.*]] = alloca %struct.TT.0*, align 4 +// TCHECK3-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// TCHECK3-NEXT: store %struct.TT.0* [[E]], %struct.TT.0** [[E_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP0:%.*]] = load %struct.TT.0*, %struct.TT.0** [[E_ADDR]], align 4 +// TCHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0:%.*]], %struct.TT.0* [[TMP0]], i32 0, i32 0 +// TCHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[X]], align 4 +// TCHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP1]] to double +// TCHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// TCHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP2]], i32 0 +// TCHECK3-NEXT: store double [[CONV]], double* [[ARRAYIDX]], align 4 +// TCHECK3-NEXT: [[TMP3:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// TCHECK3-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[TMP3]], i32 0 +// TCHECK3-NEXT: [[TMP4:%.*]] = load double, double* [[ARRAYIDX1]], align 4 +// TCHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP4]], 1.000000e+00 +// TCHECK3-NEXT: store double [[INC]], double* [[ARRAYIDX1]], align 4 +// TCHECK3-NEXT: ret void +// +// +// TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l142 +// TCHECK3-SAME: (i32 noundef [[A:%.*]], i32 noundef [[AAA:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK3-NEXT: entry: +// TCHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[AAA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// TCHECK3-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// TCHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// TCHECK3-NEXT: store i32 [[AAA]], i32* [[AAA_ADDR]], align 4 +// TCHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// TCHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AAA_ADDR]] to i8* +// TCHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// TCHECK3-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// TCHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// TCHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 1 +// TCHECK3-NEXT: [[CONV2:%.*]] = sext i8 [[TMP4]] to i32 +// TCHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 +// TCHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 +// TCHECK3-NEXT: store i8 [[CONV4]], i8* [[CONV]], align 1 +// TCHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// TCHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP5]], 1 +// TCHECK3-NEXT: store i32 [[ADD5]], i32* [[ARRAYIDX]], align 4 +// TCHECK3-NEXT: ret void +// +// +// TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l167 +// TCHECK3-SAME: (%struct.S1* noundef [[THIS:%.*]], i32 noundef [[B:%.*]], i32 noundef [[VLA:%.*]], i32 noundef [[VLA1:%.*]], i16* noundef nonnull align 2 dereferenceable(2) [[C:%.*]]) #[[ATTR0]] { +// TCHECK3-NEXT: entry: +// TCHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// TCHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[VLA_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[VLA_ADDR2:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[C_ADDR:%.*]] = alloca i16*, align 4 +// TCHECK3-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// TCHECK3-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// TCHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// TCHECK3-NEXT: store i32 [[VLA]], i32* [[VLA_ADDR]], align 4 +// TCHECK3-NEXT: store i32 [[VLA1]], i32* [[VLA_ADDR2]], align 4 +// TCHECK3-NEXT: store i16* [[C]], i16** [[C_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP0:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[VLA_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[VLA_ADDR2]], align 4 +// TCHECK3-NEXT: [[TMP3:%.*]] = load i16*, i16** [[C_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP4:%.*]] = call i8* @llvm.stacksave() +// TCHECK3-NEXT: store i8* [[TMP4]], i8** [[SAVED_STACK]], align 4 +// TCHECK3-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// TCHECK3-NEXT: [[VLA3:%.*]] = alloca i16, i32 [[TMP5]], align 2 +// TCHECK3-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// TCHECK3-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// TCHECK3-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP1]], [[TMP2]] +// TCHECK3-NEXT: [[TMP7:%.*]] = mul nuw i32 [[TMP6]], 2 +// TCHECK3-NEXT: [[TMP8:%.*]] = bitcast i16* [[VLA3]] to i8* +// TCHECK3-NEXT: [[TMP9:%.*]] = bitcast i16* [[TMP3]] to i8* +// TCHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[TMP8]], i8* align 2 [[TMP9]], i32 [[TMP7]], i1 false) +// TCHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// TCHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP10]] to double +// TCHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 +// TCHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK3-NEXT: store double [[ADD]], double* [[A]], align 4 +// TCHECK3-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 +// TCHECK3-NEXT: [[TMP11:%.*]] = load double, double* [[A4]], align 4 +// TCHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// TCHECK3-NEXT: store double [[INC]], double* [[A4]], align 4 +// TCHECK3-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 +// TCHECK3-NEXT: [[TMP12:%.*]] = mul nsw i32 1, [[TMP2]] +// TCHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA3]], i32 [[TMP12]] +// TCHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// TCHECK3-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// TCHECK3-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// TCHECK3-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// TCHECK3-NEXT: ret void +// +// +// TCHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l128 +// TCHECK3-SAME: (i32 noundef [[A:%.*]], [10 x i32]* noundef nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// TCHECK3-NEXT: entry: +// TCHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// TCHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// TCHECK3-NEXT: [[B1:%.*]] = alloca [10 x i32], align 4 +// TCHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// TCHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP1:%.*]] = bitcast [10 x i32]* [[B1]] to i8* +// TCHECK3-NEXT: [[TMP2:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// TCHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP1]], i8* align 4 [[TMP2]], i32 40, i1 false) +// TCHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// TCHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 1 +// TCHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// TCHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B1]], i32 0, i32 2 +// TCHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// TCHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], 1 +// TCHECK3-NEXT: store i32 [[ADD2]], i32* [[ARRAYIDX]], align 4 +// TCHECK3-NEXT: ret void +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_Z3fooiPd +// SIMD-ONLY1-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// SIMD-ONLY1-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[AA:%.*]] = alloca i16, align 2 +// SIMD-ONLY1-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// SIMD-ONLY1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// SIMD-ONLY1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// SIMD-ONLY1-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// SIMD-ONLY1-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// SIMD-ONLY1-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// SIMD-ONLY1-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// SIMD-ONLY1-NEXT: [[P:%.*]] = alloca i32*, align 64 +// SIMD-ONLY1-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: store i16 0, i16* [[AA]], align 2 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY1-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY1-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4 +// SIMD-ONLY1-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// SIMD-ONLY1-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// SIMD-ONLY1-NEXT: [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]] +// SIMD-ONLY1-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8 +// SIMD-ONLY1-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// SIMD-ONLY1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: store i32 [[TMP6]], i32* [[X]], align 4 +// SIMD-ONLY1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// SIMD-ONLY1-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: store i32 [[TMP7]], i32* [[Y]], align 4 +// SIMD-ONLY1-NEXT: store i32* [[A]], i32** [[P]], align 64 +// SIMD-ONLY1-NEXT: [[TMP8:%.*]] = load i16, i16* [[AA]], align 2 +// SIMD-ONLY1-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// SIMD-ONLY1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 +// SIMD-ONLY1-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// SIMD-ONLY1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B]], i64 0, i64 2 +// SIMD-ONLY1-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// SIMD-ONLY1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[VLA]], i64 3 +// SIMD-ONLY1-NEXT: store float 1.000000e+00, float* [[ARRAYIDX3]], align 4 +// SIMD-ONLY1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C]], i64 0, i64 1 +// SIMD-ONLY1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX4]], i64 0, i64 2 +// SIMD-ONLY1-NEXT: store double 1.000000e+00, double* [[ARRAYIDX5]], align 8 +// SIMD-ONLY1-NEXT: [[TMP9:%.*]] = mul nsw i64 1, [[TMP4]] +// SIMD-ONLY1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[VLA1]], i64 [[TMP9]] +// SIMD-ONLY1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX6]], i64 3 +// SIMD-ONLY1-NEXT: store double 1.000000e+00, double* [[ARRAYIDX7]], align 8 +// SIMD-ONLY1-NEXT: [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: store i64 1, i64* [[X8]], align 8 +// SIMD-ONLY1-NEXT: [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 1 +// SIMD-ONLY1-NEXT: store i8 1, i8* [[Y9]], align 8 +// SIMD-ONLY1-NEXT: [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP10:%.*]] = load i32, i32* [[X10]], align 4 +// SIMD-ONLY1-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP10]] to double +// SIMD-ONLY1-NEXT: [[TMP11:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP11]], i64 0 +// SIMD-ONLY1-NEXT: store double [[CONV11]], double* [[ARRAYIDX12]], align 8 +// SIMD-ONLY1-NEXT: [[TMP12:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP12]], i64 0 +// SIMD-ONLY1-NEXT: [[TMP13:%.*]] = load double, double* [[ARRAYIDX13]], align 8 +// SIMD-ONLY1-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 +// SIMD-ONLY1-NEXT: store double [[INC]], double* [[ARRAYIDX13]], align 8 +// SIMD-ONLY1-NEXT: [[TMP14:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[TMP15:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY1-NEXT: call void @llvm.stackrestore(i8* [[TMP15]]) +// SIMD-ONLY1-NEXT: ret i32 [[TMP14]] +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_Z3bariPd +// SIMD-ONLY1-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// SIMD-ONLY1-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 +// SIMD-ONLY1-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]]) +// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// SIMD-ONLY1-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]]) +// SIMD-ONLY1-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// SIMD-ONLY1-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]]) +// SIMD-ONLY1-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// SIMD-ONLY1-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]]) +// SIMD-ONLY1-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// SIMD-ONLY1-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: ret i32 [[TMP9]] +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// SIMD-ONLY1-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// SIMD-ONLY1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[B:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// SIMD-ONLY1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// SIMD-ONLY1-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY1-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +// SIMD-ONLY1-NEXT: [[TMP3:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY1-NEXT: store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY1-NEXT: [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]] +// SIMD-ONLY1-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2 +// SIMD-ONLY1-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8 +// SIMD-ONLY1-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double +// SIMD-ONLY1-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// SIMD-ONLY1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: store double [[ADD2]], double* [[A]], align 8 +// SIMD-ONLY1-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY1-NEXT: [[TMP6:%.*]] = load double, double* [[A3]], align 8 +// SIMD-ONLY1-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// SIMD-ONLY1-NEXT: store double [[INC]], double* [[A3]], align 8 +// SIMD-ONLY1-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// SIMD-ONLY1-NEXT: [[TMP7:%.*]] = mul nsw i64 1, [[TMP2]] +// SIMD-ONLY1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP7]] +// SIMD-ONLY1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// SIMD-ONLY1-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2 +// SIMD-ONLY1-NEXT: [[TMP8:%.*]] = mul nsw i64 1, [[TMP2]] +// SIMD-ONLY1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP8]] +// SIMD-ONLY1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX6]], i64 1 +// SIMD-ONLY1-NEXT: [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2 +// SIMD-ONLY1-NEXT: [[CONV8:%.*]] = sext i16 [[TMP9]] to i32 +// SIMD-ONLY1-NEXT: [[TMP10:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY1-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] +// SIMD-ONLY1-NEXT: [[TMP11:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY1-NEXT: call void @llvm.stackrestore(i8* [[TMP11]]) +// SIMD-ONLY1-NEXT: ret i32 [[ADD9]] +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_ZL7fstatici +// SIMD-ONLY1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// SIMD-ONLY1-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY1-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: store i8 0, i8* [[AAA]], align 1 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY1-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load i8, i8* [[AAA]], align 1 +// SIMD-ONLY1-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// SIMD-ONLY1-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY1-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8 +// SIMD-ONLY1-NEXT: store i8 [[CONV2]], i8* [[AAA]], align 1 +// SIMD-ONLY1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i64 0, i64 2 +// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1 +// SIMD-ONLY1-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY1-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: ret i32 [[TMP3]] +// +// +// SIMD-ONLY1-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// SIMD-ONLY1-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat { +// SIMD-ONLY1-NEXT: entry: +// SIMD-ONLY1-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY1-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY1-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY1-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY1-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i64 0, i64 2 +// SIMD-ONLY1-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY1-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1 +// SIMD-ONLY1-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY1-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY1-NEXT: ret i32 [[TMP2]] +// +// +// SIMD-ONLY11-LABEL: define {{[^@]+}}@_Z3fooiPd +// SIMD-ONLY11-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY11-NEXT: entry: +// SIMD-ONLY11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY11-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// SIMD-ONLY11-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY11-NEXT: [[AA:%.*]] = alloca i16, align 2 +// SIMD-ONLY11-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// SIMD-ONLY11-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// SIMD-ONLY11-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// SIMD-ONLY11-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// SIMD-ONLY11-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 +// SIMD-ONLY11-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 +// SIMD-ONLY11-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// SIMD-ONLY11-NEXT: [[P:%.*]] = alloca i32*, align 64 +// SIMD-ONLY11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// SIMD-ONLY11-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: store i16 0, i16* [[AA]], align 2 +// SIMD-ONLY11-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +// SIMD-ONLY11-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY11-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY11-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 4 +// SIMD-ONLY11-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8 +// SIMD-ONLY11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +// SIMD-ONLY11-NEXT: [[TMP5:%.*]] = mul nuw i64 5, [[TMP4]] +// SIMD-ONLY11-NEXT: [[VLA1:%.*]] = alloca double, i64 [[TMP5]], align 8 +// SIMD-ONLY11-NEXT: store i64 [[TMP4]], i64* [[__VLA_EXPR1]], align 8 +// SIMD-ONLY11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY11-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: store i32 [[TMP6]], i32* [[X]], align 4 +// SIMD-ONLY11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// SIMD-ONLY11-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: store i32 [[TMP7]], i32* [[Y]], align 4 +// SIMD-ONLY11-NEXT: store i32* [[A]], i32** [[P]], align 64 +// SIMD-ONLY11-NEXT: [[TMP8:%.*]] = load i16, i16* [[AA]], align 2 +// SIMD-ONLY11-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 +// SIMD-ONLY11-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 +// SIMD-ONLY11-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// SIMD-ONLY11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B]], i64 0, i64 2 +// SIMD-ONLY11-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// SIMD-ONLY11-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[VLA]], i64 3 +// SIMD-ONLY11-NEXT: store float 1.000000e+00, float* [[ARRAYIDX3]], align 4 +// SIMD-ONLY11-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C]], i64 0, i64 1 +// SIMD-ONLY11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX4]], i64 0, i64 2 +// SIMD-ONLY11-NEXT: store double 1.000000e+00, double* [[ARRAYIDX5]], align 8 +// SIMD-ONLY11-NEXT: [[TMP9:%.*]] = mul nsw i64 1, [[TMP4]] +// SIMD-ONLY11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[VLA1]], i64 [[TMP9]] +// SIMD-ONLY11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX6]], i64 3 +// SIMD-ONLY11-NEXT: store double 1.000000e+00, double* [[ARRAYIDX7]], align 8 +// SIMD-ONLY11-NEXT: [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 0 +// SIMD-ONLY11-NEXT: store i64 1, i64* [[X8]], align 8 +// SIMD-ONLY11-NEXT: [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 1 +// SIMD-ONLY11-NEXT: store i8 1, i8* [[Y9]], align 8 +// SIMD-ONLY11-NEXT: [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY11-NEXT: [[TMP10:%.*]] = load i32, i32* [[X10]], align 4 +// SIMD-ONLY11-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP10]] to double +// SIMD-ONLY11-NEXT: [[TMP11:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// SIMD-ONLY11-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP11]], i64 0 +// SIMD-ONLY11-NEXT: store double [[CONV11]], double* [[ARRAYIDX12]], align 8 +// SIMD-ONLY11-NEXT: [[TMP12:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// SIMD-ONLY11-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP12]], i64 0 +// SIMD-ONLY11-NEXT: [[TMP13:%.*]] = load double, double* [[ARRAYIDX13]], align 8 +// SIMD-ONLY11-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 +// SIMD-ONLY11-NEXT: store double [[INC]], double* [[ARRAYIDX13]], align 8 +// SIMD-ONLY11-NEXT: [[TMP14:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[TMP15:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY11-NEXT: call void @llvm.stackrestore(i8* [[TMP15]]) +// SIMD-ONLY11-NEXT: ret i32 [[TMP14]] +// +// +// SIMD-ONLY11-LABEL: define {{[^@]+}}@_Z3bariPd +// SIMD-ONLY11-SAME: (i32 noundef signext [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// SIMD-ONLY11-NEXT: entry: +// SIMD-ONLY11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY11-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 8 +// SIMD-ONLY11-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY11-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 +// SIMD-ONLY11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 8 +// SIMD-ONLY11-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 8 +// SIMD-ONLY11-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z3fooiPd(i32 noundef signext [[TMP0]], double* noundef [[TMP1]]) +// SIMD-ONLY11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// SIMD-ONLY11-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: [[CALL1:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP3]]) +// SIMD-ONLY11-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// SIMD-ONLY11-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: [[CALL3:%.*]] = call noundef signext i32 @_ZL7fstatici(i32 noundef signext [[TMP5]]) +// SIMD-ONLY11-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// SIMD-ONLY11-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: [[CALL5:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP7]]) +// SIMD-ONLY11-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// SIMD-ONLY11-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: ret i32 [[TMP9]] +// +// +// SIMD-ONLY11-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// SIMD-ONLY11-SAME: (%struct.S1* noundef nonnull align 8 dereferenceable(8) [[THIS:%.*]], i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// SIMD-ONLY11-NEXT: entry: +// SIMD-ONLY11-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 8 +// SIMD-ONLY11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY11-NEXT: [[B:%.*]] = alloca i32, align 4 +// SIMD-ONLY11-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 8 +// SIMD-ONLY11-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 +// SIMD-ONLY11-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 8 +// SIMD-ONLY11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 8 +// SIMD-ONLY11-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY11-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// SIMD-ONLY11-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64 +// SIMD-ONLY11-NEXT: [[TMP3:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY11-NEXT: store i8* [[TMP3]], i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY11-NEXT: [[TMP4:%.*]] = mul nuw i64 2, [[TMP2]] +// SIMD-ONLY11-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP4]], align 2 +// SIMD-ONLY11-NEXT: store i64 [[TMP2]], i64* [[__VLA_EXPR0]], align 8 +// SIMD-ONLY11-NEXT: [[TMP5:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY11-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double +// SIMD-ONLY11-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// SIMD-ONLY11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY11-NEXT: store double [[ADD2]], double* [[A]], align 8 +// SIMD-ONLY11-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY11-NEXT: [[TMP6:%.*]] = load double, double* [[A3]], align 8 +// SIMD-ONLY11-NEXT: [[INC:%.*]] = fadd double [[TMP6]], 1.000000e+00 +// SIMD-ONLY11-NEXT: store double [[INC]], double* [[A3]], align 8 +// SIMD-ONLY11-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// SIMD-ONLY11-NEXT: [[TMP7:%.*]] = mul nsw i64 1, [[TMP2]] +// SIMD-ONLY11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP7]] +// SIMD-ONLY11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 +// SIMD-ONLY11-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2 +// SIMD-ONLY11-NEXT: [[TMP8:%.*]] = mul nsw i64 1, [[TMP2]] +// SIMD-ONLY11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i64 [[TMP8]] +// SIMD-ONLY11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX6]], i64 1 +// SIMD-ONLY11-NEXT: [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2 +// SIMD-ONLY11-NEXT: [[CONV8:%.*]] = sext i16 [[TMP9]] to i32 +// SIMD-ONLY11-NEXT: [[TMP10:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY11-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP10]] +// SIMD-ONLY11-NEXT: [[TMP11:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8 +// SIMD-ONLY11-NEXT: call void @llvm.stackrestore(i8* [[TMP11]]) +// SIMD-ONLY11-NEXT: ret i32 [[ADD9]] +// +// +// SIMD-ONLY11-LABEL: define {{[^@]+}}@_ZL7fstatici +// SIMD-ONLY11-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] { +// SIMD-ONLY11-NEXT: entry: +// SIMD-ONLY11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY11-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY11-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// SIMD-ONLY11-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: store i8 0, i8* [[AAA]], align 1 +// SIMD-ONLY11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY11-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[TMP1:%.*]] = load i8, i8* [[AAA]], align 1 +// SIMD-ONLY11-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// SIMD-ONLY11-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY11-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8 +// SIMD-ONLY11-NEXT: store i8 [[CONV2]], i8* [[AAA]], align 1 +// SIMD-ONLY11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i64 0, i64 2 +// SIMD-ONLY11-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1 +// SIMD-ONLY11-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY11-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: ret i32 [[TMP3]] +// +// +// SIMD-ONLY11-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// SIMD-ONLY11-SAME: (i32 noundef signext [[N:%.*]]) #[[ATTR0]] comdat { +// SIMD-ONLY11-NEXT: entry: +// SIMD-ONLY11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY11-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY11-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY11-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY11-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i64 0, i64 2 +// SIMD-ONLY11-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY11-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1 +// SIMD-ONLY11-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY11-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY11-NEXT: ret i32 [[TMP2]] +// +// +// SIMD-ONLY12-LABEL: define {{[^@]+}}@_Z3fooiPd +// SIMD-ONLY12-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY12-NEXT: entry: +// SIMD-ONLY12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY12-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// SIMD-ONLY12-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY12-NEXT: [[AA:%.*]] = alloca i16, align 2 +// SIMD-ONLY12-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// SIMD-ONLY12-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// SIMD-ONLY12-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// SIMD-ONLY12-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// SIMD-ONLY12-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// SIMD-ONLY12-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// SIMD-ONLY12-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// SIMD-ONLY12-NEXT: [[P:%.*]] = alloca i32*, align 64 +// SIMD-ONLY12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: store i16 0, i16* [[AA]], align 2 +// SIMD-ONLY12-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[TMP1:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY12-NEXT: store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY12-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4 +// SIMD-ONLY12-NEXT: store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4 +// SIMD-ONLY12-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]] +// SIMD-ONLY12-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8 +// SIMD-ONLY12-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// SIMD-ONLY12-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY12-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store i32 [[TMP4]], i32* [[X]], align 4 +// SIMD-ONLY12-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// SIMD-ONLY12-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store i32 [[TMP5]], i32* [[Y]], align 4 +// SIMD-ONLY12-NEXT: store i32* [[A]], i32** [[P]], align 64 +// SIMD-ONLY12-NEXT: [[TMP6:%.*]] = load i16, i16* [[AA]], align 2 +// SIMD-ONLY12-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 +// SIMD-ONLY12-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY12-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 +// SIMD-ONLY12-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// SIMD-ONLY12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B]], i32 0, i32 2 +// SIMD-ONLY12-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// SIMD-ONLY12-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[VLA]], i32 3 +// SIMD-ONLY12-NEXT: store float 1.000000e+00, float* [[ARRAYIDX3]], align 4 +// SIMD-ONLY12-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C]], i32 0, i32 1 +// SIMD-ONLY12-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX4]], i32 0, i32 2 +// SIMD-ONLY12-NEXT: store double 1.000000e+00, double* [[ARRAYIDX5]], align 8 +// SIMD-ONLY12-NEXT: [[TMP7:%.*]] = mul nsw i32 1, [[TMP2]] +// SIMD-ONLY12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[VLA1]], i32 [[TMP7]] +// SIMD-ONLY12-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX6]], i32 3 +// SIMD-ONLY12-NEXT: store double 1.000000e+00, double* [[ARRAYIDX7]], align 8 +// SIMD-ONLY12-NEXT: [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 0 +// SIMD-ONLY12-NEXT: store i64 1, i64* [[X8]], align 4 +// SIMD-ONLY12-NEXT: [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 1 +// SIMD-ONLY12-NEXT: store i8 1, i8* [[Y9]], align 4 +// SIMD-ONLY12-NEXT: [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY12-NEXT: [[TMP8:%.*]] = load i32, i32* [[X10]], align 4 +// SIMD-ONLY12-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP8]] to double +// SIMD-ONLY12-NEXT: [[TMP9:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP9]], i32 0 +// SIMD-ONLY12-NEXT: store double [[CONV11]], double* [[ARRAYIDX12]], align 4 +// SIMD-ONLY12-NEXT: [[TMP10:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP10]], i32 0 +// SIMD-ONLY12-NEXT: [[TMP11:%.*]] = load double, double* [[ARRAYIDX13]], align 4 +// SIMD-ONLY12-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// SIMD-ONLY12-NEXT: store double [[INC]], double* [[ARRAYIDX13]], align 4 +// SIMD-ONLY12-NEXT: [[TMP12:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY12-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// SIMD-ONLY12-NEXT: ret i32 [[TMP12]] +// +// +// SIMD-ONLY12-LABEL: define {{[^@]+}}@_Z3bariPd +// SIMD-ONLY12-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// SIMD-ONLY12-NEXT: entry: +// SIMD-ONLY12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY12-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// SIMD-ONLY12-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY12-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 +// SIMD-ONLY12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]]) +// SIMD-ONLY12-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// SIMD-ONLY12-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]]) +// SIMD-ONLY12-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// SIMD-ONLY12-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]]) +// SIMD-ONLY12-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// SIMD-ONLY12-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]]) +// SIMD-ONLY12-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// SIMD-ONLY12-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: ret i32 [[TMP9]] +// +// +// SIMD-ONLY12-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// SIMD-ONLY12-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// SIMD-ONLY12-NEXT: entry: +// SIMD-ONLY12-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// SIMD-ONLY12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY12-NEXT: [[B:%.*]] = alloca i32, align 4 +// SIMD-ONLY12-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// SIMD-ONLY12-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// SIMD-ONLY12-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY12-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// SIMD-ONLY12-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY12-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY12-NEXT: [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]] +// SIMD-ONLY12-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2 +// SIMD-ONLY12-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// SIMD-ONLY12-NEXT: [[TMP4:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY12-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// SIMD-ONLY12-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// SIMD-ONLY12-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY12-NEXT: store double [[ADD2]], double* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY12-NEXT: [[TMP5:%.*]] = load double, double* [[A3]], align 4 +// SIMD-ONLY12-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 +// SIMD-ONLY12-NEXT: store double [[INC]], double* [[A3]], align 4 +// SIMD-ONLY12-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// SIMD-ONLY12-NEXT: [[TMP6:%.*]] = mul nsw i32 1, [[TMP1]] +// SIMD-ONLY12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP6]] +// SIMD-ONLY12-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// SIMD-ONLY12-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2 +// SIMD-ONLY12-NEXT: [[TMP7:%.*]] = mul nsw i32 1, [[TMP1]] +// SIMD-ONLY12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP7]] +// SIMD-ONLY12-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX6]], i32 1 +// SIMD-ONLY12-NEXT: [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2 +// SIMD-ONLY12-NEXT: [[CONV8:%.*]] = sext i16 [[TMP8]] to i32 +// SIMD-ONLY12-NEXT: [[TMP9:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY12-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP9]] +// SIMD-ONLY12-NEXT: [[TMP10:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY12-NEXT: call void @llvm.stackrestore(i8* [[TMP10]]) +// SIMD-ONLY12-NEXT: ret i32 [[ADD9]] +// +// +// SIMD-ONLY12-LABEL: define {{[^@]+}}@_ZL7fstatici +// SIMD-ONLY12-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] { +// SIMD-ONLY12-NEXT: entry: +// SIMD-ONLY12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY12-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY12-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// SIMD-ONLY12-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: store i8 0, i8* [[AAA]], align 1 +// SIMD-ONLY12-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY12-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[TMP1:%.*]] = load i8, i8* [[AAA]], align 1 +// SIMD-ONLY12-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// SIMD-ONLY12-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY12-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8 +// SIMD-ONLY12-NEXT: store i8 [[CONV2]], i8* [[AAA]], align 1 +// SIMD-ONLY12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i32 0, i32 2 +// SIMD-ONLY12-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1 +// SIMD-ONLY12-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY12-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: ret i32 [[TMP3]] +// +// +// SIMD-ONLY12-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// SIMD-ONLY12-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat { +// SIMD-ONLY12-NEXT: entry: +// SIMD-ONLY12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY12-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY12-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY12-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i32 0, i32 2 +// SIMD-ONLY12-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY12-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1 +// SIMD-ONLY12-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY12-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY12-NEXT: ret i32 [[TMP2]] +// +// +// SIMD-ONLY13-LABEL: define {{[^@]+}}@_Z3fooiPd +// SIMD-ONLY13-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY13-NEXT: entry: +// SIMD-ONLY13-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY13-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// SIMD-ONLY13-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY13-NEXT: [[AA:%.*]] = alloca i16, align 2 +// SIMD-ONLY13-NEXT: [[B:%.*]] = alloca [10 x float], align 4 +// SIMD-ONLY13-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// SIMD-ONLY13-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// SIMD-ONLY13-NEXT: [[C:%.*]] = alloca [5 x [10 x double]], align 8 +// SIMD-ONLY13-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 +// SIMD-ONLY13-NEXT: [[D:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 +// SIMD-ONLY13-NEXT: [[E:%.*]] = alloca [[STRUCT_TT_0:%.*]], align 4 +// SIMD-ONLY13-NEXT: [[P:%.*]] = alloca i32*, align 64 +// SIMD-ONLY13-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: store i16 0, i16* [[AA]], align 2 +// SIMD-ONLY13-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[TMP1:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY13-NEXT: store i8* [[TMP1]], i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY13-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP0]], align 4 +// SIMD-ONLY13-NEXT: store i32 [[TMP0]], i32* [[__VLA_EXPR0]], align 4 +// SIMD-ONLY13-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[TMP3:%.*]] = mul nuw i32 5, [[TMP2]] +// SIMD-ONLY13-NEXT: [[VLA1:%.*]] = alloca double, i32 [[TMP3]], align 8 +// SIMD-ONLY13-NEXT: store i32 [[TMP2]], i32* [[__VLA_EXPR1]], align 4 +// SIMD-ONLY13-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY13-NEXT: [[TMP4:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store i32 [[TMP4]], i32* [[X]], align 4 +// SIMD-ONLY13-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 1 +// SIMD-ONLY13-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store i32 [[TMP5]], i32* [[Y]], align 4 +// SIMD-ONLY13-NEXT: store i32* [[A]], i32** [[P]], align 64 +// SIMD-ONLY13-NEXT: [[TMP6:%.*]] = load i16, i16* [[AA]], align 2 +// SIMD-ONLY13-NEXT: [[CONV:%.*]] = sext i16 [[TMP6]] to i32 +// SIMD-ONLY13-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY13-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 +// SIMD-ONLY13-NEXT: store i16 [[CONV2]], i16* [[AA]], align 2 +// SIMD-ONLY13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[B]], i32 0, i32 2 +// SIMD-ONLY13-NEXT: store float 1.000000e+00, float* [[ARRAYIDX]], align 4 +// SIMD-ONLY13-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[VLA]], i32 3 +// SIMD-ONLY13-NEXT: store float 1.000000e+00, float* [[ARRAYIDX3]], align 4 +// SIMD-ONLY13-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[C]], i32 0, i32 1 +// SIMD-ONLY13-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX4]], i32 0, i32 2 +// SIMD-ONLY13-NEXT: store double 1.000000e+00, double* [[ARRAYIDX5]], align 8 +// SIMD-ONLY13-NEXT: [[TMP7:%.*]] = mul nsw i32 1, [[TMP2]] +// SIMD-ONLY13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[VLA1]], i32 [[TMP7]] +// SIMD-ONLY13-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX6]], i32 3 +// SIMD-ONLY13-NEXT: store double 1.000000e+00, double* [[ARRAYIDX7]], align 8 +// SIMD-ONLY13-NEXT: [[X8:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 0 +// SIMD-ONLY13-NEXT: store i64 1, i64* [[X8]], align 4 +// SIMD-ONLY13-NEXT: [[Y9:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[D]], i32 0, i32 1 +// SIMD-ONLY13-NEXT: store i8 1, i8* [[Y9]], align 4 +// SIMD-ONLY13-NEXT: [[X10:%.*]] = getelementptr inbounds [[STRUCT_TT_0]], %struct.TT.0* [[E]], i32 0, i32 0 +// SIMD-ONLY13-NEXT: [[TMP8:%.*]] = load i32, i32* [[X10]], align 4 +// SIMD-ONLY13-NEXT: [[CONV11:%.*]] = sitofp i32 [[TMP8]] to double +// SIMD-ONLY13-NEXT: [[TMP9:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP9]], i32 0 +// SIMD-ONLY13-NEXT: store double [[CONV11]], double* [[ARRAYIDX12]], align 4 +// SIMD-ONLY13-NEXT: [[TMP10:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds double, double* [[TMP10]], i32 0 +// SIMD-ONLY13-NEXT: [[TMP11:%.*]] = load double, double* [[ARRAYIDX13]], align 4 +// SIMD-ONLY13-NEXT: [[INC:%.*]] = fadd double [[TMP11]], 1.000000e+00 +// SIMD-ONLY13-NEXT: store double [[INC]], double* [[ARRAYIDX13]], align 4 +// SIMD-ONLY13-NEXT: [[TMP12:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[TMP13:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY13-NEXT: call void @llvm.stackrestore(i8* [[TMP13]]) +// SIMD-ONLY13-NEXT: ret i32 [[TMP12]] +// +// +// SIMD-ONLY13-LABEL: define {{[^@]+}}@_Z3bariPd +// SIMD-ONLY13-SAME: (i32 noundef [[N:%.*]], double* noundef [[PTR:%.*]]) #[[ATTR0]] { +// SIMD-ONLY13-NEXT: entry: +// SIMD-ONLY13-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY13-NEXT: [[PTR_ADDR:%.*]] = alloca double*, align 4 +// SIMD-ONLY13-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY13-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 +// SIMD-ONLY13-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store double* [[PTR]], double** [[PTR_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[TMP1:%.*]] = load double*, double** [[PTR_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[CALL:%.*]] = call noundef i32 @_Z3fooiPd(i32 noundef [[TMP0]], double* noundef [[TMP1]]) +// SIMD-ONLY13-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[CALL]] +// SIMD-ONLY13-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[CALL1:%.*]] = call noundef i32 @_ZN2S12r1Ei(%struct.S1* noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP3]]) +// SIMD-ONLY13-NEXT: [[TMP4:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP4]], [[CALL1]] +// SIMD-ONLY13-NEXT: store i32 [[ADD2]], i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[TMP5:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[CALL3:%.*]] = call noundef i32 @_ZL7fstatici(i32 noundef [[TMP5]]) +// SIMD-ONLY13-NEXT: [[TMP6:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP6]], [[CALL3]] +// SIMD-ONLY13-NEXT: store i32 [[ADD4]], i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[CALL5:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP7]]) +// SIMD-ONLY13-NEXT: [[TMP8:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP8]], [[CALL5]] +// SIMD-ONLY13-NEXT: store i32 [[ADD6]], i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[TMP9:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: ret i32 [[TMP9]] +// +// +// SIMD-ONLY13-LABEL: define {{[^@]+}}@_ZN2S12r1Ei +// SIMD-ONLY13-SAME: (%struct.S1* noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[N:%.*]]) #[[ATTR0]] comdat align 2 { +// SIMD-ONLY13-NEXT: entry: +// SIMD-ONLY13-NEXT: [[THIS_ADDR:%.*]] = alloca %struct.S1*, align 4 +// SIMD-ONLY13-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY13-NEXT: [[B:%.*]] = alloca i32, align 4 +// SIMD-ONLY13-NEXT: [[SAVED_STACK:%.*]] = alloca i8*, align 4 +// SIMD-ONLY13-NEXT: [[__VLA_EXPR0:%.*]] = alloca i32, align 4 +// SIMD-ONLY13-NEXT: store %struct.S1* [[THIS]], %struct.S1** [[THIS_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[THIS1:%.*]] = load %struct.S1*, %struct.S1** [[THIS_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY13-NEXT: store i32 [[ADD]], i32* [[B]], align 4 +// SIMD-ONLY13-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave() +// SIMD-ONLY13-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY13-NEXT: [[TMP3:%.*]] = mul nuw i32 2, [[TMP1]] +// SIMD-ONLY13-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP3]], align 2 +// SIMD-ONLY13-NEXT: store i32 [[TMP1]], i32* [[__VLA_EXPR0]], align 4 +// SIMD-ONLY13-NEXT: [[TMP4:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY13-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double +// SIMD-ONLY13-NEXT: [[ADD2:%.*]] = fadd double [[CONV]], 1.500000e+00 +// SIMD-ONLY13-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY13-NEXT: store double [[ADD2]], double* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[THIS1]], i32 0, i32 0 +// SIMD-ONLY13-NEXT: [[TMP5:%.*]] = load double, double* [[A3]], align 4 +// SIMD-ONLY13-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 +// SIMD-ONLY13-NEXT: store double [[INC]], double* [[A3]], align 4 +// SIMD-ONLY13-NEXT: [[CONV4:%.*]] = fptosi double [[INC]] to i16 +// SIMD-ONLY13-NEXT: [[TMP6:%.*]] = mul nsw i32 1, [[TMP1]] +// SIMD-ONLY13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP6]] +// SIMD-ONLY13-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 +// SIMD-ONLY13-NEXT: store i16 [[CONV4]], i16* [[ARRAYIDX5]], align 2 +// SIMD-ONLY13-NEXT: [[TMP7:%.*]] = mul nsw i32 1, [[TMP1]] +// SIMD-ONLY13-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[VLA]], i32 [[TMP7]] +// SIMD-ONLY13-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX6]], i32 1 +// SIMD-ONLY13-NEXT: [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2 +// SIMD-ONLY13-NEXT: [[CONV8:%.*]] = sext i16 [[TMP8]] to i32 +// SIMD-ONLY13-NEXT: [[TMP9:%.*]] = load i32, i32* [[B]], align 4 +// SIMD-ONLY13-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], [[TMP9]] +// SIMD-ONLY13-NEXT: [[TMP10:%.*]] = load i8*, i8** [[SAVED_STACK]], align 4 +// SIMD-ONLY13-NEXT: call void @llvm.stackrestore(i8* [[TMP10]]) +// SIMD-ONLY13-NEXT: ret i32 [[ADD9]] +// +// +// SIMD-ONLY13-LABEL: define {{[^@]+}}@_ZL7fstatici +// SIMD-ONLY13-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] { +// SIMD-ONLY13-NEXT: entry: +// SIMD-ONLY13-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY13-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY13-NEXT: [[AAA:%.*]] = alloca i8, align 1 +// SIMD-ONLY13-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY13-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: store i8 0, i8* [[AAA]], align 1 +// SIMD-ONLY13-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY13-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[TMP1:%.*]] = load i8, i8* [[AAA]], align 1 +// SIMD-ONLY13-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32 +// SIMD-ONLY13-NEXT: [[ADD1:%.*]] = add nsw i32 [[CONV]], 1 +// SIMD-ONLY13-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD1]] to i8 +// SIMD-ONLY13-NEXT: store i8 [[CONV2]], i8* [[AAA]], align 1 +// SIMD-ONLY13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i32 0, i32 2 +// SIMD-ONLY13-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1 +// SIMD-ONLY13-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY13-NEXT: [[TMP3:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: ret i32 [[TMP3]] +// +// +// SIMD-ONLY13-LABEL: define {{[^@]+}}@_Z9ftemplateIiET_i +// SIMD-ONLY13-SAME: (i32 noundef [[N:%.*]]) #[[ATTR0]] comdat { +// SIMD-ONLY13-NEXT: entry: +// SIMD-ONLY13-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// SIMD-ONLY13-NEXT: [[A:%.*]] = alloca i32, align 4 +// SIMD-ONLY13-NEXT: [[B:%.*]] = alloca [10 x i32], align 4 +// SIMD-ONLY13-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store i32 0, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[TMP0:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], 1 +// SIMD-ONLY13-NEXT: store i32 [[ADD]], i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B]], i32 0, i32 2 +// SIMD-ONLY13-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY13-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP1]], 1 +// SIMD-ONLY13-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// SIMD-ONLY13-NEXT: [[TMP2:%.*]] = load i32, i32* [[A]], align 4 +// SIMD-ONLY13-NEXT: ret i32 [[TMP2]] +// diff --git a/clang/test/OpenMP/target_is_device_ptr_codegen.cpp b/clang/test/OpenMP/target_is_device_ptr_codegen.cpp index b010d0dcea192..09c4d568bb096 100644 --- a/clang/test/OpenMP/target_is_device_ptr_codegen.cpp +++ b/clang/test/OpenMP/target_is_device_ptr_codegen.cpp @@ -1,193 +1,76 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ // expected-no-diagnostics #ifndef HEADER #define HEADER ///==========================================================================/// -// RUN: %clang_cc1 -DCK1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK10 // RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-64 -// RUN: %clang_cc1 -DCK1 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK11 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK12 // RUN: %clang_cc1 -DCK1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK1 --check-prefix CK1-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK13 -// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY00 %s // RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY01 %s +// RUN: %clang_cc1 -DCK1 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY02 %s // RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s -// SIMD-ONLY0-NOT: {{__kmpc|__tgt}} +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY03 %s #ifdef CK1 double *g; -// CK1: @g ={{.*}} global ptr -// CK1: [[SIZES00:@.+]] = {{.+}}constant [1 x i[[sz:64|32]]] [i{{64|32}} {{8|4}}] -// CK1: [[TYPES00:@.+]] = {{.+}}constant [1 x i64] [i64 288] -// CK1: [[SIZES01:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] {{8|4}}] -// CK1: [[TYPES01:@.+]] = {{.+}}constant [1 x i64] [i64 288] -// CK1: [[SIZES02:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] {{8|4}}] -// CK1: [[TYPES02:@.+]] = {{.+}}constant [1 x i64] [i64 288] -// CK1: [[SIZES03:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] {{8|4}}] -// CK1: [[TYPES03:@.+]] = {{.+}}constant [1 x i64] [i64 288] -// CK1: [[SIZES04:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] {{8|4}}] -// CK1: [[TYPES04:@.+]] = {{.+}}constant [1 x i64] [i64 288] -// CK1: [[SIZES05:@.+]] = {{.+}}constant [1 x i[[sz]]] [i[[sz]] {{8|4}}] -// CK1: [[TYPES05:@.+]] = {{.+}}constant [1 x i64] [i64 288] -// CK1: [[SIZES06:@.+]] = {{.+}}constant [2 x i[[sz]]] [i[[sz]] {{8|4}}, i[[sz]] {{8|4}}] -// CK1: [[TYPES06:@.+]] = {{.+}}constant [2 x i64] [i64 288, i64 288] -// CK1-LABEL: @_Z3foo{{.*}}( template void foo(float *&lr, T *&tr) { float *l; T *t; -// CK1-DAG: [[RET:%.+]] = call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 [[DEVICE:.+]], i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) -// CK1-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2 -// CK1-DAG: store ptr [[BPGEP:%.+]], ptr [[BPARG]] -// CK1-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3 -// CK1-DAG: store ptr [[PGEP:%.+]], ptr [[PARG]] -// CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 -// CK1-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 -// CK1-DAG: store ptr [[VAL:%.+]], ptr [[BP1]] -// CK1-DAG: store ptr [[VAL]], ptr [[P1]] -// CK1-DAG: [[VAL]] = load ptr, ptr [[ADDR:@g]], - -// CK1: call void [[KERNEL:@.+]](ptr [[VAL]]) + #pragma omp target is_device_ptr(g) { ++g; } -// CK1-DAG: [[RET:%.+]] = call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 [[DEVICE:.+]], i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) -// CK1-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2 -// CK1-DAG: store ptr [[BPGEP:%.+]], ptr [[BPARG]] -// CK1-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3 -// CK1-DAG: store ptr [[PGEP:%.+]], ptr [[PARG]] -// CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 -// CK1-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 -// CK1-DAG: store ptr [[VAL:%.+]], ptr [[BP1]] -// CK1-DAG: store ptr [[VAL]], ptr [[P1]] -// CK1-DAG: [[VAL]] = load ptr, ptr [[ADDR:%.+]], - -// CK1: call void [[KERNEL:@.+]](ptr [[VAL]]) + #pragma omp target is_device_ptr(l) { ++l; } -// CK1-DAG: [[RET:%.+]] = call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 [[DEVICE:.+]], i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) -// CK1-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2 -// CK1-DAG: store ptr [[BPGEP:%.+]], ptr [[BPARG]] -// CK1-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3 -// CK1-DAG: store ptr [[PGEP:%.+]], ptr [[PARG]] -// CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 -// CK1-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 -// CK1-DAG: store ptr [[VAL:%.+]], ptr [[BP1]] -// CK1-DAG: store ptr [[VAL]], ptr [[P1]] -// CK1-DAG: [[VAL]] = load ptr, ptr [[ADDR:%.+]], - -// CK1: call void [[KERNEL:@.+]](ptr [[VAL]]) + #pragma omp target is_device_ptr(t) { ++t; } -// CK1-DAG: [[RET:%.+]] = call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 [[DEVICE:.+]], i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) -// CK1-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2 -// CK1-DAG: store ptr [[BPGEP:%.+]], ptr [[BPARG]] -// CK1-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3 -// CK1-DAG: store ptr [[PGEP:%.+]], ptr [[PARG]] -// CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 -// CK1-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 -// CK1-DAG: store ptr [[VAL:%.+]], ptr [[BP1]] -// CK1-DAG: store ptr [[VAL]], ptr [[P1]] -// CK1-DAG: [[VAL]] = load ptr, ptr [[ADDR:%.+]], -// CK1-DAG: [[ADDR]] = load ptr, ptr [[ADDR2:%.+]], - -// CK1: call void [[KERNEL:@.+]](ptr [[VAL]]) + #pragma omp target is_device_ptr(lr) { ++lr; } -// CK1-DAG: [[RET:%.+]] = call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 [[DEVICE:.+]], i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) -// CK1-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2 -// CK1-DAG: store ptr [[BPGEP:%.+]], ptr [[BPARG]] -// CK1-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3 -// CK1-DAG: store ptr [[PGEP:%.+]], ptr [[PARG]] -// CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 -// CK1-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 -// CK1-DAG: store ptr [[VAL:%.+]], ptr [[BP1]] -// CK1-DAG: store ptr [[VAL]], ptr [[P1]] -// CK1-DAG: [[VAL]] = load ptr, ptr [[ADDR:%.+]], -// CK1-DAG: [[ADDR]] = load ptr, ptr [[ADDR2:%.+]], - -// CK1: call void [[KERNEL:@.+]](ptr [[VAL]]) + #pragma omp target is_device_ptr(tr) { ++tr; } -// CK1-DAG: [[RET:%.+]] = call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 [[DEVICE:.+]], i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) -// CK1-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2 -// CK1-DAG: store ptr [[BPGEP:%.+]], ptr [[BPARG]] -// CK1-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3 -// CK1-DAG: store ptr [[PGEP:%.+]], ptr [[PARG]] -// CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 -// CK1-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 -// CK1-DAG: store ptr [[VAL:%.+]], ptr [[BP1]] -// CK1-DAG: store ptr [[VAL]], ptr [[P1]] -// CK1-DAG: [[VAL]] = load ptr, ptr [[ADDR:%.+]], -// CK1-DAG: [[ADDR]] = load ptr, ptr [[ADDR2:%.+]], - -// CK1: call void [[KERNEL:@.+]](ptr [[VAL]]) + #pragma omp target is_device_ptr(tr, lr) { ++tr; } -// CK1-DAG: [[RET:%.+]] = call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 [[DEVICE:.+]], i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) -// CK1-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2 -// CK1-DAG: store ptr [[BPGEP:%.+]], ptr [[BPARG]] -// CK1-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3 -// CK1-DAG: store ptr [[PGEP:%.+]], ptr [[PARG]] -// CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 -// CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 -// CK1-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 -// CK1-DAG: store ptr [[VAL:%.+]], ptr [[BP1]] -// CK1-DAG: store ptr [[VAL]], ptr [[P1]] -// CK1-DAG: [[VAL]] = load ptr, ptr [[ADDR:%.+]], -// CK1-DAG: [[ADDR]] = load ptr, ptr [[ADDR2:%.+]], - -// CK1-DAG: [[_BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1 -// CK1-DAG: [[_P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1 -// CK1-DAG: store ptr [[_VAL:%.+]], ptr [[_BP1]] -// CK1-DAG: store ptr [[_VAL]], ptr [[_P1]] -// CK1-DAG: [[_VAL]] = load ptr, ptr [[_ADDR:%.+]], -// CK1-DAG: [[_ADDR]] = load ptr, ptr [[_ADDR2:%.+]], - -// CK1: call void [[KERNEL:@.+]](ptr [[VAL]], ptr [[_VAL]]) + + #pragma omp target is_device_ptr(tr, lr) { ++tr,++lr; @@ -200,23 +83,21 @@ void bar(float *&a, int *&b) { #endif ///==========================================================================/// -// RUN: %clang_cc1 -DCK2 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK20 // RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-64 -// RUN: %clang_cc1 -DCK2 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK21 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK22 // RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK2 --check-prefix CK2-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK23 -// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY10 %s // RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s -// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY11 %s +// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY12 %s // RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s -// SIMD-ONLY1-NOT: {{__kmpc|__tgt}} +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY13 %s #ifdef CK2 -// CK2: [[ST:%.+]] = type { ptr, ptr } template struct ST { @@ -224,70 +105,23 @@ struct ST { double *&b; ST(double *&b) : a(0), b(b) {} - // CK2-LABEL: @{{.*}}foo{{.*}} void foo(double *&arg) { int *la = 0; -// CK2-DAG: [[RET:%.+]] = call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 [[DEVICE:.+]], i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) -// CK2-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2 -// CK2-DAG: store ptr [[BPGEP:%.+]], ptr [[BPARG]] -// CK2-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3 -// CK2-DAG: store ptr [[PGEP:%.+]], ptr [[PARG]] -// CK2-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] -// CK2-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] - -// CK2-DAG: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1:%.+]], i32 0, i32 0 -// CK2-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 -// CK2-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 -// CK2-DAG: store ptr [[THIS1]], ptr [[BP0]] -// CK2-DAG: store ptr [[A]], ptr [[P0]] + #pragma omp target is_device_ptr(a) { a++; } -// CK2-DAG: [[RET:%.+]] = call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 [[DEVICE:.+]], i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) -// CK2-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2 -// CK2-DAG: store ptr [[BPGEP:%.+]], ptr [[BPARG]] -// CK2-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3 -// CK2-DAG: store ptr [[PGEP:%.+]], ptr [[PARG]] -// CK2-DAG: [[SARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4 -// CK2-DAG: store ptr [[SIZE:%.+]], ptr [[SARG]] -// CK2-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] -// CK2-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] - -// CK2-DAG: [[S:%[^,]+]] = sdiv exact i64 [[SZ:%.+]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CK2-DAG: [[SIZE:%[^,]+]] = getelementptr inbounds [2 x i64], ptr %.offload_sizes, i32 0, i32 0 -// CK2-DAG: store i64 [[S]], ptr [[SIZE]] -// CK2-DAG: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 -// CK2-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 -// CK2-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 -// CK2-DAG: store ptr [[THIS1]], ptr [[BP0]] -// CK2-DAG: store ptr [[B]], ptr [[P0]] + #pragma omp target is_device_ptr(b) { b++; } -// CK2-DAG: [[RET:%.+]] = call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 [[DEVICE:.+]], i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) -// CK2-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2 -// CK2-DAG: store ptr [[BPGEP:%.+]], ptr [[BPARG]] -// CK2-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3 -// CK2-DAG: store ptr [[PGEP:%.+]], ptr [[PARG]] -// CK2-DAG: [[SARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4 -// CK2-DAG: store ptr [[SIZE:%.+]], ptr [[SARG]] -// CK2-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] -// CK2-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] - -// CK2-DAG: [[A8:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 -// CK2-DAG: [[B9:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 -// CK2-DAG: [[S:%[^,]+]] = sdiv exact i64 [[SZ:%.+]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) -// CK2-DAG: store i64 [[S]], ptr [[SIZE:%.+]] - -// CK2-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0 -// CK2-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0 -// CK2-DAG: store ptr [[THIS1]], ptr [[BP0]] -// CK2-DAG: store ptr [[A8]], ptr [[TMP64:%.+]] + + #pragma omp target is_device_ptr(a, b) { a++; @@ -303,42 +137,5595 @@ void bar(double *arg){ } #endif ///==========================================================================/// -// RUN: %clang_cc1 -DCK3 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64 +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK30 // RUN: %clang_cc1 -DCK3 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-64 -// RUN: %clang_cc1 -DCK3 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK31 +// RUN: %clang_cc1 -DCK3 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK32 // RUN: %clang_cc1 -DCK3 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK3 --check-prefix CK3-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK33 -// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY20 %s // RUN: %clang_cc1 -DCK3 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s -// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY1 %s +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY21 %s +// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY22 %s // RUN: %clang_cc1 -DCK3 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s -// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY1 %s -// SIMD-ONLY1-NOT: {{__kmpc|__tgt}} +// RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY23 %s #ifdef CK3 -// CK3-DAG: [[SIZES:@.+]] = {{.+}}constant [1 x i[[SZ:64|32]]] [i{{64|32}} {{8|4}}] // OMP_MAP_TARGET_PARAM = 0x20 | OMP_MAP_TO = 0x1 = 0x21 -// CK3-DAG: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 [[#0x21]]] void bar() { __attribute__((aligned(64))) double *ptr; - // CK3-DAG: [[RET:%.+]] = call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 [[DEVICE:.+]], i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]]) - // CK3-DAG: [[BPARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 2 - // CK3-DAG: store ptr [[BPGEP:%.+]], ptr [[BPARG]] - // CK3-DAG: [[PARG:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 3 - // CK3-DAG: store ptr [[PGEP:%.+]], ptr [[PARG]] - // CK3-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 - // CK3-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 - // CK3-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 - // CK3-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0 - // CK3-DAG: store ptr [[PTR:%.+]], ptr [[BP1]] - // CK3-DAG: store ptr [[PTR]], ptr [[P1]] - - // CK3: call void [[KERNEL:@.+]](ptr [[PTR]]) + #pragma omp target is_device_ptr(ptr) *ptr = 0; } #endif #endif +// CK1-64-LABEL: define {{[^@]+}}@_Z3barRPfRPi +// CK1-64-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CK1-64-NEXT: entry: +// CK1-64-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CK1-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CK1-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CK1-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CK1-64-NEXT: call void @_Z3fooIiEvRPfRPT_(ptr noundef nonnull align 8 dereferenceable(8) [[TMP0]], ptr noundef nonnull align 8 dereferenceable(8) [[TMP1]]) +// CK1-64-NEXT: ret void +// CK1-64-LABEL: define {{[^@]+}}@_Z3fooIiEvRPfRPT_ +// CK1-64-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[LR:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[TR:%.*]]) #[[ATTR0]] comdat { +// CK1-64-NEXT: entry: +// CK1-64-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[L:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[T:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS7:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_PTRS8:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_MAPPERS9:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS13:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_PTRS14:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_MAPPERS15:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[_TMP19:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS20:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_PTRS21:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_MAPPERS22:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[_TMP26:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 8 +// CK1-64-NEXT: [[_TMP33:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[_TMP34:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_BASEPTRS35:%.*]] = alloca [2 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_PTRS36:%.*]] = alloca [2 x ptr], align 8 +// CK1-64-NEXT: [[DOTOFFLOAD_MAPPERS37:%.*]] = alloca [2 x ptr], align 8 +// CK1-64-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 8 +// CK1-64-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// CK1-64-NEXT: [[TMP0:%.*]] = load ptr, ptr @g, align 8 +// CK1-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CK1-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CK1-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CK1-64-NEXT: store ptr null, ptr [[TMP3]], align 8 +// CK1-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK1-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK1-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK1-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK1-64-NEXT: store i32 2, ptr [[TMP6]], align 4 +// CK1-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK1-64-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CK1-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK1-64-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CK1-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK1-64-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CK1-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK1-64-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 8 +// CK1-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK1-64-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 8 +// CK1-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK1-64-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CK1-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK1-64-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CK1-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK1-64-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK1-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK1-64-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CK1-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP16]], align 4 +// CK1-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK1-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CK1-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK1-64-NEXT: store i32 0, ptr [[TMP18]], align 4 +// CK1-64-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37.region_id, ptr [[KERNEL_ARGS]]) +// CK1-64-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CK1-64-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK1-64: omp_offload.failed: +// CK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37(ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CK1-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK1-64: omp_offload.cont: +// CK1-64-NEXT: [[TMP21:%.*]] = load ptr, ptr [[L]], align 8 +// CK1-64-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP21]], ptr [[TMP22]], align 8 +// CK1-64-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP21]], ptr [[TMP23]], align 8 +// CK1-64-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 +// CK1-64-NEXT: store ptr null, ptr [[TMP24]], align 8 +// CK1-64-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CK1-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CK1-64-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK1-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CK1-64-NEXT: store i32 2, ptr [[TMP27]], align 4 +// CK1-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 +// CK1-64-NEXT: store i32 1, ptr [[TMP28]], align 4 +// CK1-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CK1-64-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 8 +// CK1-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 +// CK1-64-NEXT: store ptr [[TMP26]], ptr [[TMP30]], align 8 +// CK1-64-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 +// CK1-64-NEXT: store ptr @.offload_sizes.1, ptr [[TMP31]], align 8 +// CK1-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 +// CK1-64-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP32]], align 8 +// CK1-64-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CK1-64-NEXT: store ptr null, ptr [[TMP33]], align 8 +// CK1-64-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 +// CK1-64-NEXT: store ptr null, ptr [[TMP34]], align 8 +// CK1-64-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 +// CK1-64-NEXT: store i64 0, ptr [[TMP35]], align 8 +// CK1-64-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 9 +// CK1-64-NEXT: store i64 0, ptr [[TMP36]], align 8 +// CK1-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 10 +// CK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP37]], align 4 +// CK1-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 11 +// CK1-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP38]], align 4 +// CK1-64-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 12 +// CK1-64-NEXT: store i32 0, ptr [[TMP39]], align 4 +// CK1-64-NEXT: [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43.region_id, ptr [[KERNEL_ARGS4]]) +// CK1-64-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CK1-64-NEXT: br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] +// CK1-64: omp_offload.failed5: +// CK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43(ptr [[TMP21]]) #[[ATTR2]] +// CK1-64-NEXT: br label [[OMP_OFFLOAD_CONT6]] +// CK1-64: omp_offload.cont6: +// CK1-64-NEXT: [[TMP42:%.*]] = load ptr, ptr [[T]], align 8 +// CK1-64-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP42]], ptr [[TMP43]], align 8 +// CK1-64-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP42]], ptr [[TMP44]], align 8 +// CK1-64-NEXT: [[TMP45:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0 +// CK1-64-NEXT: store ptr null, ptr [[TMP45]], align 8 +// CK1-64-NEXT: [[TMP46:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CK1-64-NEXT: [[TMP47:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CK1-64-NEXT: [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK1-64-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 +// CK1-64-NEXT: store i32 2, ptr [[TMP48]], align 4 +// CK1-64-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 +// CK1-64-NEXT: store i32 1, ptr [[TMP49]], align 4 +// CK1-64-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 +// CK1-64-NEXT: store ptr [[TMP46]], ptr [[TMP50]], align 8 +// CK1-64-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 +// CK1-64-NEXT: store ptr [[TMP47]], ptr [[TMP51]], align 8 +// CK1-64-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 +// CK1-64-NEXT: store ptr @.offload_sizes.3, ptr [[TMP52]], align 8 +// CK1-64-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 +// CK1-64-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP53]], align 8 +// CK1-64-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 +// CK1-64-NEXT: store ptr null, ptr [[TMP54]], align 8 +// CK1-64-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 +// CK1-64-NEXT: store ptr null, ptr [[TMP55]], align 8 +// CK1-64-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 +// CK1-64-NEXT: store i64 0, ptr [[TMP56]], align 8 +// CK1-64-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 9 +// CK1-64-NEXT: store i64 0, ptr [[TMP57]], align 8 +// CK1-64-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 10 +// CK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP58]], align 4 +// CK1-64-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 11 +// CK1-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP59]], align 4 +// CK1-64-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 12 +// CK1-64-NEXT: store i32 0, ptr [[TMP60]], align 4 +// CK1-64-NEXT: [[TMP61:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49.region_id, ptr [[KERNEL_ARGS10]]) +// CK1-64-NEXT: [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0 +// CK1-64-NEXT: br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] +// CK1-64: omp_offload.failed11: +// CK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49(ptr [[TMP42]]) #[[ATTR2]] +// CK1-64-NEXT: br label [[OMP_OFFLOAD_CONT12]] +// CK1-64: omp_offload.cont12: +// CK1-64-NEXT: [[TMP63:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// CK1-64-NEXT: store ptr [[TMP63]], ptr [[TMP]], align 8 +// CK1-64-NEXT: [[TMP64:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK1-64-NEXT: [[TMP65:%.*]] = load ptr, ptr [[TMP64]], align 8 +// CK1-64-NEXT: [[TMP66:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP65]], ptr [[TMP66]], align 8 +// CK1-64-NEXT: [[TMP67:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP65]], ptr [[TMP67]], align 8 +// CK1-64-NEXT: [[TMP68:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS15]], i64 0, i64 0 +// CK1-64-NEXT: store ptr null, ptr [[TMP68]], align 8 +// CK1-64-NEXT: [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CK1-64-NEXT: [[TMP70:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CK1-64-NEXT: [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK1-64-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 +// CK1-64-NEXT: store i32 2, ptr [[TMP71]], align 4 +// CK1-64-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 +// CK1-64-NEXT: store i32 1, ptr [[TMP72]], align 4 +// CK1-64-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 +// CK1-64-NEXT: store ptr [[TMP69]], ptr [[TMP73]], align 8 +// CK1-64-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 3 +// CK1-64-NEXT: store ptr [[TMP70]], ptr [[TMP74]], align 8 +// CK1-64-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 4 +// CK1-64-NEXT: store ptr @.offload_sizes.5, ptr [[TMP75]], align 8 +// CK1-64-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 5 +// CK1-64-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP76]], align 8 +// CK1-64-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 6 +// CK1-64-NEXT: store ptr null, ptr [[TMP77]], align 8 +// CK1-64-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 7 +// CK1-64-NEXT: store ptr null, ptr [[TMP78]], align 8 +// CK1-64-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 8 +// CK1-64-NEXT: store i64 0, ptr [[TMP79]], align 8 +// CK1-64-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 9 +// CK1-64-NEXT: store i64 0, ptr [[TMP80]], align 8 +// CK1-64-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 10 +// CK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP81]], align 4 +// CK1-64-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 11 +// CK1-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP82]], align 4 +// CK1-64-NEXT: [[TMP83:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 12 +// CK1-64-NEXT: store i32 0, ptr [[TMP83]], align 4 +// CK1-64-NEXT: [[TMP84:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55.region_id, ptr [[KERNEL_ARGS16]]) +// CK1-64-NEXT: [[TMP85:%.*]] = icmp ne i32 [[TMP84]], 0 +// CK1-64-NEXT: br i1 [[TMP85]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]] +// CK1-64: omp_offload.failed17: +// CK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55(ptr [[TMP65]]) #[[ATTR2]] +// CK1-64-NEXT: br label [[OMP_OFFLOAD_CONT18]] +// CK1-64: omp_offload.cont18: +// CK1-64-NEXT: [[TMP86:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// CK1-64-NEXT: store ptr [[TMP86]], ptr [[_TMP19]], align 8 +// CK1-64-NEXT: [[TMP87:%.*]] = load ptr, ptr [[_TMP19]], align 8 +// CK1-64-NEXT: [[TMP88:%.*]] = load ptr, ptr [[TMP87]], align 8 +// CK1-64-NEXT: [[TMP89:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP88]], ptr [[TMP89]], align 8 +// CK1-64-NEXT: [[TMP90:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP88]], ptr [[TMP90]], align 8 +// CK1-64-NEXT: [[TMP91:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0 +// CK1-64-NEXT: store ptr null, ptr [[TMP91]], align 8 +// CK1-64-NEXT: [[TMP92:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CK1-64-NEXT: [[TMP93:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CK1-64-NEXT: [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK1-64-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 +// CK1-64-NEXT: store i32 2, ptr [[TMP94]], align 4 +// CK1-64-NEXT: [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 +// CK1-64-NEXT: store i32 1, ptr [[TMP95]], align 4 +// CK1-64-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 +// CK1-64-NEXT: store ptr [[TMP92]], ptr [[TMP96]], align 8 +// CK1-64-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 3 +// CK1-64-NEXT: store ptr [[TMP93]], ptr [[TMP97]], align 8 +// CK1-64-NEXT: [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 4 +// CK1-64-NEXT: store ptr @.offload_sizes.7, ptr [[TMP98]], align 8 +// CK1-64-NEXT: [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 5 +// CK1-64-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP99]], align 8 +// CK1-64-NEXT: [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 6 +// CK1-64-NEXT: store ptr null, ptr [[TMP100]], align 8 +// CK1-64-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 7 +// CK1-64-NEXT: store ptr null, ptr [[TMP101]], align 8 +// CK1-64-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 8 +// CK1-64-NEXT: store i64 0, ptr [[TMP102]], align 8 +// CK1-64-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 9 +// CK1-64-NEXT: store i64 0, ptr [[TMP103]], align 8 +// CK1-64-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 10 +// CK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP104]], align 4 +// CK1-64-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 11 +// CK1-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP105]], align 4 +// CK1-64-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 12 +// CK1-64-NEXT: store i32 0, ptr [[TMP106]], align 4 +// CK1-64-NEXT: [[TMP107:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61.region_id, ptr [[KERNEL_ARGS23]]) +// CK1-64-NEXT: [[TMP108:%.*]] = icmp ne i32 [[TMP107]], 0 +// CK1-64-NEXT: br i1 [[TMP108]], label [[OMP_OFFLOAD_FAILED24:%.*]], label [[OMP_OFFLOAD_CONT25:%.*]] +// CK1-64: omp_offload.failed24: +// CK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61(ptr [[TMP88]]) #[[ATTR2]] +// CK1-64-NEXT: br label [[OMP_OFFLOAD_CONT25]] +// CK1-64: omp_offload.cont25: +// CK1-64-NEXT: [[TMP109:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// CK1-64-NEXT: store ptr [[TMP109]], ptr [[_TMP26]], align 8 +// CK1-64-NEXT: [[TMP110:%.*]] = load ptr, ptr [[_TMP26]], align 8 +// CK1-64-NEXT: [[TMP111:%.*]] = load ptr, ptr [[TMP110]], align 8 +// CK1-64-NEXT: [[TMP112:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP111]], ptr [[TMP112]], align 8 +// CK1-64-NEXT: [[TMP113:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP111]], ptr [[TMP113]], align 8 +// CK1-64-NEXT: [[TMP114:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i64 0, i64 0 +// CK1-64-NEXT: store ptr null, ptr [[TMP114]], align 8 +// CK1-64-NEXT: [[TMP115:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 +// CK1-64-NEXT: [[TMP116:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 +// CK1-64-NEXT: [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK1-64-NEXT: [[TMP117:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 +// CK1-64-NEXT: store i32 2, ptr [[TMP117]], align 4 +// CK1-64-NEXT: [[TMP118:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 +// CK1-64-NEXT: store i32 1, ptr [[TMP118]], align 4 +// CK1-64-NEXT: [[TMP119:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 +// CK1-64-NEXT: store ptr [[TMP115]], ptr [[TMP119]], align 8 +// CK1-64-NEXT: [[TMP120:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 3 +// CK1-64-NEXT: store ptr [[TMP116]], ptr [[TMP120]], align 8 +// CK1-64-NEXT: [[TMP121:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 4 +// CK1-64-NEXT: store ptr @.offload_sizes.9, ptr [[TMP121]], align 8 +// CK1-64-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 5 +// CK1-64-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP122]], align 8 +// CK1-64-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 6 +// CK1-64-NEXT: store ptr null, ptr [[TMP123]], align 8 +// CK1-64-NEXT: [[TMP124:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 7 +// CK1-64-NEXT: store ptr null, ptr [[TMP124]], align 8 +// CK1-64-NEXT: [[TMP125:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 8 +// CK1-64-NEXT: store i64 0, ptr [[TMP125]], align 8 +// CK1-64-NEXT: [[TMP126:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 9 +// CK1-64-NEXT: store i64 0, ptr [[TMP126]], align 8 +// CK1-64-NEXT: [[TMP127:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 10 +// CK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP127]], align 4 +// CK1-64-NEXT: [[TMP128:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 11 +// CK1-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP128]], align 4 +// CK1-64-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 12 +// CK1-64-NEXT: store i32 0, ptr [[TMP129]], align 4 +// CK1-64-NEXT: [[TMP130:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67.region_id, ptr [[KERNEL_ARGS30]]) +// CK1-64-NEXT: [[TMP131:%.*]] = icmp ne i32 [[TMP130]], 0 +// CK1-64-NEXT: br i1 [[TMP131]], label [[OMP_OFFLOAD_FAILED31:%.*]], label [[OMP_OFFLOAD_CONT32:%.*]] +// CK1-64: omp_offload.failed31: +// CK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67(ptr [[TMP111]]) #[[ATTR2]] +// CK1-64-NEXT: br label [[OMP_OFFLOAD_CONT32]] +// CK1-64: omp_offload.cont32: +// CK1-64-NEXT: [[TMP132:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// CK1-64-NEXT: store ptr [[TMP132]], ptr [[_TMP33]], align 8 +// CK1-64-NEXT: [[TMP133:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// CK1-64-NEXT: store ptr [[TMP133]], ptr [[_TMP34]], align 8 +// CK1-64-NEXT: [[TMP134:%.*]] = load ptr, ptr [[_TMP33]], align 8 +// CK1-64-NEXT: [[TMP135:%.*]] = load ptr, ptr [[TMP134]], align 8 +// CK1-64-NEXT: [[TMP136:%.*]] = load ptr, ptr [[_TMP34]], align 8 +// CK1-64-NEXT: [[TMP137:%.*]] = load ptr, ptr [[TMP136]], align 8 +// CK1-64-NEXT: [[TMP138:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP135]], ptr [[TMP138]], align 8 +// CK1-64-NEXT: [[TMP139:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 +// CK1-64-NEXT: store ptr [[TMP135]], ptr [[TMP139]], align 8 +// CK1-64-NEXT: [[TMP140:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS37]], i64 0, i64 0 +// CK1-64-NEXT: store ptr null, ptr [[TMP140]], align 8 +// CK1-64-NEXT: [[TMP141:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 1 +// CK1-64-NEXT: store ptr [[TMP137]], ptr [[TMP141]], align 8 +// CK1-64-NEXT: [[TMP142:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 1 +// CK1-64-NEXT: store ptr [[TMP137]], ptr [[TMP142]], align 8 +// CK1-64-NEXT: [[TMP143:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS37]], i64 0, i64 1 +// CK1-64-NEXT: store ptr null, ptr [[TMP143]], align 8 +// CK1-64-NEXT: [[TMP144:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 +// CK1-64-NEXT: [[TMP145:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 +// CK1-64-NEXT: [[KERNEL_ARGS38:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK1-64-NEXT: [[TMP146:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 0 +// CK1-64-NEXT: store i32 2, ptr [[TMP146]], align 4 +// CK1-64-NEXT: [[TMP147:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 1 +// CK1-64-NEXT: store i32 2, ptr [[TMP147]], align 4 +// CK1-64-NEXT: [[TMP148:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 2 +// CK1-64-NEXT: store ptr [[TMP144]], ptr [[TMP148]], align 8 +// CK1-64-NEXT: [[TMP149:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 3 +// CK1-64-NEXT: store ptr [[TMP145]], ptr [[TMP149]], align 8 +// CK1-64-NEXT: [[TMP150:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 4 +// CK1-64-NEXT: store ptr @.offload_sizes.11, ptr [[TMP150]], align 8 +// CK1-64-NEXT: [[TMP151:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 5 +// CK1-64-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP151]], align 8 +// CK1-64-NEXT: [[TMP152:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 6 +// CK1-64-NEXT: store ptr null, ptr [[TMP152]], align 8 +// CK1-64-NEXT: [[TMP153:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 7 +// CK1-64-NEXT: store ptr null, ptr [[TMP153]], align 8 +// CK1-64-NEXT: [[TMP154:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 8 +// CK1-64-NEXT: store i64 0, ptr [[TMP154]], align 8 +// CK1-64-NEXT: [[TMP155:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 9 +// CK1-64-NEXT: store i64 0, ptr [[TMP155]], align 8 +// CK1-64-NEXT: [[TMP156:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 10 +// CK1-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP156]], align 4 +// CK1-64-NEXT: [[TMP157:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 11 +// CK1-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP157]], align 4 +// CK1-64-NEXT: [[TMP158:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 12 +// CK1-64-NEXT: store i32 0, ptr [[TMP158]], align 4 +// CK1-64-NEXT: [[TMP159:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74.region_id, ptr [[KERNEL_ARGS38]]) +// CK1-64-NEXT: [[TMP160:%.*]] = icmp ne i32 [[TMP159]], 0 +// CK1-64-NEXT: br i1 [[TMP160]], label [[OMP_OFFLOAD_FAILED39:%.*]], label [[OMP_OFFLOAD_CONT40:%.*]] +// CK1-64: omp_offload.failed39: +// CK1-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74(ptr [[TMP135]], ptr [[TMP137]]) #[[ATTR2]] +// CK1-64-NEXT: br label [[OMP_OFFLOAD_CONT40]] +// CK1-64: omp_offload.cont40: +// CK1-64-NEXT: ret void +// CK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37 +// CK1-64-SAME: (ptr noundef [[G:%.*]]) #[[ATTR1:[0-9]+]] { +// CK1-64-NEXT: entry: +// CK1-64-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 +// CK1-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 +// CK1-64-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK1-64-NEXT: store ptr [[INCDEC_PTR]], ptr [[G_ADDR]], align 8 +// CK1-64-NEXT: ret void +// CK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43 +// CK1-64-SAME: (ptr noundef [[L:%.*]]) #[[ATTR1]] { +// CK1-64-NEXT: entry: +// CK1-64-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 8 +// CK1-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[L_ADDR]], align 8 +// CK1-64-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 1 +// CK1-64-NEXT: store ptr [[INCDEC_PTR]], ptr [[L_ADDR]], align 8 +// CK1-64-NEXT: ret void +// CK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49 +// CK1-64-SAME: (ptr noundef [[T:%.*]]) #[[ATTR1]] { +// CK1-64-NEXT: entry: +// CK1-64-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8 +// CK1-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8 +// CK1-64-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 +// CK1-64-NEXT: store ptr [[INCDEC_PTR]], ptr [[T_ADDR]], align 8 +// CK1-64-NEXT: ret void +// CK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55 +// CK1-64-SAME: (ptr noundef [[LR:%.*]]) #[[ATTR1]] { +// CK1-64-NEXT: entry: +// CK1-64-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 8 +// CK1-64-NEXT: store ptr [[LR_ADDR]], ptr [[TMP]], align 8 +// CK1-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK1-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// CK1-64-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// CK1-64-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 +// CK1-64-NEXT: ret void +// CK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61 +// CK1-64-SAME: (ptr noundef [[TR:%.*]]) #[[ATTR1]] { +// CK1-64-NEXT: entry: +// CK1-64-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// CK1-64-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 8 +// CK1-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK1-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// CK1-64-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK1-64-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 +// CK1-64-NEXT: ret void +// CK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67 +// CK1-64-SAME: (ptr noundef [[TR:%.*]]) #[[ATTR1]] { +// CK1-64-NEXT: entry: +// CK1-64-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// CK1-64-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 8 +// CK1-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK1-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// CK1-64-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK1-64-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 +// CK1-64-NEXT: ret void +// CK1-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74 +// CK1-64-SAME: (ptr noundef [[TR:%.*]], ptr noundef [[LR:%.*]]) #[[ATTR1]] { +// CK1-64-NEXT: entry: +// CK1-64-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 +// CK1-64-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// CK1-64-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 8 +// CK1-64-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 8 +// CK1-64-NEXT: store ptr [[LR_ADDR]], ptr [[_TMP1]], align 8 +// CK1-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK1-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// CK1-64-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK1-64-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 +// CK1-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CK1-64-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CK1-64-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 1 +// CK1-64-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP2]], align 8 +// CK1-64-NEXT: ret void +// CK1-64-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK1-64-SAME: () #[[ATTR3:[0-9]+]] { +// CK1-64-NEXT: entry: +// CK1-64-NEXT: call void @__tgt_register_requires(i64 1) +// CK1-64-NEXT: ret void +// CK2-64-LABEL: define {{[^@]+}}@_Z3barPd +// CK2-64-SAME: (ptr noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +// CK2-64-NEXT: entry: +// CK2-64-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CK2-64-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 8 +// CK2-64-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 +// CK2-64-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) +// CK2-64-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) +// CK2-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 +// CK2-64-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK2-64-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 8 +// CK2-64-NEXT: ret void +// CK2-64-LABEL: define {{[^@]+}}@_ZN2STIdEC1ERPd +// CK2-64-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CK2-64-NEXT: entry: +// CK2-64-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK2-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CK2-64-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK2-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CK2-64-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK2-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CK2-64-NEXT: call void @_ZN2STIdEC2ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]], ptr noundef nonnull align 8 dereferenceable(8) [[TMP0]]) +// CK2-64-NEXT: ret void +// CK2-64-LABEL: define {{[^@]+}}@_ZN2STIdE3fooERPd +// CK2-64-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG:%.*]]) #[[ATTR0]] comdat align 2 { +// CK2-64-NEXT: entry: +// CK2-64-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK2-64-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CK2-64-NEXT: [[LA:%.*]] = alloca ptr, align 8 +// CK2-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CK2-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CK2-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CK2-64-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [2 x ptr], align 8 +// CK2-64-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [2 x ptr], align 8 +// CK2-64-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [2 x ptr], align 8 +// CK2-64-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [2 x i64], align 8 +// CK2-64-NEXT: [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [3 x ptr], align 8 +// CK2-64-NEXT: [[DOTOFFLOAD_PTRS11:%.*]] = alloca [3 x ptr], align 8 +// CK2-64-NEXT: [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [3 x ptr], align 8 +// CK2-64-NEXT: [[DOTOFFLOAD_SIZES13:%.*]] = alloca [3 x i64], align 8 +// CK2-64-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK2-64-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 +// CK2-64-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK2-64-NEXT: store ptr null, ptr [[LA]], align 8 +// CK2-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CK2-64-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK2-64-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 +// CK2-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK2-64-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CK2-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CK2-64-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CK2-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK2-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK2-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK2-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK2-64-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CK2-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK2-64-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CK2-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK2-64-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CK2-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK2-64-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CK2-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK2-64-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CK2-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK2-64-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CK2-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK2-64-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CK2-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK2-64-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CK2-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK2-64-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CK2-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK2-64-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK2-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK2-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP15]], align 4 +// CK2-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK2-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CK2-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK2-64-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CK2-64-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112.region_id, ptr [[KERNEL_ARGS]]) +// CK2-64-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CK2-64-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK2-64: omp_offload.failed: +// CK2-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112(ptr [[THIS1]]) #[[ATTR3:[0-9]+]] +// CK2-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK2-64: omp_offload.cont: +// CK2-64-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK2-64-NEXT: [[TMP20:%.*]] = load ptr, ptr [[B]], align 8 +// CK2-64-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[B]], i32 1 +// CK2-64-NEXT: [[TMP22:%.*]] = ptrtoint ptr [[TMP21]] to i64 +// CK2-64-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[B]] to i64 +// CK2-64-NEXT: [[TMP24:%.*]] = sub i64 [[TMP22]], [[TMP23]] +// CK2-64-NEXT: [[TMP25:%.*]] = sdiv exact i64 [[TMP24]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CK2-64-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes.1, i64 16, i1 false) +// CK2-64-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CK2-64-NEXT: store ptr [[THIS1]], ptr [[TMP26]], align 8 +// CK2-64-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CK2-64-NEXT: store ptr [[B]], ptr [[TMP27]], align 8 +// CK2-64-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CK2-64-NEXT: store i64 [[TMP25]], ptr [[TMP28]], align 8 +// CK2-64-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 0 +// CK2-64-NEXT: store ptr null, ptr [[TMP29]], align 8 +// CK2-64-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1 +// CK2-64-NEXT: store ptr [[THIS1]], ptr [[TMP30]], align 8 +// CK2-64-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 1 +// CK2-64-NEXT: store ptr [[TMP20]], ptr [[TMP31]], align 8 +// CK2-64-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 1 +// CK2-64-NEXT: store ptr null, ptr [[TMP32]], align 8 +// CK2-64-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CK2-64-NEXT: [[TMP34:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CK2-64-NEXT: [[TMP35:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CK2-64-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK2-64-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 +// CK2-64-NEXT: store i32 2, ptr [[TMP36]], align 4 +// CK2-64-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 +// CK2-64-NEXT: store i32 2, ptr [[TMP37]], align 4 +// CK2-64-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 +// CK2-64-NEXT: store ptr [[TMP33]], ptr [[TMP38]], align 8 +// CK2-64-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3 +// CK2-64-NEXT: store ptr [[TMP34]], ptr [[TMP39]], align 8 +// CK2-64-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4 +// CK2-64-NEXT: store ptr [[TMP35]], ptr [[TMP40]], align 8 +// CK2-64-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5 +// CK2-64-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP41]], align 8 +// CK2-64-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6 +// CK2-64-NEXT: store ptr null, ptr [[TMP42]], align 8 +// CK2-64-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7 +// CK2-64-NEXT: store ptr null, ptr [[TMP43]], align 8 +// CK2-64-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8 +// CK2-64-NEXT: store i64 0, ptr [[TMP44]], align 8 +// CK2-64-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9 +// CK2-64-NEXT: store i64 0, ptr [[TMP45]], align 8 +// CK2-64-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10 +// CK2-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP46]], align 4 +// CK2-64-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11 +// CK2-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP47]], align 4 +// CK2-64-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12 +// CK2-64-NEXT: store i32 0, ptr [[TMP48]], align 4 +// CK2-64-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118.region_id, ptr [[KERNEL_ARGS5]]) +// CK2-64-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CK2-64-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CK2-64: omp_offload.failed6: +// CK2-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118(ptr [[THIS1]]) #[[ATTR3]] +// CK2-64-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CK2-64: omp_offload.cont7: +// CK2-64-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 +// CK2-64-NEXT: [[B9:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK2-64-NEXT: [[TMP51:%.*]] = load ptr, ptr [[B9]], align 8 +// CK2-64-NEXT: [[TMP52:%.*]] = getelementptr ptr, ptr [[B9]], i32 1 +// CK2-64-NEXT: [[TMP53:%.*]] = ptrtoint ptr [[TMP52]] to i64 +// CK2-64-NEXT: [[TMP54:%.*]] = ptrtoint ptr [[A8]] to i64 +// CK2-64-NEXT: [[TMP55:%.*]] = sub i64 [[TMP53]], [[TMP54]] +// CK2-64-NEXT: [[TMP56:%.*]] = sdiv exact i64 [[TMP55]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CK2-64-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES13]], ptr align 8 @.offload_sizes.3, i64 24, i1 false) +// CK2-64-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CK2-64-NEXT: store ptr [[THIS1]], ptr [[TMP57]], align 8 +// CK2-64-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CK2-64-NEXT: store ptr [[A8]], ptr [[TMP58]], align 8 +// CK2-64-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 +// CK2-64-NEXT: store i64 [[TMP56]], ptr [[TMP59]], align 8 +// CK2-64-NEXT: [[TMP60:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 0 +// CK2-64-NEXT: store ptr null, ptr [[TMP60]], align 8 +// CK2-64-NEXT: [[TMP61:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1 +// CK2-64-NEXT: store ptr [[THIS1]], ptr [[TMP61]], align 8 +// CK2-64-NEXT: [[TMP62:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 1 +// CK2-64-NEXT: store ptr [[A8]], ptr [[TMP62]], align 8 +// CK2-64-NEXT: [[TMP63:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 1 +// CK2-64-NEXT: store ptr null, ptr [[TMP63]], align 8 +// CK2-64-NEXT: [[TMP64:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 2 +// CK2-64-NEXT: store ptr [[THIS1]], ptr [[TMP64]], align 8 +// CK2-64-NEXT: [[TMP65:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 2 +// CK2-64-NEXT: store ptr [[TMP51]], ptr [[TMP65]], align 8 +// CK2-64-NEXT: [[TMP66:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 2 +// CK2-64-NEXT: store ptr null, ptr [[TMP66]], align 8 +// CK2-64-NEXT: [[TMP67:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CK2-64-NEXT: [[TMP68:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CK2-64-NEXT: [[TMP69:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 +// CK2-64-NEXT: [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK2-64-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 +// CK2-64-NEXT: store i32 2, ptr [[TMP70]], align 4 +// CK2-64-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 +// CK2-64-NEXT: store i32 3, ptr [[TMP71]], align 4 +// CK2-64-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 +// CK2-64-NEXT: store ptr [[TMP67]], ptr [[TMP72]], align 8 +// CK2-64-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 3 +// CK2-64-NEXT: store ptr [[TMP68]], ptr [[TMP73]], align 8 +// CK2-64-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 4 +// CK2-64-NEXT: store ptr [[TMP69]], ptr [[TMP74]], align 8 +// CK2-64-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 5 +// CK2-64-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP75]], align 8 +// CK2-64-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 6 +// CK2-64-NEXT: store ptr null, ptr [[TMP76]], align 8 +// CK2-64-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 7 +// CK2-64-NEXT: store ptr null, ptr [[TMP77]], align 8 +// CK2-64-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 8 +// CK2-64-NEXT: store i64 0, ptr [[TMP78]], align 8 +// CK2-64-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 9 +// CK2-64-NEXT: store i64 0, ptr [[TMP79]], align 8 +// CK2-64-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 10 +// CK2-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP80]], align 4 +// CK2-64-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 11 +// CK2-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP81]], align 4 +// CK2-64-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 12 +// CK2-64-NEXT: store i32 0, ptr [[TMP82]], align 4 +// CK2-64-NEXT: [[TMP83:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125.region_id, ptr [[KERNEL_ARGS14]]) +// CK2-64-NEXT: [[TMP84:%.*]] = icmp ne i32 [[TMP83]], 0 +// CK2-64-NEXT: br i1 [[TMP84]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]] +// CK2-64: omp_offload.failed15: +// CK2-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125(ptr [[THIS1]]) #[[ATTR3]] +// CK2-64-NEXT: br label [[OMP_OFFLOAD_CONT16]] +// CK2-64: omp_offload.cont16: +// CK2-64-NEXT: ret void +// CK2-64-LABEL: define {{[^@]+}}@_ZN2STIdEC2ERPd +// CK2-64-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CK2-64-NEXT: entry: +// CK2-64-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK2-64-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CK2-64-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK2-64-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CK2-64-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK2-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CK2-64-NEXT: store ptr null, ptr [[A]], align 8 +// CK2-64-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK2-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CK2-64-NEXT: store ptr [[TMP0]], ptr [[B2]], align 8 +// CK2-64-NEXT: ret void +// CK2-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112 +// CK2-64-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { +// CK2-64-NEXT: entry: +// CK2-64-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK2-64-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK2-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK2-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CK2-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8 +// CK2-64-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK2-64-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 8 +// CK2-64-NEXT: ret void +// CK2-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118 +// CK2-64-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CK2-64-NEXT: entry: +// CK2-64-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK2-64-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK2-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK2-64-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 1 +// CK2-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 8 +// CK2-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CK2-64-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// CK2-64-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP1]], align 8 +// CK2-64-NEXT: ret void +// CK2-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125 +// CK2-64-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CK2-64-NEXT: entry: +// CK2-64-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK2-64-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK2-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK2-64-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CK2-64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8 +// CK2-64-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK2-64-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 8 +// CK2-64-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP0]], i32 0, i32 1 +// CK2-64-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B]], align 8 +// CK2-64-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CK2-64-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// CK2-64-NEXT: store ptr [[INCDEC_PTR1]], ptr [[TMP2]], align 8 +// CK2-64-NEXT: ret void +// CK2-64-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK2-64-SAME: () #[[ATTR5:[0-9]+]] { +// CK2-64-NEXT: entry: +// CK2-64-NEXT: call void @__tgt_register_requires(i64 1) +// CK2-64-NEXT: ret void +// CK2-32-LABEL: define {{[^@]+}}@_Z3barPd +// CK2-32-SAME: (ptr noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +// CK2-32-NEXT: entry: +// CK2-32-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 4 +// CK2-32-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 +// CK2-32-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 4 +// CK2-32-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) +// CK2-32-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) +// CK2-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 4 +// CK2-32-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK2-32-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 4 +// CK2-32-NEXT: ret void +// CK2-32-LABEL: define {{[^@]+}}@_ZN2STIdEC1ERPd +// CK2-32-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CK2-32-NEXT: entry: +// CK2-32-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK2-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CK2-32-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK2-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CK2-32-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK2-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CK2-32-NEXT: call void @_ZN2STIdEC2ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0]]) +// CK2-32-NEXT: ret void +// CK2-32-LABEL: define {{[^@]+}}@_ZN2STIdE3fooERPd +// CK2-32-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR0]] comdat align 2 { +// CK2-32-NEXT: entry: +// CK2-32-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK2-32-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 4 +// CK2-32-NEXT: [[LA:%.*]] = alloca ptr, align 4 +// CK2-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CK2-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CK2-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CK2-32-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [2 x ptr], align 4 +// CK2-32-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [2 x ptr], align 4 +// CK2-32-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [2 x ptr], align 4 +// CK2-32-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [2 x i64], align 4 +// CK2-32-NEXT: [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [3 x ptr], align 4 +// CK2-32-NEXT: [[DOTOFFLOAD_PTRS11:%.*]] = alloca [3 x ptr], align 4 +// CK2-32-NEXT: [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [3 x ptr], align 4 +// CK2-32-NEXT: [[DOTOFFLOAD_SIZES13:%.*]] = alloca [3 x i64], align 4 +// CK2-32-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK2-32-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 4 +// CK2-32-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK2-32-NEXT: store ptr null, ptr [[LA]], align 4 +// CK2-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CK2-32-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK2-32-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4 +// CK2-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK2-32-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CK2-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CK2-32-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CK2-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK2-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK2-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK2-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK2-32-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CK2-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK2-32-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CK2-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK2-32-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CK2-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK2-32-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CK2-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK2-32-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CK2-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK2-32-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CK2-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK2-32-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CK2-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK2-32-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CK2-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK2-32-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CK2-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK2-32-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK2-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK2-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP15]], align 4 +// CK2-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK2-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CK2-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK2-32-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CK2-32-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112.region_id, ptr [[KERNEL_ARGS]]) +// CK2-32-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CK2-32-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK2-32: omp_offload.failed: +// CK2-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112(ptr [[THIS1]]) #[[ATTR3:[0-9]+]] +// CK2-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK2-32: omp_offload.cont: +// CK2-32-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK2-32-NEXT: [[TMP20:%.*]] = load ptr, ptr [[B]], align 4 +// CK2-32-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[B]], i32 1 +// CK2-32-NEXT: [[TMP22:%.*]] = ptrtoint ptr [[TMP21]] to i64 +// CK2-32-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[B]] to i64 +// CK2-32-NEXT: [[TMP24:%.*]] = sub i64 [[TMP22]], [[TMP23]] +// CK2-32-NEXT: [[TMP25:%.*]] = sdiv exact i64 [[TMP24]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CK2-32-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes.1, i32 16, i1 false) +// CK2-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CK2-32-NEXT: store ptr [[THIS1]], ptr [[TMP26]], align 4 +// CK2-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CK2-32-NEXT: store ptr [[B]], ptr [[TMP27]], align 4 +// CK2-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CK2-32-NEXT: store i64 [[TMP25]], ptr [[TMP28]], align 4 +// CK2-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0 +// CK2-32-NEXT: store ptr null, ptr [[TMP29]], align 4 +// CK2-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1 +// CK2-32-NEXT: store ptr [[THIS1]], ptr [[TMP30]], align 4 +// CK2-32-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 1 +// CK2-32-NEXT: store ptr [[TMP20]], ptr [[TMP31]], align 4 +// CK2-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1 +// CK2-32-NEXT: store ptr null, ptr [[TMP32]], align 4 +// CK2-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CK2-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CK2-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CK2-32-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK2-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 +// CK2-32-NEXT: store i32 2, ptr [[TMP36]], align 4 +// CK2-32-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 +// CK2-32-NEXT: store i32 2, ptr [[TMP37]], align 4 +// CK2-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 +// CK2-32-NEXT: store ptr [[TMP33]], ptr [[TMP38]], align 4 +// CK2-32-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3 +// CK2-32-NEXT: store ptr [[TMP34]], ptr [[TMP39]], align 4 +// CK2-32-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4 +// CK2-32-NEXT: store ptr [[TMP35]], ptr [[TMP40]], align 4 +// CK2-32-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5 +// CK2-32-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP41]], align 4 +// CK2-32-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6 +// CK2-32-NEXT: store ptr null, ptr [[TMP42]], align 4 +// CK2-32-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7 +// CK2-32-NEXT: store ptr null, ptr [[TMP43]], align 4 +// CK2-32-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8 +// CK2-32-NEXT: store i64 0, ptr [[TMP44]], align 8 +// CK2-32-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9 +// CK2-32-NEXT: store i64 0, ptr [[TMP45]], align 8 +// CK2-32-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10 +// CK2-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP46]], align 4 +// CK2-32-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11 +// CK2-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP47]], align 4 +// CK2-32-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12 +// CK2-32-NEXT: store i32 0, ptr [[TMP48]], align 4 +// CK2-32-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118.region_id, ptr [[KERNEL_ARGS5]]) +// CK2-32-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CK2-32-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CK2-32: omp_offload.failed6: +// CK2-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118(ptr [[THIS1]]) #[[ATTR3]] +// CK2-32-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CK2-32: omp_offload.cont7: +// CK2-32-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 +// CK2-32-NEXT: [[B9:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK2-32-NEXT: [[TMP51:%.*]] = load ptr, ptr [[B9]], align 4 +// CK2-32-NEXT: [[TMP52:%.*]] = getelementptr ptr, ptr [[B9]], i32 1 +// CK2-32-NEXT: [[TMP53:%.*]] = ptrtoint ptr [[TMP52]] to i64 +// CK2-32-NEXT: [[TMP54:%.*]] = ptrtoint ptr [[A8]] to i64 +// CK2-32-NEXT: [[TMP55:%.*]] = sub i64 [[TMP53]], [[TMP54]] +// CK2-32-NEXT: [[TMP56:%.*]] = sdiv exact i64 [[TMP55]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CK2-32-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES13]], ptr align 4 @.offload_sizes.3, i32 24, i1 false) +// CK2-32-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CK2-32-NEXT: store ptr [[THIS1]], ptr [[TMP57]], align 4 +// CK2-32-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CK2-32-NEXT: store ptr [[A8]], ptr [[TMP58]], align 4 +// CK2-32-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 +// CK2-32-NEXT: store i64 [[TMP56]], ptr [[TMP59]], align 4 +// CK2-32-NEXT: [[TMP60:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i32 0, i32 0 +// CK2-32-NEXT: store ptr null, ptr [[TMP60]], align 4 +// CK2-32-NEXT: [[TMP61:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1 +// CK2-32-NEXT: store ptr [[THIS1]], ptr [[TMP61]], align 4 +// CK2-32-NEXT: [[TMP62:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 1 +// CK2-32-NEXT: store ptr [[A8]], ptr [[TMP62]], align 4 +// CK2-32-NEXT: [[TMP63:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i32 0, i32 1 +// CK2-32-NEXT: store ptr null, ptr [[TMP63]], align 4 +// CK2-32-NEXT: [[TMP64:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 2 +// CK2-32-NEXT: store ptr [[THIS1]], ptr [[TMP64]], align 4 +// CK2-32-NEXT: [[TMP65:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 2 +// CK2-32-NEXT: store ptr [[TMP51]], ptr [[TMP65]], align 4 +// CK2-32-NEXT: [[TMP66:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i32 0, i32 2 +// CK2-32-NEXT: store ptr null, ptr [[TMP66]], align 4 +// CK2-32-NEXT: [[TMP67:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CK2-32-NEXT: [[TMP68:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CK2-32-NEXT: [[TMP69:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 +// CK2-32-NEXT: [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK2-32-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 +// CK2-32-NEXT: store i32 2, ptr [[TMP70]], align 4 +// CK2-32-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 +// CK2-32-NEXT: store i32 3, ptr [[TMP71]], align 4 +// CK2-32-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 +// CK2-32-NEXT: store ptr [[TMP67]], ptr [[TMP72]], align 4 +// CK2-32-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 3 +// CK2-32-NEXT: store ptr [[TMP68]], ptr [[TMP73]], align 4 +// CK2-32-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 4 +// CK2-32-NEXT: store ptr [[TMP69]], ptr [[TMP74]], align 4 +// CK2-32-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 5 +// CK2-32-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP75]], align 4 +// CK2-32-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 6 +// CK2-32-NEXT: store ptr null, ptr [[TMP76]], align 4 +// CK2-32-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 7 +// CK2-32-NEXT: store ptr null, ptr [[TMP77]], align 4 +// CK2-32-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 8 +// CK2-32-NEXT: store i64 0, ptr [[TMP78]], align 8 +// CK2-32-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 9 +// CK2-32-NEXT: store i64 0, ptr [[TMP79]], align 8 +// CK2-32-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 10 +// CK2-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP80]], align 4 +// CK2-32-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 11 +// CK2-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP81]], align 4 +// CK2-32-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 12 +// CK2-32-NEXT: store i32 0, ptr [[TMP82]], align 4 +// CK2-32-NEXT: [[TMP83:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125.region_id, ptr [[KERNEL_ARGS14]]) +// CK2-32-NEXT: [[TMP84:%.*]] = icmp ne i32 [[TMP83]], 0 +// CK2-32-NEXT: br i1 [[TMP84]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]] +// CK2-32: omp_offload.failed15: +// CK2-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125(ptr [[THIS1]]) #[[ATTR3]] +// CK2-32-NEXT: br label [[OMP_OFFLOAD_CONT16]] +// CK2-32: omp_offload.cont16: +// CK2-32-NEXT: ret void +// CK2-32-LABEL: define {{[^@]+}}@_ZN2STIdEC2ERPd +// CK2-32-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CK2-32-NEXT: entry: +// CK2-32-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK2-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CK2-32-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK2-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CK2-32-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK2-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CK2-32-NEXT: store ptr null, ptr [[A]], align 4 +// CK2-32-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK2-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CK2-32-NEXT: store ptr [[TMP0]], ptr [[B2]], align 4 +// CK2-32-NEXT: ret void +// CK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112 +// CK2-32-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { +// CK2-32-NEXT: entry: +// CK2-32-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK2-32-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK2-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK2-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CK2-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 4 +// CK2-32-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK2-32-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 4 +// CK2-32-NEXT: ret void +// CK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118 +// CK2-32-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CK2-32-NEXT: entry: +// CK2-32-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK2-32-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK2-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK2-32-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 1 +// CK2-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 4 +// CK2-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CK2-32-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// CK2-32-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP1]], align 4 +// CK2-32-NEXT: ret void +// CK2-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125 +// CK2-32-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CK2-32-NEXT: entry: +// CK2-32-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK2-32-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK2-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK2-32-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CK2-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 4 +// CK2-32-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK2-32-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 4 +// CK2-32-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP0]], i32 0, i32 1 +// CK2-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B]], align 4 +// CK2-32-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CK2-32-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// CK2-32-NEXT: store ptr [[INCDEC_PTR1]], ptr [[TMP2]], align 4 +// CK2-32-NEXT: ret void +// CK2-32-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK2-32-SAME: () #[[ATTR5:[0-9]+]] { +// CK2-32-NEXT: entry: +// CK2-32-NEXT: call void @__tgt_register_requires(i64 1) +// CK2-32-NEXT: ret void +// CK3-64-LABEL: define {{[^@]+}}@_Z3barv +// CK3-64-SAME: () #[[ATTR0:[0-9]+]] { +// CK3-64-NEXT: entry: +// CK3-64-NEXT: [[PTR:%.*]] = alloca ptr, align 64 +// CK3-64-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CK3-64-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CK3-64-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CK3-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 64 +// CK3-64-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK3-64-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CK3-64-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK3-64-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CK3-64-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CK3-64-NEXT: store ptr null, ptr [[TMP3]], align 8 +// CK3-64-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK3-64-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK3-64-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK3-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK3-64-NEXT: store i32 2, ptr [[TMP6]], align 4 +// CK3-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK3-64-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CK3-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK3-64-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CK3-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK3-64-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CK3-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK3-64-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 8 +// CK3-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK3-64-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 8 +// CK3-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK3-64-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CK3-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK3-64-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CK3-64-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK3-64-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK3-64-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK3-64-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CK3-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK3-64-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP16]], align 4 +// CK3-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK3-64-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CK3-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK3-64-NEXT: store i32 0, ptr [[TMP18]], align 4 +// CK3-64-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159.region_id, ptr [[KERNEL_ARGS]]) +// CK3-64-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CK3-64-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK3-64: omp_offload.failed: +// CK3-64-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159(ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CK3-64-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK3-64: omp_offload.cont: +// CK3-64-NEXT: ret void +// CK3-64-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159 +// CK3-64-SAME: (ptr noundef [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { +// CK3-64-NEXT: entry: +// CK3-64-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8 +// CK3-64-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8 +// CK3-64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +// CK3-64-NEXT: store double 0.000000e+00, ptr [[TMP0]], align 8 +// CK3-64-NEXT: ret void +// CK3-64-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK3-64-SAME: () #[[ATTR3:[0-9]+]] { +// CK3-64-NEXT: entry: +// CK3-64-NEXT: call void @__tgt_register_requires(i64 1) +// CK3-64-NEXT: ret void +// CK3-32-LABEL: define {{[^@]+}}@_Z3barv +// CK3-32-SAME: () #[[ATTR0:[0-9]+]] { +// CK3-32-NEXT: entry: +// CK3-32-NEXT: [[PTR:%.*]] = alloca ptr, align 64 +// CK3-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CK3-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CK3-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CK3-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 64 +// CK3-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK3-32-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CK3-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK3-32-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CK3-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CK3-32-NEXT: store ptr null, ptr [[TMP3]], align 4 +// CK3-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK3-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK3-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK3-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK3-32-NEXT: store i32 2, ptr [[TMP6]], align 4 +// CK3-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK3-32-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CK3-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK3-32-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CK3-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK3-32-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CK3-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK3-32-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 4 +// CK3-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK3-32-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 4 +// CK3-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK3-32-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CK3-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK3-32-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CK3-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK3-32-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK3-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK3-32-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CK3-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK3-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP16]], align 4 +// CK3-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK3-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CK3-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK3-32-NEXT: store i32 0, ptr [[TMP18]], align 4 +// CK3-32-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159.region_id, ptr [[KERNEL_ARGS]]) +// CK3-32-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CK3-32-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK3-32: omp_offload.failed: +// CK3-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159(ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CK3-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK3-32: omp_offload.cont: +// CK3-32-NEXT: ret void +// CK3-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159 +// CK3-32-SAME: (ptr noundef [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { +// CK3-32-NEXT: entry: +// CK3-32-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 4 +// CK3-32-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 4 +// CK3-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4 +// CK3-32-NEXT: store double 0.000000e+00, ptr [[TMP0]], align 4 +// CK3-32-NEXT: ret void +// CK3-32-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK3-32-SAME: () #[[ATTR3:[0-9]+]] { +// CK3-32-NEXT: entry: +// CK3-32-NEXT: call void @__tgt_register_requires(i64 1) +// CK3-32-NEXT: ret void +// CK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK1-SAME: () #[[ATTR3:[0-9]+]] { +// CK1-NEXT: entry: +// CK1-NEXT: call void @__tgt_register_requires(i64 1) +// CK1-NEXT: ret void +// CK1-32-LABEL: define {{[^@]+}}@_Z3barRPfRPi +// CK1-32-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CK1-32-NEXT: entry: +// CK1-32-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CK1-32-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CK1-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CK1-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CK1-32-NEXT: call void @_Z3fooIiEvRPfRPT_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP0]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +// CK1-32-NEXT: ret void +// CK1-32-LABEL: define {{[^@]+}}@_Z3fooIiEvRPfRPT_ +// CK1-32-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TR:%.*]]) #[[ATTR0]] comdat { +// CK1-32-NEXT: entry: +// CK1-32-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[L:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[T:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_BASEPTRS7:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_PTRS8:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_MAPPERS9:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_BASEPTRS13:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_PTRS14:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_MAPPERS15:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[_TMP19:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_BASEPTRS20:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_PTRS21:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_MAPPERS22:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[_TMP26:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 4 +// CK1-32-NEXT: [[_TMP33:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[_TMP34:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_BASEPTRS35:%.*]] = alloca [2 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_PTRS36:%.*]] = alloca [2 x ptr], align 4 +// CK1-32-NEXT: [[DOTOFFLOAD_MAPPERS37:%.*]] = alloca [2 x ptr], align 4 +// CK1-32-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 4 +// CK1-32-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// CK1-32-NEXT: [[TMP0:%.*]] = load ptr, ptr @g, align 4 +// CK1-32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CK1-32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CK1-32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CK1-32-NEXT: store ptr null, ptr [[TMP3]], align 4 +// CK1-32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK1-32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK1-32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK1-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK1-32-NEXT: store i32 2, ptr [[TMP6]], align 4 +// CK1-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK1-32-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CK1-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK1-32-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CK1-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK1-32-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CK1-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK1-32-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 4 +// CK1-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK1-32-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 4 +// CK1-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK1-32-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CK1-32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK1-32-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CK1-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK1-32-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK1-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK1-32-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CK1-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK1-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP16]], align 4 +// CK1-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK1-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CK1-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK1-32-NEXT: store i32 0, ptr [[TMP18]], align 4 +// CK1-32-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37.region_id, ptr [[KERNEL_ARGS]]) +// CK1-32-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CK1-32-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK1-32: omp_offload.failed: +// CK1-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37(ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CK1-32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK1-32: omp_offload.cont: +// CK1-32-NEXT: [[TMP21:%.*]] = load ptr, ptr [[L]], align 4 +// CK1-32-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP21]], ptr [[TMP22]], align 4 +// CK1-32-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP21]], ptr [[TMP23]], align 4 +// CK1-32-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 0 +// CK1-32-NEXT: store ptr null, ptr [[TMP24]], align 4 +// CK1-32-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CK1-32-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CK1-32-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK1-32-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CK1-32-NEXT: store i32 2, ptr [[TMP27]], align 4 +// CK1-32-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 +// CK1-32-NEXT: store i32 1, ptr [[TMP28]], align 4 +// CK1-32-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CK1-32-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 4 +// CK1-32-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 +// CK1-32-NEXT: store ptr [[TMP26]], ptr [[TMP30]], align 4 +// CK1-32-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 +// CK1-32-NEXT: store ptr @.offload_sizes.1, ptr [[TMP31]], align 4 +// CK1-32-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 +// CK1-32-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP32]], align 4 +// CK1-32-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CK1-32-NEXT: store ptr null, ptr [[TMP33]], align 4 +// CK1-32-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 +// CK1-32-NEXT: store ptr null, ptr [[TMP34]], align 4 +// CK1-32-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 +// CK1-32-NEXT: store i64 0, ptr [[TMP35]], align 8 +// CK1-32-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 9 +// CK1-32-NEXT: store i64 0, ptr [[TMP36]], align 8 +// CK1-32-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 10 +// CK1-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP37]], align 4 +// CK1-32-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 11 +// CK1-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP38]], align 4 +// CK1-32-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 12 +// CK1-32-NEXT: store i32 0, ptr [[TMP39]], align 4 +// CK1-32-NEXT: [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43.region_id, ptr [[KERNEL_ARGS4]]) +// CK1-32-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CK1-32-NEXT: br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] +// CK1-32: omp_offload.failed5: +// CK1-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43(ptr [[TMP21]]) #[[ATTR2]] +// CK1-32-NEXT: br label [[OMP_OFFLOAD_CONT6]] +// CK1-32: omp_offload.cont6: +// CK1-32-NEXT: [[TMP42:%.*]] = load ptr, ptr [[T]], align 4 +// CK1-32-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP42]], ptr [[TMP43]], align 4 +// CK1-32-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP42]], ptr [[TMP44]], align 4 +// CK1-32-NEXT: [[TMP45:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i32 0, i32 0 +// CK1-32-NEXT: store ptr null, ptr [[TMP45]], align 4 +// CK1-32-NEXT: [[TMP46:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CK1-32-NEXT: [[TMP47:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CK1-32-NEXT: [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK1-32-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 +// CK1-32-NEXT: store i32 2, ptr [[TMP48]], align 4 +// CK1-32-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 +// CK1-32-NEXT: store i32 1, ptr [[TMP49]], align 4 +// CK1-32-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 +// CK1-32-NEXT: store ptr [[TMP46]], ptr [[TMP50]], align 4 +// CK1-32-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 +// CK1-32-NEXT: store ptr [[TMP47]], ptr [[TMP51]], align 4 +// CK1-32-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 +// CK1-32-NEXT: store ptr @.offload_sizes.3, ptr [[TMP52]], align 4 +// CK1-32-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 +// CK1-32-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP53]], align 4 +// CK1-32-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 +// CK1-32-NEXT: store ptr null, ptr [[TMP54]], align 4 +// CK1-32-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 +// CK1-32-NEXT: store ptr null, ptr [[TMP55]], align 4 +// CK1-32-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 +// CK1-32-NEXT: store i64 0, ptr [[TMP56]], align 8 +// CK1-32-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 9 +// CK1-32-NEXT: store i64 0, ptr [[TMP57]], align 8 +// CK1-32-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 10 +// CK1-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP58]], align 4 +// CK1-32-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 11 +// CK1-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP59]], align 4 +// CK1-32-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 12 +// CK1-32-NEXT: store i32 0, ptr [[TMP60]], align 4 +// CK1-32-NEXT: [[TMP61:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49.region_id, ptr [[KERNEL_ARGS10]]) +// CK1-32-NEXT: [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0 +// CK1-32-NEXT: br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] +// CK1-32: omp_offload.failed11: +// CK1-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49(ptr [[TMP42]]) #[[ATTR2]] +// CK1-32-NEXT: br label [[OMP_OFFLOAD_CONT12]] +// CK1-32: omp_offload.cont12: +// CK1-32-NEXT: [[TMP63:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// CK1-32-NEXT: store ptr [[TMP63]], ptr [[TMP]], align 4 +// CK1-32-NEXT: [[TMP64:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK1-32-NEXT: [[TMP65:%.*]] = load ptr, ptr [[TMP64]], align 4 +// CK1-32-NEXT: [[TMP66:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP65]], ptr [[TMP66]], align 4 +// CK1-32-NEXT: [[TMP67:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP65]], ptr [[TMP67]], align 4 +// CK1-32-NEXT: [[TMP68:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 0 +// CK1-32-NEXT: store ptr null, ptr [[TMP68]], align 4 +// CK1-32-NEXT: [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CK1-32-NEXT: [[TMP70:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CK1-32-NEXT: [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK1-32-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 +// CK1-32-NEXT: store i32 2, ptr [[TMP71]], align 4 +// CK1-32-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 +// CK1-32-NEXT: store i32 1, ptr [[TMP72]], align 4 +// CK1-32-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 +// CK1-32-NEXT: store ptr [[TMP69]], ptr [[TMP73]], align 4 +// CK1-32-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 3 +// CK1-32-NEXT: store ptr [[TMP70]], ptr [[TMP74]], align 4 +// CK1-32-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 4 +// CK1-32-NEXT: store ptr @.offload_sizes.5, ptr [[TMP75]], align 4 +// CK1-32-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 5 +// CK1-32-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP76]], align 4 +// CK1-32-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 6 +// CK1-32-NEXT: store ptr null, ptr [[TMP77]], align 4 +// CK1-32-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 7 +// CK1-32-NEXT: store ptr null, ptr [[TMP78]], align 4 +// CK1-32-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 8 +// CK1-32-NEXT: store i64 0, ptr [[TMP79]], align 8 +// CK1-32-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 9 +// CK1-32-NEXT: store i64 0, ptr [[TMP80]], align 8 +// CK1-32-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 10 +// CK1-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP81]], align 4 +// CK1-32-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 11 +// CK1-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP82]], align 4 +// CK1-32-NEXT: [[TMP83:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 12 +// CK1-32-NEXT: store i32 0, ptr [[TMP83]], align 4 +// CK1-32-NEXT: [[TMP84:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55.region_id, ptr [[KERNEL_ARGS16]]) +// CK1-32-NEXT: [[TMP85:%.*]] = icmp ne i32 [[TMP84]], 0 +// CK1-32-NEXT: br i1 [[TMP85]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]] +// CK1-32: omp_offload.failed17: +// CK1-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55(ptr [[TMP65]]) #[[ATTR2]] +// CK1-32-NEXT: br label [[OMP_OFFLOAD_CONT18]] +// CK1-32: omp_offload.cont18: +// CK1-32-NEXT: [[TMP86:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// CK1-32-NEXT: store ptr [[TMP86]], ptr [[_TMP19]], align 4 +// CK1-32-NEXT: [[TMP87:%.*]] = load ptr, ptr [[_TMP19]], align 4 +// CK1-32-NEXT: [[TMP88:%.*]] = load ptr, ptr [[TMP87]], align 4 +// CK1-32-NEXT: [[TMP89:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP88]], ptr [[TMP89]], align 4 +// CK1-32-NEXT: [[TMP90:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP88]], ptr [[TMP90]], align 4 +// CK1-32-NEXT: [[TMP91:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 0 +// CK1-32-NEXT: store ptr null, ptr [[TMP91]], align 4 +// CK1-32-NEXT: [[TMP92:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CK1-32-NEXT: [[TMP93:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CK1-32-NEXT: [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK1-32-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 +// CK1-32-NEXT: store i32 2, ptr [[TMP94]], align 4 +// CK1-32-NEXT: [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 +// CK1-32-NEXT: store i32 1, ptr [[TMP95]], align 4 +// CK1-32-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 +// CK1-32-NEXT: store ptr [[TMP92]], ptr [[TMP96]], align 4 +// CK1-32-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 3 +// CK1-32-NEXT: store ptr [[TMP93]], ptr [[TMP97]], align 4 +// CK1-32-NEXT: [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 4 +// CK1-32-NEXT: store ptr @.offload_sizes.7, ptr [[TMP98]], align 4 +// CK1-32-NEXT: [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 5 +// CK1-32-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP99]], align 4 +// CK1-32-NEXT: [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 6 +// CK1-32-NEXT: store ptr null, ptr [[TMP100]], align 4 +// CK1-32-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 7 +// CK1-32-NEXT: store ptr null, ptr [[TMP101]], align 4 +// CK1-32-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 8 +// CK1-32-NEXT: store i64 0, ptr [[TMP102]], align 8 +// CK1-32-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 9 +// CK1-32-NEXT: store i64 0, ptr [[TMP103]], align 8 +// CK1-32-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 10 +// CK1-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP104]], align 4 +// CK1-32-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 11 +// CK1-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP105]], align 4 +// CK1-32-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 12 +// CK1-32-NEXT: store i32 0, ptr [[TMP106]], align 4 +// CK1-32-NEXT: [[TMP107:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61.region_id, ptr [[KERNEL_ARGS23]]) +// CK1-32-NEXT: [[TMP108:%.*]] = icmp ne i32 [[TMP107]], 0 +// CK1-32-NEXT: br i1 [[TMP108]], label [[OMP_OFFLOAD_FAILED24:%.*]], label [[OMP_OFFLOAD_CONT25:%.*]] +// CK1-32: omp_offload.failed24: +// CK1-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61(ptr [[TMP88]]) #[[ATTR2]] +// CK1-32-NEXT: br label [[OMP_OFFLOAD_CONT25]] +// CK1-32: omp_offload.cont25: +// CK1-32-NEXT: [[TMP109:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// CK1-32-NEXT: store ptr [[TMP109]], ptr [[_TMP26]], align 4 +// CK1-32-NEXT: [[TMP110:%.*]] = load ptr, ptr [[_TMP26]], align 4 +// CK1-32-NEXT: [[TMP111:%.*]] = load ptr, ptr [[TMP110]], align 4 +// CK1-32-NEXT: [[TMP112:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP111]], ptr [[TMP112]], align 4 +// CK1-32-NEXT: [[TMP113:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP111]], ptr [[TMP113]], align 4 +// CK1-32-NEXT: [[TMP114:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i32 0, i32 0 +// CK1-32-NEXT: store ptr null, ptr [[TMP114]], align 4 +// CK1-32-NEXT: [[TMP115:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 +// CK1-32-NEXT: [[TMP116:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 +// CK1-32-NEXT: [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK1-32-NEXT: [[TMP117:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 +// CK1-32-NEXT: store i32 2, ptr [[TMP117]], align 4 +// CK1-32-NEXT: [[TMP118:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 +// CK1-32-NEXT: store i32 1, ptr [[TMP118]], align 4 +// CK1-32-NEXT: [[TMP119:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 +// CK1-32-NEXT: store ptr [[TMP115]], ptr [[TMP119]], align 4 +// CK1-32-NEXT: [[TMP120:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 3 +// CK1-32-NEXT: store ptr [[TMP116]], ptr [[TMP120]], align 4 +// CK1-32-NEXT: [[TMP121:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 4 +// CK1-32-NEXT: store ptr @.offload_sizes.9, ptr [[TMP121]], align 4 +// CK1-32-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 5 +// CK1-32-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP122]], align 4 +// CK1-32-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 6 +// CK1-32-NEXT: store ptr null, ptr [[TMP123]], align 4 +// CK1-32-NEXT: [[TMP124:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 7 +// CK1-32-NEXT: store ptr null, ptr [[TMP124]], align 4 +// CK1-32-NEXT: [[TMP125:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 8 +// CK1-32-NEXT: store i64 0, ptr [[TMP125]], align 8 +// CK1-32-NEXT: [[TMP126:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 9 +// CK1-32-NEXT: store i64 0, ptr [[TMP126]], align 8 +// CK1-32-NEXT: [[TMP127:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 10 +// CK1-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP127]], align 4 +// CK1-32-NEXT: [[TMP128:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 11 +// CK1-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP128]], align 4 +// CK1-32-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 12 +// CK1-32-NEXT: store i32 0, ptr [[TMP129]], align 4 +// CK1-32-NEXT: [[TMP130:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67.region_id, ptr [[KERNEL_ARGS30]]) +// CK1-32-NEXT: [[TMP131:%.*]] = icmp ne i32 [[TMP130]], 0 +// CK1-32-NEXT: br i1 [[TMP131]], label [[OMP_OFFLOAD_FAILED31:%.*]], label [[OMP_OFFLOAD_CONT32:%.*]] +// CK1-32: omp_offload.failed31: +// CK1-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67(ptr [[TMP111]]) #[[ATTR2]] +// CK1-32-NEXT: br label [[OMP_OFFLOAD_CONT32]] +// CK1-32: omp_offload.cont32: +// CK1-32-NEXT: [[TMP132:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// CK1-32-NEXT: store ptr [[TMP132]], ptr [[_TMP33]], align 4 +// CK1-32-NEXT: [[TMP133:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// CK1-32-NEXT: store ptr [[TMP133]], ptr [[_TMP34]], align 4 +// CK1-32-NEXT: [[TMP134:%.*]] = load ptr, ptr [[_TMP33]], align 4 +// CK1-32-NEXT: [[TMP135:%.*]] = load ptr, ptr [[TMP134]], align 4 +// CK1-32-NEXT: [[TMP136:%.*]] = load ptr, ptr [[_TMP34]], align 4 +// CK1-32-NEXT: [[TMP137:%.*]] = load ptr, ptr [[TMP136]], align 4 +// CK1-32-NEXT: [[TMP138:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP135]], ptr [[TMP138]], align 4 +// CK1-32-NEXT: [[TMP139:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 +// CK1-32-NEXT: store ptr [[TMP135]], ptr [[TMP139]], align 4 +// CK1-32-NEXT: [[TMP140:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS37]], i32 0, i32 0 +// CK1-32-NEXT: store ptr null, ptr [[TMP140]], align 4 +// CK1-32-NEXT: [[TMP141:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 1 +// CK1-32-NEXT: store ptr [[TMP137]], ptr [[TMP141]], align 4 +// CK1-32-NEXT: [[TMP142:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 1 +// CK1-32-NEXT: store ptr [[TMP137]], ptr [[TMP142]], align 4 +// CK1-32-NEXT: [[TMP143:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS37]], i32 0, i32 1 +// CK1-32-NEXT: store ptr null, ptr [[TMP143]], align 4 +// CK1-32-NEXT: [[TMP144:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 +// CK1-32-NEXT: [[TMP145:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 +// CK1-32-NEXT: [[KERNEL_ARGS38:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK1-32-NEXT: [[TMP146:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 0 +// CK1-32-NEXT: store i32 2, ptr [[TMP146]], align 4 +// CK1-32-NEXT: [[TMP147:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 1 +// CK1-32-NEXT: store i32 2, ptr [[TMP147]], align 4 +// CK1-32-NEXT: [[TMP148:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 2 +// CK1-32-NEXT: store ptr [[TMP144]], ptr [[TMP148]], align 4 +// CK1-32-NEXT: [[TMP149:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 3 +// CK1-32-NEXT: store ptr [[TMP145]], ptr [[TMP149]], align 4 +// CK1-32-NEXT: [[TMP150:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 4 +// CK1-32-NEXT: store ptr @.offload_sizes.11, ptr [[TMP150]], align 4 +// CK1-32-NEXT: [[TMP151:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 5 +// CK1-32-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP151]], align 4 +// CK1-32-NEXT: [[TMP152:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 6 +// CK1-32-NEXT: store ptr null, ptr [[TMP152]], align 4 +// CK1-32-NEXT: [[TMP153:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 7 +// CK1-32-NEXT: store ptr null, ptr [[TMP153]], align 4 +// CK1-32-NEXT: [[TMP154:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 8 +// CK1-32-NEXT: store i64 0, ptr [[TMP154]], align 8 +// CK1-32-NEXT: [[TMP155:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 9 +// CK1-32-NEXT: store i64 0, ptr [[TMP155]], align 8 +// CK1-32-NEXT: [[TMP156:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 10 +// CK1-32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP156]], align 4 +// CK1-32-NEXT: [[TMP157:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 11 +// CK1-32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP157]], align 4 +// CK1-32-NEXT: [[TMP158:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 12 +// CK1-32-NEXT: store i32 0, ptr [[TMP158]], align 4 +// CK1-32-NEXT: [[TMP159:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74.region_id, ptr [[KERNEL_ARGS38]]) +// CK1-32-NEXT: [[TMP160:%.*]] = icmp ne i32 [[TMP159]], 0 +// CK1-32-NEXT: br i1 [[TMP160]], label [[OMP_OFFLOAD_FAILED39:%.*]], label [[OMP_OFFLOAD_CONT40:%.*]] +// CK1-32: omp_offload.failed39: +// CK1-32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74(ptr [[TMP135]], ptr [[TMP137]]) #[[ATTR2]] +// CK1-32-NEXT: br label [[OMP_OFFLOAD_CONT40]] +// CK1-32: omp_offload.cont40: +// CK1-32-NEXT: ret void +// CK1-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37 +// CK1-32-SAME: (ptr noundef [[G:%.*]]) #[[ATTR1:[0-9]+]] { +// CK1-32-NEXT: entry: +// CK1-32-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 +// CK1-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 +// CK1-32-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK1-32-NEXT: store ptr [[INCDEC_PTR]], ptr [[G_ADDR]], align 4 +// CK1-32-NEXT: ret void +// CK1-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43 +// CK1-32-SAME: (ptr noundef [[L:%.*]]) #[[ATTR1]] { +// CK1-32-NEXT: entry: +// CK1-32-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 4 +// CK1-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[L_ADDR]], align 4 +// CK1-32-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 1 +// CK1-32-NEXT: store ptr [[INCDEC_PTR]], ptr [[L_ADDR]], align 4 +// CK1-32-NEXT: ret void +// CK1-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49 +// CK1-32-SAME: (ptr noundef [[T:%.*]]) #[[ATTR1]] { +// CK1-32-NEXT: entry: +// CK1-32-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 4 +// CK1-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 4 +// CK1-32-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 +// CK1-32-NEXT: store ptr [[INCDEC_PTR]], ptr [[T_ADDR]], align 4 +// CK1-32-NEXT: ret void +// CK1-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55 +// CK1-32-SAME: (ptr noundef [[LR:%.*]]) #[[ATTR1]] { +// CK1-32-NEXT: entry: +// CK1-32-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 4 +// CK1-32-NEXT: store ptr [[LR_ADDR]], ptr [[TMP]], align 4 +// CK1-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK1-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 +// CK1-32-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// CK1-32-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 +// CK1-32-NEXT: ret void +// CK1-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61 +// CK1-32-SAME: (ptr noundef [[TR:%.*]]) #[[ATTR1]] { +// CK1-32-NEXT: entry: +// CK1-32-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// CK1-32-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 4 +// CK1-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK1-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 +// CK1-32-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK1-32-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 +// CK1-32-NEXT: ret void +// CK1-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67 +// CK1-32-SAME: (ptr noundef [[TR:%.*]]) #[[ATTR1]] { +// CK1-32-NEXT: entry: +// CK1-32-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// CK1-32-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 4 +// CK1-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK1-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 +// CK1-32-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK1-32-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 +// CK1-32-NEXT: ret void +// CK1-32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74 +// CK1-32-SAME: (ptr noundef [[TR:%.*]], ptr noundef [[LR:%.*]]) #[[ATTR1]] { +// CK1-32-NEXT: entry: +// CK1-32-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 +// CK1-32-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// CK1-32-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 4 +// CK1-32-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 4 +// CK1-32-NEXT: store ptr [[LR_ADDR]], ptr [[_TMP1]], align 4 +// CK1-32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK1-32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 +// CK1-32-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK1-32-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 +// CK1-32-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CK1-32-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CK1-32-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 1 +// CK1-32-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP2]], align 4 +// CK1-32-NEXT: ret void +// CK1-32-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK1-32-SAME: () #[[ATTR3:[0-9]+]] { +// CK1-32-NEXT: entry: +// CK1-32-NEXT: call void @__tgt_register_requires(i64 1) +// CK1-32-NEXT: ret void +// CK2-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK2-SAME: () #[[ATTR5:[0-9]+]] { +// CK2-NEXT: entry: +// CK2-NEXT: call void @__tgt_register_requires(i64 1) +// CK2-NEXT: ret void +// CK3-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK3-SAME: () #[[ATTR3:[0-9]+]] { +// CK3-NEXT: entry: +// CK3-NEXT: call void @__tgt_register_requires(i64 1) +// CK3-NEXT: ret void +// CK10-LABEL: define {{[^@]+}}@_Z3barRPfRPi +// CK10-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CK10-NEXT: entry: +// CK10-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CK10-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CK10-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CK10-NEXT: call void @_Z3fooIiEvRPfRPT_(ptr noundef nonnull align 8 dereferenceable(8) [[TMP0]], ptr noundef nonnull align 8 dereferenceable(8) [[TMP1]]) +// CK10-NEXT: ret void +// +// +// CK10-LABEL: define {{[^@]+}}@_Z3fooIiEvRPfRPT_ +// CK10-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[LR:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[TR:%.*]]) #[[ATTR0]] comdat { +// CK10-NEXT: entry: +// CK10-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[L:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[T:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK10-NEXT: [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK10-NEXT: [[DOTOFFLOAD_BASEPTRS7:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_PTRS8:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_MAPPERS9:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK10-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[DOTOFFLOAD_BASEPTRS13:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_PTRS14:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_MAPPERS15:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK10-NEXT: [[_TMP19:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[DOTOFFLOAD_BASEPTRS20:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_PTRS21:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_MAPPERS22:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK10-NEXT: [[_TMP26:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 8 +// CK10-NEXT: [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK10-NEXT: [[_TMP33:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[_TMP34:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[DOTOFFLOAD_BASEPTRS35:%.*]] = alloca [2 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_PTRS36:%.*]] = alloca [2 x ptr], align 8 +// CK10-NEXT: [[DOTOFFLOAD_MAPPERS37:%.*]] = alloca [2 x ptr], align 8 +// CK10-NEXT: [[KERNEL_ARGS38:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK10-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 8 +// CK10-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// CK10-NEXT: [[TMP0:%.*]] = load ptr, ptr @g, align 8 +// CK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CK10-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CK10-NEXT: store ptr null, ptr [[TMP3]], align 8 +// CK10-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK10-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK10-NEXT: store i32 2, ptr [[TMP6]], align 4 +// CK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK10-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CK10-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK10-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK10-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CK10-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK10-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 8 +// CK10-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK10-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 8 +// CK10-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK10-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CK10-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK10-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CK10-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK10-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK10-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK10-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CK10-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK10-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP16]], align 4 +// CK10-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK10-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CK10-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK10-NEXT: store i32 0, ptr [[TMP18]], align 4 +// CK10-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37.region_id, ptr [[KERNEL_ARGS]]) +// CK10-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CK10-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK10: omp_offload.failed: +// CK10-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37(ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CK10-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK10: omp_offload.cont: +// CK10-NEXT: [[TMP21:%.*]] = load ptr, ptr [[L]], align 8 +// CK10-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP21]], ptr [[TMP22]], align 8 +// CK10-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP21]], ptr [[TMP23]], align 8 +// CK10-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 +// CK10-NEXT: store ptr null, ptr [[TMP24]], align 8 +// CK10-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CK10-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CK10-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CK10-NEXT: store i32 2, ptr [[TMP27]], align 4 +// CK10-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 +// CK10-NEXT: store i32 1, ptr [[TMP28]], align 4 +// CK10-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CK10-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 8 +// CK10-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 +// CK10-NEXT: store ptr [[TMP26]], ptr [[TMP30]], align 8 +// CK10-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 +// CK10-NEXT: store ptr @.offload_sizes.1, ptr [[TMP31]], align 8 +// CK10-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 +// CK10-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP32]], align 8 +// CK10-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CK10-NEXT: store ptr null, ptr [[TMP33]], align 8 +// CK10-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 +// CK10-NEXT: store ptr null, ptr [[TMP34]], align 8 +// CK10-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 +// CK10-NEXT: store i64 0, ptr [[TMP35]], align 8 +// CK10-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 9 +// CK10-NEXT: store i64 0, ptr [[TMP36]], align 8 +// CK10-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 10 +// CK10-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP37]], align 4 +// CK10-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 11 +// CK10-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP38]], align 4 +// CK10-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 12 +// CK10-NEXT: store i32 0, ptr [[TMP39]], align 4 +// CK10-NEXT: [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43.region_id, ptr [[KERNEL_ARGS4]]) +// CK10-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CK10-NEXT: br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] +// CK10: omp_offload.failed5: +// CK10-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43(ptr [[TMP21]]) #[[ATTR2]] +// CK10-NEXT: br label [[OMP_OFFLOAD_CONT6]] +// CK10: omp_offload.cont6: +// CK10-NEXT: [[TMP42:%.*]] = load ptr, ptr [[T]], align 8 +// CK10-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP42]], ptr [[TMP43]], align 8 +// CK10-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP42]], ptr [[TMP44]], align 8 +// CK10-NEXT: [[TMP45:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0 +// CK10-NEXT: store ptr null, ptr [[TMP45]], align 8 +// CK10-NEXT: [[TMP46:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CK10-NEXT: [[TMP47:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CK10-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 +// CK10-NEXT: store i32 2, ptr [[TMP48]], align 4 +// CK10-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 +// CK10-NEXT: store i32 1, ptr [[TMP49]], align 4 +// CK10-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 +// CK10-NEXT: store ptr [[TMP46]], ptr [[TMP50]], align 8 +// CK10-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 +// CK10-NEXT: store ptr [[TMP47]], ptr [[TMP51]], align 8 +// CK10-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 +// CK10-NEXT: store ptr @.offload_sizes.3, ptr [[TMP52]], align 8 +// CK10-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 +// CK10-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP53]], align 8 +// CK10-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 +// CK10-NEXT: store ptr null, ptr [[TMP54]], align 8 +// CK10-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 +// CK10-NEXT: store ptr null, ptr [[TMP55]], align 8 +// CK10-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 +// CK10-NEXT: store i64 0, ptr [[TMP56]], align 8 +// CK10-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 9 +// CK10-NEXT: store i64 0, ptr [[TMP57]], align 8 +// CK10-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 10 +// CK10-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP58]], align 4 +// CK10-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 11 +// CK10-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP59]], align 4 +// CK10-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 12 +// CK10-NEXT: store i32 0, ptr [[TMP60]], align 4 +// CK10-NEXT: [[TMP61:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49.region_id, ptr [[KERNEL_ARGS10]]) +// CK10-NEXT: [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0 +// CK10-NEXT: br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] +// CK10: omp_offload.failed11: +// CK10-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49(ptr [[TMP42]]) #[[ATTR2]] +// CK10-NEXT: br label [[OMP_OFFLOAD_CONT12]] +// CK10: omp_offload.cont12: +// CK10-NEXT: [[TMP63:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// CK10-NEXT: store ptr [[TMP63]], ptr [[TMP]], align 8 +// CK10-NEXT: [[TMP64:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK10-NEXT: [[TMP65:%.*]] = load ptr, ptr [[TMP64]], align 8 +// CK10-NEXT: [[TMP66:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP65]], ptr [[TMP66]], align 8 +// CK10-NEXT: [[TMP67:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP65]], ptr [[TMP67]], align 8 +// CK10-NEXT: [[TMP68:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS15]], i64 0, i64 0 +// CK10-NEXT: store ptr null, ptr [[TMP68]], align 8 +// CK10-NEXT: [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CK10-NEXT: [[TMP70:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CK10-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 +// CK10-NEXT: store i32 2, ptr [[TMP71]], align 4 +// CK10-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 +// CK10-NEXT: store i32 1, ptr [[TMP72]], align 4 +// CK10-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 +// CK10-NEXT: store ptr [[TMP69]], ptr [[TMP73]], align 8 +// CK10-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 3 +// CK10-NEXT: store ptr [[TMP70]], ptr [[TMP74]], align 8 +// CK10-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 4 +// CK10-NEXT: store ptr @.offload_sizes.5, ptr [[TMP75]], align 8 +// CK10-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 5 +// CK10-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP76]], align 8 +// CK10-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 6 +// CK10-NEXT: store ptr null, ptr [[TMP77]], align 8 +// CK10-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 7 +// CK10-NEXT: store ptr null, ptr [[TMP78]], align 8 +// CK10-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 8 +// CK10-NEXT: store i64 0, ptr [[TMP79]], align 8 +// CK10-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 9 +// CK10-NEXT: store i64 0, ptr [[TMP80]], align 8 +// CK10-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 10 +// CK10-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP81]], align 4 +// CK10-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 11 +// CK10-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP82]], align 4 +// CK10-NEXT: [[TMP83:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 12 +// CK10-NEXT: store i32 0, ptr [[TMP83]], align 4 +// CK10-NEXT: [[TMP84:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55.region_id, ptr [[KERNEL_ARGS16]]) +// CK10-NEXT: [[TMP85:%.*]] = icmp ne i32 [[TMP84]], 0 +// CK10-NEXT: br i1 [[TMP85]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]] +// CK10: omp_offload.failed17: +// CK10-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55(ptr [[TMP65]]) #[[ATTR2]] +// CK10-NEXT: br label [[OMP_OFFLOAD_CONT18]] +// CK10: omp_offload.cont18: +// CK10-NEXT: [[TMP86:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// CK10-NEXT: store ptr [[TMP86]], ptr [[_TMP19]], align 8 +// CK10-NEXT: [[TMP87:%.*]] = load ptr, ptr [[_TMP19]], align 8 +// CK10-NEXT: [[TMP88:%.*]] = load ptr, ptr [[TMP87]], align 8 +// CK10-NEXT: [[TMP89:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP88]], ptr [[TMP89]], align 8 +// CK10-NEXT: [[TMP90:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP88]], ptr [[TMP90]], align 8 +// CK10-NEXT: [[TMP91:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0 +// CK10-NEXT: store ptr null, ptr [[TMP91]], align 8 +// CK10-NEXT: [[TMP92:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CK10-NEXT: [[TMP93:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CK10-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 +// CK10-NEXT: store i32 2, ptr [[TMP94]], align 4 +// CK10-NEXT: [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 +// CK10-NEXT: store i32 1, ptr [[TMP95]], align 4 +// CK10-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 +// CK10-NEXT: store ptr [[TMP92]], ptr [[TMP96]], align 8 +// CK10-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 3 +// CK10-NEXT: store ptr [[TMP93]], ptr [[TMP97]], align 8 +// CK10-NEXT: [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 4 +// CK10-NEXT: store ptr @.offload_sizes.7, ptr [[TMP98]], align 8 +// CK10-NEXT: [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 5 +// CK10-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP99]], align 8 +// CK10-NEXT: [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 6 +// CK10-NEXT: store ptr null, ptr [[TMP100]], align 8 +// CK10-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 7 +// CK10-NEXT: store ptr null, ptr [[TMP101]], align 8 +// CK10-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 8 +// CK10-NEXT: store i64 0, ptr [[TMP102]], align 8 +// CK10-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 9 +// CK10-NEXT: store i64 0, ptr [[TMP103]], align 8 +// CK10-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 10 +// CK10-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP104]], align 4 +// CK10-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 11 +// CK10-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP105]], align 4 +// CK10-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 12 +// CK10-NEXT: store i32 0, ptr [[TMP106]], align 4 +// CK10-NEXT: [[TMP107:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61.region_id, ptr [[KERNEL_ARGS23]]) +// CK10-NEXT: [[TMP108:%.*]] = icmp ne i32 [[TMP107]], 0 +// CK10-NEXT: br i1 [[TMP108]], label [[OMP_OFFLOAD_FAILED24:%.*]], label [[OMP_OFFLOAD_CONT25:%.*]] +// CK10: omp_offload.failed24: +// CK10-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61(ptr [[TMP88]]) #[[ATTR2]] +// CK10-NEXT: br label [[OMP_OFFLOAD_CONT25]] +// CK10: omp_offload.cont25: +// CK10-NEXT: [[TMP109:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// CK10-NEXT: store ptr [[TMP109]], ptr [[_TMP26]], align 8 +// CK10-NEXT: [[TMP110:%.*]] = load ptr, ptr [[_TMP26]], align 8 +// CK10-NEXT: [[TMP111:%.*]] = load ptr, ptr [[TMP110]], align 8 +// CK10-NEXT: [[TMP112:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP111]], ptr [[TMP112]], align 8 +// CK10-NEXT: [[TMP113:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP111]], ptr [[TMP113]], align 8 +// CK10-NEXT: [[TMP114:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i64 0, i64 0 +// CK10-NEXT: store ptr null, ptr [[TMP114]], align 8 +// CK10-NEXT: [[TMP115:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 +// CK10-NEXT: [[TMP116:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 +// CK10-NEXT: [[TMP117:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 +// CK10-NEXT: store i32 2, ptr [[TMP117]], align 4 +// CK10-NEXT: [[TMP118:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 +// CK10-NEXT: store i32 1, ptr [[TMP118]], align 4 +// CK10-NEXT: [[TMP119:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 +// CK10-NEXT: store ptr [[TMP115]], ptr [[TMP119]], align 8 +// CK10-NEXT: [[TMP120:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 3 +// CK10-NEXT: store ptr [[TMP116]], ptr [[TMP120]], align 8 +// CK10-NEXT: [[TMP121:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 4 +// CK10-NEXT: store ptr @.offload_sizes.9, ptr [[TMP121]], align 8 +// CK10-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 5 +// CK10-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP122]], align 8 +// CK10-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 6 +// CK10-NEXT: store ptr null, ptr [[TMP123]], align 8 +// CK10-NEXT: [[TMP124:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 7 +// CK10-NEXT: store ptr null, ptr [[TMP124]], align 8 +// CK10-NEXT: [[TMP125:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 8 +// CK10-NEXT: store i64 0, ptr [[TMP125]], align 8 +// CK10-NEXT: [[TMP126:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 9 +// CK10-NEXT: store i64 0, ptr [[TMP126]], align 8 +// CK10-NEXT: [[TMP127:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 10 +// CK10-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP127]], align 4 +// CK10-NEXT: [[TMP128:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 11 +// CK10-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP128]], align 4 +// CK10-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 12 +// CK10-NEXT: store i32 0, ptr [[TMP129]], align 4 +// CK10-NEXT: [[TMP130:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67.region_id, ptr [[KERNEL_ARGS30]]) +// CK10-NEXT: [[TMP131:%.*]] = icmp ne i32 [[TMP130]], 0 +// CK10-NEXT: br i1 [[TMP131]], label [[OMP_OFFLOAD_FAILED31:%.*]], label [[OMP_OFFLOAD_CONT32:%.*]] +// CK10: omp_offload.failed31: +// CK10-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67(ptr [[TMP111]]) #[[ATTR2]] +// CK10-NEXT: br label [[OMP_OFFLOAD_CONT32]] +// CK10: omp_offload.cont32: +// CK10-NEXT: [[TMP132:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// CK10-NEXT: store ptr [[TMP132]], ptr [[_TMP33]], align 8 +// CK10-NEXT: [[TMP133:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// CK10-NEXT: store ptr [[TMP133]], ptr [[_TMP34]], align 8 +// CK10-NEXT: [[TMP134:%.*]] = load ptr, ptr [[_TMP33]], align 8 +// CK10-NEXT: [[TMP135:%.*]] = load ptr, ptr [[TMP134]], align 8 +// CK10-NEXT: [[TMP136:%.*]] = load ptr, ptr [[_TMP34]], align 8 +// CK10-NEXT: [[TMP137:%.*]] = load ptr, ptr [[TMP136]], align 8 +// CK10-NEXT: [[TMP138:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP135]], ptr [[TMP138]], align 8 +// CK10-NEXT: [[TMP139:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 +// CK10-NEXT: store ptr [[TMP135]], ptr [[TMP139]], align 8 +// CK10-NEXT: [[TMP140:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS37]], i64 0, i64 0 +// CK10-NEXT: store ptr null, ptr [[TMP140]], align 8 +// CK10-NEXT: [[TMP141:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 1 +// CK10-NEXT: store ptr [[TMP137]], ptr [[TMP141]], align 8 +// CK10-NEXT: [[TMP142:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 1 +// CK10-NEXT: store ptr [[TMP137]], ptr [[TMP142]], align 8 +// CK10-NEXT: [[TMP143:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS37]], i64 0, i64 1 +// CK10-NEXT: store ptr null, ptr [[TMP143]], align 8 +// CK10-NEXT: [[TMP144:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 +// CK10-NEXT: [[TMP145:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 +// CK10-NEXT: [[TMP146:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 0 +// CK10-NEXT: store i32 2, ptr [[TMP146]], align 4 +// CK10-NEXT: [[TMP147:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 1 +// CK10-NEXT: store i32 2, ptr [[TMP147]], align 4 +// CK10-NEXT: [[TMP148:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 2 +// CK10-NEXT: store ptr [[TMP144]], ptr [[TMP148]], align 8 +// CK10-NEXT: [[TMP149:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 3 +// CK10-NEXT: store ptr [[TMP145]], ptr [[TMP149]], align 8 +// CK10-NEXT: [[TMP150:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 4 +// CK10-NEXT: store ptr @.offload_sizes.11, ptr [[TMP150]], align 8 +// CK10-NEXT: [[TMP151:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 5 +// CK10-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP151]], align 8 +// CK10-NEXT: [[TMP152:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 6 +// CK10-NEXT: store ptr null, ptr [[TMP152]], align 8 +// CK10-NEXT: [[TMP153:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 7 +// CK10-NEXT: store ptr null, ptr [[TMP153]], align 8 +// CK10-NEXT: [[TMP154:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 8 +// CK10-NEXT: store i64 0, ptr [[TMP154]], align 8 +// CK10-NEXT: [[TMP155:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 9 +// CK10-NEXT: store i64 0, ptr [[TMP155]], align 8 +// CK10-NEXT: [[TMP156:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 10 +// CK10-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP156]], align 4 +// CK10-NEXT: [[TMP157:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 11 +// CK10-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP157]], align 4 +// CK10-NEXT: [[TMP158:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 12 +// CK10-NEXT: store i32 0, ptr [[TMP158]], align 4 +// CK10-NEXT: [[TMP159:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74.region_id, ptr [[KERNEL_ARGS38]]) +// CK10-NEXT: [[TMP160:%.*]] = icmp ne i32 [[TMP159]], 0 +// CK10-NEXT: br i1 [[TMP160]], label [[OMP_OFFLOAD_FAILED39:%.*]], label [[OMP_OFFLOAD_CONT40:%.*]] +// CK10: omp_offload.failed39: +// CK10-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74(ptr [[TMP135]], ptr [[TMP137]]) #[[ATTR2]] +// CK10-NEXT: br label [[OMP_OFFLOAD_CONT40]] +// CK10: omp_offload.cont40: +// CK10-NEXT: ret void +// +// +// CK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37 +// CK10-SAME: (ptr noundef [[G:%.*]]) #[[ATTR1:[0-9]+]] { +// CK10-NEXT: entry: +// CK10-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CK10-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 +// CK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 +// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK10-NEXT: store ptr [[INCDEC_PTR]], ptr [[G_ADDR]], align 8 +// CK10-NEXT: ret void +// +// +// CK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43 +// CK10-SAME: (ptr noundef [[L:%.*]]) #[[ATTR1]] { +// CK10-NEXT: entry: +// CK10-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 8 +// CK10-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 8 +// CK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[L_ADDR]], align 8 +// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 1 +// CK10-NEXT: store ptr [[INCDEC_PTR]], ptr [[L_ADDR]], align 8 +// CK10-NEXT: ret void +// +// +// CK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49 +// CK10-SAME: (ptr noundef [[T:%.*]]) #[[ATTR1]] { +// CK10-NEXT: entry: +// CK10-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 8 +// CK10-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8 +// CK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8 +// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 +// CK10-NEXT: store ptr [[INCDEC_PTR]], ptr [[T_ADDR]], align 8 +// CK10-NEXT: ret void +// +// +// CK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55 +// CK10-SAME: (ptr noundef [[LR:%.*]]) #[[ATTR1]] { +// CK10-NEXT: entry: +// CK10-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK10-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 8 +// CK10-NEXT: store ptr [[LR_ADDR]], ptr [[TMP]], align 8 +// CK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// CK10-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 +// CK10-NEXT: ret void +// +// +// CK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61 +// CK10-SAME: (ptr noundef [[TR:%.*]]) #[[ATTR1]] { +// CK10-NEXT: entry: +// CK10-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK10-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// CK10-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 8 +// CK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK10-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 +// CK10-NEXT: ret void +// +// +// CK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67 +// CK10-SAME: (ptr noundef [[TR:%.*]]) #[[ATTR1]] { +// CK10-NEXT: entry: +// CK10-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK10-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// CK10-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 8 +// CK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK10-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 +// CK10-NEXT: ret void +// +// +// CK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74 +// CK10-SAME: (ptr noundef [[TR:%.*]], ptr noundef [[LR:%.*]]) #[[ATTR1]] { +// CK10-NEXT: entry: +// CK10-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK10-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 +// CK10-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// CK10-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 8 +// CK10-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 8 +// CK10-NEXT: store ptr [[LR_ADDR]], ptr [[_TMP1]], align 8 +// CK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// CK10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK10-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 +// CK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CK10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CK10-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 1 +// CK10-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP2]], align 8 +// CK10-NEXT: ret void +// +// +// CK10-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK10-SAME: () #[[ATTR3:[0-9]+]] { +// CK10-NEXT: entry: +// CK10-NEXT: call void @__tgt_register_requires(i64 1) +// CK10-NEXT: ret void +// +// +// CK11-LABEL: define {{[^@]+}}@_Z3barRPfRPi +// CK11-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CK11-NEXT: entry: +// CK11-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CK11-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// CK11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CK11-NEXT: call void @_Z3fooIiEvRPfRPT_(ptr noundef nonnull align 8 dereferenceable(8) [[TMP0]], ptr noundef nonnull align 8 dereferenceable(8) [[TMP1]]) +// CK11-NEXT: ret void +// +// +// CK11-LABEL: define {{[^@]+}}@_Z3fooIiEvRPfRPT_ +// CK11-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[LR:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[TR:%.*]]) #[[ATTR0]] comdat { +// CK11-NEXT: entry: +// CK11-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[L:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[T:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK11-NEXT: [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK11-NEXT: [[DOTOFFLOAD_BASEPTRS7:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_PTRS8:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_MAPPERS9:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK11-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[DOTOFFLOAD_BASEPTRS13:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_PTRS14:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_MAPPERS15:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK11-NEXT: [[_TMP19:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[DOTOFFLOAD_BASEPTRS20:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_PTRS21:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_MAPPERS22:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK11-NEXT: [[_TMP26:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 8 +// CK11-NEXT: [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK11-NEXT: [[_TMP33:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[_TMP34:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[DOTOFFLOAD_BASEPTRS35:%.*]] = alloca [2 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_PTRS36:%.*]] = alloca [2 x ptr], align 8 +// CK11-NEXT: [[DOTOFFLOAD_MAPPERS37:%.*]] = alloca [2 x ptr], align 8 +// CK11-NEXT: [[KERNEL_ARGS38:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK11-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 8 +// CK11-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// CK11-NEXT: [[TMP0:%.*]] = load ptr, ptr @g, align 8 +// CK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CK11-NEXT: store ptr null, ptr [[TMP3]], align 8 +// CK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK11-NEXT: store i32 2, ptr [[TMP6]], align 4 +// CK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK11-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK11-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK11-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK11-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 8 +// CK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK11-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 8 +// CK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK11-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK11-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK11-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK11-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK11-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP16]], align 4 +// CK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK11-NEXT: store i32 0, ptr [[TMP18]], align 4 +// CK11-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37.region_id, ptr [[KERNEL_ARGS]]) +// CK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CK11-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK11: omp_offload.failed: +// CK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37(ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CK11-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK11: omp_offload.cont: +// CK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[L]], align 8 +// CK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP21]], ptr [[TMP22]], align 8 +// CK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP21]], ptr [[TMP23]], align 8 +// CK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 +// CK11-NEXT: store ptr null, ptr [[TMP24]], align 8 +// CK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CK11-NEXT: store i32 2, ptr [[TMP27]], align 4 +// CK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 +// CK11-NEXT: store i32 1, ptr [[TMP28]], align 4 +// CK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CK11-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 8 +// CK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 +// CK11-NEXT: store ptr [[TMP26]], ptr [[TMP30]], align 8 +// CK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 +// CK11-NEXT: store ptr @.offload_sizes.1, ptr [[TMP31]], align 8 +// CK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 +// CK11-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP32]], align 8 +// CK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CK11-NEXT: store ptr null, ptr [[TMP33]], align 8 +// CK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 +// CK11-NEXT: store ptr null, ptr [[TMP34]], align 8 +// CK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 +// CK11-NEXT: store i64 0, ptr [[TMP35]], align 8 +// CK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 9 +// CK11-NEXT: store i64 0, ptr [[TMP36]], align 8 +// CK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 10 +// CK11-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP37]], align 4 +// CK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 11 +// CK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP38]], align 4 +// CK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 12 +// CK11-NEXT: store i32 0, ptr [[TMP39]], align 4 +// CK11-NEXT: [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43.region_id, ptr [[KERNEL_ARGS4]]) +// CK11-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CK11-NEXT: br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] +// CK11: omp_offload.failed5: +// CK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43(ptr [[TMP21]]) #[[ATTR2]] +// CK11-NEXT: br label [[OMP_OFFLOAD_CONT6]] +// CK11: omp_offload.cont6: +// CK11-NEXT: [[TMP42:%.*]] = load ptr, ptr [[T]], align 8 +// CK11-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP42]], ptr [[TMP43]], align 8 +// CK11-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP42]], ptr [[TMP44]], align 8 +// CK11-NEXT: [[TMP45:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0 +// CK11-NEXT: store ptr null, ptr [[TMP45]], align 8 +// CK11-NEXT: [[TMP46:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CK11-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 +// CK11-NEXT: store i32 2, ptr [[TMP48]], align 4 +// CK11-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 +// CK11-NEXT: store i32 1, ptr [[TMP49]], align 4 +// CK11-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 +// CK11-NEXT: store ptr [[TMP46]], ptr [[TMP50]], align 8 +// CK11-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 +// CK11-NEXT: store ptr [[TMP47]], ptr [[TMP51]], align 8 +// CK11-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 +// CK11-NEXT: store ptr @.offload_sizes.3, ptr [[TMP52]], align 8 +// CK11-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 +// CK11-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP53]], align 8 +// CK11-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 +// CK11-NEXT: store ptr null, ptr [[TMP54]], align 8 +// CK11-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 +// CK11-NEXT: store ptr null, ptr [[TMP55]], align 8 +// CK11-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 +// CK11-NEXT: store i64 0, ptr [[TMP56]], align 8 +// CK11-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 9 +// CK11-NEXT: store i64 0, ptr [[TMP57]], align 8 +// CK11-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 10 +// CK11-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP58]], align 4 +// CK11-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 11 +// CK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP59]], align 4 +// CK11-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 12 +// CK11-NEXT: store i32 0, ptr [[TMP60]], align 4 +// CK11-NEXT: [[TMP61:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49.region_id, ptr [[KERNEL_ARGS10]]) +// CK11-NEXT: [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0 +// CK11-NEXT: br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] +// CK11: omp_offload.failed11: +// CK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49(ptr [[TMP42]]) #[[ATTR2]] +// CK11-NEXT: br label [[OMP_OFFLOAD_CONT12]] +// CK11: omp_offload.cont12: +// CK11-NEXT: [[TMP63:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// CK11-NEXT: store ptr [[TMP63]], ptr [[TMP]], align 8 +// CK11-NEXT: [[TMP64:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK11-NEXT: [[TMP65:%.*]] = load ptr, ptr [[TMP64]], align 8 +// CK11-NEXT: [[TMP66:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP65]], ptr [[TMP66]], align 8 +// CK11-NEXT: [[TMP67:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP65]], ptr [[TMP67]], align 8 +// CK11-NEXT: [[TMP68:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS15]], i64 0, i64 0 +// CK11-NEXT: store ptr null, ptr [[TMP68]], align 8 +// CK11-NEXT: [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CK11-NEXT: [[TMP70:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CK11-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 +// CK11-NEXT: store i32 2, ptr [[TMP71]], align 4 +// CK11-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 +// CK11-NEXT: store i32 1, ptr [[TMP72]], align 4 +// CK11-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 +// CK11-NEXT: store ptr [[TMP69]], ptr [[TMP73]], align 8 +// CK11-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 3 +// CK11-NEXT: store ptr [[TMP70]], ptr [[TMP74]], align 8 +// CK11-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 4 +// CK11-NEXT: store ptr @.offload_sizes.5, ptr [[TMP75]], align 8 +// CK11-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 5 +// CK11-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP76]], align 8 +// CK11-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 6 +// CK11-NEXT: store ptr null, ptr [[TMP77]], align 8 +// CK11-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 7 +// CK11-NEXT: store ptr null, ptr [[TMP78]], align 8 +// CK11-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 8 +// CK11-NEXT: store i64 0, ptr [[TMP79]], align 8 +// CK11-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 9 +// CK11-NEXT: store i64 0, ptr [[TMP80]], align 8 +// CK11-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 10 +// CK11-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP81]], align 4 +// CK11-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 11 +// CK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP82]], align 4 +// CK11-NEXT: [[TMP83:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 12 +// CK11-NEXT: store i32 0, ptr [[TMP83]], align 4 +// CK11-NEXT: [[TMP84:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55.region_id, ptr [[KERNEL_ARGS16]]) +// CK11-NEXT: [[TMP85:%.*]] = icmp ne i32 [[TMP84]], 0 +// CK11-NEXT: br i1 [[TMP85]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]] +// CK11: omp_offload.failed17: +// CK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55(ptr [[TMP65]]) #[[ATTR2]] +// CK11-NEXT: br label [[OMP_OFFLOAD_CONT18]] +// CK11: omp_offload.cont18: +// CK11-NEXT: [[TMP86:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// CK11-NEXT: store ptr [[TMP86]], ptr [[_TMP19]], align 8 +// CK11-NEXT: [[TMP87:%.*]] = load ptr, ptr [[_TMP19]], align 8 +// CK11-NEXT: [[TMP88:%.*]] = load ptr, ptr [[TMP87]], align 8 +// CK11-NEXT: [[TMP89:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP88]], ptr [[TMP89]], align 8 +// CK11-NEXT: [[TMP90:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP88]], ptr [[TMP90]], align 8 +// CK11-NEXT: [[TMP91:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS22]], i64 0, i64 0 +// CK11-NEXT: store ptr null, ptr [[TMP91]], align 8 +// CK11-NEXT: [[TMP92:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CK11-NEXT: [[TMP93:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CK11-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 +// CK11-NEXT: store i32 2, ptr [[TMP94]], align 4 +// CK11-NEXT: [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 +// CK11-NEXT: store i32 1, ptr [[TMP95]], align 4 +// CK11-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 +// CK11-NEXT: store ptr [[TMP92]], ptr [[TMP96]], align 8 +// CK11-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 3 +// CK11-NEXT: store ptr [[TMP93]], ptr [[TMP97]], align 8 +// CK11-NEXT: [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 4 +// CK11-NEXT: store ptr @.offload_sizes.7, ptr [[TMP98]], align 8 +// CK11-NEXT: [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 5 +// CK11-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP99]], align 8 +// CK11-NEXT: [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 6 +// CK11-NEXT: store ptr null, ptr [[TMP100]], align 8 +// CK11-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 7 +// CK11-NEXT: store ptr null, ptr [[TMP101]], align 8 +// CK11-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 8 +// CK11-NEXT: store i64 0, ptr [[TMP102]], align 8 +// CK11-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 9 +// CK11-NEXT: store i64 0, ptr [[TMP103]], align 8 +// CK11-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 10 +// CK11-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP104]], align 4 +// CK11-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 11 +// CK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP105]], align 4 +// CK11-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 12 +// CK11-NEXT: store i32 0, ptr [[TMP106]], align 4 +// CK11-NEXT: [[TMP107:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61.region_id, ptr [[KERNEL_ARGS23]]) +// CK11-NEXT: [[TMP108:%.*]] = icmp ne i32 [[TMP107]], 0 +// CK11-NEXT: br i1 [[TMP108]], label [[OMP_OFFLOAD_FAILED24:%.*]], label [[OMP_OFFLOAD_CONT25:%.*]] +// CK11: omp_offload.failed24: +// CK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61(ptr [[TMP88]]) #[[ATTR2]] +// CK11-NEXT: br label [[OMP_OFFLOAD_CONT25]] +// CK11: omp_offload.cont25: +// CK11-NEXT: [[TMP109:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// CK11-NEXT: store ptr [[TMP109]], ptr [[_TMP26]], align 8 +// CK11-NEXT: [[TMP110:%.*]] = load ptr, ptr [[_TMP26]], align 8 +// CK11-NEXT: [[TMP111:%.*]] = load ptr, ptr [[TMP110]], align 8 +// CK11-NEXT: [[TMP112:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP111]], ptr [[TMP112]], align 8 +// CK11-NEXT: [[TMP113:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP111]], ptr [[TMP113]], align 8 +// CK11-NEXT: [[TMP114:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i64 0, i64 0 +// CK11-NEXT: store ptr null, ptr [[TMP114]], align 8 +// CK11-NEXT: [[TMP115:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 +// CK11-NEXT: [[TMP116:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 +// CK11-NEXT: [[TMP117:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 +// CK11-NEXT: store i32 2, ptr [[TMP117]], align 4 +// CK11-NEXT: [[TMP118:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 +// CK11-NEXT: store i32 1, ptr [[TMP118]], align 4 +// CK11-NEXT: [[TMP119:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 +// CK11-NEXT: store ptr [[TMP115]], ptr [[TMP119]], align 8 +// CK11-NEXT: [[TMP120:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 3 +// CK11-NEXT: store ptr [[TMP116]], ptr [[TMP120]], align 8 +// CK11-NEXT: [[TMP121:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 4 +// CK11-NEXT: store ptr @.offload_sizes.9, ptr [[TMP121]], align 8 +// CK11-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 5 +// CK11-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP122]], align 8 +// CK11-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 6 +// CK11-NEXT: store ptr null, ptr [[TMP123]], align 8 +// CK11-NEXT: [[TMP124:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 7 +// CK11-NEXT: store ptr null, ptr [[TMP124]], align 8 +// CK11-NEXT: [[TMP125:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 8 +// CK11-NEXT: store i64 0, ptr [[TMP125]], align 8 +// CK11-NEXT: [[TMP126:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 9 +// CK11-NEXT: store i64 0, ptr [[TMP126]], align 8 +// CK11-NEXT: [[TMP127:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 10 +// CK11-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP127]], align 4 +// CK11-NEXT: [[TMP128:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 11 +// CK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP128]], align 4 +// CK11-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 12 +// CK11-NEXT: store i32 0, ptr [[TMP129]], align 4 +// CK11-NEXT: [[TMP130:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67.region_id, ptr [[KERNEL_ARGS30]]) +// CK11-NEXT: [[TMP131:%.*]] = icmp ne i32 [[TMP130]], 0 +// CK11-NEXT: br i1 [[TMP131]], label [[OMP_OFFLOAD_FAILED31:%.*]], label [[OMP_OFFLOAD_CONT32:%.*]] +// CK11: omp_offload.failed31: +// CK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67(ptr [[TMP111]]) #[[ATTR2]] +// CK11-NEXT: br label [[OMP_OFFLOAD_CONT32]] +// CK11: omp_offload.cont32: +// CK11-NEXT: [[TMP132:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// CK11-NEXT: store ptr [[TMP132]], ptr [[_TMP33]], align 8 +// CK11-NEXT: [[TMP133:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// CK11-NEXT: store ptr [[TMP133]], ptr [[_TMP34]], align 8 +// CK11-NEXT: [[TMP134:%.*]] = load ptr, ptr [[_TMP33]], align 8 +// CK11-NEXT: [[TMP135:%.*]] = load ptr, ptr [[TMP134]], align 8 +// CK11-NEXT: [[TMP136:%.*]] = load ptr, ptr [[_TMP34]], align 8 +// CK11-NEXT: [[TMP137:%.*]] = load ptr, ptr [[TMP136]], align 8 +// CK11-NEXT: [[TMP138:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP135]], ptr [[TMP138]], align 8 +// CK11-NEXT: [[TMP139:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 +// CK11-NEXT: store ptr [[TMP135]], ptr [[TMP139]], align 8 +// CK11-NEXT: [[TMP140:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS37]], i64 0, i64 0 +// CK11-NEXT: store ptr null, ptr [[TMP140]], align 8 +// CK11-NEXT: [[TMP141:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 1 +// CK11-NEXT: store ptr [[TMP137]], ptr [[TMP141]], align 8 +// CK11-NEXT: [[TMP142:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 1 +// CK11-NEXT: store ptr [[TMP137]], ptr [[TMP142]], align 8 +// CK11-NEXT: [[TMP143:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS37]], i64 0, i64 1 +// CK11-NEXT: store ptr null, ptr [[TMP143]], align 8 +// CK11-NEXT: [[TMP144:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 +// CK11-NEXT: [[TMP145:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 +// CK11-NEXT: [[TMP146:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 0 +// CK11-NEXT: store i32 2, ptr [[TMP146]], align 4 +// CK11-NEXT: [[TMP147:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 1 +// CK11-NEXT: store i32 2, ptr [[TMP147]], align 4 +// CK11-NEXT: [[TMP148:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 2 +// CK11-NEXT: store ptr [[TMP144]], ptr [[TMP148]], align 8 +// CK11-NEXT: [[TMP149:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 3 +// CK11-NEXT: store ptr [[TMP145]], ptr [[TMP149]], align 8 +// CK11-NEXT: [[TMP150:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 4 +// CK11-NEXT: store ptr @.offload_sizes.11, ptr [[TMP150]], align 8 +// CK11-NEXT: [[TMP151:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 5 +// CK11-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP151]], align 8 +// CK11-NEXT: [[TMP152:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 6 +// CK11-NEXT: store ptr null, ptr [[TMP152]], align 8 +// CK11-NEXT: [[TMP153:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 7 +// CK11-NEXT: store ptr null, ptr [[TMP153]], align 8 +// CK11-NEXT: [[TMP154:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 8 +// CK11-NEXT: store i64 0, ptr [[TMP154]], align 8 +// CK11-NEXT: [[TMP155:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 9 +// CK11-NEXT: store i64 0, ptr [[TMP155]], align 8 +// CK11-NEXT: [[TMP156:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 10 +// CK11-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP156]], align 4 +// CK11-NEXT: [[TMP157:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 11 +// CK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP157]], align 4 +// CK11-NEXT: [[TMP158:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 12 +// CK11-NEXT: store i32 0, ptr [[TMP158]], align 4 +// CK11-NEXT: [[TMP159:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74.region_id, ptr [[KERNEL_ARGS38]]) +// CK11-NEXT: [[TMP160:%.*]] = icmp ne i32 [[TMP159]], 0 +// CK11-NEXT: br i1 [[TMP160]], label [[OMP_OFFLOAD_FAILED39:%.*]], label [[OMP_OFFLOAD_CONT40:%.*]] +// CK11: omp_offload.failed39: +// CK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74(ptr [[TMP135]], ptr [[TMP137]]) #[[ATTR2]] +// CK11-NEXT: br label [[OMP_OFFLOAD_CONT40]] +// CK11: omp_offload.cont40: +// CK11-NEXT: ret void +// +// +// CK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37 +// CK11-SAME: (ptr noundef [[G:%.*]]) #[[ATTR1:[0-9]+]] { +// CK11-NEXT: entry: +// CK11-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CK11-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 +// CK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 +// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK11-NEXT: store ptr [[INCDEC_PTR]], ptr [[G_ADDR]], align 8 +// CK11-NEXT: ret void +// +// +// CK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43 +// CK11-SAME: (ptr noundef [[L:%.*]]) #[[ATTR1]] { +// CK11-NEXT: entry: +// CK11-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 8 +// CK11-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 8 +// CK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[L_ADDR]], align 8 +// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 1 +// CK11-NEXT: store ptr [[INCDEC_PTR]], ptr [[L_ADDR]], align 8 +// CK11-NEXT: ret void +// +// +// CK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49 +// CK11-SAME: (ptr noundef [[T:%.*]]) #[[ATTR1]] { +// CK11-NEXT: entry: +// CK11-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 8 +// CK11-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 8 +// CK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 8 +// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 +// CK11-NEXT: store ptr [[INCDEC_PTR]], ptr [[T_ADDR]], align 8 +// CK11-NEXT: ret void +// +// +// CK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55 +// CK11-SAME: (ptr noundef [[LR:%.*]]) #[[ATTR1]] { +// CK11-NEXT: entry: +// CK11-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK11-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 8 +// CK11-NEXT: store ptr [[LR_ADDR]], ptr [[TMP]], align 8 +// CK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// CK11-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 +// CK11-NEXT: ret void +// +// +// CK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61 +// CK11-SAME: (ptr noundef [[TR:%.*]]) #[[ATTR1]] { +// CK11-NEXT: entry: +// CK11-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK11-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// CK11-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 8 +// CK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK11-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 +// CK11-NEXT: ret void +// +// +// CK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67 +// CK11-SAME: (ptr noundef [[TR:%.*]]) #[[ATTR1]] { +// CK11-NEXT: entry: +// CK11-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK11-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// CK11-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 8 +// CK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK11-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 +// CK11-NEXT: ret void +// +// +// CK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74 +// CK11-SAME: (ptr noundef [[TR:%.*]], ptr noundef [[LR:%.*]]) #[[ATTR1]] { +// CK11-NEXT: entry: +// CK11-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// CK11-NEXT: [[_TMP1:%.*]] = alloca ptr, align 8 +// CK11-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// CK11-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 8 +// CK11-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 8 +// CK11-NEXT: store ptr [[LR_ADDR]], ptr [[_TMP1]], align 8 +// CK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8 +// CK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +// CK11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK11-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 8 +// CK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 8 +// CK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CK11-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 1 +// CK11-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP2]], align 8 +// CK11-NEXT: ret void +// +// +// CK11-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK11-SAME: () #[[ATTR3:[0-9]+]] { +// CK11-NEXT: entry: +// CK11-NEXT: call void @__tgt_register_requires(i64 1) +// CK11-NEXT: ret void +// +// +// CK12-LABEL: define {{[^@]+}}@_Z3barRPfRPi +// CK12-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CK12-NEXT: entry: +// CK12-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CK12-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CK12-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CK12-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CK12-NEXT: call void @_Z3fooIiEvRPfRPT_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP0]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +// CK12-NEXT: ret void +// +// +// CK12-LABEL: define {{[^@]+}}@_Z3fooIiEvRPfRPT_ +// CK12-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TR:%.*]]) #[[ATTR0]] comdat { +// CK12-NEXT: entry: +// CK12-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[L:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[T:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK12-NEXT: [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK12-NEXT: [[DOTOFFLOAD_BASEPTRS7:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_PTRS8:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_MAPPERS9:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK12-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[DOTOFFLOAD_BASEPTRS13:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_PTRS14:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_MAPPERS15:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK12-NEXT: [[_TMP19:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[DOTOFFLOAD_BASEPTRS20:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_PTRS21:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_MAPPERS22:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK12-NEXT: [[_TMP26:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 4 +// CK12-NEXT: [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK12-NEXT: [[_TMP33:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[_TMP34:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[DOTOFFLOAD_BASEPTRS35:%.*]] = alloca [2 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_PTRS36:%.*]] = alloca [2 x ptr], align 4 +// CK12-NEXT: [[DOTOFFLOAD_MAPPERS37:%.*]] = alloca [2 x ptr], align 4 +// CK12-NEXT: [[KERNEL_ARGS38:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK12-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 4 +// CK12-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// CK12-NEXT: [[TMP0:%.*]] = load ptr, ptr @g, align 4 +// CK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CK12-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CK12-NEXT: store ptr null, ptr [[TMP3]], align 4 +// CK12-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK12-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK12-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK12-NEXT: store i32 2, ptr [[TMP6]], align 4 +// CK12-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK12-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CK12-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK12-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CK12-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK12-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CK12-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK12-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 4 +// CK12-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK12-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 4 +// CK12-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK12-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CK12-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK12-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CK12-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK12-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK12-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK12-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CK12-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK12-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP16]], align 4 +// CK12-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK12-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CK12-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK12-NEXT: store i32 0, ptr [[TMP18]], align 4 +// CK12-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37.region_id, ptr [[KERNEL_ARGS]]) +// CK12-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CK12-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK12: omp_offload.failed: +// CK12-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37(ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CK12-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK12: omp_offload.cont: +// CK12-NEXT: [[TMP21:%.*]] = load ptr, ptr [[L]], align 4 +// CK12-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP21]], ptr [[TMP22]], align 4 +// CK12-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP21]], ptr [[TMP23]], align 4 +// CK12-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 0 +// CK12-NEXT: store ptr null, ptr [[TMP24]], align 4 +// CK12-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CK12-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CK12-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CK12-NEXT: store i32 2, ptr [[TMP27]], align 4 +// CK12-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 +// CK12-NEXT: store i32 1, ptr [[TMP28]], align 4 +// CK12-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CK12-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 4 +// CK12-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 +// CK12-NEXT: store ptr [[TMP26]], ptr [[TMP30]], align 4 +// CK12-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 +// CK12-NEXT: store ptr @.offload_sizes.1, ptr [[TMP31]], align 4 +// CK12-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 +// CK12-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP32]], align 4 +// CK12-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CK12-NEXT: store ptr null, ptr [[TMP33]], align 4 +// CK12-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 +// CK12-NEXT: store ptr null, ptr [[TMP34]], align 4 +// CK12-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 +// CK12-NEXT: store i64 0, ptr [[TMP35]], align 8 +// CK12-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 9 +// CK12-NEXT: store i64 0, ptr [[TMP36]], align 8 +// CK12-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 10 +// CK12-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP37]], align 4 +// CK12-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 11 +// CK12-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP38]], align 4 +// CK12-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 12 +// CK12-NEXT: store i32 0, ptr [[TMP39]], align 4 +// CK12-NEXT: [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43.region_id, ptr [[KERNEL_ARGS4]]) +// CK12-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CK12-NEXT: br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] +// CK12: omp_offload.failed5: +// CK12-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43(ptr [[TMP21]]) #[[ATTR2]] +// CK12-NEXT: br label [[OMP_OFFLOAD_CONT6]] +// CK12: omp_offload.cont6: +// CK12-NEXT: [[TMP42:%.*]] = load ptr, ptr [[T]], align 4 +// CK12-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP42]], ptr [[TMP43]], align 4 +// CK12-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP42]], ptr [[TMP44]], align 4 +// CK12-NEXT: [[TMP45:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i32 0, i32 0 +// CK12-NEXT: store ptr null, ptr [[TMP45]], align 4 +// CK12-NEXT: [[TMP46:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CK12-NEXT: [[TMP47:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CK12-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 +// CK12-NEXT: store i32 2, ptr [[TMP48]], align 4 +// CK12-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 +// CK12-NEXT: store i32 1, ptr [[TMP49]], align 4 +// CK12-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 +// CK12-NEXT: store ptr [[TMP46]], ptr [[TMP50]], align 4 +// CK12-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 +// CK12-NEXT: store ptr [[TMP47]], ptr [[TMP51]], align 4 +// CK12-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 +// CK12-NEXT: store ptr @.offload_sizes.3, ptr [[TMP52]], align 4 +// CK12-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 +// CK12-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP53]], align 4 +// CK12-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 +// CK12-NEXT: store ptr null, ptr [[TMP54]], align 4 +// CK12-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 +// CK12-NEXT: store ptr null, ptr [[TMP55]], align 4 +// CK12-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 +// CK12-NEXT: store i64 0, ptr [[TMP56]], align 8 +// CK12-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 9 +// CK12-NEXT: store i64 0, ptr [[TMP57]], align 8 +// CK12-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 10 +// CK12-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP58]], align 4 +// CK12-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 11 +// CK12-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP59]], align 4 +// CK12-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 12 +// CK12-NEXT: store i32 0, ptr [[TMP60]], align 4 +// CK12-NEXT: [[TMP61:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49.region_id, ptr [[KERNEL_ARGS10]]) +// CK12-NEXT: [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0 +// CK12-NEXT: br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] +// CK12: omp_offload.failed11: +// CK12-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49(ptr [[TMP42]]) #[[ATTR2]] +// CK12-NEXT: br label [[OMP_OFFLOAD_CONT12]] +// CK12: omp_offload.cont12: +// CK12-NEXT: [[TMP63:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// CK12-NEXT: store ptr [[TMP63]], ptr [[TMP]], align 4 +// CK12-NEXT: [[TMP64:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK12-NEXT: [[TMP65:%.*]] = load ptr, ptr [[TMP64]], align 4 +// CK12-NEXT: [[TMP66:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP65]], ptr [[TMP66]], align 4 +// CK12-NEXT: [[TMP67:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP65]], ptr [[TMP67]], align 4 +// CK12-NEXT: [[TMP68:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 0 +// CK12-NEXT: store ptr null, ptr [[TMP68]], align 4 +// CK12-NEXT: [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CK12-NEXT: [[TMP70:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CK12-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 +// CK12-NEXT: store i32 2, ptr [[TMP71]], align 4 +// CK12-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 +// CK12-NEXT: store i32 1, ptr [[TMP72]], align 4 +// CK12-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 +// CK12-NEXT: store ptr [[TMP69]], ptr [[TMP73]], align 4 +// CK12-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 3 +// CK12-NEXT: store ptr [[TMP70]], ptr [[TMP74]], align 4 +// CK12-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 4 +// CK12-NEXT: store ptr @.offload_sizes.5, ptr [[TMP75]], align 4 +// CK12-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 5 +// CK12-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP76]], align 4 +// CK12-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 6 +// CK12-NEXT: store ptr null, ptr [[TMP77]], align 4 +// CK12-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 7 +// CK12-NEXT: store ptr null, ptr [[TMP78]], align 4 +// CK12-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 8 +// CK12-NEXT: store i64 0, ptr [[TMP79]], align 8 +// CK12-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 9 +// CK12-NEXT: store i64 0, ptr [[TMP80]], align 8 +// CK12-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 10 +// CK12-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP81]], align 4 +// CK12-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 11 +// CK12-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP82]], align 4 +// CK12-NEXT: [[TMP83:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 12 +// CK12-NEXT: store i32 0, ptr [[TMP83]], align 4 +// CK12-NEXT: [[TMP84:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55.region_id, ptr [[KERNEL_ARGS16]]) +// CK12-NEXT: [[TMP85:%.*]] = icmp ne i32 [[TMP84]], 0 +// CK12-NEXT: br i1 [[TMP85]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]] +// CK12: omp_offload.failed17: +// CK12-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55(ptr [[TMP65]]) #[[ATTR2]] +// CK12-NEXT: br label [[OMP_OFFLOAD_CONT18]] +// CK12: omp_offload.cont18: +// CK12-NEXT: [[TMP86:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// CK12-NEXT: store ptr [[TMP86]], ptr [[_TMP19]], align 4 +// CK12-NEXT: [[TMP87:%.*]] = load ptr, ptr [[_TMP19]], align 4 +// CK12-NEXT: [[TMP88:%.*]] = load ptr, ptr [[TMP87]], align 4 +// CK12-NEXT: [[TMP89:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP88]], ptr [[TMP89]], align 4 +// CK12-NEXT: [[TMP90:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP88]], ptr [[TMP90]], align 4 +// CK12-NEXT: [[TMP91:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 0 +// CK12-NEXT: store ptr null, ptr [[TMP91]], align 4 +// CK12-NEXT: [[TMP92:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CK12-NEXT: [[TMP93:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CK12-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 +// CK12-NEXT: store i32 2, ptr [[TMP94]], align 4 +// CK12-NEXT: [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 +// CK12-NEXT: store i32 1, ptr [[TMP95]], align 4 +// CK12-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 +// CK12-NEXT: store ptr [[TMP92]], ptr [[TMP96]], align 4 +// CK12-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 3 +// CK12-NEXT: store ptr [[TMP93]], ptr [[TMP97]], align 4 +// CK12-NEXT: [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 4 +// CK12-NEXT: store ptr @.offload_sizes.7, ptr [[TMP98]], align 4 +// CK12-NEXT: [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 5 +// CK12-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP99]], align 4 +// CK12-NEXT: [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 6 +// CK12-NEXT: store ptr null, ptr [[TMP100]], align 4 +// CK12-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 7 +// CK12-NEXT: store ptr null, ptr [[TMP101]], align 4 +// CK12-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 8 +// CK12-NEXT: store i64 0, ptr [[TMP102]], align 8 +// CK12-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 9 +// CK12-NEXT: store i64 0, ptr [[TMP103]], align 8 +// CK12-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 10 +// CK12-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP104]], align 4 +// CK12-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 11 +// CK12-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP105]], align 4 +// CK12-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 12 +// CK12-NEXT: store i32 0, ptr [[TMP106]], align 4 +// CK12-NEXT: [[TMP107:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61.region_id, ptr [[KERNEL_ARGS23]]) +// CK12-NEXT: [[TMP108:%.*]] = icmp ne i32 [[TMP107]], 0 +// CK12-NEXT: br i1 [[TMP108]], label [[OMP_OFFLOAD_FAILED24:%.*]], label [[OMP_OFFLOAD_CONT25:%.*]] +// CK12: omp_offload.failed24: +// CK12-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61(ptr [[TMP88]]) #[[ATTR2]] +// CK12-NEXT: br label [[OMP_OFFLOAD_CONT25]] +// CK12: omp_offload.cont25: +// CK12-NEXT: [[TMP109:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// CK12-NEXT: store ptr [[TMP109]], ptr [[_TMP26]], align 4 +// CK12-NEXT: [[TMP110:%.*]] = load ptr, ptr [[_TMP26]], align 4 +// CK12-NEXT: [[TMP111:%.*]] = load ptr, ptr [[TMP110]], align 4 +// CK12-NEXT: [[TMP112:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP111]], ptr [[TMP112]], align 4 +// CK12-NEXT: [[TMP113:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP111]], ptr [[TMP113]], align 4 +// CK12-NEXT: [[TMP114:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i32 0, i32 0 +// CK12-NEXT: store ptr null, ptr [[TMP114]], align 4 +// CK12-NEXT: [[TMP115:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 +// CK12-NEXT: [[TMP116:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 +// CK12-NEXT: [[TMP117:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 +// CK12-NEXT: store i32 2, ptr [[TMP117]], align 4 +// CK12-NEXT: [[TMP118:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 +// CK12-NEXT: store i32 1, ptr [[TMP118]], align 4 +// CK12-NEXT: [[TMP119:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 +// CK12-NEXT: store ptr [[TMP115]], ptr [[TMP119]], align 4 +// CK12-NEXT: [[TMP120:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 3 +// CK12-NEXT: store ptr [[TMP116]], ptr [[TMP120]], align 4 +// CK12-NEXT: [[TMP121:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 4 +// CK12-NEXT: store ptr @.offload_sizes.9, ptr [[TMP121]], align 4 +// CK12-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 5 +// CK12-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP122]], align 4 +// CK12-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 6 +// CK12-NEXT: store ptr null, ptr [[TMP123]], align 4 +// CK12-NEXT: [[TMP124:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 7 +// CK12-NEXT: store ptr null, ptr [[TMP124]], align 4 +// CK12-NEXT: [[TMP125:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 8 +// CK12-NEXT: store i64 0, ptr [[TMP125]], align 8 +// CK12-NEXT: [[TMP126:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 9 +// CK12-NEXT: store i64 0, ptr [[TMP126]], align 8 +// CK12-NEXT: [[TMP127:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 10 +// CK12-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP127]], align 4 +// CK12-NEXT: [[TMP128:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 11 +// CK12-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP128]], align 4 +// CK12-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 12 +// CK12-NEXT: store i32 0, ptr [[TMP129]], align 4 +// CK12-NEXT: [[TMP130:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67.region_id, ptr [[KERNEL_ARGS30]]) +// CK12-NEXT: [[TMP131:%.*]] = icmp ne i32 [[TMP130]], 0 +// CK12-NEXT: br i1 [[TMP131]], label [[OMP_OFFLOAD_FAILED31:%.*]], label [[OMP_OFFLOAD_CONT32:%.*]] +// CK12: omp_offload.failed31: +// CK12-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67(ptr [[TMP111]]) #[[ATTR2]] +// CK12-NEXT: br label [[OMP_OFFLOAD_CONT32]] +// CK12: omp_offload.cont32: +// CK12-NEXT: [[TMP132:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// CK12-NEXT: store ptr [[TMP132]], ptr [[_TMP33]], align 4 +// CK12-NEXT: [[TMP133:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// CK12-NEXT: store ptr [[TMP133]], ptr [[_TMP34]], align 4 +// CK12-NEXT: [[TMP134:%.*]] = load ptr, ptr [[_TMP33]], align 4 +// CK12-NEXT: [[TMP135:%.*]] = load ptr, ptr [[TMP134]], align 4 +// CK12-NEXT: [[TMP136:%.*]] = load ptr, ptr [[_TMP34]], align 4 +// CK12-NEXT: [[TMP137:%.*]] = load ptr, ptr [[TMP136]], align 4 +// CK12-NEXT: [[TMP138:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP135]], ptr [[TMP138]], align 4 +// CK12-NEXT: [[TMP139:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 +// CK12-NEXT: store ptr [[TMP135]], ptr [[TMP139]], align 4 +// CK12-NEXT: [[TMP140:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS37]], i32 0, i32 0 +// CK12-NEXT: store ptr null, ptr [[TMP140]], align 4 +// CK12-NEXT: [[TMP141:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 1 +// CK12-NEXT: store ptr [[TMP137]], ptr [[TMP141]], align 4 +// CK12-NEXT: [[TMP142:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 1 +// CK12-NEXT: store ptr [[TMP137]], ptr [[TMP142]], align 4 +// CK12-NEXT: [[TMP143:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS37]], i32 0, i32 1 +// CK12-NEXT: store ptr null, ptr [[TMP143]], align 4 +// CK12-NEXT: [[TMP144:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 +// CK12-NEXT: [[TMP145:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 +// CK12-NEXT: [[TMP146:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 0 +// CK12-NEXT: store i32 2, ptr [[TMP146]], align 4 +// CK12-NEXT: [[TMP147:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 1 +// CK12-NEXT: store i32 2, ptr [[TMP147]], align 4 +// CK12-NEXT: [[TMP148:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 2 +// CK12-NEXT: store ptr [[TMP144]], ptr [[TMP148]], align 4 +// CK12-NEXT: [[TMP149:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 3 +// CK12-NEXT: store ptr [[TMP145]], ptr [[TMP149]], align 4 +// CK12-NEXT: [[TMP150:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 4 +// CK12-NEXT: store ptr @.offload_sizes.11, ptr [[TMP150]], align 4 +// CK12-NEXT: [[TMP151:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 5 +// CK12-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP151]], align 4 +// CK12-NEXT: [[TMP152:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 6 +// CK12-NEXT: store ptr null, ptr [[TMP152]], align 4 +// CK12-NEXT: [[TMP153:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 7 +// CK12-NEXT: store ptr null, ptr [[TMP153]], align 4 +// CK12-NEXT: [[TMP154:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 8 +// CK12-NEXT: store i64 0, ptr [[TMP154]], align 8 +// CK12-NEXT: [[TMP155:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 9 +// CK12-NEXT: store i64 0, ptr [[TMP155]], align 8 +// CK12-NEXT: [[TMP156:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 10 +// CK12-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP156]], align 4 +// CK12-NEXT: [[TMP157:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 11 +// CK12-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP157]], align 4 +// CK12-NEXT: [[TMP158:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 12 +// CK12-NEXT: store i32 0, ptr [[TMP158]], align 4 +// CK12-NEXT: [[TMP159:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74.region_id, ptr [[KERNEL_ARGS38]]) +// CK12-NEXT: [[TMP160:%.*]] = icmp ne i32 [[TMP159]], 0 +// CK12-NEXT: br i1 [[TMP160]], label [[OMP_OFFLOAD_FAILED39:%.*]], label [[OMP_OFFLOAD_CONT40:%.*]] +// CK12: omp_offload.failed39: +// CK12-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74(ptr [[TMP135]], ptr [[TMP137]]) #[[ATTR2]] +// CK12-NEXT: br label [[OMP_OFFLOAD_CONT40]] +// CK12: omp_offload.cont40: +// CK12-NEXT: ret void +// +// +// CK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37 +// CK12-SAME: (ptr noundef [[G:%.*]]) #[[ATTR1:[0-9]+]] { +// CK12-NEXT: entry: +// CK12-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CK12-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 +// CK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 +// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK12-NEXT: store ptr [[INCDEC_PTR]], ptr [[G_ADDR]], align 4 +// CK12-NEXT: ret void +// +// +// CK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43 +// CK12-SAME: (ptr noundef [[L:%.*]]) #[[ATTR1]] { +// CK12-NEXT: entry: +// CK12-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 4 +// CK12-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 4 +// CK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[L_ADDR]], align 4 +// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 1 +// CK12-NEXT: store ptr [[INCDEC_PTR]], ptr [[L_ADDR]], align 4 +// CK12-NEXT: ret void +// +// +// CK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49 +// CK12-SAME: (ptr noundef [[T:%.*]]) #[[ATTR1]] { +// CK12-NEXT: entry: +// CK12-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 4 +// CK12-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 4 +// CK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 4 +// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 +// CK12-NEXT: store ptr [[INCDEC_PTR]], ptr [[T_ADDR]], align 4 +// CK12-NEXT: ret void +// +// +// CK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55 +// CK12-SAME: (ptr noundef [[LR:%.*]]) #[[ATTR1]] { +// CK12-NEXT: entry: +// CK12-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK12-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 4 +// CK12-NEXT: store ptr [[LR_ADDR]], ptr [[TMP]], align 4 +// CK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK12-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 +// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// CK12-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 +// CK12-NEXT: ret void +// +// +// CK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61 +// CK12-SAME: (ptr noundef [[TR:%.*]]) #[[ATTR1]] { +// CK12-NEXT: entry: +// CK12-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK12-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// CK12-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 4 +// CK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK12-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 +// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK12-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 +// CK12-NEXT: ret void +// +// +// CK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67 +// CK12-SAME: (ptr noundef [[TR:%.*]]) #[[ATTR1]] { +// CK12-NEXT: entry: +// CK12-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK12-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// CK12-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 4 +// CK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK12-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 +// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK12-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 +// CK12-NEXT: ret void +// +// +// CK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74 +// CK12-SAME: (ptr noundef [[TR:%.*]], ptr noundef [[LR:%.*]]) #[[ATTR1]] { +// CK12-NEXT: entry: +// CK12-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK12-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 +// CK12-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// CK12-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 4 +// CK12-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 4 +// CK12-NEXT: store ptr [[LR_ADDR]], ptr [[_TMP1]], align 4 +// CK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK12-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 +// CK12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK12-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 +// CK12-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CK12-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CK12-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 1 +// CK12-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP2]], align 4 +// CK12-NEXT: ret void +// +// +// CK12-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK12-SAME: () #[[ATTR3:[0-9]+]] { +// CK12-NEXT: entry: +// CK12-NEXT: call void @__tgt_register_requires(i64 1) +// CK12-NEXT: ret void +// +// +// CK13-LABEL: define {{[^@]+}}@_Z3barRPfRPi +// CK13-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CK13-NEXT: entry: +// CK13-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CK13-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// CK13-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// CK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CK13-NEXT: call void @_Z3fooIiEvRPfRPT_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP0]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +// CK13-NEXT: ret void +// +// +// CK13-LABEL: define {{[^@]+}}@_Z3fooIiEvRPfRPT_ +// CK13-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TR:%.*]]) #[[ATTR0]] comdat { +// CK13-NEXT: entry: +// CK13-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[L:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[T:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK13-NEXT: [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK13-NEXT: [[DOTOFFLOAD_BASEPTRS7:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_PTRS8:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_MAPPERS9:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[KERNEL_ARGS10:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK13-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[DOTOFFLOAD_BASEPTRS13:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_PTRS14:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_MAPPERS15:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[KERNEL_ARGS16:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK13-NEXT: [[_TMP19:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[DOTOFFLOAD_BASEPTRS20:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_PTRS21:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_MAPPERS22:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[KERNEL_ARGS23:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK13-NEXT: [[_TMP26:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[DOTOFFLOAD_BASEPTRS27:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_PTRS28:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_MAPPERS29:%.*]] = alloca [1 x ptr], align 4 +// CK13-NEXT: [[KERNEL_ARGS30:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK13-NEXT: [[_TMP33:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[_TMP34:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[DOTOFFLOAD_BASEPTRS35:%.*]] = alloca [2 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_PTRS36:%.*]] = alloca [2 x ptr], align 4 +// CK13-NEXT: [[DOTOFFLOAD_MAPPERS37:%.*]] = alloca [2 x ptr], align 4 +// CK13-NEXT: [[KERNEL_ARGS38:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK13-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 4 +// CK13-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// CK13-NEXT: [[TMP0:%.*]] = load ptr, ptr @g, align 4 +// CK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CK13-NEXT: store ptr null, ptr [[TMP3]], align 4 +// CK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK13-NEXT: store i32 2, ptr [[TMP6]], align 4 +// CK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK13-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CK13-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK13-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK13-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CK13-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK13-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 4 +// CK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK13-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 4 +// CK13-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK13-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CK13-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK13-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CK13-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK13-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK13-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK13-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CK13-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK13-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP16]], align 4 +// CK13-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK13-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CK13-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK13-NEXT: store i32 0, ptr [[TMP18]], align 4 +// CK13-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37.region_id, ptr [[KERNEL_ARGS]]) +// CK13-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CK13-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK13: omp_offload.failed: +// CK13-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37(ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CK13-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK13: omp_offload.cont: +// CK13-NEXT: [[TMP21:%.*]] = load ptr, ptr [[L]], align 4 +// CK13-NEXT: [[TMP22:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP21]], ptr [[TMP22]], align 4 +// CK13-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP21]], ptr [[TMP23]], align 4 +// CK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 0 +// CK13-NEXT: store ptr null, ptr [[TMP24]], align 4 +// CK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 +// CK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 +// CK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 +// CK13-NEXT: store i32 2, ptr [[TMP27]], align 4 +// CK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 +// CK13-NEXT: store i32 1, ptr [[TMP28]], align 4 +// CK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 +// CK13-NEXT: store ptr [[TMP25]], ptr [[TMP29]], align 4 +// CK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 +// CK13-NEXT: store ptr [[TMP26]], ptr [[TMP30]], align 4 +// CK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 +// CK13-NEXT: store ptr @.offload_sizes.1, ptr [[TMP31]], align 4 +// CK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 +// CK13-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP32]], align 4 +// CK13-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 +// CK13-NEXT: store ptr null, ptr [[TMP33]], align 4 +// CK13-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 +// CK13-NEXT: store ptr null, ptr [[TMP34]], align 4 +// CK13-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 +// CK13-NEXT: store i64 0, ptr [[TMP35]], align 8 +// CK13-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 9 +// CK13-NEXT: store i64 0, ptr [[TMP36]], align 8 +// CK13-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 10 +// CK13-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP37]], align 4 +// CK13-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 11 +// CK13-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP38]], align 4 +// CK13-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 12 +// CK13-NEXT: store i32 0, ptr [[TMP39]], align 4 +// CK13-NEXT: [[TMP40:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43.region_id, ptr [[KERNEL_ARGS4]]) +// CK13-NEXT: [[TMP41:%.*]] = icmp ne i32 [[TMP40]], 0 +// CK13-NEXT: br i1 [[TMP41]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] +// CK13: omp_offload.failed5: +// CK13-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43(ptr [[TMP21]]) #[[ATTR2]] +// CK13-NEXT: br label [[OMP_OFFLOAD_CONT6]] +// CK13: omp_offload.cont6: +// CK13-NEXT: [[TMP42:%.*]] = load ptr, ptr [[T]], align 4 +// CK13-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP42]], ptr [[TMP43]], align 4 +// CK13-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP42]], ptr [[TMP44]], align 4 +// CK13-NEXT: [[TMP45:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS9]], i32 0, i32 0 +// CK13-NEXT: store ptr null, ptr [[TMP45]], align 4 +// CK13-NEXT: [[TMP46:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 +// CK13-NEXT: [[TMP47:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 +// CK13-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 +// CK13-NEXT: store i32 2, ptr [[TMP48]], align 4 +// CK13-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 +// CK13-NEXT: store i32 1, ptr [[TMP49]], align 4 +// CK13-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 +// CK13-NEXT: store ptr [[TMP46]], ptr [[TMP50]], align 4 +// CK13-NEXT: [[TMP51:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 3 +// CK13-NEXT: store ptr [[TMP47]], ptr [[TMP51]], align 4 +// CK13-NEXT: [[TMP52:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 4 +// CK13-NEXT: store ptr @.offload_sizes.3, ptr [[TMP52]], align 4 +// CK13-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 5 +// CK13-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP53]], align 4 +// CK13-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 6 +// CK13-NEXT: store ptr null, ptr [[TMP54]], align 4 +// CK13-NEXT: [[TMP55:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 7 +// CK13-NEXT: store ptr null, ptr [[TMP55]], align 4 +// CK13-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 8 +// CK13-NEXT: store i64 0, ptr [[TMP56]], align 8 +// CK13-NEXT: [[TMP57:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 9 +// CK13-NEXT: store i64 0, ptr [[TMP57]], align 8 +// CK13-NEXT: [[TMP58:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 10 +// CK13-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP58]], align 4 +// CK13-NEXT: [[TMP59:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 11 +// CK13-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP59]], align 4 +// CK13-NEXT: [[TMP60:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 12 +// CK13-NEXT: store i32 0, ptr [[TMP60]], align 4 +// CK13-NEXT: [[TMP61:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49.region_id, ptr [[KERNEL_ARGS10]]) +// CK13-NEXT: [[TMP62:%.*]] = icmp ne i32 [[TMP61]], 0 +// CK13-NEXT: br i1 [[TMP62]], label [[OMP_OFFLOAD_FAILED11:%.*]], label [[OMP_OFFLOAD_CONT12:%.*]] +// CK13: omp_offload.failed11: +// CK13-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49(ptr [[TMP42]]) #[[ATTR2]] +// CK13-NEXT: br label [[OMP_OFFLOAD_CONT12]] +// CK13: omp_offload.cont12: +// CK13-NEXT: [[TMP63:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// CK13-NEXT: store ptr [[TMP63]], ptr [[TMP]], align 4 +// CK13-NEXT: [[TMP64:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK13-NEXT: [[TMP65:%.*]] = load ptr, ptr [[TMP64]], align 4 +// CK13-NEXT: [[TMP66:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP65]], ptr [[TMP66]], align 4 +// CK13-NEXT: [[TMP67:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP65]], ptr [[TMP67]], align 4 +// CK13-NEXT: [[TMP68:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS15]], i32 0, i32 0 +// CK13-NEXT: store ptr null, ptr [[TMP68]], align 4 +// CK13-NEXT: [[TMP69:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 +// CK13-NEXT: [[TMP70:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 +// CK13-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 +// CK13-NEXT: store i32 2, ptr [[TMP71]], align 4 +// CK13-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 +// CK13-NEXT: store i32 1, ptr [[TMP72]], align 4 +// CK13-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 +// CK13-NEXT: store ptr [[TMP69]], ptr [[TMP73]], align 4 +// CK13-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 3 +// CK13-NEXT: store ptr [[TMP70]], ptr [[TMP74]], align 4 +// CK13-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 4 +// CK13-NEXT: store ptr @.offload_sizes.5, ptr [[TMP75]], align 4 +// CK13-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 5 +// CK13-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP76]], align 4 +// CK13-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 6 +// CK13-NEXT: store ptr null, ptr [[TMP77]], align 4 +// CK13-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 7 +// CK13-NEXT: store ptr null, ptr [[TMP78]], align 4 +// CK13-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 8 +// CK13-NEXT: store i64 0, ptr [[TMP79]], align 8 +// CK13-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 9 +// CK13-NEXT: store i64 0, ptr [[TMP80]], align 8 +// CK13-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 10 +// CK13-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP81]], align 4 +// CK13-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 11 +// CK13-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP82]], align 4 +// CK13-NEXT: [[TMP83:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 12 +// CK13-NEXT: store i32 0, ptr [[TMP83]], align 4 +// CK13-NEXT: [[TMP84:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55.region_id, ptr [[KERNEL_ARGS16]]) +// CK13-NEXT: [[TMP85:%.*]] = icmp ne i32 [[TMP84]], 0 +// CK13-NEXT: br i1 [[TMP85]], label [[OMP_OFFLOAD_FAILED17:%.*]], label [[OMP_OFFLOAD_CONT18:%.*]] +// CK13: omp_offload.failed17: +// CK13-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55(ptr [[TMP65]]) #[[ATTR2]] +// CK13-NEXT: br label [[OMP_OFFLOAD_CONT18]] +// CK13: omp_offload.cont18: +// CK13-NEXT: [[TMP86:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// CK13-NEXT: store ptr [[TMP86]], ptr [[_TMP19]], align 4 +// CK13-NEXT: [[TMP87:%.*]] = load ptr, ptr [[_TMP19]], align 4 +// CK13-NEXT: [[TMP88:%.*]] = load ptr, ptr [[TMP87]], align 4 +// CK13-NEXT: [[TMP89:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP88]], ptr [[TMP89]], align 4 +// CK13-NEXT: [[TMP90:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP88]], ptr [[TMP90]], align 4 +// CK13-NEXT: [[TMP91:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS22]], i32 0, i32 0 +// CK13-NEXT: store ptr null, ptr [[TMP91]], align 4 +// CK13-NEXT: [[TMP92:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 +// CK13-NEXT: [[TMP93:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 +// CK13-NEXT: [[TMP94:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 +// CK13-NEXT: store i32 2, ptr [[TMP94]], align 4 +// CK13-NEXT: [[TMP95:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 +// CK13-NEXT: store i32 1, ptr [[TMP95]], align 4 +// CK13-NEXT: [[TMP96:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 +// CK13-NEXT: store ptr [[TMP92]], ptr [[TMP96]], align 4 +// CK13-NEXT: [[TMP97:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 3 +// CK13-NEXT: store ptr [[TMP93]], ptr [[TMP97]], align 4 +// CK13-NEXT: [[TMP98:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 4 +// CK13-NEXT: store ptr @.offload_sizes.7, ptr [[TMP98]], align 4 +// CK13-NEXT: [[TMP99:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 5 +// CK13-NEXT: store ptr @.offload_maptypes.8, ptr [[TMP99]], align 4 +// CK13-NEXT: [[TMP100:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 6 +// CK13-NEXT: store ptr null, ptr [[TMP100]], align 4 +// CK13-NEXT: [[TMP101:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 7 +// CK13-NEXT: store ptr null, ptr [[TMP101]], align 4 +// CK13-NEXT: [[TMP102:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 8 +// CK13-NEXT: store i64 0, ptr [[TMP102]], align 8 +// CK13-NEXT: [[TMP103:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 9 +// CK13-NEXT: store i64 0, ptr [[TMP103]], align 8 +// CK13-NEXT: [[TMP104:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 10 +// CK13-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP104]], align 4 +// CK13-NEXT: [[TMP105:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 11 +// CK13-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP105]], align 4 +// CK13-NEXT: [[TMP106:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 12 +// CK13-NEXT: store i32 0, ptr [[TMP106]], align 4 +// CK13-NEXT: [[TMP107:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61.region_id, ptr [[KERNEL_ARGS23]]) +// CK13-NEXT: [[TMP108:%.*]] = icmp ne i32 [[TMP107]], 0 +// CK13-NEXT: br i1 [[TMP108]], label [[OMP_OFFLOAD_FAILED24:%.*]], label [[OMP_OFFLOAD_CONT25:%.*]] +// CK13: omp_offload.failed24: +// CK13-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61(ptr [[TMP88]]) #[[ATTR2]] +// CK13-NEXT: br label [[OMP_OFFLOAD_CONT25]] +// CK13: omp_offload.cont25: +// CK13-NEXT: [[TMP109:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// CK13-NEXT: store ptr [[TMP109]], ptr [[_TMP26]], align 4 +// CK13-NEXT: [[TMP110:%.*]] = load ptr, ptr [[_TMP26]], align 4 +// CK13-NEXT: [[TMP111:%.*]] = load ptr, ptr [[TMP110]], align 4 +// CK13-NEXT: [[TMP112:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP111]], ptr [[TMP112]], align 4 +// CK13-NEXT: [[TMP113:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP111]], ptr [[TMP113]], align 4 +// CK13-NEXT: [[TMP114:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS29]], i32 0, i32 0 +// CK13-NEXT: store ptr null, ptr [[TMP114]], align 4 +// CK13-NEXT: [[TMP115:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 +// CK13-NEXT: [[TMP116:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 +// CK13-NEXT: [[TMP117:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 +// CK13-NEXT: store i32 2, ptr [[TMP117]], align 4 +// CK13-NEXT: [[TMP118:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 +// CK13-NEXT: store i32 1, ptr [[TMP118]], align 4 +// CK13-NEXT: [[TMP119:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 +// CK13-NEXT: store ptr [[TMP115]], ptr [[TMP119]], align 4 +// CK13-NEXT: [[TMP120:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 3 +// CK13-NEXT: store ptr [[TMP116]], ptr [[TMP120]], align 4 +// CK13-NEXT: [[TMP121:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 4 +// CK13-NEXT: store ptr @.offload_sizes.9, ptr [[TMP121]], align 4 +// CK13-NEXT: [[TMP122:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 5 +// CK13-NEXT: store ptr @.offload_maptypes.10, ptr [[TMP122]], align 4 +// CK13-NEXT: [[TMP123:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 6 +// CK13-NEXT: store ptr null, ptr [[TMP123]], align 4 +// CK13-NEXT: [[TMP124:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 7 +// CK13-NEXT: store ptr null, ptr [[TMP124]], align 4 +// CK13-NEXT: [[TMP125:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 8 +// CK13-NEXT: store i64 0, ptr [[TMP125]], align 8 +// CK13-NEXT: [[TMP126:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 9 +// CK13-NEXT: store i64 0, ptr [[TMP126]], align 8 +// CK13-NEXT: [[TMP127:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 10 +// CK13-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP127]], align 4 +// CK13-NEXT: [[TMP128:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 11 +// CK13-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP128]], align 4 +// CK13-NEXT: [[TMP129:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 12 +// CK13-NEXT: store i32 0, ptr [[TMP129]], align 4 +// CK13-NEXT: [[TMP130:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67.region_id, ptr [[KERNEL_ARGS30]]) +// CK13-NEXT: [[TMP131:%.*]] = icmp ne i32 [[TMP130]], 0 +// CK13-NEXT: br i1 [[TMP131]], label [[OMP_OFFLOAD_FAILED31:%.*]], label [[OMP_OFFLOAD_CONT32:%.*]] +// CK13: omp_offload.failed31: +// CK13-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67(ptr [[TMP111]]) #[[ATTR2]] +// CK13-NEXT: br label [[OMP_OFFLOAD_CONT32]] +// CK13: omp_offload.cont32: +// CK13-NEXT: [[TMP132:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// CK13-NEXT: store ptr [[TMP132]], ptr [[_TMP33]], align 4 +// CK13-NEXT: [[TMP133:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// CK13-NEXT: store ptr [[TMP133]], ptr [[_TMP34]], align 4 +// CK13-NEXT: [[TMP134:%.*]] = load ptr, ptr [[_TMP33]], align 4 +// CK13-NEXT: [[TMP135:%.*]] = load ptr, ptr [[TMP134]], align 4 +// CK13-NEXT: [[TMP136:%.*]] = load ptr, ptr [[_TMP34]], align 4 +// CK13-NEXT: [[TMP137:%.*]] = load ptr, ptr [[TMP136]], align 4 +// CK13-NEXT: [[TMP138:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP135]], ptr [[TMP138]], align 4 +// CK13-NEXT: [[TMP139:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 +// CK13-NEXT: store ptr [[TMP135]], ptr [[TMP139]], align 4 +// CK13-NEXT: [[TMP140:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS37]], i32 0, i32 0 +// CK13-NEXT: store ptr null, ptr [[TMP140]], align 4 +// CK13-NEXT: [[TMP141:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 1 +// CK13-NEXT: store ptr [[TMP137]], ptr [[TMP141]], align 4 +// CK13-NEXT: [[TMP142:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 1 +// CK13-NEXT: store ptr [[TMP137]], ptr [[TMP142]], align 4 +// CK13-NEXT: [[TMP143:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS37]], i32 0, i32 1 +// CK13-NEXT: store ptr null, ptr [[TMP143]], align 4 +// CK13-NEXT: [[TMP144:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 +// CK13-NEXT: [[TMP145:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 +// CK13-NEXT: [[TMP146:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 0 +// CK13-NEXT: store i32 2, ptr [[TMP146]], align 4 +// CK13-NEXT: [[TMP147:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 1 +// CK13-NEXT: store i32 2, ptr [[TMP147]], align 4 +// CK13-NEXT: [[TMP148:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 2 +// CK13-NEXT: store ptr [[TMP144]], ptr [[TMP148]], align 4 +// CK13-NEXT: [[TMP149:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 3 +// CK13-NEXT: store ptr [[TMP145]], ptr [[TMP149]], align 4 +// CK13-NEXT: [[TMP150:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 4 +// CK13-NEXT: store ptr @.offload_sizes.11, ptr [[TMP150]], align 4 +// CK13-NEXT: [[TMP151:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 5 +// CK13-NEXT: store ptr @.offload_maptypes.12, ptr [[TMP151]], align 4 +// CK13-NEXT: [[TMP152:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 6 +// CK13-NEXT: store ptr null, ptr [[TMP152]], align 4 +// CK13-NEXT: [[TMP153:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 7 +// CK13-NEXT: store ptr null, ptr [[TMP153]], align 4 +// CK13-NEXT: [[TMP154:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 8 +// CK13-NEXT: store i64 0, ptr [[TMP154]], align 8 +// CK13-NEXT: [[TMP155:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 9 +// CK13-NEXT: store i64 0, ptr [[TMP155]], align 8 +// CK13-NEXT: [[TMP156:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 10 +// CK13-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP156]], align 4 +// CK13-NEXT: [[TMP157:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 11 +// CK13-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP157]], align 4 +// CK13-NEXT: [[TMP158:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 12 +// CK13-NEXT: store i32 0, ptr [[TMP158]], align 4 +// CK13-NEXT: [[TMP159:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74.region_id, ptr [[KERNEL_ARGS38]]) +// CK13-NEXT: [[TMP160:%.*]] = icmp ne i32 [[TMP159]], 0 +// CK13-NEXT: br i1 [[TMP160]], label [[OMP_OFFLOAD_FAILED39:%.*]], label [[OMP_OFFLOAD_CONT40:%.*]] +// CK13: omp_offload.failed39: +// CK13-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74(ptr [[TMP135]], ptr [[TMP137]]) #[[ATTR2]] +// CK13-NEXT: br label [[OMP_OFFLOAD_CONT40]] +// CK13: omp_offload.cont40: +// CK13-NEXT: ret void +// +// +// CK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l37 +// CK13-SAME: (ptr noundef [[G:%.*]]) #[[ATTR1:[0-9]+]] { +// CK13-NEXT: entry: +// CK13-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CK13-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 +// CK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 +// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK13-NEXT: store ptr [[INCDEC_PTR]], ptr [[G_ADDR]], align 4 +// CK13-NEXT: ret void +// +// +// CK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l43 +// CK13-SAME: (ptr noundef [[L:%.*]]) #[[ATTR1]] { +// CK13-NEXT: entry: +// CK13-NEXT: [[L_ADDR:%.*]] = alloca ptr, align 4 +// CK13-NEXT: store ptr [[L]], ptr [[L_ADDR]], align 4 +// CK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[L_ADDR]], align 4 +// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i32 1 +// CK13-NEXT: store ptr [[INCDEC_PTR]], ptr [[L_ADDR]], align 4 +// CK13-NEXT: ret void +// +// +// CK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l49 +// CK13-SAME: (ptr noundef [[T:%.*]]) #[[ATTR1]] { +// CK13-NEXT: entry: +// CK13-NEXT: [[T_ADDR:%.*]] = alloca ptr, align 4 +// CK13-NEXT: store ptr [[T]], ptr [[T_ADDR]], align 4 +// CK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_ADDR]], align 4 +// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 +// CK13-NEXT: store ptr [[INCDEC_PTR]], ptr [[T_ADDR]], align 4 +// CK13-NEXT: ret void +// +// +// CK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l55 +// CK13-SAME: (ptr noundef [[LR:%.*]]) #[[ATTR1]] { +// CK13-NEXT: entry: +// CK13-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK13-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 4 +// CK13-NEXT: store ptr [[LR_ADDR]], ptr [[TMP]], align 4 +// CK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 +// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// CK13-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 +// CK13-NEXT: ret void +// +// +// CK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l61 +// CK13-SAME: (ptr noundef [[TR:%.*]]) #[[ATTR1]] { +// CK13-NEXT: entry: +// CK13-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK13-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// CK13-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 4 +// CK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 +// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK13-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 +// CK13-NEXT: ret void +// +// +// CK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l67 +// CK13-SAME: (ptr noundef [[TR:%.*]]) #[[ATTR1]] { +// CK13-NEXT: entry: +// CK13-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK13-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// CK13-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 4 +// CK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 +// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK13-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 +// CK13-NEXT: ret void +// +// +// CK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIiEvRPfRPT__l74 +// CK13-SAME: (ptr noundef [[TR:%.*]], ptr noundef [[LR:%.*]]) #[[ATTR1]] { +// CK13-NEXT: entry: +// CK13-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// CK13-NEXT: [[_TMP1:%.*]] = alloca ptr, align 4 +// CK13-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// CK13-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 4 +// CK13-NEXT: store ptr [[TR_ADDR]], ptr [[TMP]], align 4 +// CK13-NEXT: store ptr [[LR_ADDR]], ptr [[_TMP1]], align 4 +// CK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 4 +// CK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 +// CK13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 +// CK13-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP0]], align 4 +// CK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[_TMP1]], align 4 +// CK13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CK13-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 1 +// CK13-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP2]], align 4 +// CK13-NEXT: ret void +// +// +// CK13-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK13-SAME: () #[[ATTR3:[0-9]+]] { +// CK13-NEXT: entry: +// CK13-NEXT: call void @__tgt_register_requires(i64 1) +// CK13-NEXT: ret void +// +// +// SIMD-ONLY00-LABEL: define {{[^@]+}}@_Z3barRPfRPi +// SIMD-ONLY00-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY00-NEXT: entry: +// SIMD-ONLY00-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY00-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY00-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// SIMD-ONLY00-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// SIMD-ONLY00-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// SIMD-ONLY00-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// SIMD-ONLY00-NEXT: call void @_Z3fooIiEvRPfRPT_(ptr noundef nonnull align 8 dereferenceable(8) [[TMP0]], ptr noundef nonnull align 8 dereferenceable(8) [[TMP1]]) +// SIMD-ONLY00-NEXT: ret void +// +// +// SIMD-ONLY00-LABEL: define {{[^@]+}}@_Z3fooIiEvRPfRPT_ +// SIMD-ONLY00-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[LR:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[TR:%.*]]) #[[ATTR0]] comdat { +// SIMD-ONLY00-NEXT: entry: +// SIMD-ONLY00-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY00-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY00-NEXT: [[L:%.*]] = alloca ptr, align 8 +// SIMD-ONLY00-NEXT: [[T:%.*]] = alloca ptr, align 8 +// SIMD-ONLY00-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// SIMD-ONLY00-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// SIMD-ONLY00-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 +// SIMD-ONLY00-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 +// SIMD-ONLY00-NEXT: [[_TMP9:%.*]] = alloca ptr, align 8 +// SIMD-ONLY00-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 8 +// SIMD-ONLY00-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// SIMD-ONLY00-NEXT: [[TMP0:%.*]] = load ptr, ptr @g, align 8 +// SIMD-ONLY00-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY00-NEXT: store ptr [[INCDEC_PTR]], ptr @g, align 8 +// SIMD-ONLY00-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L]], align 8 +// SIMD-ONLY00-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// SIMD-ONLY00-NEXT: store ptr [[INCDEC_PTR1]], ptr [[L]], align 8 +// SIMD-ONLY00-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T]], align 8 +// SIMD-ONLY00-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 +// SIMD-ONLY00-NEXT: store ptr [[INCDEC_PTR2]], ptr [[T]], align 8 +// SIMD-ONLY00-NEXT: [[TMP3:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// SIMD-ONLY00-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// SIMD-ONLY00-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// SIMD-ONLY00-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// SIMD-ONLY00-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// SIMD-ONLY00-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 1 +// SIMD-ONLY00-NEXT: store ptr [[INCDEC_PTR3]], ptr [[TMP5]], align 8 +// SIMD-ONLY00-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// SIMD-ONLY00-NEXT: store ptr [[TMP7]], ptr [[_TMP4]], align 8 +// SIMD-ONLY00-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// SIMD-ONLY00-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// SIMD-ONLY00-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// SIMD-ONLY00-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +// SIMD-ONLY00-NEXT: store ptr [[INCDEC_PTR5]], ptr [[TMP9]], align 8 +// SIMD-ONLY00-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// SIMD-ONLY00-NEXT: store ptr [[TMP11]], ptr [[_TMP6]], align 8 +// SIMD-ONLY00-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// SIMD-ONLY00-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP6]], align 8 +// SIMD-ONLY00-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// SIMD-ONLY00-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 1 +// SIMD-ONLY00-NEXT: store ptr [[INCDEC_PTR7]], ptr [[TMP13]], align 8 +// SIMD-ONLY00-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// SIMD-ONLY00-NEXT: store ptr [[TMP15]], ptr [[_TMP8]], align 8 +// SIMD-ONLY00-NEXT: [[TMP16:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// SIMD-ONLY00-NEXT: store ptr [[TMP16]], ptr [[_TMP9]], align 8 +// SIMD-ONLY00-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// SIMD-ONLY00-NEXT: [[TMP18:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// SIMD-ONLY00-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP8]], align 8 +// SIMD-ONLY00-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// SIMD-ONLY00-NEXT: [[INCDEC_PTR10:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 1 +// SIMD-ONLY00-NEXT: store ptr [[INCDEC_PTR10]], ptr [[TMP19]], align 8 +// SIMD-ONLY00-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP9]], align 8 +// SIMD-ONLY00-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// SIMD-ONLY00-NEXT: [[INCDEC_PTR11:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 1 +// SIMD-ONLY00-NEXT: store ptr [[INCDEC_PTR11]], ptr [[TMP21]], align 8 +// SIMD-ONLY00-NEXT: ret void +// +// +// SIMD-ONLY01-LABEL: define {{[^@]+}}@_Z3barRPfRPi +// SIMD-ONLY01-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[A:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY01-NEXT: entry: +// SIMD-ONLY01-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY01-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY01-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 +// SIMD-ONLY01-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// SIMD-ONLY01-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// SIMD-ONLY01-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// SIMD-ONLY01-NEXT: call void @_Z3fooIiEvRPfRPT_(ptr noundef nonnull align 8 dereferenceable(8) [[TMP0]], ptr noundef nonnull align 8 dereferenceable(8) [[TMP1]]) +// SIMD-ONLY01-NEXT: ret void +// +// +// SIMD-ONLY01-LABEL: define {{[^@]+}}@_Z3fooIiEvRPfRPT_ +// SIMD-ONLY01-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[LR:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[TR:%.*]]) #[[ATTR0]] comdat { +// SIMD-ONLY01-NEXT: entry: +// SIMD-ONLY01-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY01-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY01-NEXT: [[L:%.*]] = alloca ptr, align 8 +// SIMD-ONLY01-NEXT: [[T:%.*]] = alloca ptr, align 8 +// SIMD-ONLY01-NEXT: [[TMP:%.*]] = alloca ptr, align 8 +// SIMD-ONLY01-NEXT: [[_TMP4:%.*]] = alloca ptr, align 8 +// SIMD-ONLY01-NEXT: [[_TMP6:%.*]] = alloca ptr, align 8 +// SIMD-ONLY01-NEXT: [[_TMP8:%.*]] = alloca ptr, align 8 +// SIMD-ONLY01-NEXT: [[_TMP9:%.*]] = alloca ptr, align 8 +// SIMD-ONLY01-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: [[TMP0:%.*]] = load ptr, ptr @g, align 8 +// SIMD-ONLY01-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY01-NEXT: store ptr [[INCDEC_PTR]], ptr @g, align 8 +// SIMD-ONLY01-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L]], align 8 +// SIMD-ONLY01-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// SIMD-ONLY01-NEXT: store ptr [[INCDEC_PTR1]], ptr [[L]], align 8 +// SIMD-ONLY01-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T]], align 8 +// SIMD-ONLY01-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 +// SIMD-ONLY01-NEXT: store ptr [[INCDEC_PTR2]], ptr [[T]], align 8 +// SIMD-ONLY01-NEXT: [[TMP3:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 8 +// SIMD-ONLY01-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 8 +// SIMD-ONLY01-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// SIMD-ONLY01-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 1 +// SIMD-ONLY01-NEXT: store ptr [[INCDEC_PTR3]], ptr [[TMP5]], align 8 +// SIMD-ONLY01-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: store ptr [[TMP7]], ptr [[_TMP4]], align 8 +// SIMD-ONLY01-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 8 +// SIMD-ONLY01-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// SIMD-ONLY01-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +// SIMD-ONLY01-NEXT: store ptr [[INCDEC_PTR5]], ptr [[TMP9]], align 8 +// SIMD-ONLY01-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: store ptr [[TMP11]], ptr [[_TMP6]], align 8 +// SIMD-ONLY01-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP6]], align 8 +// SIMD-ONLY01-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// SIMD-ONLY01-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 1 +// SIMD-ONLY01-NEXT: store ptr [[INCDEC_PTR7]], ptr [[TMP13]], align 8 +// SIMD-ONLY01-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: store ptr [[TMP15]], ptr [[_TMP8]], align 8 +// SIMD-ONLY01-NEXT: [[TMP16:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: store ptr [[TMP16]], ptr [[_TMP9]], align 8 +// SIMD-ONLY01-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: [[TMP18:%.*]] = load ptr, ptr [[LR_ADDR]], align 8 +// SIMD-ONLY01-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP8]], align 8 +// SIMD-ONLY01-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// SIMD-ONLY01-NEXT: [[INCDEC_PTR10:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 1 +// SIMD-ONLY01-NEXT: store ptr [[INCDEC_PTR10]], ptr [[TMP19]], align 8 +// SIMD-ONLY01-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP9]], align 8 +// SIMD-ONLY01-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// SIMD-ONLY01-NEXT: [[INCDEC_PTR11:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 1 +// SIMD-ONLY01-NEXT: store ptr [[INCDEC_PTR11]], ptr [[TMP21]], align 8 +// SIMD-ONLY01-NEXT: ret void +// +// +// SIMD-ONLY02-LABEL: define {{[^@]+}}@_Z3barRPfRPi +// SIMD-ONLY02-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY02-NEXT: entry: +// SIMD-ONLY02-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY02-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY02-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// SIMD-ONLY02-NEXT: call void @_Z3fooIiEvRPfRPT_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP0]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +// SIMD-ONLY02-NEXT: ret void +// +// +// SIMD-ONLY02-LABEL: define {{[^@]+}}@_Z3fooIiEvRPfRPT_ +// SIMD-ONLY02-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TR:%.*]]) #[[ATTR0]] comdat { +// SIMD-ONLY02-NEXT: entry: +// SIMD-ONLY02-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY02-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY02-NEXT: [[L:%.*]] = alloca ptr, align 4 +// SIMD-ONLY02-NEXT: [[T:%.*]] = alloca ptr, align 4 +// SIMD-ONLY02-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// SIMD-ONLY02-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// SIMD-ONLY02-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// SIMD-ONLY02-NEXT: [[_TMP8:%.*]] = alloca ptr, align 4 +// SIMD-ONLY02-NEXT: [[_TMP9:%.*]] = alloca ptr, align 4 +// SIMD-ONLY02-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[TMP0:%.*]] = load ptr, ptr @g, align 4 +// SIMD-ONLY02-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY02-NEXT: store ptr [[INCDEC_PTR]], ptr @g, align 4 +// SIMD-ONLY02-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L]], align 4 +// SIMD-ONLY02-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// SIMD-ONLY02-NEXT: store ptr [[INCDEC_PTR1]], ptr [[L]], align 4 +// SIMD-ONLY02-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T]], align 4 +// SIMD-ONLY02-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 +// SIMD-ONLY02-NEXT: store ptr [[INCDEC_PTR2]], ptr [[T]], align 4 +// SIMD-ONLY02-NEXT: [[TMP3:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// SIMD-ONLY02-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 +// SIMD-ONLY02-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// SIMD-ONLY02-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 1 +// SIMD-ONLY02-NEXT: store ptr [[INCDEC_PTR3]], ptr [[TMP5]], align 4 +// SIMD-ONLY02-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store ptr [[TMP7]], ptr [[_TMP4]], align 4 +// SIMD-ONLY02-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// SIMD-ONLY02-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// SIMD-ONLY02-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +// SIMD-ONLY02-NEXT: store ptr [[INCDEC_PTR5]], ptr [[TMP9]], align 4 +// SIMD-ONLY02-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store ptr [[TMP11]], ptr [[_TMP6]], align 4 +// SIMD-ONLY02-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP6]], align 4 +// SIMD-ONLY02-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// SIMD-ONLY02-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 1 +// SIMD-ONLY02-NEXT: store ptr [[INCDEC_PTR7]], ptr [[TMP13]], align 4 +// SIMD-ONLY02-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store ptr [[TMP15]], ptr [[_TMP8]], align 4 +// SIMD-ONLY02-NEXT: [[TMP16:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: store ptr [[TMP16]], ptr [[_TMP9]], align 4 +// SIMD-ONLY02-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[TMP18:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// SIMD-ONLY02-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP8]], align 4 +// SIMD-ONLY02-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 +// SIMD-ONLY02-NEXT: [[INCDEC_PTR10:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 1 +// SIMD-ONLY02-NEXT: store ptr [[INCDEC_PTR10]], ptr [[TMP19]], align 4 +// SIMD-ONLY02-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP9]], align 4 +// SIMD-ONLY02-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4 +// SIMD-ONLY02-NEXT: [[INCDEC_PTR11:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 1 +// SIMD-ONLY02-NEXT: store ptr [[INCDEC_PTR11]], ptr [[TMP21]], align 4 +// SIMD-ONLY02-NEXT: ret void +// +// +// SIMD-ONLY03-LABEL: define {{[^@]+}}@_Z3barRPfRPi +// SIMD-ONLY03-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY03-NEXT: entry: +// SIMD-ONLY03-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY03-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY03-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// SIMD-ONLY03-NEXT: call void @_Z3fooIiEvRPfRPT_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP0]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP1]]) +// SIMD-ONLY03-NEXT: ret void +// +// +// SIMD-ONLY03-LABEL: define {{[^@]+}}@_Z3fooIiEvRPfRPT_ +// SIMD-ONLY03-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LR:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[TR:%.*]]) #[[ATTR0]] comdat { +// SIMD-ONLY03-NEXT: entry: +// SIMD-ONLY03-NEXT: [[LR_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY03-NEXT: [[TR_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY03-NEXT: [[L:%.*]] = alloca ptr, align 4 +// SIMD-ONLY03-NEXT: [[T:%.*]] = alloca ptr, align 4 +// SIMD-ONLY03-NEXT: [[TMP:%.*]] = alloca ptr, align 4 +// SIMD-ONLY03-NEXT: [[_TMP4:%.*]] = alloca ptr, align 4 +// SIMD-ONLY03-NEXT: [[_TMP6:%.*]] = alloca ptr, align 4 +// SIMD-ONLY03-NEXT: [[_TMP8:%.*]] = alloca ptr, align 4 +// SIMD-ONLY03-NEXT: [[_TMP9:%.*]] = alloca ptr, align 4 +// SIMD-ONLY03-NEXT: store ptr [[LR]], ptr [[LR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store ptr [[TR]], ptr [[TR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[TMP0:%.*]] = load ptr, ptr @g, align 4 +// SIMD-ONLY03-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY03-NEXT: store ptr [[INCDEC_PTR]], ptr @g, align 4 +// SIMD-ONLY03-NEXT: [[TMP1:%.*]] = load ptr, ptr [[L]], align 4 +// SIMD-ONLY03-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 1 +// SIMD-ONLY03-NEXT: store ptr [[INCDEC_PTR1]], ptr [[L]], align 4 +// SIMD-ONLY03-NEXT: [[TMP2:%.*]] = load ptr, ptr [[T]], align 4 +// SIMD-ONLY03-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 1 +// SIMD-ONLY03-NEXT: store ptr [[INCDEC_PTR2]], ptr [[T]], align 4 +// SIMD-ONLY03-NEXT: [[TMP3:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store ptr [[TMP3]], ptr [[TMP]], align 4 +// SIMD-ONLY03-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP]], align 4 +// SIMD-ONLY03-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// SIMD-ONLY03-NEXT: [[INCDEC_PTR3:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i32 1 +// SIMD-ONLY03-NEXT: store ptr [[INCDEC_PTR3]], ptr [[TMP5]], align 4 +// SIMD-ONLY03-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store ptr [[TMP7]], ptr [[_TMP4]], align 4 +// SIMD-ONLY03-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[TMP9:%.*]] = load ptr, ptr [[_TMP4]], align 4 +// SIMD-ONLY03-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// SIMD-ONLY03-NEXT: [[INCDEC_PTR5:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 1 +// SIMD-ONLY03-NEXT: store ptr [[INCDEC_PTR5]], ptr [[TMP9]], align 4 +// SIMD-ONLY03-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store ptr [[TMP11]], ptr [[_TMP6]], align 4 +// SIMD-ONLY03-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[TMP13:%.*]] = load ptr, ptr [[_TMP6]], align 4 +// SIMD-ONLY03-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// SIMD-ONLY03-NEXT: [[INCDEC_PTR7:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 1 +// SIMD-ONLY03-NEXT: store ptr [[INCDEC_PTR7]], ptr [[TMP13]], align 4 +// SIMD-ONLY03-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store ptr [[TMP15]], ptr [[_TMP8]], align 4 +// SIMD-ONLY03-NEXT: [[TMP16:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: store ptr [[TMP16]], ptr [[_TMP9]], align 4 +// SIMD-ONLY03-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[TMP18:%.*]] = load ptr, ptr [[LR_ADDR]], align 4 +// SIMD-ONLY03-NEXT: [[TMP19:%.*]] = load ptr, ptr [[_TMP8]], align 4 +// SIMD-ONLY03-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 +// SIMD-ONLY03-NEXT: [[INCDEC_PTR10:%.*]] = getelementptr inbounds i32, ptr [[TMP20]], i32 1 +// SIMD-ONLY03-NEXT: store ptr [[INCDEC_PTR10]], ptr [[TMP19]], align 4 +// SIMD-ONLY03-NEXT: [[TMP21:%.*]] = load ptr, ptr [[_TMP9]], align 4 +// SIMD-ONLY03-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4 +// SIMD-ONLY03-NEXT: [[INCDEC_PTR11:%.*]] = getelementptr inbounds float, ptr [[TMP22]], i32 1 +// SIMD-ONLY03-NEXT: store ptr [[INCDEC_PTR11]], ptr [[TMP21]], align 4 +// SIMD-ONLY03-NEXT: ret void +// +// +// CK20-LABEL: define {{[^@]+}}@_Z3barPd +// CK20-SAME: (ptr noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +// CK20-NEXT: entry: +// CK20-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CK20-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 8 +// CK20-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 +// CK20-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) +// CK20-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) +// CK20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 +// CK20-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK20-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 8 +// CK20-NEXT: ret void +// +// +// CK20-LABEL: define {{[^@]+}}@_ZN2STIdEC1ERPd +// CK20-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CK20-NEXT: entry: +// CK20-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK20-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CK20-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK20-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CK20-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CK20-NEXT: call void @_ZN2STIdEC2ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]], ptr noundef nonnull align 8 dereferenceable(8) [[TMP0]]) +// CK20-NEXT: ret void +// +// +// CK20-LABEL: define {{[^@]+}}@_ZN2STIdE3fooERPd +// CK20-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG:%.*]]) #[[ATTR0]] comdat align 2 { +// CK20-NEXT: entry: +// CK20-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK20-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CK20-NEXT: [[LA:%.*]] = alloca ptr, align 8 +// CK20-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CK20-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CK20-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CK20-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK20-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [2 x ptr], align 8 +// CK20-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [2 x ptr], align 8 +// CK20-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [2 x ptr], align 8 +// CK20-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [2 x i64], align 8 +// CK20-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK20-NEXT: [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [3 x ptr], align 8 +// CK20-NEXT: [[DOTOFFLOAD_PTRS11:%.*]] = alloca [3 x ptr], align 8 +// CK20-NEXT: [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [3 x ptr], align 8 +// CK20-NEXT: [[DOTOFFLOAD_SIZES13:%.*]] = alloca [3 x i64], align 8 +// CK20-NEXT: [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK20-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK20-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 +// CK20-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK20-NEXT: store ptr null, ptr [[LA]], align 8 +// CK20-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CK20-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK20-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 +// CK20-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK20-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CK20-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CK20-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CK20-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK20-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK20-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK20-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CK20-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK20-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CK20-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK20-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CK20-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK20-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CK20-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK20-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CK20-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK20-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CK20-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK20-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CK20-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK20-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CK20-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK20-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CK20-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK20-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK20-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK20-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP15]], align 4 +// CK20-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK20-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CK20-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK20-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CK20-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112.region_id, ptr [[KERNEL_ARGS]]) +// CK20-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CK20-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK20: omp_offload.failed: +// CK20-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112(ptr [[THIS1]]) #[[ATTR3:[0-9]+]] +// CK20-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK20: omp_offload.cont: +// CK20-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK20-NEXT: [[TMP20:%.*]] = load ptr, ptr [[B]], align 8 +// CK20-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[B]], i32 1 +// CK20-NEXT: [[TMP22:%.*]] = ptrtoint ptr [[TMP21]] to i64 +// CK20-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[B]] to i64 +// CK20-NEXT: [[TMP24:%.*]] = sub i64 [[TMP22]], [[TMP23]] +// CK20-NEXT: [[TMP25:%.*]] = sdiv exact i64 [[TMP24]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CK20-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes.1, i64 16, i1 false) +// CK20-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CK20-NEXT: store ptr [[THIS1]], ptr [[TMP26]], align 8 +// CK20-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CK20-NEXT: store ptr [[B]], ptr [[TMP27]], align 8 +// CK20-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CK20-NEXT: store i64 [[TMP25]], ptr [[TMP28]], align 8 +// CK20-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 0 +// CK20-NEXT: store ptr null, ptr [[TMP29]], align 8 +// CK20-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1 +// CK20-NEXT: store ptr [[THIS1]], ptr [[TMP30]], align 8 +// CK20-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 1 +// CK20-NEXT: store ptr [[TMP20]], ptr [[TMP31]], align 8 +// CK20-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 1 +// CK20-NEXT: store ptr null, ptr [[TMP32]], align 8 +// CK20-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CK20-NEXT: [[TMP34:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CK20-NEXT: [[TMP35:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CK20-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 +// CK20-NEXT: store i32 2, ptr [[TMP36]], align 4 +// CK20-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 +// CK20-NEXT: store i32 2, ptr [[TMP37]], align 4 +// CK20-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 +// CK20-NEXT: store ptr [[TMP33]], ptr [[TMP38]], align 8 +// CK20-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3 +// CK20-NEXT: store ptr [[TMP34]], ptr [[TMP39]], align 8 +// CK20-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4 +// CK20-NEXT: store ptr [[TMP35]], ptr [[TMP40]], align 8 +// CK20-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5 +// CK20-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP41]], align 8 +// CK20-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6 +// CK20-NEXT: store ptr null, ptr [[TMP42]], align 8 +// CK20-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7 +// CK20-NEXT: store ptr null, ptr [[TMP43]], align 8 +// CK20-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8 +// CK20-NEXT: store i64 0, ptr [[TMP44]], align 8 +// CK20-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9 +// CK20-NEXT: store i64 0, ptr [[TMP45]], align 8 +// CK20-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10 +// CK20-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP46]], align 4 +// CK20-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11 +// CK20-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP47]], align 4 +// CK20-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12 +// CK20-NEXT: store i32 0, ptr [[TMP48]], align 4 +// CK20-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118.region_id, ptr [[KERNEL_ARGS5]]) +// CK20-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CK20-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CK20: omp_offload.failed6: +// CK20-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118(ptr [[THIS1]]) #[[ATTR3]] +// CK20-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CK20: omp_offload.cont7: +// CK20-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 +// CK20-NEXT: [[B9:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK20-NEXT: [[TMP51:%.*]] = load ptr, ptr [[B9]], align 8 +// CK20-NEXT: [[TMP52:%.*]] = getelementptr ptr, ptr [[B9]], i32 1 +// CK20-NEXT: [[TMP53:%.*]] = ptrtoint ptr [[TMP52]] to i64 +// CK20-NEXT: [[TMP54:%.*]] = ptrtoint ptr [[A8]] to i64 +// CK20-NEXT: [[TMP55:%.*]] = sub i64 [[TMP53]], [[TMP54]] +// CK20-NEXT: [[TMP56:%.*]] = sdiv exact i64 [[TMP55]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CK20-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES13]], ptr align 8 @.offload_sizes.3, i64 24, i1 false) +// CK20-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CK20-NEXT: store ptr [[THIS1]], ptr [[TMP57]], align 8 +// CK20-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CK20-NEXT: store ptr [[A8]], ptr [[TMP58]], align 8 +// CK20-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 +// CK20-NEXT: store i64 [[TMP56]], ptr [[TMP59]], align 8 +// CK20-NEXT: [[TMP60:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 0 +// CK20-NEXT: store ptr null, ptr [[TMP60]], align 8 +// CK20-NEXT: [[TMP61:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1 +// CK20-NEXT: store ptr [[THIS1]], ptr [[TMP61]], align 8 +// CK20-NEXT: [[TMP62:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 1 +// CK20-NEXT: store ptr [[A8]], ptr [[TMP62]], align 8 +// CK20-NEXT: [[TMP63:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 1 +// CK20-NEXT: store ptr null, ptr [[TMP63]], align 8 +// CK20-NEXT: [[TMP64:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 2 +// CK20-NEXT: store ptr [[THIS1]], ptr [[TMP64]], align 8 +// CK20-NEXT: [[TMP65:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 2 +// CK20-NEXT: store ptr [[TMP51]], ptr [[TMP65]], align 8 +// CK20-NEXT: [[TMP66:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 2 +// CK20-NEXT: store ptr null, ptr [[TMP66]], align 8 +// CK20-NEXT: [[TMP67:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CK20-NEXT: [[TMP68:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CK20-NEXT: [[TMP69:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 +// CK20-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 +// CK20-NEXT: store i32 2, ptr [[TMP70]], align 4 +// CK20-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 +// CK20-NEXT: store i32 3, ptr [[TMP71]], align 4 +// CK20-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 +// CK20-NEXT: store ptr [[TMP67]], ptr [[TMP72]], align 8 +// CK20-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 3 +// CK20-NEXT: store ptr [[TMP68]], ptr [[TMP73]], align 8 +// CK20-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 4 +// CK20-NEXT: store ptr [[TMP69]], ptr [[TMP74]], align 8 +// CK20-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 5 +// CK20-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP75]], align 8 +// CK20-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 6 +// CK20-NEXT: store ptr null, ptr [[TMP76]], align 8 +// CK20-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 7 +// CK20-NEXT: store ptr null, ptr [[TMP77]], align 8 +// CK20-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 8 +// CK20-NEXT: store i64 0, ptr [[TMP78]], align 8 +// CK20-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 9 +// CK20-NEXT: store i64 0, ptr [[TMP79]], align 8 +// CK20-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 10 +// CK20-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP80]], align 4 +// CK20-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 11 +// CK20-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP81]], align 4 +// CK20-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 12 +// CK20-NEXT: store i32 0, ptr [[TMP82]], align 4 +// CK20-NEXT: [[TMP83:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125.region_id, ptr [[KERNEL_ARGS14]]) +// CK20-NEXT: [[TMP84:%.*]] = icmp ne i32 [[TMP83]], 0 +// CK20-NEXT: br i1 [[TMP84]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]] +// CK20: omp_offload.failed15: +// CK20-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125(ptr [[THIS1]]) #[[ATTR3]] +// CK20-NEXT: br label [[OMP_OFFLOAD_CONT16]] +// CK20: omp_offload.cont16: +// CK20-NEXT: ret void +// +// +// CK20-LABEL: define {{[^@]+}}@_ZN2STIdEC2ERPd +// CK20-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CK20-NEXT: entry: +// CK20-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK20-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CK20-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK20-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CK20-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK20-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CK20-NEXT: store ptr null, ptr [[A]], align 8 +// CK20-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CK20-NEXT: store ptr [[TMP0]], ptr [[B2]], align 8 +// CK20-NEXT: ret void +// +// +// CK20-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112 +// CK20-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { +// CK20-NEXT: entry: +// CK20-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK20-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK20-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CK20-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8 +// CK20-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK20-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 8 +// CK20-NEXT: ret void +// +// +// CK20-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118 +// CK20-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CK20-NEXT: entry: +// CK20-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK20-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK20-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 1 +// CK20-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 8 +// CK20-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CK20-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// CK20-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP1]], align 8 +// CK20-NEXT: ret void +// +// +// CK20-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125 +// CK20-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CK20-NEXT: entry: +// CK20-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK20-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK20-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CK20-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8 +// CK20-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK20-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 8 +// CK20-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP0]], i32 0, i32 1 +// CK20-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B]], align 8 +// CK20-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CK20-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// CK20-NEXT: store ptr [[INCDEC_PTR1]], ptr [[TMP2]], align 8 +// CK20-NEXT: ret void +// +// +// CK20-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK20-SAME: () #[[ATTR5:[0-9]+]] { +// CK20-NEXT: entry: +// CK20-NEXT: call void @__tgt_register_requires(i64 1) +// CK20-NEXT: ret void +// +// +// CK21-LABEL: define {{[^@]+}}@_Z3barPd +// CK21-SAME: (ptr noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +// CK21-NEXT: entry: +// CK21-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CK21-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 8 +// CK21-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 +// CK21-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) +// CK21-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) +// CK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 +// CK21-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK21-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 8 +// CK21-NEXT: ret void +// +// +// CK21-LABEL: define {{[^@]+}}@_ZN2STIdEC1ERPd +// CK21-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CK21-NEXT: entry: +// CK21-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CK21-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CK21-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CK21-NEXT: call void @_ZN2STIdEC2ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]], ptr noundef nonnull align 8 dereferenceable(8) [[TMP0]]) +// CK21-NEXT: ret void +// +// +// CK21-LABEL: define {{[^@]+}}@_ZN2STIdE3fooERPd +// CK21-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG:%.*]]) #[[ATTR0]] comdat align 2 { +// CK21-NEXT: entry: +// CK21-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK21-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// CK21-NEXT: [[LA:%.*]] = alloca ptr, align 8 +// CK21-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CK21-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CK21-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CK21-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK21-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [2 x ptr], align 8 +// CK21-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [2 x ptr], align 8 +// CK21-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [2 x ptr], align 8 +// CK21-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [2 x i64], align 8 +// CK21-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK21-NEXT: [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [3 x ptr], align 8 +// CK21-NEXT: [[DOTOFFLOAD_PTRS11:%.*]] = alloca [3 x ptr], align 8 +// CK21-NEXT: [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [3 x ptr], align 8 +// CK21-NEXT: [[DOTOFFLOAD_SIZES13:%.*]] = alloca [3 x i64], align 8 +// CK21-NEXT: [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK21-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK21-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 +// CK21-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK21-NEXT: store ptr null, ptr [[LA]], align 8 +// CK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CK21-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK21-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 8 +// CK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK21-NEXT: store ptr [[A]], ptr [[TMP1]], align 8 +// CK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CK21-NEXT: store ptr null, ptr [[TMP2]], align 8 +// CK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK21-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK21-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CK21-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK21-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK21-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 8 +// CK21-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK21-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK21-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 8 +// CK21-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK21-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 8 +// CK21-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK21-NEXT: store ptr null, ptr [[TMP11]], align 8 +// CK21-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK21-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CK21-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK21-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CK21-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK21-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK21-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK21-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP15]], align 4 +// CK21-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK21-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CK21-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK21-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CK21-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112.region_id, ptr [[KERNEL_ARGS]]) +// CK21-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CK21-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK21: omp_offload.failed: +// CK21-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112(ptr [[THIS1]]) #[[ATTR3:[0-9]+]] +// CK21-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK21: omp_offload.cont: +// CK21-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK21-NEXT: [[TMP20:%.*]] = load ptr, ptr [[B]], align 8 +// CK21-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[B]], i32 1 +// CK21-NEXT: [[TMP22:%.*]] = ptrtoint ptr [[TMP21]] to i64 +// CK21-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[B]] to i64 +// CK21-NEXT: [[TMP24:%.*]] = sub i64 [[TMP22]], [[TMP23]] +// CK21-NEXT: [[TMP25:%.*]] = sdiv exact i64 [[TMP24]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CK21-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES]], ptr align 8 @.offload_sizes.1, i64 16, i1 false) +// CK21-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CK21-NEXT: store ptr [[THIS1]], ptr [[TMP26]], align 8 +// CK21-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CK21-NEXT: store ptr [[B]], ptr [[TMP27]], align 8 +// CK21-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CK21-NEXT: store i64 [[TMP25]], ptr [[TMP28]], align 8 +// CK21-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 0 +// CK21-NEXT: store ptr null, ptr [[TMP29]], align 8 +// CK21-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1 +// CK21-NEXT: store ptr [[THIS1]], ptr [[TMP30]], align 8 +// CK21-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 1 +// CK21-NEXT: store ptr [[TMP20]], ptr [[TMP31]], align 8 +// CK21-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 1 +// CK21-NEXT: store ptr null, ptr [[TMP32]], align 8 +// CK21-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CK21-NEXT: [[TMP34:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CK21-NEXT: [[TMP35:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CK21-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 +// CK21-NEXT: store i32 2, ptr [[TMP36]], align 4 +// CK21-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 +// CK21-NEXT: store i32 2, ptr [[TMP37]], align 4 +// CK21-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 +// CK21-NEXT: store ptr [[TMP33]], ptr [[TMP38]], align 8 +// CK21-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3 +// CK21-NEXT: store ptr [[TMP34]], ptr [[TMP39]], align 8 +// CK21-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4 +// CK21-NEXT: store ptr [[TMP35]], ptr [[TMP40]], align 8 +// CK21-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5 +// CK21-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP41]], align 8 +// CK21-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6 +// CK21-NEXT: store ptr null, ptr [[TMP42]], align 8 +// CK21-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7 +// CK21-NEXT: store ptr null, ptr [[TMP43]], align 8 +// CK21-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8 +// CK21-NEXT: store i64 0, ptr [[TMP44]], align 8 +// CK21-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9 +// CK21-NEXT: store i64 0, ptr [[TMP45]], align 8 +// CK21-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10 +// CK21-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP46]], align 4 +// CK21-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11 +// CK21-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP47]], align 4 +// CK21-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12 +// CK21-NEXT: store i32 0, ptr [[TMP48]], align 4 +// CK21-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118.region_id, ptr [[KERNEL_ARGS5]]) +// CK21-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CK21-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CK21: omp_offload.failed6: +// CK21-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118(ptr [[THIS1]]) #[[ATTR3]] +// CK21-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CK21: omp_offload.cont7: +// CK21-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 +// CK21-NEXT: [[B9:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK21-NEXT: [[TMP51:%.*]] = load ptr, ptr [[B9]], align 8 +// CK21-NEXT: [[TMP52:%.*]] = getelementptr ptr, ptr [[B9]], i32 1 +// CK21-NEXT: [[TMP53:%.*]] = ptrtoint ptr [[TMP52]] to i64 +// CK21-NEXT: [[TMP54:%.*]] = ptrtoint ptr [[A8]] to i64 +// CK21-NEXT: [[TMP55:%.*]] = sub i64 [[TMP53]], [[TMP54]] +// CK21-NEXT: [[TMP56:%.*]] = sdiv exact i64 [[TMP55]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CK21-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DOTOFFLOAD_SIZES13]], ptr align 8 @.offload_sizes.3, i64 24, i1 false) +// CK21-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CK21-NEXT: store ptr [[THIS1]], ptr [[TMP57]], align 8 +// CK21-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CK21-NEXT: store ptr [[A8]], ptr [[TMP58]], align 8 +// CK21-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 +// CK21-NEXT: store i64 [[TMP56]], ptr [[TMP59]], align 8 +// CK21-NEXT: [[TMP60:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 0 +// CK21-NEXT: store ptr null, ptr [[TMP60]], align 8 +// CK21-NEXT: [[TMP61:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1 +// CK21-NEXT: store ptr [[THIS1]], ptr [[TMP61]], align 8 +// CK21-NEXT: [[TMP62:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 1 +// CK21-NEXT: store ptr [[A8]], ptr [[TMP62]], align 8 +// CK21-NEXT: [[TMP63:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 1 +// CK21-NEXT: store ptr null, ptr [[TMP63]], align 8 +// CK21-NEXT: [[TMP64:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 2 +// CK21-NEXT: store ptr [[THIS1]], ptr [[TMP64]], align 8 +// CK21-NEXT: [[TMP65:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 2 +// CK21-NEXT: store ptr [[TMP51]], ptr [[TMP65]], align 8 +// CK21-NEXT: [[TMP66:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i64 0, i64 2 +// CK21-NEXT: store ptr null, ptr [[TMP66]], align 8 +// CK21-NEXT: [[TMP67:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CK21-NEXT: [[TMP68:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CK21-NEXT: [[TMP69:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 +// CK21-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 +// CK21-NEXT: store i32 2, ptr [[TMP70]], align 4 +// CK21-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 +// CK21-NEXT: store i32 3, ptr [[TMP71]], align 4 +// CK21-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 +// CK21-NEXT: store ptr [[TMP67]], ptr [[TMP72]], align 8 +// CK21-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 3 +// CK21-NEXT: store ptr [[TMP68]], ptr [[TMP73]], align 8 +// CK21-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 4 +// CK21-NEXT: store ptr [[TMP69]], ptr [[TMP74]], align 8 +// CK21-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 5 +// CK21-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP75]], align 8 +// CK21-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 6 +// CK21-NEXT: store ptr null, ptr [[TMP76]], align 8 +// CK21-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 7 +// CK21-NEXT: store ptr null, ptr [[TMP77]], align 8 +// CK21-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 8 +// CK21-NEXT: store i64 0, ptr [[TMP78]], align 8 +// CK21-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 9 +// CK21-NEXT: store i64 0, ptr [[TMP79]], align 8 +// CK21-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 10 +// CK21-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP80]], align 4 +// CK21-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 11 +// CK21-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP81]], align 4 +// CK21-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 12 +// CK21-NEXT: store i32 0, ptr [[TMP82]], align 4 +// CK21-NEXT: [[TMP83:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125.region_id, ptr [[KERNEL_ARGS14]]) +// CK21-NEXT: [[TMP84:%.*]] = icmp ne i32 [[TMP83]], 0 +// CK21-NEXT: br i1 [[TMP84]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]] +// CK21: omp_offload.failed15: +// CK21-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125(ptr [[THIS1]]) #[[ATTR3]] +// CK21-NEXT: br label [[OMP_OFFLOAD_CONT16]] +// CK21: omp_offload.cont16: +// CK21-NEXT: ret void +// +// +// CK21-LABEL: define {{[^@]+}}@_ZN2STIdEC2ERPd +// CK21-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CK21-NEXT: entry: +// CK21-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK21-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CK21-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK21-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// CK21-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CK21-NEXT: store ptr null, ptr [[A]], align 8 +// CK21-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CK21-NEXT: store ptr [[TMP0]], ptr [[B2]], align 8 +// CK21-NEXT: ret void +// +// +// CK21-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112 +// CK21-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { +// CK21-NEXT: entry: +// CK21-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK21-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8 +// CK21-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK21-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 8 +// CK21-NEXT: ret void +// +// +// CK21-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118 +// CK21-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CK21-NEXT: entry: +// CK21-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK21-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK21-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 1 +// CK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 8 +// CK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CK21-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// CK21-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP1]], align 8 +// CK21-NEXT: ret void +// +// +// CK21-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125 +// CK21-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CK21-NEXT: entry: +// CK21-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// CK21-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// CK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// CK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 8 +// CK21-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK21-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 8 +// CK21-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP0]], i32 0, i32 1 +// CK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B]], align 8 +// CK21-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CK21-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// CK21-NEXT: store ptr [[INCDEC_PTR1]], ptr [[TMP2]], align 8 +// CK21-NEXT: ret void +// +// +// CK21-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK21-SAME: () #[[ATTR5:[0-9]+]] { +// CK21-NEXT: entry: +// CK21-NEXT: call void @__tgt_register_requires(i64 1) +// CK21-NEXT: ret void +// +// +// CK22-LABEL: define {{[^@]+}}@_Z3barPd +// CK22-SAME: (ptr noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +// CK22-NEXT: entry: +// CK22-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 4 +// CK22-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 +// CK22-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 4 +// CK22-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) +// CK22-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) +// CK22-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 4 +// CK22-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK22-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 4 +// CK22-NEXT: ret void +// +// +// CK22-LABEL: define {{[^@]+}}@_ZN2STIdEC1ERPd +// CK22-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CK22-NEXT: entry: +// CK22-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK22-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CK22-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK22-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CK22-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK22-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CK22-NEXT: call void @_ZN2STIdEC2ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0]]) +// CK22-NEXT: ret void +// +// +// CK22-LABEL: define {{[^@]+}}@_ZN2STIdE3fooERPd +// CK22-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR0]] comdat align 2 { +// CK22-NEXT: entry: +// CK22-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK22-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 4 +// CK22-NEXT: [[LA:%.*]] = alloca ptr, align 4 +// CK22-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CK22-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CK22-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CK22-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK22-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [2 x ptr], align 4 +// CK22-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [2 x ptr], align 4 +// CK22-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [2 x ptr], align 4 +// CK22-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [2 x i64], align 4 +// CK22-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK22-NEXT: [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [3 x ptr], align 4 +// CK22-NEXT: [[DOTOFFLOAD_PTRS11:%.*]] = alloca [3 x ptr], align 4 +// CK22-NEXT: [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [3 x ptr], align 4 +// CK22-NEXT: [[DOTOFFLOAD_SIZES13:%.*]] = alloca [3 x i64], align 4 +// CK22-NEXT: [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK22-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK22-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 4 +// CK22-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK22-NEXT: store ptr null, ptr [[LA]], align 4 +// CK22-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CK22-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK22-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4 +// CK22-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK22-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CK22-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CK22-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CK22-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK22-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK22-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK22-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CK22-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK22-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CK22-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK22-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CK22-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK22-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CK22-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK22-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CK22-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK22-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CK22-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK22-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CK22-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK22-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CK22-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK22-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CK22-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK22-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK22-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK22-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP15]], align 4 +// CK22-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK22-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CK22-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK22-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CK22-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112.region_id, ptr [[KERNEL_ARGS]]) +// CK22-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CK22-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK22: omp_offload.failed: +// CK22-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112(ptr [[THIS1]]) #[[ATTR3:[0-9]+]] +// CK22-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK22: omp_offload.cont: +// CK22-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK22-NEXT: [[TMP20:%.*]] = load ptr, ptr [[B]], align 4 +// CK22-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[B]], i32 1 +// CK22-NEXT: [[TMP22:%.*]] = ptrtoint ptr [[TMP21]] to i64 +// CK22-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[B]] to i64 +// CK22-NEXT: [[TMP24:%.*]] = sub i64 [[TMP22]], [[TMP23]] +// CK22-NEXT: [[TMP25:%.*]] = sdiv exact i64 [[TMP24]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CK22-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes.1, i32 16, i1 false) +// CK22-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CK22-NEXT: store ptr [[THIS1]], ptr [[TMP26]], align 4 +// CK22-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CK22-NEXT: store ptr [[B]], ptr [[TMP27]], align 4 +// CK22-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CK22-NEXT: store i64 [[TMP25]], ptr [[TMP28]], align 4 +// CK22-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0 +// CK22-NEXT: store ptr null, ptr [[TMP29]], align 4 +// CK22-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1 +// CK22-NEXT: store ptr [[THIS1]], ptr [[TMP30]], align 4 +// CK22-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 1 +// CK22-NEXT: store ptr [[TMP20]], ptr [[TMP31]], align 4 +// CK22-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1 +// CK22-NEXT: store ptr null, ptr [[TMP32]], align 4 +// CK22-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CK22-NEXT: [[TMP34:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CK22-NEXT: [[TMP35:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CK22-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 +// CK22-NEXT: store i32 2, ptr [[TMP36]], align 4 +// CK22-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 +// CK22-NEXT: store i32 2, ptr [[TMP37]], align 4 +// CK22-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 +// CK22-NEXT: store ptr [[TMP33]], ptr [[TMP38]], align 4 +// CK22-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3 +// CK22-NEXT: store ptr [[TMP34]], ptr [[TMP39]], align 4 +// CK22-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4 +// CK22-NEXT: store ptr [[TMP35]], ptr [[TMP40]], align 4 +// CK22-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5 +// CK22-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP41]], align 4 +// CK22-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6 +// CK22-NEXT: store ptr null, ptr [[TMP42]], align 4 +// CK22-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7 +// CK22-NEXT: store ptr null, ptr [[TMP43]], align 4 +// CK22-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8 +// CK22-NEXT: store i64 0, ptr [[TMP44]], align 8 +// CK22-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9 +// CK22-NEXT: store i64 0, ptr [[TMP45]], align 8 +// CK22-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10 +// CK22-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP46]], align 4 +// CK22-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11 +// CK22-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP47]], align 4 +// CK22-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12 +// CK22-NEXT: store i32 0, ptr [[TMP48]], align 4 +// CK22-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118.region_id, ptr [[KERNEL_ARGS5]]) +// CK22-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CK22-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CK22: omp_offload.failed6: +// CK22-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118(ptr [[THIS1]]) #[[ATTR3]] +// CK22-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CK22: omp_offload.cont7: +// CK22-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 +// CK22-NEXT: [[B9:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK22-NEXT: [[TMP51:%.*]] = load ptr, ptr [[B9]], align 4 +// CK22-NEXT: [[TMP52:%.*]] = getelementptr ptr, ptr [[B9]], i32 1 +// CK22-NEXT: [[TMP53:%.*]] = ptrtoint ptr [[TMP52]] to i64 +// CK22-NEXT: [[TMP54:%.*]] = ptrtoint ptr [[A8]] to i64 +// CK22-NEXT: [[TMP55:%.*]] = sub i64 [[TMP53]], [[TMP54]] +// CK22-NEXT: [[TMP56:%.*]] = sdiv exact i64 [[TMP55]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CK22-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES13]], ptr align 4 @.offload_sizes.3, i32 24, i1 false) +// CK22-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CK22-NEXT: store ptr [[THIS1]], ptr [[TMP57]], align 4 +// CK22-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CK22-NEXT: store ptr [[A8]], ptr [[TMP58]], align 4 +// CK22-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 +// CK22-NEXT: store i64 [[TMP56]], ptr [[TMP59]], align 4 +// CK22-NEXT: [[TMP60:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i32 0, i32 0 +// CK22-NEXT: store ptr null, ptr [[TMP60]], align 4 +// CK22-NEXT: [[TMP61:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1 +// CK22-NEXT: store ptr [[THIS1]], ptr [[TMP61]], align 4 +// CK22-NEXT: [[TMP62:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 1 +// CK22-NEXT: store ptr [[A8]], ptr [[TMP62]], align 4 +// CK22-NEXT: [[TMP63:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i32 0, i32 1 +// CK22-NEXT: store ptr null, ptr [[TMP63]], align 4 +// CK22-NEXT: [[TMP64:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 2 +// CK22-NEXT: store ptr [[THIS1]], ptr [[TMP64]], align 4 +// CK22-NEXT: [[TMP65:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 2 +// CK22-NEXT: store ptr [[TMP51]], ptr [[TMP65]], align 4 +// CK22-NEXT: [[TMP66:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i32 0, i32 2 +// CK22-NEXT: store ptr null, ptr [[TMP66]], align 4 +// CK22-NEXT: [[TMP67:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CK22-NEXT: [[TMP68:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CK22-NEXT: [[TMP69:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 +// CK22-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 +// CK22-NEXT: store i32 2, ptr [[TMP70]], align 4 +// CK22-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 +// CK22-NEXT: store i32 3, ptr [[TMP71]], align 4 +// CK22-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 +// CK22-NEXT: store ptr [[TMP67]], ptr [[TMP72]], align 4 +// CK22-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 3 +// CK22-NEXT: store ptr [[TMP68]], ptr [[TMP73]], align 4 +// CK22-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 4 +// CK22-NEXT: store ptr [[TMP69]], ptr [[TMP74]], align 4 +// CK22-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 5 +// CK22-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP75]], align 4 +// CK22-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 6 +// CK22-NEXT: store ptr null, ptr [[TMP76]], align 4 +// CK22-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 7 +// CK22-NEXT: store ptr null, ptr [[TMP77]], align 4 +// CK22-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 8 +// CK22-NEXT: store i64 0, ptr [[TMP78]], align 8 +// CK22-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 9 +// CK22-NEXT: store i64 0, ptr [[TMP79]], align 8 +// CK22-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 10 +// CK22-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP80]], align 4 +// CK22-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 11 +// CK22-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP81]], align 4 +// CK22-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 12 +// CK22-NEXT: store i32 0, ptr [[TMP82]], align 4 +// CK22-NEXT: [[TMP83:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125.region_id, ptr [[KERNEL_ARGS14]]) +// CK22-NEXT: [[TMP84:%.*]] = icmp ne i32 [[TMP83]], 0 +// CK22-NEXT: br i1 [[TMP84]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]] +// CK22: omp_offload.failed15: +// CK22-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125(ptr [[THIS1]]) #[[ATTR3]] +// CK22-NEXT: br label [[OMP_OFFLOAD_CONT16]] +// CK22: omp_offload.cont16: +// CK22-NEXT: ret void +// +// +// CK22-LABEL: define {{[^@]+}}@_ZN2STIdEC2ERPd +// CK22-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CK22-NEXT: entry: +// CK22-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK22-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CK22-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK22-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CK22-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK22-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CK22-NEXT: store ptr null, ptr [[A]], align 4 +// CK22-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK22-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CK22-NEXT: store ptr [[TMP0]], ptr [[B2]], align 4 +// CK22-NEXT: ret void +// +// +// CK22-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112 +// CK22-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { +// CK22-NEXT: entry: +// CK22-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK22-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK22-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK22-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CK22-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 4 +// CK22-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK22-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 4 +// CK22-NEXT: ret void +// +// +// CK22-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118 +// CK22-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CK22-NEXT: entry: +// CK22-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK22-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK22-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK22-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 1 +// CK22-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 4 +// CK22-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CK22-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// CK22-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP1]], align 4 +// CK22-NEXT: ret void +// +// +// CK22-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125 +// CK22-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CK22-NEXT: entry: +// CK22-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK22-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK22-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK22-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CK22-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 4 +// CK22-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK22-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 4 +// CK22-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP0]], i32 0, i32 1 +// CK22-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B]], align 4 +// CK22-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CK22-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// CK22-NEXT: store ptr [[INCDEC_PTR1]], ptr [[TMP2]], align 4 +// CK22-NEXT: ret void +// +// +// CK22-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK22-SAME: () #[[ATTR5:[0-9]+]] { +// CK22-NEXT: entry: +// CK22-NEXT: call void @__tgt_register_requires(i64 1) +// CK22-NEXT: ret void +// +// +// CK23-LABEL: define {{[^@]+}}@_Z3barPd +// CK23-SAME: (ptr noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +// CK23-NEXT: entry: +// CK23-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 4 +// CK23-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 +// CK23-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 4 +// CK23-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) +// CK23-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) +// CK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 4 +// CK23-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// CK23-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 4 +// CK23-NEXT: ret void +// +// +// CK23-LABEL: define {{[^@]+}}@_ZN2STIdEC1ERPd +// CK23-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// CK23-NEXT: entry: +// CK23-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CK23-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CK23-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CK23-NEXT: call void @_ZN2STIdEC2ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0]]) +// CK23-NEXT: ret void +// +// +// CK23-LABEL: define {{[^@]+}}@_ZN2STIdE3fooERPd +// CK23-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR0]] comdat align 2 { +// CK23-NEXT: entry: +// CK23-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK23-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 4 +// CK23-NEXT: [[LA:%.*]] = alloca ptr, align 4 +// CK23-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CK23-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CK23-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CK23-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK23-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [2 x ptr], align 4 +// CK23-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [2 x ptr], align 4 +// CK23-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [2 x ptr], align 4 +// CK23-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [2 x i64], align 4 +// CK23-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK23-NEXT: [[DOTOFFLOAD_BASEPTRS10:%.*]] = alloca [3 x ptr], align 4 +// CK23-NEXT: [[DOTOFFLOAD_PTRS11:%.*]] = alloca [3 x ptr], align 4 +// CK23-NEXT: [[DOTOFFLOAD_MAPPERS12:%.*]] = alloca [3 x ptr], align 4 +// CK23-NEXT: [[DOTOFFLOAD_SIZES13:%.*]] = alloca [3 x i64], align 4 +// CK23-NEXT: [[KERNEL_ARGS14:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CK23-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK23-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 4 +// CK23-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK23-NEXT: store ptr null, ptr [[LA]], align 4 +// CK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CK23-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK23-NEXT: store ptr [[THIS1]], ptr [[TMP0]], align 4 +// CK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK23-NEXT: store ptr [[A]], ptr [[TMP1]], align 4 +// CK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CK23-NEXT: store ptr null, ptr [[TMP2]], align 4 +// CK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK23-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK23-NEXT: store i32 2, ptr [[TMP5]], align 4 +// CK23-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK23-NEXT: store i32 1, ptr [[TMP6]], align 4 +// CK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK23-NEXT: store ptr [[TMP3]], ptr [[TMP7]], align 4 +// CK23-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK23-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK23-NEXT: store ptr @.offload_sizes, ptr [[TMP9]], align 4 +// CK23-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK23-NEXT: store ptr @.offload_maptypes, ptr [[TMP10]], align 4 +// CK23-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK23-NEXT: store ptr null, ptr [[TMP11]], align 4 +// CK23-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK23-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CK23-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK23-NEXT: store i64 0, ptr [[TMP13]], align 8 +// CK23-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK23-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK23-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK23-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP15]], align 4 +// CK23-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK23-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP16]], align 4 +// CK23-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK23-NEXT: store i32 0, ptr [[TMP17]], align 4 +// CK23-NEXT: [[TMP18:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112.region_id, ptr [[KERNEL_ARGS]]) +// CK23-NEXT: [[TMP19:%.*]] = icmp ne i32 [[TMP18]], 0 +// CK23-NEXT: br i1 [[TMP19]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK23: omp_offload.failed: +// CK23-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112(ptr [[THIS1]]) #[[ATTR3:[0-9]+]] +// CK23-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK23: omp_offload.cont: +// CK23-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK23-NEXT: [[TMP20:%.*]] = load ptr, ptr [[B]], align 4 +// CK23-NEXT: [[TMP21:%.*]] = getelementptr ptr, ptr [[B]], i32 1 +// CK23-NEXT: [[TMP22:%.*]] = ptrtoint ptr [[TMP21]] to i64 +// CK23-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[B]] to i64 +// CK23-NEXT: [[TMP24:%.*]] = sub i64 [[TMP22]], [[TMP23]] +// CK23-NEXT: [[TMP25:%.*]] = sdiv exact i64 [[TMP24]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CK23-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES]], ptr align 4 @.offload_sizes.1, i32 16, i1 false) +// CK23-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CK23-NEXT: store ptr [[THIS1]], ptr [[TMP26]], align 4 +// CK23-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CK23-NEXT: store ptr [[B]], ptr [[TMP27]], align 4 +// CK23-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CK23-NEXT: store i64 [[TMP25]], ptr [[TMP28]], align 4 +// CK23-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0 +// CK23-NEXT: store ptr null, ptr [[TMP29]], align 4 +// CK23-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 1 +// CK23-NEXT: store ptr [[THIS1]], ptr [[TMP30]], align 4 +// CK23-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 1 +// CK23-NEXT: store ptr [[TMP20]], ptr [[TMP31]], align 4 +// CK23-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 1 +// CK23-NEXT: store ptr null, ptr [[TMP32]], align 4 +// CK23-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CK23-NEXT: [[TMP34:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CK23-NEXT: [[TMP35:%.*]] = getelementptr inbounds [2 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 +// CK23-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 +// CK23-NEXT: store i32 2, ptr [[TMP36]], align 4 +// CK23-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 +// CK23-NEXT: store i32 2, ptr [[TMP37]], align 4 +// CK23-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 +// CK23-NEXT: store ptr [[TMP33]], ptr [[TMP38]], align 4 +// CK23-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3 +// CK23-NEXT: store ptr [[TMP34]], ptr [[TMP39]], align 4 +// CK23-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4 +// CK23-NEXT: store ptr [[TMP35]], ptr [[TMP40]], align 4 +// CK23-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5 +// CK23-NEXT: store ptr @.offload_maptypes.2, ptr [[TMP41]], align 4 +// CK23-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6 +// CK23-NEXT: store ptr null, ptr [[TMP42]], align 4 +// CK23-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7 +// CK23-NEXT: store ptr null, ptr [[TMP43]], align 4 +// CK23-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8 +// CK23-NEXT: store i64 0, ptr [[TMP44]], align 8 +// CK23-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9 +// CK23-NEXT: store i64 0, ptr [[TMP45]], align 8 +// CK23-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10 +// CK23-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP46]], align 4 +// CK23-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11 +// CK23-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP47]], align 4 +// CK23-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12 +// CK23-NEXT: store i32 0, ptr [[TMP48]], align 4 +// CK23-NEXT: [[TMP49:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118.region_id, ptr [[KERNEL_ARGS5]]) +// CK23-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CK23-NEXT: br i1 [[TMP50]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CK23: omp_offload.failed6: +// CK23-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118(ptr [[THIS1]]) #[[ATTR3]] +// CK23-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CK23: omp_offload.cont7: +// CK23-NEXT: [[A8:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 +// CK23-NEXT: [[B9:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK23-NEXT: [[TMP51:%.*]] = load ptr, ptr [[B9]], align 4 +// CK23-NEXT: [[TMP52:%.*]] = getelementptr ptr, ptr [[B9]], i32 1 +// CK23-NEXT: [[TMP53:%.*]] = ptrtoint ptr [[TMP52]] to i64 +// CK23-NEXT: [[TMP54:%.*]] = ptrtoint ptr [[A8]] to i64 +// CK23-NEXT: [[TMP55:%.*]] = sub i64 [[TMP53]], [[TMP54]] +// CK23-NEXT: [[TMP56:%.*]] = sdiv exact i64 [[TMP55]], ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) +// CK23-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[DOTOFFLOAD_SIZES13]], ptr align 4 @.offload_sizes.3, i32 24, i1 false) +// CK23-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CK23-NEXT: store ptr [[THIS1]], ptr [[TMP57]], align 4 +// CK23-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CK23-NEXT: store ptr [[A8]], ptr [[TMP58]], align 4 +// CK23-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 +// CK23-NEXT: store i64 [[TMP56]], ptr [[TMP59]], align 4 +// CK23-NEXT: [[TMP60:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i32 0, i32 0 +// CK23-NEXT: store ptr null, ptr [[TMP60]], align 4 +// CK23-NEXT: [[TMP61:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 1 +// CK23-NEXT: store ptr [[THIS1]], ptr [[TMP61]], align 4 +// CK23-NEXT: [[TMP62:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 1 +// CK23-NEXT: store ptr [[A8]], ptr [[TMP62]], align 4 +// CK23-NEXT: [[TMP63:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i32 0, i32 1 +// CK23-NEXT: store ptr null, ptr [[TMP63]], align 4 +// CK23-NEXT: [[TMP64:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 2 +// CK23-NEXT: store ptr [[THIS1]], ptr [[TMP64]], align 4 +// CK23-NEXT: [[TMP65:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 2 +// CK23-NEXT: store ptr [[TMP51]], ptr [[TMP65]], align 4 +// CK23-NEXT: [[TMP66:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_MAPPERS12]], i32 0, i32 2 +// CK23-NEXT: store ptr null, ptr [[TMP66]], align 4 +// CK23-NEXT: [[TMP67:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 +// CK23-NEXT: [[TMP68:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 +// CK23-NEXT: [[TMP69:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 +// CK23-NEXT: [[TMP70:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 +// CK23-NEXT: store i32 2, ptr [[TMP70]], align 4 +// CK23-NEXT: [[TMP71:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 +// CK23-NEXT: store i32 3, ptr [[TMP71]], align 4 +// CK23-NEXT: [[TMP72:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 +// CK23-NEXT: store ptr [[TMP67]], ptr [[TMP72]], align 4 +// CK23-NEXT: [[TMP73:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 3 +// CK23-NEXT: store ptr [[TMP68]], ptr [[TMP73]], align 4 +// CK23-NEXT: [[TMP74:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 4 +// CK23-NEXT: store ptr [[TMP69]], ptr [[TMP74]], align 4 +// CK23-NEXT: [[TMP75:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 5 +// CK23-NEXT: store ptr @.offload_maptypes.4, ptr [[TMP75]], align 4 +// CK23-NEXT: [[TMP76:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 6 +// CK23-NEXT: store ptr null, ptr [[TMP76]], align 4 +// CK23-NEXT: [[TMP77:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 7 +// CK23-NEXT: store ptr null, ptr [[TMP77]], align 4 +// CK23-NEXT: [[TMP78:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 8 +// CK23-NEXT: store i64 0, ptr [[TMP78]], align 8 +// CK23-NEXT: [[TMP79:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 9 +// CK23-NEXT: store i64 0, ptr [[TMP79]], align 8 +// CK23-NEXT: [[TMP80:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 10 +// CK23-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP80]], align 4 +// CK23-NEXT: [[TMP81:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 11 +// CK23-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP81]], align 4 +// CK23-NEXT: [[TMP82:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 12 +// CK23-NEXT: store i32 0, ptr [[TMP82]], align 4 +// CK23-NEXT: [[TMP83:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125.region_id, ptr [[KERNEL_ARGS14]]) +// CK23-NEXT: [[TMP84:%.*]] = icmp ne i32 [[TMP83]], 0 +// CK23-NEXT: br i1 [[TMP84]], label [[OMP_OFFLOAD_FAILED15:%.*]], label [[OMP_OFFLOAD_CONT16:%.*]] +// CK23: omp_offload.failed15: +// CK23-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125(ptr [[THIS1]]) #[[ATTR3]] +// CK23-NEXT: br label [[OMP_OFFLOAD_CONT16]] +// CK23: omp_offload.cont16: +// CK23-NEXT: ret void +// +// +// CK23-LABEL: define {{[^@]+}}@_ZN2STIdEC2ERPd +// CK23-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// CK23-NEXT: entry: +// CK23-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK23-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// CK23-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK23-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// CK23-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// CK23-NEXT: store ptr null, ptr [[A]], align 4 +// CK23-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// CK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// CK23-NEXT: store ptr [[TMP0]], ptr [[B2]], align 4 +// CK23-NEXT: ret void +// +// +// CK23-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l112 +// CK23-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2:[0-9]+]] { +// CK23-NEXT: entry: +// CK23-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK23-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 4 +// CK23-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK23-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 4 +// CK23-NEXT: ret void +// +// +// CK23-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l118 +// CK23-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CK23-NEXT: entry: +// CK23-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK23-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK23-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 1 +// CK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 4 +// CK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CK23-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// CK23-NEXT: store ptr [[INCDEC_PTR]], ptr [[TMP1]], align 4 +// CK23-NEXT: ret void +// +// +// CK23-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2STIdE3fooERPd_l125 +// CK23-SAME: (ptr noundef [[THIS:%.*]]) #[[ATTR2]] { +// CK23-NEXT: entry: +// CK23-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// CK23-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// CK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// CK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[TMP0]], i32 0, i32 0 +// CK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A]], align 4 +// CK23-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 1 +// CK23-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 4 +// CK23-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[TMP0]], i32 0, i32 1 +// CK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B]], align 4 +// CK23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CK23-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// CK23-NEXT: store ptr [[INCDEC_PTR1]], ptr [[TMP2]], align 4 +// CK23-NEXT: ret void +// +// +// CK23-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK23-SAME: () #[[ATTR5:[0-9]+]] { +// CK23-NEXT: entry: +// CK23-NEXT: call void @__tgt_register_requires(i64 1) +// CK23-NEXT: ret void +// +// +// SIMD-ONLY10-LABEL: define {{[^@]+}}@_Z3barPd +// SIMD-ONLY10-SAME: (ptr noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY10-NEXT: entry: +// SIMD-ONLY10-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY10-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 8 +// SIMD-ONLY10-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 +// SIMD-ONLY10-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) +// SIMD-ONLY10-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) +// SIMD-ONLY10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 +// SIMD-ONLY10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY10-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 8 +// SIMD-ONLY10-NEXT: ret void +// +// +// SIMD-ONLY10-LABEL: define {{[^@]+}}@_ZN2STIdEC1ERPd +// SIMD-ONLY10-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// SIMD-ONLY10-NEXT: entry: +// SIMD-ONLY10-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY10-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY10-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY10-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// SIMD-ONLY10-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// SIMD-ONLY10-NEXT: call void @_ZN2STIdEC2ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]], ptr noundef nonnull align 8 dereferenceable(8) [[TMP0]]) +// SIMD-ONLY10-NEXT: ret void +// +// +// SIMD-ONLY10-LABEL: define {{[^@]+}}@_ZN2STIdE3fooERPd +// SIMD-ONLY10-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG:%.*]]) #[[ATTR0]] comdat align 2 { +// SIMD-ONLY10-NEXT: entry: +// SIMD-ONLY10-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY10-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY10-NEXT: [[LA:%.*]] = alloca ptr, align 8 +// SIMD-ONLY10-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY10-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 +// SIMD-ONLY10-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY10-NEXT: store ptr null, ptr [[LA]], align 8 +// SIMD-ONLY10-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// SIMD-ONLY10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A]], align 8 +// SIMD-ONLY10-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY10-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 8 +// SIMD-ONLY10-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// SIMD-ONLY10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 8 +// SIMD-ONLY10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// SIMD-ONLY10-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// SIMD-ONLY10-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP1]], align 8 +// SIMD-ONLY10-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 +// SIMD-ONLY10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A3]], align 8 +// SIMD-ONLY10-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// SIMD-ONLY10-NEXT: store ptr [[INCDEC_PTR4]], ptr [[A3]], align 8 +// SIMD-ONLY10-NEXT: [[B5:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// SIMD-ONLY10-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B5]], align 8 +// SIMD-ONLY10-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// SIMD-ONLY10-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i32 1 +// SIMD-ONLY10-NEXT: store ptr [[INCDEC_PTR6]], ptr [[TMP4]], align 8 +// SIMD-ONLY10-NEXT: ret void +// +// +// SIMD-ONLY10-LABEL: define {{[^@]+}}@_ZN2STIdEC2ERPd +// SIMD-ONLY10-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY10-NEXT: entry: +// SIMD-ONLY10-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY10-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY10-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY10-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// SIMD-ONLY10-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY10-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// SIMD-ONLY10-NEXT: store ptr null, ptr [[A]], align 8 +// SIMD-ONLY10-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// SIMD-ONLY10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// SIMD-ONLY10-NEXT: store ptr [[TMP0]], ptr [[B2]], align 8 +// SIMD-ONLY10-NEXT: ret void +// +// +// SIMD-ONLY11-LABEL: define {{[^@]+}}@_Z3barPd +// SIMD-ONLY11-SAME: (ptr noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY11-NEXT: entry: +// SIMD-ONLY11-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY11-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 8 +// SIMD-ONLY11-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 +// SIMD-ONLY11-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) +// SIMD-ONLY11-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 8 dereferenceable(16) [[A]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG_ADDR]]) +// SIMD-ONLY11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 8 +// SIMD-ONLY11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY11-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 8 +// SIMD-ONLY11-NEXT: ret void +// +// +// SIMD-ONLY11-LABEL: define {{[^@]+}}@_ZN2STIdEC1ERPd +// SIMD-ONLY11-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// SIMD-ONLY11-NEXT: entry: +// SIMD-ONLY11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// SIMD-ONLY11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// SIMD-ONLY11-NEXT: call void @_ZN2STIdEC2ERPd(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]], ptr noundef nonnull align 8 dereferenceable(8) [[TMP0]]) +// SIMD-ONLY11-NEXT: ret void +// +// +// SIMD-ONLY11-LABEL: define {{[^@]+}}@_ZN2STIdE3fooERPd +// SIMD-ONLY11-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARG:%.*]]) #[[ATTR0]] comdat align 2 { +// SIMD-ONLY11-NEXT: entry: +// SIMD-ONLY11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY11-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY11-NEXT: [[LA:%.*]] = alloca ptr, align 8 +// SIMD-ONLY11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY11-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 8 +// SIMD-ONLY11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY11-NEXT: store ptr null, ptr [[LA]], align 8 +// SIMD-ONLY11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// SIMD-ONLY11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A]], align 8 +// SIMD-ONLY11-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY11-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 8 +// SIMD-ONLY11-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// SIMD-ONLY11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 8 +// SIMD-ONLY11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// SIMD-ONLY11-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// SIMD-ONLY11-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP1]], align 8 +// SIMD-ONLY11-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 +// SIMD-ONLY11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A3]], align 8 +// SIMD-ONLY11-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// SIMD-ONLY11-NEXT: store ptr [[INCDEC_PTR4]], ptr [[A3]], align 8 +// SIMD-ONLY11-NEXT: [[B5:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// SIMD-ONLY11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B5]], align 8 +// SIMD-ONLY11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// SIMD-ONLY11-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i32 1 +// SIMD-ONLY11-NEXT: store ptr [[INCDEC_PTR6]], ptr [[TMP4]], align 8 +// SIMD-ONLY11-NEXT: ret void +// +// +// SIMD-ONLY11-LABEL: define {{[^@]+}}@_ZN2STIdEC2ERPd +// SIMD-ONLY11-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[B:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY11-NEXT: entry: +// SIMD-ONLY11-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY11-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// SIMD-ONLY11-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY11-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 +// SIMD-ONLY11-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 +// SIMD-ONLY11-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// SIMD-ONLY11-NEXT: store ptr null, ptr [[A]], align 8 +// SIMD-ONLY11-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// SIMD-ONLY11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// SIMD-ONLY11-NEXT: store ptr [[TMP0]], ptr [[B2]], align 8 +// SIMD-ONLY11-NEXT: ret void +// +// +// SIMD-ONLY12-LABEL: define {{[^@]+}}@_Z3barPd +// SIMD-ONLY12-SAME: (ptr noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY12-NEXT: entry: +// SIMD-ONLY12-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY12-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 +// SIMD-ONLY12-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 4 +// SIMD-ONLY12-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) +// SIMD-ONLY12-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) +// SIMD-ONLY12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY12-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 4 +// SIMD-ONLY12-NEXT: ret void +// +// +// SIMD-ONLY12-LABEL: define {{[^@]+}}@_ZN2STIdEC1ERPd +// SIMD-ONLY12-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// SIMD-ONLY12-NEXT: entry: +// SIMD-ONLY12-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY12-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY12-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// SIMD-ONLY12-NEXT: call void @_ZN2STIdEC2ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0]]) +// SIMD-ONLY12-NEXT: ret void +// +// +// SIMD-ONLY12-LABEL: define {{[^@]+}}@_ZN2STIdE3fooERPd +// SIMD-ONLY12-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR0]] comdat align 2 { +// SIMD-ONLY12-NEXT: entry: +// SIMD-ONLY12-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY12-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY12-NEXT: [[LA:%.*]] = alloca ptr, align 4 +// SIMD-ONLY12-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store ptr null, ptr [[LA]], align 4 +// SIMD-ONLY12-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// SIMD-ONLY12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A]], align 4 +// SIMD-ONLY12-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY12-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 4 +// SIMD-ONLY12-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// SIMD-ONLY12-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 4 +// SIMD-ONLY12-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// SIMD-ONLY12-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// SIMD-ONLY12-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP1]], align 4 +// SIMD-ONLY12-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 +// SIMD-ONLY12-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A3]], align 4 +// SIMD-ONLY12-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// SIMD-ONLY12-NEXT: store ptr [[INCDEC_PTR4]], ptr [[A3]], align 4 +// SIMD-ONLY12-NEXT: [[B5:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// SIMD-ONLY12-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B5]], align 4 +// SIMD-ONLY12-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// SIMD-ONLY12-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i32 1 +// SIMD-ONLY12-NEXT: store ptr [[INCDEC_PTR6]], ptr [[TMP4]], align 4 +// SIMD-ONLY12-NEXT: ret void +// +// +// SIMD-ONLY12-LABEL: define {{[^@]+}}@_ZN2STIdEC2ERPd +// SIMD-ONLY12-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY12-NEXT: entry: +// SIMD-ONLY12-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY12-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY12-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// SIMD-ONLY12-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// SIMD-ONLY12-NEXT: store ptr null, ptr [[A]], align 4 +// SIMD-ONLY12-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// SIMD-ONLY12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// SIMD-ONLY12-NEXT: store ptr [[TMP0]], ptr [[B2]], align 4 +// SIMD-ONLY12-NEXT: ret void +// +// +// SIMD-ONLY13-LABEL: define {{[^@]+}}@_Z3barPd +// SIMD-ONLY13-SAME: (ptr noundef [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +// SIMD-ONLY13-NEXT: entry: +// SIMD-ONLY13-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY13-NEXT: [[A:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 +// SIMD-ONLY13-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 4 +// SIMD-ONLY13-NEXT: call void @_ZN2STIdEC1ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) +// SIMD-ONLY13-NEXT: call void @_ZN2STIdE3fooERPd(ptr noundef nonnull align 4 dereferenceable(8) [[A]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG_ADDR]]) +// SIMD-ONLY13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARG_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY13-NEXT: store ptr [[INCDEC_PTR]], ptr [[ARG_ADDR]], align 4 +// SIMD-ONLY13-NEXT: ret void +// +// +// SIMD-ONLY13-LABEL: define {{[^@]+}}@_ZN2STIdEC1ERPd +// SIMD-ONLY13-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 { +// SIMD-ONLY13-NEXT: entry: +// SIMD-ONLY13-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY13-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY13-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// SIMD-ONLY13-NEXT: call void @_ZN2STIdEC2ERPd(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]], ptr noundef nonnull align 4 dereferenceable(4) [[TMP0]]) +// SIMD-ONLY13-NEXT: ret void +// +// +// SIMD-ONLY13-LABEL: define {{[^@]+}}@_ZN2STIdE3fooERPd +// SIMD-ONLY13-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[ARG:%.*]]) #[[ATTR0]] comdat align 2 { +// SIMD-ONLY13-NEXT: entry: +// SIMD-ONLY13-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY13-NEXT: [[ARG_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY13-NEXT: [[LA:%.*]] = alloca ptr, align 4 +// SIMD-ONLY13-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store ptr [[ARG]], ptr [[ARG_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store ptr null, ptr [[LA]], align 4 +// SIMD-ONLY13-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// SIMD-ONLY13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A]], align 4 +// SIMD-ONLY13-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i32 1 +// SIMD-ONLY13-NEXT: store ptr [[INCDEC_PTR]], ptr [[A]], align 4 +// SIMD-ONLY13-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// SIMD-ONLY13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B]], align 4 +// SIMD-ONLY13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// SIMD-ONLY13-NEXT: [[INCDEC_PTR2:%.*]] = getelementptr inbounds double, ptr [[TMP2]], i32 1 +// SIMD-ONLY13-NEXT: store ptr [[INCDEC_PTR2]], ptr [[TMP1]], align 4 +// SIMD-ONLY13-NEXT: [[A3:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 0 +// SIMD-ONLY13-NEXT: [[TMP3:%.*]] = load ptr, ptr [[A3]], align 4 +// SIMD-ONLY13-NEXT: [[INCDEC_PTR4:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 1 +// SIMD-ONLY13-NEXT: store ptr [[INCDEC_PTR4]], ptr [[A3]], align 4 +// SIMD-ONLY13-NEXT: [[B5:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// SIMD-ONLY13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[B5]], align 4 +// SIMD-ONLY13-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// SIMD-ONLY13-NEXT: [[INCDEC_PTR6:%.*]] = getelementptr inbounds double, ptr [[TMP5]], i32 1 +// SIMD-ONLY13-NEXT: store ptr [[INCDEC_PTR6]], ptr [[TMP4]], align 4 +// SIMD-ONLY13-NEXT: ret void +// +// +// SIMD-ONLY13-LABEL: define {{[^@]+}}@_ZN2STIdEC2ERPd +// SIMD-ONLY13-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 { +// SIMD-ONLY13-NEXT: entry: +// SIMD-ONLY13-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY13-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 +// SIMD-ONLY13-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 +// SIMD-ONLY13-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_ST:%.*]], ptr [[THIS1]], i32 0, i32 0 +// SIMD-ONLY13-NEXT: store ptr null, ptr [[A]], align 4 +// SIMD-ONLY13-NEXT: [[B2:%.*]] = getelementptr inbounds [[STRUCT_ST]], ptr [[THIS1]], i32 0, i32 1 +// SIMD-ONLY13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4 +// SIMD-ONLY13-NEXT: store ptr [[TMP0]], ptr [[B2]], align 4 +// SIMD-ONLY13-NEXT: ret void +// +// +// CK30-LABEL: define {{[^@]+}}@_Z3barv +// CK30-SAME: () #[[ATTR0:[0-9]+]] { +// CK30-NEXT: entry: +// CK30-NEXT: [[PTR:%.*]] = alloca ptr, align 64 +// CK30-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CK30-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CK30-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CK30-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK30-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 64 +// CK30-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK30-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CK30-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK30-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CK30-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CK30-NEXT: store ptr null, ptr [[TMP3]], align 8 +// CK30-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK30-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK30-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK30-NEXT: store i32 2, ptr [[TMP6]], align 4 +// CK30-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK30-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CK30-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK30-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CK30-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK30-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CK30-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK30-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 8 +// CK30-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK30-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 8 +// CK30-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK30-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CK30-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK30-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CK30-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK30-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK30-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK30-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CK30-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK30-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP16]], align 4 +// CK30-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK30-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CK30-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK30-NEXT: store i32 0, ptr [[TMP18]], align 4 +// CK30-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159.region_id, ptr [[KERNEL_ARGS]]) +// CK30-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CK30-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK30: omp_offload.failed: +// CK30-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159(ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CK30-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK30: omp_offload.cont: +// CK30-NEXT: ret void +// +// +// CK30-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159 +// CK30-SAME: (ptr noundef [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { +// CK30-NEXT: entry: +// CK30-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8 +// CK30-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8 +// CK30-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +// CK30-NEXT: store double 0.000000e+00, ptr [[TMP0]], align 8 +// CK30-NEXT: ret void +// +// +// CK30-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK30-SAME: () #[[ATTR3:[0-9]+]] { +// CK30-NEXT: entry: +// CK30-NEXT: call void @__tgt_register_requires(i64 1) +// CK30-NEXT: ret void +// +// +// CK31-LABEL: define {{[^@]+}}@_Z3barv +// CK31-SAME: () #[[ATTR0:[0-9]+]] { +// CK31-NEXT: entry: +// CK31-NEXT: [[PTR:%.*]] = alloca ptr, align 64 +// CK31-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 8 +// CK31-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 8 +// CK31-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 8 +// CK31-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK31-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 64 +// CK31-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK31-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 8 +// CK31-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK31-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 8 +// CK31-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CK31-NEXT: store ptr null, ptr [[TMP3]], align 8 +// CK31-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK31-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK31-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK31-NEXT: store i32 2, ptr [[TMP6]], align 4 +// CK31-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK31-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CK31-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK31-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 8 +// CK31-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK31-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 8 +// CK31-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK31-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 8 +// CK31-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK31-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 8 +// CK31-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK31-NEXT: store ptr null, ptr [[TMP12]], align 8 +// CK31-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK31-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CK31-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK31-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK31-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK31-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CK31-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK31-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP16]], align 4 +// CK31-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK31-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CK31-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK31-NEXT: store i32 0, ptr [[TMP18]], align 4 +// CK31-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159.region_id, ptr [[KERNEL_ARGS]]) +// CK31-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CK31-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK31: omp_offload.failed: +// CK31-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159(ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CK31-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK31: omp_offload.cont: +// CK31-NEXT: ret void +// +// +// CK31-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159 +// CK31-SAME: (ptr noundef [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { +// CK31-NEXT: entry: +// CK31-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 8 +// CK31-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 8 +// CK31-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 8 +// CK31-NEXT: store double 0.000000e+00, ptr [[TMP0]], align 8 +// CK31-NEXT: ret void +// +// +// CK31-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK31-SAME: () #[[ATTR3:[0-9]+]] { +// CK31-NEXT: entry: +// CK31-NEXT: call void @__tgt_register_requires(i64 1) +// CK31-NEXT: ret void +// +// +// CK32-LABEL: define {{[^@]+}}@_Z3barv +// CK32-SAME: () #[[ATTR0:[0-9]+]] { +// CK32-NEXT: entry: +// CK32-NEXT: [[PTR:%.*]] = alloca ptr, align 64 +// CK32-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CK32-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CK32-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CK32-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 64 +// CK32-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK32-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CK32-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK32-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CK32-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CK32-NEXT: store ptr null, ptr [[TMP3]], align 4 +// CK32-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK32-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK32-NEXT: store i32 2, ptr [[TMP6]], align 4 +// CK32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK32-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CK32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK32-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CK32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK32-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CK32-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK32-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 4 +// CK32-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK32-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 4 +// CK32-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK32-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CK32-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK32-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CK32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK32-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK32-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CK32-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK32-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP16]], align 4 +// CK32-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK32-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CK32-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK32-NEXT: store i32 0, ptr [[TMP18]], align 4 +// CK32-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159.region_id, ptr [[KERNEL_ARGS]]) +// CK32-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CK32-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK32: omp_offload.failed: +// CK32-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159(ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CK32-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK32: omp_offload.cont: +// CK32-NEXT: ret void +// +// +// CK32-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159 +// CK32-SAME: (ptr noundef [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { +// CK32-NEXT: entry: +// CK32-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 4 +// CK32-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 4 +// CK32-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4 +// CK32-NEXT: store double 0.000000e+00, ptr [[TMP0]], align 4 +// CK32-NEXT: ret void +// +// +// CK32-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK32-SAME: () #[[ATTR3:[0-9]+]] { +// CK32-NEXT: entry: +// CK32-NEXT: call void @__tgt_register_requires(i64 1) +// CK32-NEXT: ret void +// +// +// CK33-LABEL: define {{[^@]+}}@_Z3barv +// CK33-SAME: () #[[ATTR0:[0-9]+]] { +// CK33-NEXT: entry: +// CK33-NEXT: [[PTR:%.*]] = alloca ptr, align 64 +// CK33-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x ptr], align 4 +// CK33-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x ptr], align 4 +// CK33-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [1 x ptr], align 4 +// CK33-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 +// CK33-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 64 +// CK33-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK33-NEXT: store ptr [[TMP0]], ptr [[TMP1]], align 4 +// CK33-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK33-NEXT: store ptr [[TMP0]], ptr [[TMP2]], align 4 +// CK33-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CK33-NEXT: store ptr null, ptr [[TMP3]], align 4 +// CK33-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CK33-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CK33-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CK33-NEXT: store i32 2, ptr [[TMP6]], align 4 +// CK33-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CK33-NEXT: store i32 1, ptr [[TMP7]], align 4 +// CK33-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CK33-NEXT: store ptr [[TMP4]], ptr [[TMP8]], align 4 +// CK33-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CK33-NEXT: store ptr [[TMP5]], ptr [[TMP9]], align 4 +// CK33-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CK33-NEXT: store ptr @.offload_sizes, ptr [[TMP10]], align 4 +// CK33-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CK33-NEXT: store ptr @.offload_maptypes, ptr [[TMP11]], align 4 +// CK33-NEXT: [[TMP12:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CK33-NEXT: store ptr null, ptr [[TMP12]], align 4 +// CK33-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CK33-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CK33-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CK33-NEXT: store i64 0, ptr [[TMP14]], align 8 +// CK33-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CK33-NEXT: store i64 0, ptr [[TMP15]], align 8 +// CK33-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CK33-NEXT: store [3 x i32] [i32 -1, i32 0, i32 0], ptr [[TMP16]], align 4 +// CK33-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CK33-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP17]], align 4 +// CK33-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CK33-NEXT: store i32 0, ptr [[TMP18]], align 4 +// CK33-NEXT: [[TMP19:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1:[0-9]+]], i64 -1, i32 -1, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159.region_id, ptr [[KERNEL_ARGS]]) +// CK33-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CK33-NEXT: br i1 [[TMP20]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CK33: omp_offload.failed: +// CK33-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159(ptr [[TMP0]]) #[[ATTR2:[0-9]+]] +// CK33-NEXT: br label [[OMP_OFFLOAD_CONT]] +// CK33: omp_offload.cont: +// CK33-NEXT: ret void +// +// +// CK33-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3barv_l159 +// CK33-SAME: (ptr noundef [[PTR:%.*]]) #[[ATTR1:[0-9]+]] { +// CK33-NEXT: entry: +// CK33-NEXT: [[PTR_ADDR:%.*]] = alloca ptr, align 4 +// CK33-NEXT: store ptr [[PTR]], ptr [[PTR_ADDR]], align 4 +// CK33-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR_ADDR]], align 4 +// CK33-NEXT: store double 0.000000e+00, ptr [[TMP0]], align 4 +// CK33-NEXT: ret void +// +// +// CK33-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg +// CK33-SAME: () #[[ATTR3:[0-9]+]] { +// CK33-NEXT: entry: +// CK33-NEXT: call void @__tgt_register_requires(i64 1) +// CK33-NEXT: ret void +// +// +// SIMD-ONLY20-LABEL: define {{[^@]+}}@_Z3barv +// SIMD-ONLY20-SAME: () #[[ATTR0:[0-9]+]] { +// SIMD-ONLY20-NEXT: entry: +// SIMD-ONLY20-NEXT: [[PTR:%.*]] = alloca ptr, align 64 +// SIMD-ONLY20-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 64 +// SIMD-ONLY20-NEXT: store double 0.000000e+00, ptr [[TMP0]], align 8 +// SIMD-ONLY20-NEXT: ret void +// +// +// SIMD-ONLY21-LABEL: define {{[^@]+}}@_Z3barv +// SIMD-ONLY21-SAME: () #[[ATTR0:[0-9]+]] { +// SIMD-ONLY21-NEXT: entry: +// SIMD-ONLY21-NEXT: [[PTR:%.*]] = alloca ptr, align 64 +// SIMD-ONLY21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 64 +// SIMD-ONLY21-NEXT: store double 0.000000e+00, ptr [[TMP0]], align 8 +// SIMD-ONLY21-NEXT: ret void +// +// +// SIMD-ONLY22-LABEL: define {{[^@]+}}@_Z3barv +// SIMD-ONLY22-SAME: () #[[ATTR0:[0-9]+]] { +// SIMD-ONLY22-NEXT: entry: +// SIMD-ONLY22-NEXT: [[PTR:%.*]] = alloca ptr, align 64 +// SIMD-ONLY22-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 64 +// SIMD-ONLY22-NEXT: store double 0.000000e+00, ptr [[TMP0]], align 4 +// SIMD-ONLY22-NEXT: ret void +// +// +// SIMD-ONLY23-LABEL: define {{[^@]+}}@_Z3barv +// SIMD-ONLY23-SAME: () #[[ATTR0:[0-9]+]] { +// SIMD-ONLY23-NEXT: entry: +// SIMD-ONLY23-NEXT: [[PTR:%.*]] = alloca ptr, align 64 +// SIMD-ONLY23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[PTR]], align 64 +// SIMD-ONLY23-NEXT: store double 0.000000e+00, ptr [[TMP0]], align 4 +// SIMD-ONLY23-NEXT: ret void +// diff --git a/clang/test/OpenMP/teams_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_firstprivate_codegen.cpp index c1617e16b402c..e8f9512114f7f 100644 --- a/clang/test/OpenMP/teams_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_firstprivate_codegen.cpp @@ -168,46 +168,42 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l91 -// CHECK1-SAME: (ptr nonnull align 4 dereferenceable(4) [[G:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK1-SAME: (i64 [[G:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1:%.*]] = alloca i32, align 128 +// CHECK1-NEXT: [[G_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[G1]], align 128 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[G_ADDR]], align 4 +// CHECK1-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[G_CASTED]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP2]], ptr [[SIVAR_CASTED]], align 4 // CHECK1-NEXT: [[TMP3:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[G1]], i64 [[TMP3]]) +// CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., i64 [[TMP1]], i64 [[TMP3]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[G:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK1-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[G:%.*]], i64 [[SIVAR:%.*]]) #[[ATTR2]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 8 +// CHECK1-NEXT: [[G_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[SIVAR_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[G1:%.*]] = alloca i32, align 128 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[G]], ptr [[G_ADDR]], align 8 // CHECK1-NEXT: store i64 [[SIVAR]], ptr [[SIVAR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[G1]], align 128 -// CHECK1-NEXT: store i32 1, ptr [[G1]], align 128 +// CHECK1-NEXT: store i32 1, ptr [[G_ADDR]], align 4 // CHECK1-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[G1]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP3]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP1]], align 8 // CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr nonnull align 8 dereferenceable(16) [[REF_TMP]]) // CHECK1-NEXT: ret void // @@ -230,46 +226,42 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l91 -// CHECK3-SAME: (ptr nonnull align 4 dereferenceable(4) [[G:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK3-SAME: (i32 [[G:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G1:%.*]] = alloca i32, align 128 +// CHECK3-NEXT: [[G_CASTED:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[G1]], align 128 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32, ptr [[G_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP0]], ptr [[G_CASTED]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[G_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[SIVAR_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP2]], ptr [[SIVAR_CASTED]], align 4 // CHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., ptr [[G1]], i32 [[TMP3]]) +// CHECK3-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @.omp_outlined., i32 [[TMP1]], i32 [[TMP3]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[G:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR2]] { +// CHECK3-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[G:%.*]], i32 [[SIVAR:%.*]]) #[[ATTR2]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca ptr, align 4 +// CHECK3-NEXT: [[G_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[SIVAR_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[G1:%.*]] = alloca i32, align 128 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 4 // CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[G]], ptr [[G_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[G]], ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store i32 [[SIVAR]], ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[G_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[G1]], align 128 -// CHECK3-NEXT: store i32 1, ptr [[G1]], align 128 +// CHECK3-NEXT: store i32 1, ptr [[G_ADDR]], align 4 // CHECK3-NEXT: store i32 2, ptr [[SIVAR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[G1]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP3]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 0 +// CHECK3-NEXT: store ptr [[G_ADDR]], ptr [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], ptr [[REF_TMP]], i32 0, i32 1 +// CHECK3-NEXT: store ptr [[SIVAR_ADDR]], ptr [[TMP1]], align 4 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(ptr nonnull align 4 dereferenceable(8) [[REF_TMP]]) // CHECK3-NEXT: ret void // @@ -637,14 +629,16 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK9-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 // CHECK9-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 // CHECK9-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 +// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 8 // CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 8 // CHECK9-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK9-NEXT: [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 8 -// CHECK9-NEXT: [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 8 -// CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 8 -// CHECK9-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK9-NEXT: [[T_VAR_CASTED1:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [1 x ptr], align 8 +// CHECK9-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [1 x ptr], align 8 +// CHECK9-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [1 x ptr], align 8 +// CHECK9-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 // CHECK9-NEXT: call void @_ZN1SIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK9-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC]], ptr align 128 @__const._Z5tmainIiET_v.vec, i64 8, i1 false) @@ -653,121 +647,127 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK9-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i64 1 // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 signext 2) // CHECK9-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[VAR]], i32 signext 3) -// CHECK9-NEXT: [[TMP0:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 -// CHECK9-NEXT: store ptr null, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 -// CHECK9-NEXT: store ptr null, ptr [[TMP5]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 -// CHECK9-NEXT: store ptr null, ptr [[TMP8]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[VAR]], ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 -// CHECK9-NEXT: store ptr null, ptr [[TMP11]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 2, ptr [[TMP14]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK9-NEXT: store i32 4, ptr [[TMP15]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP12]], ptr [[TMP16]], align 8 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP13]], ptr [[TMP17]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK9-NEXT: store ptr @.offload_sizes.5, ptr [[TMP18]], align 8 -// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP19]], align 8 -// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP20]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP21]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK9-NEXT: store i64 0, ptr [[TMP22]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 -// CHECK9-NEXT: store i64 0, ptr [[TMP23]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 -// CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP24]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 -// CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP25]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 -// CHECK9-NEXT: store i32 0, ptr [[TMP26]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75.region_id, ptr [[KERNEL_ARGS]]) -// CHECK9-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK9-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK9-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP1]], ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK9-NEXT: store ptr [[VEC]], ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1 +// CHECK9-NEXT: store ptr null, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[S_ARR]], ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2 +// CHECK9-NEXT: store ptr null, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[VAR]], ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[VAR]], ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3 +// CHECK9-NEXT: store ptr null, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK9-NEXT: store i32 2, ptr [[TMP16]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK9-NEXT: store i32 4, ptr [[TMP17]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP14]], ptr [[TMP18]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP15]], ptr [[TMP19]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK9-NEXT: store ptr @.offload_sizes.5, ptr [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK9-NEXT: store i64 0, ptr [[TMP24]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK9-NEXT: store i64 0, ptr [[TMP25]], align 8 +// CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP26]], align 4 +// CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP27]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK9-NEXT: store i32 0, ptr [[TMP28]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75.region_id, ptr [[KERNEL_ARGS]]) +// CHECK9-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK9-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK9: omp_offload.failed: -// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75(ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[VAR]]) #[[ATTR4]] +// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75(i64 [[TMP1]], ptr [[VEC]], ptr [[S_ARR]], ptr [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK9: omp_offload.cont: -// CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[TMP29]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[TMP30]], align 8 -// CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i64 0, i64 0 -// CHECK9-NEXT: store ptr null, ptr [[TMP31]], align 8 -// CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK9-NEXT: store i32 2, ptr [[TMP34]], align 4 -// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 -// CHECK9-NEXT: store i32 1, ptr [[TMP35]], align 4 -// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 -// CHECK9-NEXT: store ptr [[TMP32]], ptr [[TMP36]], align 8 -// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 -// CHECK9-NEXT: store ptr [[TMP33]], ptr [[TMP37]], align 8 -// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 -// CHECK9-NEXT: store ptr @.offload_sizes.8, ptr [[TMP38]], align 8 -// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 -// CHECK9-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP39]], align 8 -// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 -// CHECK9-NEXT: store ptr null, ptr [[TMP40]], align 8 -// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 -// CHECK9-NEXT: store ptr null, ptr [[TMP41]], align 8 -// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 -// CHECK9-NEXT: store i64 0, ptr [[TMP42]], align 8 -// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 9 -// CHECK9-NEXT: store i64 0, ptr [[TMP43]], align 8 -// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 10 -// CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP44]], align 4 -// CHECK9-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 11 -// CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP45]], align 4 -// CHECK9-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 12 -// CHECK9-NEXT: store i32 0, ptr [[TMP46]], align 4 -// CHECK9-NEXT: [[TMP47:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81.region_id, ptr [[KERNEL_ARGS4]]) -// CHECK9-NEXT: [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0 -// CHECK9-NEXT: br i1 [[TMP48]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] -// CHECK9: omp_offload.failed5: -// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81(ptr [[T_VAR]]) #[[ATTR4]] -// CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT6]] -// CHECK9: omp_offload.cont6: +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK9-NEXT: store i32 [[TMP31]], ptr [[T_VAR_CASTED1]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i64, ptr [[T_VAR_CASTED1]], align 8 +// CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP32]], ptr [[TMP33]], align 8 +// CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK9-NEXT: store i64 [[TMP32]], ptr [[TMP34]], align 8 +// CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i64 0, i64 0 +// CHECK9-NEXT: store ptr null, ptr [[TMP35]], align 8 +// CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 +// CHECK9-NEXT: store i32 2, ptr [[TMP38]], align 4 +// CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 +// CHECK9-NEXT: store i32 1, ptr [[TMP39]], align 4 +// CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 +// CHECK9-NEXT: store ptr [[TMP36]], ptr [[TMP40]], align 8 +// CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3 +// CHECK9-NEXT: store ptr [[TMP37]], ptr [[TMP41]], align 8 +// CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4 +// CHECK9-NEXT: store ptr @.offload_sizes.8, ptr [[TMP42]], align 8 +// CHECK9-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5 +// CHECK9-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP43]], align 8 +// CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6 +// CHECK9-NEXT: store ptr null, ptr [[TMP44]], align 8 +// CHECK9-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7 +// CHECK9-NEXT: store ptr null, ptr [[TMP45]], align 8 +// CHECK9-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8 +// CHECK9-NEXT: store i64 0, ptr [[TMP46]], align 8 +// CHECK9-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9 +// CHECK9-NEXT: store i64 0, ptr [[TMP47]], align 8 +// CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10 +// CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP48]], align 4 +// CHECK9-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11 +// CHECK9-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP49]], align 4 +// CHECK9-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12 +// CHECK9-NEXT: store i32 0, ptr [[TMP50]], align 4 +// CHECK9-NEXT: [[TMP51:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81.region_id, ptr [[KERNEL_ARGS5]]) +// CHECK9-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0 +// CHECK9-NEXT: br i1 [[TMP52]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CHECK9: omp_offload.failed6: +// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81(i64 [[TMP32]]) #[[ATTR4]] +// CHECK9-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CHECK9: omp_offload.cont7: // CHECK9-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP49]], [[OMP_OFFLOAD_CONT6]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP53]], [[OMP_OFFLOAD_CONT7]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done7: +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done8: // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP50:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK9-NEXT: ret i32 [[TMP50]] +// CHECK9-NEXT: [[TMP54:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK9-NEXT: ret i32 [[TMP54]] // // // CHECK9-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -877,89 +877,85 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75 -// CHECK9-SAME: (ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i64 [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 +// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[T_VAR1]], align 128 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..4, ptr [[TMP1]], ptr [[T_VAR1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..4, ptr [[TMP0]], i64 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], i64 [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 -// CHECK9-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 128 -// CHECK9-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 128 +// CHECK9-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 128 +// CHECK9-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S.0], align 128 // CHECK9-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK9-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK9-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK9-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 +// CHECK9-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 +// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 8 // CHECK9-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 128 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[T_VAR1]], align 128 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC2]], ptr align 128 [[TMP0]], i64 8, i1 false) -// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 -// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[VEC1]], ptr align 128 [[TMP0]], i64 8, i1 false) +// CHECK9-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK9: omp.arraycpy.body: -// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK9-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr [[AGG_TMP]]) // CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK9-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK9-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK9: omp.arraycpy.done4: -// CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR5]], ptr nonnull align 4 dereferenceable(4) [[TMP3]], ptr [[AGG_TMP6]]) -// CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 128 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i64 0, i64 0 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX]], align 128 -// CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i64 0, i64 0 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[ARRAYIDX7]], ptr align 128 [[VAR5]], i64 4, i1 false) -// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i64 2 +// CHECK9-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] +// CHECK9-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK9: omp.arraycpy.done3: +// CHECK9-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) +// CHECK9-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR4]], ptr nonnull align 4 dereferenceable(4) [[TMP2]], ptr [[AGG_TMP5]]) +// CHECK9-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i64 0, i64 0 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 128 +// CHECK9-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i64 0, i64 0 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[ARRAYIDX6]], ptr align 128 [[VAR4]], i64 4, i1 false) +// CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR4]] +// CHECK9-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i64 2 // CHECK9-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK9: arraydestroy.body: -// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[OMP_ARRAYCPY_DONE4]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK9-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[OMP_ARRAYCPY_DONE3]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK9-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1 // CHECK9-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] -// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK9: arraydestroy.done9: +// CHECK9-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK9-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK9: arraydestroy.done8: // CHECK9-NEXT: ret void // // @@ -989,31 +985,27 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // // // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81 -// CHECK9-SAME: (ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (i64 [[T_VAR:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[T_VAR1]], align 128 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[T_VAR1]]) +// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 +// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK9-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 +// CHECK9-NEXT: [[TMP1:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, i64 [[TMP1]]) // CHECK9-NEXT: ret void // // // CHECK9-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK9-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i64 [[T_VAR:%.*]]) #[[ATTR3]] { // CHECK9-NEXT: entry: // CHECK9-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 8 -// CHECK9-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 +// CHECK9-NEXT: [[T_VAR_ADDR:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK9-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK9-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK9-NEXT: store i32 [[TMP1]], ptr [[T_VAR1]], align 128 +// CHECK9-NEXT: store i64 [[T_VAR]], ptr [[T_VAR_ADDR]], align 8 // CHECK9-NEXT: ret void // // @@ -1438,14 +1430,16 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK11-NEXT: [[VEC:%.*]] = alloca [2 x i32], align 128 // CHECK11-NEXT: [[S_ARR:%.*]] = alloca [2 x %struct.S.0], align 128 // CHECK11-NEXT: [[VAR:%.*]] = alloca [[STRUCT_S_0]], align 128 +// CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [4 x ptr], align 4 // CHECK11-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [4 x ptr], align 4 // CHECK11-NEXT: [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [4 x ptr], align 4 // CHECK11-NEXT: [[KERNEL_ARGS:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS:%.*]], align 8 -// CHECK11-NEXT: [[DOTOFFLOAD_BASEPTRS1:%.*]] = alloca [1 x ptr], align 4 -// CHECK11-NEXT: [[DOTOFFLOAD_PTRS2:%.*]] = alloca [1 x ptr], align 4 -// CHECK11-NEXT: [[DOTOFFLOAD_MAPPERS3:%.*]] = alloca [1 x ptr], align 4 -// CHECK11-NEXT: [[KERNEL_ARGS4:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 +// CHECK11-NEXT: [[T_VAR_CASTED1:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [1 x ptr], align 4 +// CHECK11-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [1 x ptr], align 4 +// CHECK11-NEXT: [[DOTOFFLOAD_MAPPERS4:%.*]] = alloca [1 x ptr], align 4 +// CHECK11-NEXT: [[KERNEL_ARGS5:%.*]] = alloca [[STRUCT___TGT_KERNEL_ARGUMENTS]], align 8 // CHECK11-NEXT: call void @_ZN1SIiEC1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) // CHECK11-NEXT: store i32 0, ptr [[T_VAR]], align 128 // CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VEC]], ptr align 128 @__const._Z5tmainIiET_v.vec, i32 8, i1 false) @@ -1454,121 +1448,127 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK11-NEXT: [[ARRAYINIT_ELEMENT:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYINIT_BEGIN]], i32 1 // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[ARRAYINIT_ELEMENT]], i32 2) // CHECK11-NEXT: call void @_ZN1SIiEC1Ei(ptr nonnull align 4 dereferenceable(4) [[VAR]], i32 3) -// CHECK11-NEXT: [[TMP0:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[TMP0]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 -// CHECK11-NEXT: store ptr null, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 -// CHECK11-NEXT: store ptr [[VEC]], ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 -// CHECK11-NEXT: store ptr null, ptr [[TMP5]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr null, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[TMP9]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[VAR]], ptr [[TMP10]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr null, ptr [[TMP11]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 2, ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 -// CHECK11-NEXT: store i32 4, ptr [[TMP15]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP12]], ptr [[TMP16]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP13]], ptr [[TMP17]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 -// CHECK11-NEXT: store ptr @.offload_sizes.5, ptr [[TMP18]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP19]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP20]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP21]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 -// CHECK11-NEXT: store i64 0, ptr [[TMP22]], align 8 -// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 -// CHECK11-NEXT: store i64 0, ptr [[TMP23]], align 8 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 -// CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP24]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 -// CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP25]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 -// CHECK11-NEXT: store i32 0, ptr [[TMP26]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75.region_id, ptr [[KERNEL_ARGS]]) -// CHECK11-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK11-NEXT: br i1 [[TMP28]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] +// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[VEC]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 1 +// CHECK11-NEXT: store ptr [[VEC]], ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 1 +// CHECK11-NEXT: store ptr null, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[S_ARR]], ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr null, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[VAR]], ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[VAR]], ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_MAPPERS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr null, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 +// CHECK11-NEXT: store i32 2, ptr [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 +// CHECK11-NEXT: store i32 4, ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP14]], ptr [[TMP18]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP15]], ptr [[TMP19]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 4 +// CHECK11-NEXT: store ptr @.offload_sizes.5, ptr [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes.6, ptr [[TMP21]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 8 +// CHECK11-NEXT: store i64 0, ptr [[TMP24]], align 8 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 9 +// CHECK11-NEXT: store i64 0, ptr [[TMP25]], align 8 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 10 +// CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 11 +// CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 12 +// CHECK11-NEXT: store i32 0, ptr [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75.region_id, ptr [[KERNEL_ARGS]]) +// CHECK11-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK11-NEXT: br i1 [[TMP30]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]] // CHECK11: omp_offload.failed: -// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75(ptr [[T_VAR]], ptr [[VEC]], ptr [[S_ARR]], ptr [[VAR]]) #[[ATTR4]] +// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75(i32 [[TMP1]], ptr [[VEC]], ptr [[S_ARR]], ptr [[VAR]]) #[[ATTR4]] // CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT]] // CHECK11: omp_offload.cont: -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[TMP29]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[TMP30]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS3]], i32 0, i32 0 -// CHECK11-NEXT: store ptr null, ptr [[TMP31]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK11-NEXT: store i32 2, ptr [[TMP34]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 -// CHECK11-NEXT: store i32 1, ptr [[TMP35]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 -// CHECK11-NEXT: store ptr [[TMP32]], ptr [[TMP36]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 3 -// CHECK11-NEXT: store ptr [[TMP33]], ptr [[TMP37]], align 4 -// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 4 -// CHECK11-NEXT: store ptr @.offload_sizes.8, ptr [[TMP38]], align 4 -// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 5 -// CHECK11-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP39]], align 4 -// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 6 -// CHECK11-NEXT: store ptr null, ptr [[TMP40]], align 4 -// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 7 -// CHECK11-NEXT: store ptr null, ptr [[TMP41]], align 4 -// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 8 -// CHECK11-NEXT: store i64 0, ptr [[TMP42]], align 8 -// CHECK11-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 9 -// CHECK11-NEXT: store i64 0, ptr [[TMP43]], align 8 -// CHECK11-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 10 -// CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP44]], align 4 -// CHECK11-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 11 -// CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP45]], align 4 -// CHECK11-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 12 -// CHECK11-NEXT: store i32 0, ptr [[TMP46]], align 4 -// CHECK11-NEXT: [[TMP47:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81.region_id, ptr [[KERNEL_ARGS4]]) -// CHECK11-NEXT: [[TMP48:%.*]] = icmp ne i32 [[TMP47]], 0 -// CHECK11-NEXT: br i1 [[TMP48]], label [[OMP_OFFLOAD_FAILED5:%.*]], label [[OMP_OFFLOAD_CONT6:%.*]] -// CHECK11: omp_offload.failed5: -// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81(ptr [[T_VAR]]) #[[ATTR4]] -// CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT6]] -// CHECK11: omp_offload.cont6: +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, ptr [[T_VAR]], align 128 +// CHECK11-NEXT: store i32 [[TMP31]], ptr [[T_VAR_CASTED1]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, ptr [[T_VAR_CASTED1]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP32]], ptr [[TMP33]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP32]], ptr [[TMP34]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_MAPPERS4]], i32 0, i32 0 +// CHECK11-NEXT: store ptr null, ptr [[TMP35]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 +// CHECK11-NEXT: store i32 2, ptr [[TMP38]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 +// CHECK11-NEXT: store i32 1, ptr [[TMP39]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 +// CHECK11-NEXT: store ptr [[TMP36]], ptr [[TMP40]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 3 +// CHECK11-NEXT: store ptr [[TMP37]], ptr [[TMP41]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 4 +// CHECK11-NEXT: store ptr @.offload_sizes.8, ptr [[TMP42]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 5 +// CHECK11-NEXT: store ptr @.offload_maptypes.9, ptr [[TMP43]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 6 +// CHECK11-NEXT: store ptr null, ptr [[TMP44]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 7 +// CHECK11-NEXT: store ptr null, ptr [[TMP45]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 8 +// CHECK11-NEXT: store i64 0, ptr [[TMP46]], align 8 +// CHECK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 9 +// CHECK11-NEXT: store i64 0, ptr [[TMP47]], align 8 +// CHECK11-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 10 +// CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP48]], align 4 +// CHECK11-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 11 +// CHECK11-NEXT: store [3 x i32] zeroinitializer, ptr [[TMP49]], align 4 +// CHECK11-NEXT: [[TMP50:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 12 +// CHECK11-NEXT: store i32 0, ptr [[TMP50]], align 4 +// CHECK11-NEXT: [[TMP51:%.*]] = call i32 @__tgt_target_kernel(ptr @[[GLOB1]], i64 -1, i32 0, i32 0, ptr @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81.region_id, ptr [[KERNEL_ARGS5]]) +// CHECK11-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0 +// CHECK11-NEXT: br i1 [[TMP52]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]] +// CHECK11: omp_offload.failed6: +// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81(i32 [[TMP32]]) #[[ATTR4]] +// CHECK11-NEXT: br label [[OMP_OFFLOAD_CONT7]] +// CHECK11: omp_offload.cont7: // CHECK11-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP49:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[TMP53:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP49]], [[OMP_OFFLOAD_CONT6]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP53]], [[OMP_OFFLOAD_CONT7]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] // CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE7:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done7: +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done8: // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[TEST]]) #[[ATTR4]] -// CHECK11-NEXT: [[TMP50:%.*]] = load i32, ptr [[RETVAL]], align 4 -// CHECK11-NEXT: ret i32 [[TMP50]] +// CHECK11-NEXT: [[TMP54:%.*]] = load i32, ptr [[RETVAL]], align 4 +// CHECK11-NEXT: ret i32 [[TMP54]] // // // CHECK11-LABEL: define {{[^@]+}}@_ZN1SIfEC2Ev @@ -1678,89 +1678,85 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l75 -// CHECK11-SAME: (ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32 [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[T_VAR1]], align 128 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..4, ptr [[TMP1]], ptr [[T_VAR1]], ptr [[TMP2]], ptr [[TMP3]]) +// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[T_VAR_CASTED]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @.omp_outlined..4, ptr [[TMP0]], i32 [[TMP4]], ptr [[TMP1]], ptr [[TMP2]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(8) [[VEC:%.*]], i32 [[T_VAR:%.*]], ptr nonnull align 4 dereferenceable(8) [[S_ARR:%.*]], ptr nonnull align 4 dereferenceable(4) [[VAR:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VEC_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 +// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[S_ARR_ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 -// CHECK11-NEXT: [[VEC2:%.*]] = alloca [2 x i32], align 128 -// CHECK11-NEXT: [[S_ARR3:%.*]] = alloca [2 x %struct.S.0], align 128 +// CHECK11-NEXT: [[VEC1:%.*]] = alloca [2 x i32], align 128 +// CHECK11-NEXT: [[S_ARR2:%.*]] = alloca [2 x %struct.S.0], align 128 // CHECK11-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_ST:%.*]], align 4 -// CHECK11-NEXT: [[VAR5:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 -// CHECK11-NEXT: [[AGG_TMP6:%.*]] = alloca [[STRUCT_ST]], align 4 +// CHECK11-NEXT: [[VAR4:%.*]] = alloca [[STRUCT_S_0:%.*]], align 128 +// CHECK11-NEXT: [[AGG_TMP5:%.*]] = alloca [[STRUCT_ST]], align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[VEC]], ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[S_ARR]], ptr [[S_ARR_ADDR]], align 4 // CHECK11-NEXT: store ptr [[VAR]], ptr [[VAR_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[VEC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP1]], align 128 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[T_VAR1]], align 128 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VEC2]], ptr align 128 [[TMP0]], i32 8, i1 false) -// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 -// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP5]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE4:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ARR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[VAR_ADDR]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[VEC1]], ptr align 128 [[TMP0]], i32 8, i1 false) +// CHECK11-NEXT: [[ARRAY_BEGIN:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_ISEMPTY:%.*]] = icmp eq ptr [[ARRAY_BEGIN]], [[TMP3]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_ISEMPTY]], label [[OMP_ARRAYCPY_DONE3:%.*]], label [[OMP_ARRAYCPY_BODY:%.*]] // CHECK11: omp.arraycpy.body: -// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] +// CHECK11-NEXT: [[OMP_ARRAYCPY_SRCELEMENTPAST:%.*]] = phi ptr [ [[TMP1]], [[ENTRY:%.*]] ], [ [[OMP_ARRAYCPY_SRC_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: [[OMP_ARRAYCPY_DESTELEMENTPAST:%.*]] = phi ptr [ [[ARRAY_BEGIN]], [[ENTRY]] ], [ [[OMP_ARRAYCPY_DEST_ELEMENT:%.*]], [[OMP_ARRAYCPY_BODY]] ] // CHECK11-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) // CHECK11-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_DESTELEMENTPAST]], ptr nonnull align 4 dereferenceable(4) [[OMP_ARRAYCPY_SRCELEMENTPAST]], ptr [[AGG_TMP]]) // CHECK11-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP]]) #[[ATTR4]] // CHECK11-NEXT: [[OMP_ARRAYCPY_DEST_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_DESTELEMENTPAST]], i32 1 // CHECK11-NEXT: [[OMP_ARRAYCPY_SRC_ELEMENT]] = getelementptr [[STRUCT_S_0]], ptr [[OMP_ARRAYCPY_SRCELEMENTPAST]], i32 1 -// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP5]] -// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE4]], label [[OMP_ARRAYCPY_BODY]] -// CHECK11: omp.arraycpy.done4: -// CHECK11-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) -// CHECK11-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR5]], ptr nonnull align 4 dereferenceable(4) [[TMP3]], ptr [[AGG_TMP6]]) -// CHECK11-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP6]]) #[[ATTR4]] -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR1]], align 128 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC2]], i32 0, i32 0 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[ARRAYIDX]], align 128 -// CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[ARRAYIDX7]], ptr align 128 [[VAR5]], i32 4, i1 false) -// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR5]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAY_BEGIN8:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR3]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN8]], i32 2 +// CHECK11-NEXT: [[OMP_ARRAYCPY_DONE:%.*]] = icmp eq ptr [[OMP_ARRAYCPY_DEST_ELEMENT]], [[TMP3]] +// CHECK11-NEXT: br i1 [[OMP_ARRAYCPY_DONE]], label [[OMP_ARRAYCPY_DONE3]], label [[OMP_ARRAYCPY_BODY]] +// CHECK11: omp.arraycpy.done3: +// CHECK11-NEXT: call void @_ZN2StC1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) +// CHECK11-NEXT: call void @_ZN1SIiEC1ERKS0_2St(ptr nonnull align 4 dereferenceable(4) [[VAR4]], ptr nonnull align 4 dereferenceable(4) [[TMP2]], ptr [[AGG_TMP5]]) +// CHECK11-NEXT: call void @_ZN2StD1Ev(ptr nonnull align 4 dereferenceable(8) [[AGG_TMP5]]) #[[ATTR4]] +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[VEC1]], i32 0, i32 0 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[ARRAYIDX]], align 128 +// CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 128 [[ARRAYIDX6]], ptr align 128 [[VAR4]], i32 4, i1 false) +// CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[VAR4]]) #[[ATTR4]] +// CHECK11-NEXT: [[ARRAY_BEGIN7:%.*]] = getelementptr inbounds [2 x %struct.S.0], ptr [[S_ARR2]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAY_BEGIN7]], i32 2 // CHECK11-NEXT: br label [[ARRAYDESTROY_BODY:%.*]] // CHECK11: arraydestroy.body: -// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP7]], [[OMP_ARRAYCPY_DONE4]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] +// CHECK11-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[OMP_ARRAYCPY_DONE3]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ] // CHECK11-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S_0]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i32 -1 // CHECK11-NEXT: call void @_ZN1SIiED1Ev(ptr nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]] -// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN8]] -// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE9:%.*]], label [[ARRAYDESTROY_BODY]] -// CHECK11: arraydestroy.done9: +// CHECK11-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[ARRAY_BEGIN7]] +// CHECK11-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE8:%.*]], label [[ARRAYDESTROY_BODY]] +// CHECK11: arraydestroy.done8: // CHECK11-NEXT: ret void // // @@ -1790,31 +1786,27 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // // // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81 -// CHECK11-SAME: (ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (i32 [[T_VAR:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[T_VAR1]], align 128 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, ptr [[T_VAR1]]) +// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load i32, ptr [[T_VAR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP0]], ptr [[T_VAR_CASTED]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @.omp_outlined..7, i32 [[TMP1]]) // CHECK11-NEXT: ret void // // // CHECK11-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], ptr nonnull align 4 dereferenceable(4) [[T_VAR:%.*]]) #[[ATTR3]] { +// CHECK11-SAME: (ptr noalias [[DOTGLOBAL_TID_:%.*]], ptr noalias [[DOTBOUND_TID_:%.*]], i32 [[T_VAR:%.*]]) #[[ATTR3]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK11-NEXT: [[T_VAR1:%.*]] = alloca i32, align 128 +// CHECK11-NEXT: [[T_VAR_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK11-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store ptr [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[T_VAR_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP0]], align 128 -// CHECK11-NEXT: store i32 [[TMP1]], ptr [[T_VAR1]], align 128 +// CHECK11-NEXT: store i32 [[T_VAR]], ptr [[T_VAR_ADDR]], align 4 // CHECK11-NEXT: ret void // // From 299f3ac5233fb411bc0432a1ed24724ff532f2c5 Mon Sep 17 00:00:00 2001 From: Jun Zhang Date: Wed, 22 Mar 2023 23:41:53 +0800 Subject: [PATCH 308/691] Regenerate checks for bswap.ll, NFC Signed-off-by: Jun Zhang --- llvm/test/Transforms/InstCombine/bswap.ll | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll index 09dbff00f0055..8c5c761c73e29 100644 --- a/llvm/test/Transforms/InstCombine/bswap.ll +++ b/llvm/test/Transforms/InstCombine/bswap.ll @@ -541,8 +541,8 @@ define i8 @PR39793_bswap_u32_as_u16_trunc(i32 %0) { define i32 @partial_bswap(i32 %x) { ; CHECK-LABEL: @partial_bswap( -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.bswap.i32(i32 [[X:%.*]]) -; CHECK-NEXT: ret i32 [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.bswap.i32(i32 [[X:%.*]]) +; CHECK-NEXT: ret i32 [[R]] ; %x3 = shl i32 %x, 24 %a2 = shl i32 %x, 8 @@ -557,8 +557,8 @@ declare i32 @llvm.bswap.i32(i32) define <2 x i32> @partial_bswap_vector(<2 x i32> %x) { ; CHECK-LABEL: @partial_bswap_vector( -; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[X:%.*]]) -; CHECK-NEXT: ret <2 x i32> [[TMP1]] +; CHECK-NEXT: [[R:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[X:%.*]]) +; CHECK-NEXT: ret <2 x i32> [[R]] ; %x3 = shl <2 x i32> %x, %a2 = shl <2 x i32> %x, From ada03565261ab6ef1c5bca217767fe7f69d19a99 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 22 Mar 2023 15:56:23 +0000 Subject: [PATCH 309/691] [X86] Extend all_of(icmp_eq()) / any_of(icmp_ne()) -> scalar integer fold to AVX512 targets Extends 1bb95a3a99cb44f2b8b801e5137d3ac529253f3b to combine on AVX512 targets where the vXi1 type is legal Continues work on addressing Issue #53419 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 32 +++++---- llvm/test/CodeGen/X86/pr53419.ll | 90 +++++-------------------- 2 files changed, 36 insertions(+), 86 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a491ba84bf705..6cf359d6d217a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -44646,6 +44646,23 @@ static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG, // Special case for (pre-legalization) vXi1 reductions. if (NumElts > 64 || !isPowerOf2_32(NumElts)) return SDValue(); + if (Match.getOpcode() == ISD::SETCC) { + ISD::CondCode CC = cast(Match.getOperand(2))->get(); + if ((BinOp == ISD::AND && CC == ISD::CondCode::SETEQ) || + (BinOp == ISD::OR && CC == ISD::CondCode::SETNE)) { + // If representable as a scalar integer: + // For all_of(setcc(x,y,eq)) - use (iX)x == (iX)y. + // For any_of(setcc(x,y,ne)) - use (iX)x != (iX)y. + EVT VecVT = Match.getOperand(0).getValueType(); + EVT IntVT = EVT::getIntegerVT(Ctx, VecVT.getSizeInBits()); + if (TLI.isTypeLegal(IntVT)) { + SDValue LHS = DAG.getFreeze(Match.getOperand(0)); + SDValue RHS = DAG.getFreeze(Match.getOperand(1)); + return DAG.getSetCC(DL, ExtractVT, DAG.getBitcast(IntVT, LHS), + DAG.getBitcast(IntVT, RHS), CC); + } + } + } if (TLI.isTypeLegal(MatchVT)) { // If this is a legal AVX512 predicate type then we can just bitcast. EVT MovmskVT = EVT::getIntegerVT(Ctx, NumElts); @@ -44657,20 +44674,7 @@ static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG, ISD::CondCode CC = cast(Match.getOperand(2))->get(); if ((BinOp == ISD::AND && CC == ISD::CondCode::SETEQ) || (BinOp == ISD::OR && CC == ISD::CondCode::SETNE)) { - EVT VecVT = Match.getOperand(0).getValueType(); - - // If representable as a scalar integer: - // For all_of(setcc(x,y,eq)) - use (iX)x == (iX)y. - // For any_of(setcc(x,y,ne)) - use (iX)x != (iX)y. - EVT IntVT = EVT::getIntegerVT(Ctx, VecVT.getSizeInBits()); - if (TLI.isTypeLegal(IntVT)) { - SDValue LHS = DAG.getFreeze(Match.getOperand(0)); - SDValue RHS = DAG.getFreeze(Match.getOperand(1)); - return DAG.getSetCC(DL, ExtractVT, DAG.getBitcast(IntVT, LHS), - DAG.getBitcast(IntVT, RHS), CC); - } - - EVT VecSVT = VecVT.getScalarType(); + EVT VecSVT = Match.getOperand(0).getValueType().getScalarType(); if (VecSVT != MVT::i8 && (VecSVT.getSizeInBits() % 8) == 0) { NumElts *= VecSVT.getSizeInBits() / 8; EVT CmpVT = EVT::getVectorVT(Ctx, MVT::i8, NumElts); diff --git a/llvm/test/CodeGen/X86/pr53419.ll b/llvm/test/CodeGen/X86/pr53419.ll index d92a7ceecec48..9455810fa2d78 100644 --- a/llvm/test/CodeGen/X86/pr53419.ll +++ b/llvm/test/CodeGen/X86/pr53419.ll @@ -13,32 +13,12 @@ declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1>) ; FIXME: All four versions are semantically equivalent and should produce same asm as scalar version. define i1 @intrinsic_v2i8(ptr align 1 %arg, ptr align 1 %arg1) { -; SSE-LABEL: intrinsic_v2i8: -; SSE: # %bb.0: # %bb -; SSE-NEXT: movzwl (%rdi), %eax -; SSE-NEXT: cmpw %ax, (%rsi) -; SSE-NEXT: sete %al -; SSE-NEXT: retq -; -; AVX-LABEL: intrinsic_v2i8: -; AVX: # %bb.0: # %bb -; AVX-NEXT: movzwl (%rdi), %eax -; AVX-NEXT: cmpw %ax, (%rsi) -; AVX-NEXT: sete %al -; AVX-NEXT: retq -; -; AVX512-LABEL: intrinsic_v2i8: -; AVX512: # %bb.0: # %bb -; AVX512-NEXT: movzwl (%rsi), %eax -; AVX512-NEXT: vmovd %eax, %xmm0 -; AVX512-NEXT: movzwl (%rdi), %eax -; AVX512-NEXT: vmovd %eax, %xmm1 -; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 -; AVX512-NEXT: knotw %k0, %k0 -; AVX512-NEXT: kmovd %k0, %eax -; AVX512-NEXT: testb $3, %al -; AVX512-NEXT: sete %al -; AVX512-NEXT: retq +; X64-LABEL: intrinsic_v2i8: +; X64: # %bb.0: # %bb +; X64-NEXT: movzwl (%rdi), %eax +; X64-NEXT: cmpw %ax, (%rsi) +; X64-NEXT: sete %al +; X64-NEXT: retq ; ; X86-LABEL: intrinsic_v2i8: ; X86: # %bb.0: # %bb @@ -57,30 +37,12 @@ bb: } define i1 @intrinsic_v4i8(ptr align 1 %arg, ptr align 1 %arg1) { -; SSE-LABEL: intrinsic_v4i8: -; SSE: # %bb.0: # %bb -; SSE-NEXT: movl (%rdi), %eax -; SSE-NEXT: cmpl %eax, (%rsi) -; SSE-NEXT: sete %al -; SSE-NEXT: retq -; -; AVX-LABEL: intrinsic_v4i8: -; AVX: # %bb.0: # %bb -; AVX-NEXT: movl (%rdi), %eax -; AVX-NEXT: cmpl %eax, (%rsi) -; AVX-NEXT: sete %al -; AVX-NEXT: retq -; -; AVX512-LABEL: intrinsic_v4i8: -; AVX512: # %bb.0: # %bb -; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX512-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 -; AVX512-NEXT: knotw %k0, %k0 -; AVX512-NEXT: kmovd %k0, %eax -; AVX512-NEXT: testb $15, %al -; AVX512-NEXT: sete %al -; AVX512-NEXT: retq +; X64-LABEL: intrinsic_v4i8: +; X64: # %bb.0: # %bb +; X64-NEXT: movl (%rdi), %eax +; X64-NEXT: cmpl %eax, (%rsi) +; X64-NEXT: sete %al +; X64-NEXT: retq ; ; X86-LABEL: intrinsic_v4i8: ; X86: # %bb.0: # %bb @@ -99,28 +61,12 @@ bb: } define i1 @intrinsic_v8i8(ptr align 1 %arg, ptr align 1 %arg1) { -; SSE-LABEL: intrinsic_v8i8: -; SSE: # %bb.0: # %bb -; SSE-NEXT: movq (%rdi), %rax -; SSE-NEXT: cmpq %rax, (%rsi) -; SSE-NEXT: sete %al -; SSE-NEXT: retq -; -; AVX-LABEL: intrinsic_v8i8: -; AVX: # %bb.0: # %bb -; AVX-NEXT: movq (%rdi), %rax -; AVX-NEXT: cmpq %rax, (%rsi) -; AVX-NEXT: sete %al -; AVX-NEXT: retq -; -; AVX512-LABEL: intrinsic_v8i8: -; AVX512: # %bb.0: # %bb -; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 -; AVX512-NEXT: kortestb %k0, %k0 -; AVX512-NEXT: setb %al -; AVX512-NEXT: retq +; X64-LABEL: intrinsic_v8i8: +; X64: # %bb.0: # %bb +; X64-NEXT: movq (%rdi), %rax +; X64-NEXT: cmpq %rax, (%rsi) +; X64-NEXT: sete %al +; X64-NEXT: retq ; ; X86-LABEL: intrinsic_v8i8: ; X86: # %bb.0: # %bb From 71dc3de533b9247223c083a3b058859c9759099c Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Wed, 22 Mar 2023 16:00:19 +0000 Subject: [PATCH 310/691] [ARM] Improve min/max vector reductions on Arm This patch adds some more efficient lowering for vecreduce.min/max under NEON, using sequences of pairwise vpmin/vpmax to reduce to a single value. This nearly resolves issues such as #50466, #40981, #38190. Differential Revision: https://reviews.llvm.org/D146404 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 87 +++++++++ llvm/test/CodeGen/ARM/vecreduce-minmax.ll | 219 ++++++++++++++++++++++ 2 files changed, 306 insertions(+) create mode 100644 llvm/test/CodeGen/ARM/vecreduce-minmax.ll diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 126bbc61a7d30..9c5f0df4d9468 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1007,6 +1007,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal); } } + + for (auto VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16, + MVT::v4i32}) { + setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom); + setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom); + setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom); + setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom); + } } if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) { @@ -10271,6 +10279,80 @@ static SDValue LowerVecReduceF(SDValue Op, SelectionDAG &DAG, return LowerVecReduce(Op, DAG, ST); } +static SDValue LowerVecReduceMinMax(SDValue Op, SelectionDAG &DAG, + const ARMSubtarget *ST) { + if (!ST->hasNEON()) + return SDValue(); + + SDLoc dl(Op); + SDValue Op0 = Op->getOperand(0); + EVT VT = Op0.getValueType(); + EVT EltVT = VT.getVectorElementType(); + + unsigned PairwiseIntrinsic = 0; + switch (Op->getOpcode()) { + default: + llvm_unreachable("Expected VECREDUCE opcode"); + case ISD::VECREDUCE_UMIN: + PairwiseIntrinsic = Intrinsic::arm_neon_vpminu; + break; + case ISD::VECREDUCE_UMAX: + PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxu; + break; + case ISD::VECREDUCE_SMIN: + PairwiseIntrinsic = Intrinsic::arm_neon_vpmins; + break; + case ISD::VECREDUCE_SMAX: + PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxs; + break; + } + SDValue PairwiseOp = DAG.getConstant(PairwiseIntrinsic, dl, MVT::i32); + + unsigned NumElts = VT.getVectorNumElements(); + unsigned NumActiveLanes = NumElts; + + assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 || + NumActiveLanes == 2) && + "Only expected a power 2 vector size"); + + // Split 128-bit vectors, since vpmin/max takes 2 64-bit vectors. + if (VT.is128BitVector()) { + SDValue Lo, Hi; + std::tie(Lo, Hi) = DAG.SplitVector(Op0, dl); + VT = Lo.getValueType(); + Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Lo, Hi}); + NumActiveLanes /= 2; + } + + // Use pairwise reductions until one lane remains + while (NumActiveLanes > 1) { + Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Op0, Op0}); + NumActiveLanes /= 2; + } + + SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0, + DAG.getConstant(0, dl, MVT::i32)); + + // Result type may be wider than element type. + if (EltVT != Op.getValueType()) { + unsigned Extend = 0; + switch (Op->getOpcode()) { + default: + llvm_unreachable("Expected VECREDUCE opcode"); + case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_UMAX: + Extend = ISD::ZERO_EXTEND; + break; + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_SMAX: + Extend = ISD::SIGN_EXTEND; + break; + } + Res = DAG.getNode(Extend, dl, Op.getValueType(), Res); + } + return Res; +} + static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { if (isStrongerThanMonotonic(cast(Op)->getSuccessOrdering())) // Acquire/Release load/store is not legal for targets without a dmb or @@ -10502,6 +10584,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::VECREDUCE_FMIN: case ISD::VECREDUCE_FMAX: return LowerVecReduceF(Op, DAG, Subtarget); + case ISD::VECREDUCE_UMIN: + case ISD::VECREDUCE_UMAX: + case ISD::VECREDUCE_SMIN: + case ISD::VECREDUCE_SMAX: + return LowerVecReduceMinMax(Op, DAG, Subtarget); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); diff --git a/llvm/test/CodeGen/ARM/vecreduce-minmax.ll b/llvm/test/CodeGen/ARM/vecreduce-minmax.ll new file mode 100644 index 0000000000000..c392e6ca6bfa6 --- /dev/null +++ b/llvm/test/CodeGen/ARM/vecreduce-minmax.ll @@ -0,0 +1,219 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=armv7-none-eabi -float-abi=hard -mattr=+neon -verify-machineinstrs | FileCheck %s + +define i8 @test_umin_v8i8(<8 x i8> %x) { +; CHECK-LABEL: test_umin_v8i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmin.u8 d16, d0, d0 +; CHECK-NEXT: vpmin.u8 d16, d16, d16 +; CHECK-NEXT: vpmin.u8 d16, d16, d16 +; CHECK-NEXT: vmov.u8 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %x) + ret i8 %z +} + +define i8 @test_smin_v8i8(<8 x i8> %x) { +; CHECK-LABEL: test_smin_v8i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmin.s8 d16, d0, d0 +; CHECK-NEXT: vpmin.s8 d16, d16, d16 +; CHECK-NEXT: vpmin.s8 d16, d16, d16 +; CHECK-NEXT: vmov.s8 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %x) + ret i8 %z +} + +define i8 @test_umax_v8i8(<8 x i8> %x) { +; CHECK-LABEL: test_umax_v8i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmax.u8 d16, d0, d0 +; CHECK-NEXT: vpmax.u8 d16, d16, d16 +; CHECK-NEXT: vpmax.u8 d16, d16, d16 +; CHECK-NEXT: vmov.u8 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %x) + ret i8 %z +} + +define i8 @test_smax_v8i8(<8 x i8> %x) { +; CHECK-LABEL: test_smax_v8i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmax.s8 d16, d0, d0 +; CHECK-NEXT: vpmax.s8 d16, d16, d16 +; CHECK-NEXT: vpmax.s8 d16, d16, d16 +; CHECK-NEXT: vmov.s8 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %x) + ret i8 %z +} + +define i16 @test_umin_v4i16(<4 x i16> %x) { +; CHECK-LABEL: test_umin_v4i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmin.u16 d16, d0, d0 +; CHECK-NEXT: vpmin.u16 d16, d16, d16 +; CHECK-NEXT: vmov.u16 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %x) + ret i16 %z +} + +define i16 @test_smin_v4i16(<4 x i16> %x) { +; CHECK-LABEL: test_smin_v4i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmin.s16 d16, d0, d0 +; CHECK-NEXT: vpmin.s16 d16, d16, d16 +; CHECK-NEXT: vmov.s16 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %x) + ret i16 %z +} + +define i16 @test_umax_v4i16(<4 x i16> %x) { +; CHECK-LABEL: test_umax_v4i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmax.u16 d16, d0, d0 +; CHECK-NEXT: vpmax.u16 d16, d16, d16 +; CHECK-NEXT: vmov.u16 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %x) + ret i16 %z +} + +define i16 @test_smax_v4i16(<4 x i16> %x) { +; CHECK-LABEL: test_smax_v4i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmax.s16 d16, d0, d0 +; CHECK-NEXT: vpmax.s16 d16, d16, d16 +; CHECK-NEXT: vmov.s16 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %x) + ret i16 %z +} + +define i32 @test_umin_v2i32(<2 x i32> %x) { +; CHECK-LABEL: test_umin_v2i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmin.u32 d16, d0, d0 +; CHECK-NEXT: vmov.32 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> %x) + ret i32 %z +} + +define i32 @test_smin_v2i32(<2 x i32> %x) { +; CHECK-LABEL: test_smin_v2i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmin.s32 d16, d0, d0 +; CHECK-NEXT: vmov.32 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> %x) + ret i32 %z +} + +define i32 @test_umax_v2i32(<2 x i32> %x) { +; CHECK-LABEL: test_umax_v2i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmax.u32 d16, d0, d0 +; CHECK-NEXT: vmov.32 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> %x) + ret i32 %z +} + +define i32 @test_smax_v2i32(<2 x i32> %x) { +; CHECK-LABEL: test_smax_v2i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmax.s32 d16, d0, d0 +; CHECK-NEXT: vmov.32 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> %x) + ret i32 %z +} + +define i8 @test_umin_v16i8(<16 x i8> %x) { +; CHECK-LABEL: test_umin_v16i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmin.u8 d16, d0, d1 +; CHECK-NEXT: vpmin.u8 d16, d16, d16 +; CHECK-NEXT: vpmin.u8 d16, d16, d16 +; CHECK-NEXT: vpmin.u8 d16, d16, d16 +; CHECK-NEXT: vmov.u8 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %x) + ret i8 %z +} + +define i16 @test_smin_v8i16(<8 x i16> %x) { +; CHECK-LABEL: test_smin_v8i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmin.s16 d16, d0, d1 +; CHECK-NEXT: vpmin.s16 d16, d16, d16 +; CHECK-NEXT: vpmin.s16 d16, d16, d16 +; CHECK-NEXT: vmov.s16 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %x) + ret i16 %z +} + +define i32 @test_umax_v4i32(<4 x i32> %x) { +; CHECK-LABEL: test_umax_v4i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vpmax.u32 d16, d0, d1 +; CHECK-NEXT: vpmax.u32 d16, d16, d16 +; CHECK-NEXT: vmov.32 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %x) + ret i32 %z +} + +define i8 @test_umin_v32i8(<32 x i8> %x) { +; CHECK-LABEL: test_umin_v32i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmin.u8 q8, q0, q1 +; CHECK-NEXT: vpmin.u8 d16, d16, d17 +; CHECK-NEXT: vpmin.u8 d16, d16, d16 +; CHECK-NEXT: vpmin.u8 d16, d16, d16 +; CHECK-NEXT: vpmin.u8 d16, d16, d16 +; CHECK-NEXT: vmov.u8 r0, d16[0] +; CHECK-NEXT: bx lr +entry: + %z = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> %x) + ret i8 %z +} + +declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.smin.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.umax.v8i8(<8 x i8>) +declare i8 @llvm.vector.reduce.smax.v8i8(<8 x i8>) +declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.smin.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.umax.v4i16(<4 x i16>) +declare i16 @llvm.vector.reduce.smax.v4i16(<4 x i16>) +declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.smin.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.umax.v2i32(<2 x i32>) +declare i32 @llvm.vector.reduce.smax.v2i32(<2 x i32>) + +declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>) +declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>) +declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) + +declare i8 @llvm.vector.reduce.umin.v32i8(<32 x i8>) From 06f16232b1b0028ac87d584883bc32220882c73a Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 20 Mar 2023 17:31:40 +0000 Subject: [PATCH 311/691] [RISCV][NFC] Make interleaved access test more vectorizable The previous test case stored the result of a deinterleaved load and add into the same source address, which resulted in some scatters which we weren't testing for and made the tests harder to understand. Store it at a separate address, which will make the tests easier to read when the cost model is changed after D145085 is landed Reviewed By: reames Differential Revision: https://reviews.llvm.org/D146442 --- .../RISCV/interleaved-accesses.ll | 62 ++++++++++--------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll index b81d14c520770..d51f7becebeb5 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll @@ -395,7 +395,7 @@ exit: ret void } -define void @combine_load_factor2_i32(ptr %p) { +define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) { ; CHECK-LABEL: @combine_load_factor2_i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] @@ -403,30 +403,31 @@ define void @combine_load_factor2_i32(ptr %p) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], <4 x i64> [[TMP0]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[P]], <4 x i64> [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x ptr> [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr> [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 -; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP1]], 1 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <8 x i32> [[WIDE_VEC2]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <8 x i32> [[WIDE_VEC2]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[STRIDED_VEC4]] -; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[STRIDED_VEC3]], [[STRIDED_VEC5]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP8]], <4 x ptr> [[TMP2]], i32 4, <4 x i1> ) -; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP9]], <4 x ptr> [[TMP3]], i32 4, <4 x i1> ) +; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[STRIDED_VEC3]] +; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[STRIDED_VEC2]], [[STRIDED_VEC4]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[Q:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[Q]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0 +; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr [[TMP12]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP10]], i32 4 +; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[TMP13]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -442,7 +443,8 @@ define void @combine_load_factor2_i32(ptr %p) { ; CHECK-NEXT: [[Q1:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET1]] ; CHECK-NEXT: [[X1:%.*]] = load i32, ptr [[Q1]], align 4 ; CHECK-NEXT: [[RES:%.*]] = add i32 [[X0]], [[X1]] -; CHECK-NEXT: store i32 [[RES]], ptr [[Q0]], align 4 +; CHECK-NEXT: [[DST:%.*]] = getelementptr i32, ptr [[Q]], i64 [[I]] +; CHECK-NEXT: store i32 [[RES]], ptr [[DST]], align 4 ; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1 ; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 ; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] @@ -464,7 +466,8 @@ loop: %res = add i32 %x0, %x1 - store i32 %res, ptr %q0 + %dst = getelementptr i32, ptr %q, i64 %i + store i32 %res, ptr %dst %nexti = add i64 %i, 1 %done = icmp eq i64 %nexti, 1024 @@ -473,7 +476,7 @@ exit: ret void } -define void @combine_load_factor2_i64(ptr %p) { +define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) { ; CHECK-LABEL: @combine_load_factor2_i64( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] @@ -486,7 +489,8 @@ define void @combine_load_factor2_i64(ptr %p) { ; CHECK-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] ; CHECK-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 4 ; CHECK-NEXT: [[RES:%.*]] = add i64 [[X0]], [[X1]] -; CHECK-NEXT: store i64 [[RES]], ptr [[Q0]], align 4 +; CHECK-NEXT: [[DST:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[I]] +; CHECK-NEXT: store i64 [[RES]], ptr [[DST]], align 4 ; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1 ; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 ; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]] @@ -508,7 +512,8 @@ loop: %res = add i64 %x0, %x1 - store i64 %res, ptr %q0 + %dst = getelementptr i64, ptr %q, i64 %i + store i64 %res, ptr %dst %nexti = add i64 %i, 1 %done = icmp eq i64 %nexti, 1024 @@ -516,3 +521,4 @@ loop: exit: ret void } + From 65890469cebb675e9fa0271dc1ab3b1da15df302 Mon Sep 17 00:00:00 2001 From: Caroline Concatto Date: Wed, 22 Mar 2023 14:57:18 +0000 Subject: [PATCH 312/691] [AArch64] Add asm aliases for MOV, LDR, STR with predicate-as-counter In the 2022-12 release of the A64 ISA it was updated that the assembler must also accept predicate-as-counter register names for the source predicate register and the destination predicate register for: * *MOV: Move predicate (unpredicated)* * *LDR (predicate): Load predicate register* * *STR (predicate): Store predicate register* Reviewed By: sdesmalen Differential Revision: https://reviews.llvm.org/D146311 --- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 19 +++++++ llvm/lib/Target/AArch64/SVEInstrFormats.td | 2 - llvm/test/MC/AArch64/SVE/pfalse.s | 6 --- .../SVE/predicate-as-counter-aliases.s | 50 +++++++++++++++++++ 4 files changed, 69 insertions(+), 8 deletions(-) create mode 100644 llvm/test/MC/AArch64/SVE/predicate-as-counter-aliases.s diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 858b352c8c72e..ba33e9cfe949c 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -3844,6 +3844,25 @@ defm WHILEHS_CXX : sve2p1_int_while_rr_pn<"whilehs", 0b100>; defm WHILEHI_CXX : sve2p1_int_while_rr_pn<"whilehi", 0b101>; defm WHILELO_CXX : sve2p1_int_while_rr_pn<"whilelo", 0b110>; defm WHILELS_CXX : sve2p1_int_while_rr_pn<"whilels", 0b111>; + + +// Aliases for existing SVE instructions for which predicate-as-counter are +// accepted as an operand to the instruction +def : InstAlias<"ldr $Pt, [$Rn, $imm9, mul vl]", + (LDR_PXI PNRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), 0>; +def : InstAlias<"ldr $Pt, [$Rn]", + (LDR_PXI PNRAny:$Pt, GPR64sp:$Rn, 0), 0>; + +def : InstAlias<"str $Pt, [$Rn, $imm9, mul vl]", + (STR_PXI PNRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), 0>; +def : InstAlias<"str $Pt, [$Rn]", + (STR_PXI PNRAny:$Pt, GPR64sp:$Rn, 0), 0>; + +def : InstAlias<"mov $Pd, $Pn", + (ORR_PPzPP PNR8:$Pd, PNR8:$Pn, PNR8:$Pn, PNR8:$Pn), 0>; + +def : InstAlias<"pfalse\t$Pd", (PFALSE PNR8:$Pd), 0>; + } // End HasSVE2p1_or_HasSME2 //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index 1d3bf9150ca41..736d5b40ccb8d 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -728,8 +728,6 @@ class sve_int_pfalse opc, string asm> multiclass sve_int_pfalse opc, string asm> { def NAME : sve_int_pfalse; - def : InstAlias<"pfalse\t$Pd", (!cast(NAME) PNR8:$Pd), 0>; - def : Pat<(nxv16i1 immAllZerosV), (!cast(NAME))>; def : Pat<(nxv8i1 immAllZerosV), (!cast(NAME))>; def : Pat<(nxv4i1 immAllZerosV), (!cast(NAME))>; diff --git a/llvm/test/MC/AArch64/SVE/pfalse.s b/llvm/test/MC/AArch64/SVE/pfalse.s index 7ac4d5c44f433..4124da8ac92a0 100644 --- a/llvm/test/MC/AArch64/SVE/pfalse.s +++ b/llvm/test/MC/AArch64/SVE/pfalse.s @@ -14,9 +14,3 @@ pfalse p15.b // CHECK-ENCODING: [0x0f,0xe4,0x18,0x25] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: 2518e40f - -pfalse pn15.b -// CHECK-INST: pfalse p15.b -// CHECK-ENCODING: [0x0f,0xe4,0x18,0x25] -// CHECK-ERROR: instruction requires: sve or sme -// CHECK-UNKNOWN: 2518e40f diff --git a/llvm/test/MC/AArch64/SVE/predicate-as-counter-aliases.s b/llvm/test/MC/AArch64/SVE/predicate-as-counter-aliases.s new file mode 100644 index 0000000000000..bca2cf913ff64 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE/predicate-as-counter-aliases.s @@ -0,0 +1,50 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ +// RUN: | llvm-objdump --no-print-imm-hex -d --mattr=+sve2p1 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sve2p1 < %s \ +// RUN: | llvm-objdump --no-print-imm-hex -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN + + +ldr pn0, [x0] +// CHECK-INST: ldr p0, [x0] +// CHECK-ENCODING: [0x00,0x00,0x80,0x85] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 85800000 + +ldr pn5, [x10, #255, mul vl] +// CHECK-INST: ldr p5, [x10, #255, mul vl] +// CHECK-ENCODING: [0x45,0x1d,0x9f,0x85] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 859f1d45 + + +str pn0, [x0] +// CHECK-INST: str p0, [x0] +// CHECK-ENCODING: [0x00,0x00,0x80,0xe5] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: e5800000 + +str pn5, [x10, #255, mul vl] +// CHECK-INST: str p5, [x10, #255, mul vl] +// CHECK-ENCODING: [0x45,0x1d,0x9f,0xe5] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: e59f1d45 + + +mov pn0.b, pn0.b +// CHECK-INST: mov p0.b, p0.b +// CHECK-ENCODING: [0x00,0x40,0x80,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25804000 + + +pfalse pn15.b +// CHECK-INST: pfalse p15.b +// CHECK-ENCODING: [0x0f,0xe4,0x18,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 2518e40f From 0528087663f1558a2f662d4317b0b63d8f4a6fca Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Wed, 22 Mar 2023 09:21:12 -0700 Subject: [PATCH 313/691] [NFC][WebAssembly] Autogenerate test expectations for tailcall.ll A follow-on commit will add tests to this file and using the update_llc_test_checks script will make that easier. Differential Revision: https://reviews.llvm.org/D146568 --- llvm/test/CodeGen/WebAssembly/tailcall.ll | 445 ++++++++++++++++++---- 1 file changed, 367 insertions(+), 78 deletions(-) diff --git a/llvm/test/CodeGen/WebAssembly/tailcall.ll b/llvm/test/CodeGen/WebAssembly/tailcall.ll index 07cdea1ec9b0f..34dd0a9a424b6 100644 --- a/llvm/test/CodeGen/WebAssembly/tailcall.ll +++ b/llvm/test/CodeGen/WebAssembly/tailcall.ll @@ -1,5 +1,6 @@ -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mcpu=mvp -mattr=+tail-call | FileCheck --check-prefixes=CHECK,SLOW %s -; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -fast-isel -mcpu=mvp -mattr=+tail-call | FileCheck --check-prefixes=CHECK,FAST %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mcpu=mvp -mattr=+tail-call | FileCheck --check-prefixes=CHECK,SLOW %s +; RUN: llc < %s -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -fast-isel -mcpu=mvp -mattr=+tail-call | FileCheck --check-prefixes=CHECK,FAST %s ; RUN: llc < %s --filetype=obj -mattr=+tail-call | obj2yaml | FileCheck --check-prefix=YAML %s ; Test that the tail calls lower correctly @@ -10,101 +11,177 @@ target triple = "wasm32-unknown-unknown" declare i1 @foo(i1) declare i1 @bar(i1) -; CHECK-LABEL: recursive_notail_nullary: -; CHECK: {{^}} call recursive_notail_nullary{{$}} -; CHECK-NEXT: return define void @recursive_notail_nullary() { +; CHECK-LABEL: recursive_notail_nullary: +; CHECK: .functype recursive_notail_nullary () -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: call recursive_notail_nullary +; CHECK-NEXT: return notail call void @recursive_notail_nullary() ret void } -; CHECK-LABEL: recursive_musttail_nullary: -; CHECK: return_call recursive_musttail_nullary{{$}} define void @recursive_musttail_nullary() { +; CHECK-LABEL: recursive_musttail_nullary: +; CHECK: .functype recursive_musttail_nullary () -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: return_call recursive_musttail_nullary musttail call void @recursive_musttail_nullary() ret void } - -; CHECK-LABEL: recursive_tail_nullary: -; SLOW: return_call recursive_tail_nullary{{$}} -; FAST: {{^}} call recursive_tail_nullary{{$}} -; FAST-NEXT: return{{$}} define void @recursive_tail_nullary() { +; SLOW-LABEL: recursive_tail_nullary: +; SLOW: .functype recursive_tail_nullary () -> () +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: return_call recursive_tail_nullary +; +; FAST-LABEL: recursive_tail_nullary: +; FAST: .functype recursive_tail_nullary () -> () +; FAST-NEXT: # %bb.0: +; FAST-NEXT: call recursive_tail_nullary +; FAST-NEXT: return tail call void @recursive_tail_nullary() ret void } -; CHECK-LABEL: recursive_notail: -; CHECK: call $push[[L:[0-9]+]]=, recursive_notail, $0, $1{{$}} -; CHECK-NEXT: return $pop[[L]]{{$}} define i32 @recursive_notail(i32 %x, i32 %y) { +; CHECK-LABEL: recursive_notail: +; CHECK: .functype recursive_notail (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: call $push0=, recursive_notail, $0, $1 +; CHECK-NEXT: return $pop0 %v = notail call i32 @recursive_notail(i32 %x, i32 %y) ret i32 %v } -; CHECK-LABEL: recursive_musttail: -; CHECK: return_call recursive_musttail, $0, $1{{$}} define i32 @recursive_musttail(i32 %x, i32 %y) { +; CHECK-LABEL: recursive_musttail: +; CHECK: .functype recursive_musttail (i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: return_call recursive_musttail, $0, $1 %v = musttail call i32 @recursive_musttail(i32 %x, i32 %y) ret i32 %v } -; CHECK-LABEL: recursive_tail: -; SLOW: return_call recursive_tail, $0, $1{{$}} -; FAST: call $push[[L:[0-9]+]]=, recursive_tail, $0, $1{{$}} -; FAST-NEXT: return $pop[[L]]{{$}} define i32 @recursive_tail(i32 %x, i32 %y) { +; SLOW-LABEL: recursive_tail: +; SLOW: .functype recursive_tail (i32, i32) -> (i32) +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: return_call recursive_tail, $0, $1 +; +; FAST-LABEL: recursive_tail: +; FAST: .functype recursive_tail (i32, i32) -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: call $push0=, recursive_tail, $0, $1 +; FAST-NEXT: return $pop0 %v = tail call i32 @recursive_tail(i32 %x, i32 %y) ret i32 %v } -; CHECK-LABEL: indirect_notail: -; CHECK: call_indirect $push[[L:[0-9]+]]=, $0, $1, $2, $0{{$}} -; CHECK-NEXT: return $pop[[L]]{{$}} define i32 @indirect_notail(%fn %f, i32 %x, i32 %y) { +; CHECK-LABEL: indirect_notail: +; CHECK: .functype indirect_notail (i32, i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: call_indirect $push0=, $0, $1, $2, $0 # Invalid depth argument! +; CHECK-NEXT: return $pop0 %p = extractvalue %fn %f, 0 %v = notail call i32 %p(%fn %f, i32 %x, i32 %y) ret i32 %v } -; CHECK-LABEL: indirect_musttail: -; CHECK: return_call_indirect , $0, $1, $2, $0{{$}} define i32 @indirect_musttail(%fn %f, i32 %x, i32 %y) { +; CHECK-LABEL: indirect_musttail: +; CHECK: .functype indirect_musttail (i32, i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: return_call_indirect , $0, $1, $2, $0 %p = extractvalue %fn %f, 0 %v = musttail call i32 %p(%fn %f, i32 %x, i32 %y) ret i32 %v } -; CHECK-LABEL: indirect_tail: -; CHECK: return_call_indirect , $0, $1, $2, $0{{$}} define i32 @indirect_tail(%fn %f, i32 %x, i32 %y) { +; CHECK-LABEL: indirect_tail: +; CHECK: .functype indirect_tail (i32, i32, i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: return_call_indirect , $0, $1, $2, $0 %p = extractvalue %fn %f, 0 %v = tail call i32 %p(%fn %f, i32 %x, i32 %y) ret i32 %v } -; CHECK-LABEL: choice_notail: -; CHECK: call_indirect $push[[L:[0-9]+]]=, $0, $pop{{[0-9]+}}{{$}} -; CHECK-NEXT: return $pop[[L]]{{$}} define i1 @choice_notail(i1 %x) { +; SLOW-LABEL: choice_notail: +; SLOW: .functype choice_notail (i32) -> (i32) +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: i32.const $push3=, foo +; SLOW-NEXT: i32.const $push2=, bar +; SLOW-NEXT: i32.const $push0=, 1 +; SLOW-NEXT: i32.and $push1=, $0, $pop0 +; SLOW-NEXT: i32.select $push4=, $pop3, $pop2, $pop1 +; SLOW-NEXT: call_indirect $push5=, $0, $pop4 # Invalid depth argument! +; SLOW-NEXT: return $pop5 +; +; FAST-LABEL: choice_notail: +; FAST: .functype choice_notail (i32) -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i32.const $push3=, foo +; FAST-NEXT: i32.const $push4=, bar +; FAST-NEXT: i32.const $push1=, 1 +; FAST-NEXT: i32.and $push2=, $0, $pop1 +; FAST-NEXT: i32.select $push5=, $pop3, $pop4, $pop2 +; FAST-NEXT: call_indirect $push0=, $0, $pop5 # Invalid depth argument! +; FAST-NEXT: return $pop0 %p = select i1 %x, ptr @foo, ptr @bar %v = notail call i1 %p(i1 %x) ret i1 %v } -; CHECK-LABEL: choice_musttail: -; CHECK: return_call_indirect , $0, $pop{{[0-9]+}}{{$}} define i1 @choice_musttail(i1 %x) { +; SLOW-LABEL: choice_musttail: +; SLOW: .functype choice_musttail (i32) -> (i32) +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: i32.const $push3=, foo +; SLOW-NEXT: i32.const $push2=, bar +; SLOW-NEXT: i32.const $push0=, 1 +; SLOW-NEXT: i32.and $push1=, $0, $pop0 +; SLOW-NEXT: i32.select $push4=, $pop3, $pop2, $pop1 +; SLOW-NEXT: return_call_indirect , $0, $pop4 +; +; FAST-LABEL: choice_musttail: +; FAST: .functype choice_musttail (i32) -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i32.const $push4=, foo +; FAST-NEXT: i32.const $push3=, bar +; FAST-NEXT: i32.const $push1=, 1 +; FAST-NEXT: i32.and $push2=, $0, $pop1 +; FAST-NEXT: i32.select $push0=, $pop4, $pop3, $pop2 +; FAST-NEXT: return_call_indirect , $0, $pop0 %p = select i1 %x, ptr @foo, ptr @bar %v = musttail call i1 %p(i1 %x) ret i1 %v } -; CHECK-LABEL: choice_tail: -; SLOW: return_call_indirect , $0, $pop{{[0-9]+}}{{$}} -; FAST: call_indirect $push[[L:[0-9]+]]=, $0, $pop{{[0-9]+}}{{$}} -; FAST: return $pop[[L]]{{$}} define i1 @choice_tail(i1 %x) { +; SLOW-LABEL: choice_tail: +; SLOW: .functype choice_tail (i32) -> (i32) +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: i32.const $push3=, foo +; SLOW-NEXT: i32.const $push2=, bar +; SLOW-NEXT: i32.const $push0=, 1 +; SLOW-NEXT: i32.and $push1=, $0, $pop0 +; SLOW-NEXT: i32.select $push4=, $pop3, $pop2, $pop1 +; SLOW-NEXT: return_call_indirect , $0, $pop4 +; +; FAST-LABEL: choice_tail: +; FAST: .functype choice_tail (i32) -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i32.const $push3=, foo +; FAST-NEXT: i32.const $push4=, bar +; FAST-NEXT: i32.const $push1=, 1 +; FAST-NEXT: i32.and $push2=, $0, $pop1 +; FAST-NEXT: i32.select $push5=, $pop3, $pop4, $pop2 +; FAST-NEXT: call_indirect $push0=, $0, $pop5 # Invalid depth argument! +; FAST-NEXT: return $pop0 %p = select i1 %x, ptr @foo, ptr @bar %v = tail call i1 %p(i1 %x) ret i1 %v @@ -114,95 +191,200 @@ define i1 @choice_tail(i1 %x) { ; prototype than its caller, so the following tests can only be done with ; 'tail'. -; CHECK-LABEL: mismatched_prototypes: -; SLOW: return_call baz, $pop{{[0-9]+}}, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} -; FAST: call $push[[L:[0-9]+]]=, baz, $pop{{[0-9]+}}, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} -; FAST: return $pop[[L]]{{$}} declare i32 @baz(i32, i32, i32) define i32 @mismatched_prototypes() { +; SLOW-LABEL: mismatched_prototypes: +; SLOW: .functype mismatched_prototypes () -> (i32) +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: i32.const $push2=, 0 +; SLOW-NEXT: i32.const $push1=, 42 +; SLOW-NEXT: i32.const $push0=, 6 +; SLOW-NEXT: return_call baz, $pop2, $pop1, $pop0 +; +; FAST-LABEL: mismatched_prototypes: +; FAST: .functype mismatched_prototypes () -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i32.const $push1=, 0 +; FAST-NEXT: i32.const $push2=, 42 +; FAST-NEXT: i32.const $push3=, 6 +; FAST-NEXT: call $push0=, baz, $pop1, $pop2, $pop3 +; FAST-NEXT: return $pop0 %v = tail call i32 @baz(i32 0, i32 42, i32 6) ret i32 %v } -; CHECK-LABEL: mismatched_return_void: -; CHECK: call $drop=, baz, $pop{{[0-9]+}}, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} -; CHECK: return{{$}} define void @mismatched_return_void() { +; SLOW-LABEL: mismatched_return_void: +; SLOW: .functype mismatched_return_void () -> () +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: i32.const $push2=, 0 +; SLOW-NEXT: i32.const $push1=, 42 +; SLOW-NEXT: i32.const $push0=, 6 +; SLOW-NEXT: call $drop=, baz, $pop2, $pop1, $pop0 +; SLOW-NEXT: return +; +; FAST-LABEL: mismatched_return_void: +; FAST: .functype mismatched_return_void () -> () +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i32.const $push0=, 0 +; FAST-NEXT: i32.const $push1=, 42 +; FAST-NEXT: i32.const $push2=, 6 +; FAST-NEXT: call $drop=, baz, $pop0, $pop1, $pop2 +; FAST-NEXT: return %v = tail call i32 @baz(i32 0, i32 42, i32 6) ret void } -; CHECK-LABEL: mismatched_return_f32: -; CHECK: call $push[[L:[0-9]+]]=, baz, $pop{{[0-9]+}}, $pop{{[0-9]+}}, $pop{{[0-9]+}}{{$}} -; CHECK: f32.reinterpret_i32 $push[[L1:[0-9]+]]=, $pop[[L]]{{$}} -; CHECK: return $pop[[L1]]{{$}} define float @mismatched_return_f32() { +; SLOW-LABEL: mismatched_return_f32: +; SLOW: .functype mismatched_return_f32 () -> (f32) +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: i32.const $push2=, 0 +; SLOW-NEXT: i32.const $push1=, 42 +; SLOW-NEXT: i32.const $push0=, 6 +; SLOW-NEXT: call $push3=, baz, $pop2, $pop1, $pop0 +; SLOW-NEXT: f32.reinterpret_i32 $push4=, $pop3 +; SLOW-NEXT: return $pop4 +; +; FAST-LABEL: mismatched_return_f32: +; FAST: .functype mismatched_return_f32 () -> (f32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i32.const $push2=, 0 +; FAST-NEXT: i32.const $push3=, 42 +; FAST-NEXT: i32.const $push4=, 6 +; FAST-NEXT: call $push1=, baz, $pop2, $pop3, $pop4 +; FAST-NEXT: f32.reinterpret_i32 $push0=, $pop1 +; FAST-NEXT: return $pop0 %v = tail call i32 @baz(i32 0, i32 42, i32 6) %u = bitcast i32 %v to float ret float %u } -; CHECK-LABEL: mismatched_indirect_void: -; CHECK: call_indirect $drop=, $0, $1, $2, $0{{$}} -; CHECK: return{{$}} define void @mismatched_indirect_void(%fn %f, i32 %x, i32 %y) { +; CHECK-LABEL: mismatched_indirect_void: +; CHECK: .functype mismatched_indirect_void (i32, i32, i32) -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: call_indirect $drop=, $0, $1, $2, $0 # Invalid depth argument! +; CHECK-NEXT: return %p = extractvalue %fn %f, 0 %v = tail call i32 %p(%fn %f, i32 %x, i32 %y) ret void } -; CHECK-LABEL: mismatched_indirect_f32: -; CHECK: call_indirect $push[[L:[0-9]+]]=, $0, $1, $2, $0{{$}} -; CHECK: f32.reinterpret_i32 $push[[L1:[0-9]+]]=, $pop[[L]]{{$}} -; CHECK: return $pop[[L1]]{{$}} define float @mismatched_indirect_f32(%fn %f, i32 %x, i32 %y) { +; CHECK-LABEL: mismatched_indirect_f32: +; CHECK: .functype mismatched_indirect_f32 (i32, i32, i32) -> (f32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: call_indirect $push0=, $0, $1, $2, $0 # Invalid depth argument! +; CHECK-NEXT: f32.reinterpret_i32 $push1=, $pop0 +; CHECK-NEXT: return $pop1 %p = extractvalue %fn %f, 0 %v = tail call i32 %p(%fn %f, i32 %x, i32 %y) %u = bitcast i32 %v to float ret float %u } -; CHECK-LABEL: mismatched_byval: -; CHECK: i32.store -; CHECK: return_call quux, $pop{{[0-9]+}}{{$}} declare i32 @quux(ptr byval(i32)) define i32 @mismatched_byval(ptr %x) { +; CHECK-LABEL: mismatched_byval: +; CHECK: .functype mismatched_byval (i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push1=, __stack_pointer +; CHECK-NEXT: i32.const $push2=, 16 +; CHECK-NEXT: i32.sub $push8=, $pop1, $pop2 +; CHECK-NEXT: local.tee $push7=, $1=, $pop8 +; CHECK-NEXT: global.set __stack_pointer, $pop7 +; CHECK-NEXT: i32.load $push0=, 0($0) +; CHECK-NEXT: i32.store 12($1), $pop0 +; CHECK-NEXT: i32.const $push3=, 16 +; CHECK-NEXT: i32.add $push4=, $1, $pop3 +; CHECK-NEXT: global.set __stack_pointer, $pop4 +; CHECK-NEXT: i32.const $push5=, 12 +; CHECK-NEXT: i32.add $push6=, $1, $pop5 +; CHECK-NEXT: return_call quux, $pop6 %v = tail call i32 @quux(ptr byval(i32) %x) ret i32 %v } -; CHECK-LABEL: varargs: -; CHECK: i32.store -; CHECK: call $0=, var, $1{{$}} -; CHECK: return $0{{$}} declare i32 @var(...) define i32 @varargs(i32 %x) { +; CHECK-LABEL: varargs: +; CHECK: .functype varargs (i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push0=, __stack_pointer +; CHECK-NEXT: i32.const $push1=, 16 +; CHECK-NEXT: i32.sub $push5=, $pop0, $pop1 +; CHECK-NEXT: local.tee $push4=, $1=, $pop5 +; CHECK-NEXT: global.set __stack_pointer, $pop4 +; CHECK-NEXT: i32.store 0($1), $0 +; CHECK-NEXT: call $0=, var, $1 +; CHECK-NEXT: i32.const $push2=, 16 +; CHECK-NEXT: i32.add $push3=, $1, $pop2 +; CHECK-NEXT: global.set __stack_pointer, $pop3 +; CHECK-NEXT: return $0 %v = tail call i32 (...) @var(i32 %x) ret i32 %v } ; Type transformations inhibit tail calls, even when they are nops -; CHECK-LABEL: mismatched_return_zext: -; CHECK: call define i32 @mismatched_return_zext() { +; SLOW-LABEL: mismatched_return_zext: +; SLOW: .functype mismatched_return_zext () -> (i32) +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: i32.const $push0=, 1 +; SLOW-NEXT: call $push1=, foo, $pop0 +; SLOW-NEXT: i32.const $push3=, 1 +; SLOW-NEXT: i32.and $push2=, $pop1, $pop3 +; SLOW-NEXT: return $pop2 +; +; FAST-LABEL: mismatched_return_zext: +; FAST: .functype mismatched_return_zext () -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i32.const $push2=, 1 +; FAST-NEXT: call $push1=, foo, $pop2 +; FAST-NEXT: i32.const $push3=, 1 +; FAST-NEXT: i32.and $push0=, $pop1, $pop3 +; FAST-NEXT: return $pop0 %v = tail call i1 @foo(i1 1) %u = zext i1 %v to i32 ret i32 %u } -; CHECK-LABEL: mismatched_return_sext: -; CHECK: call define i32 @mismatched_return_sext() { +; SLOW-LABEL: mismatched_return_sext: +; SLOW: .functype mismatched_return_sext () -> (i32) +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: i32.const $push3=, 0 +; SLOW-NEXT: i32.const $push0=, 1 +; SLOW-NEXT: call $push1=, foo, $pop0 +; SLOW-NEXT: i32.const $push5=, 1 +; SLOW-NEXT: i32.and $push2=, $pop1, $pop5 +; SLOW-NEXT: i32.sub $push4=, $pop3, $pop2 +; SLOW-NEXT: return $pop4 +; +; FAST-LABEL: mismatched_return_sext: +; FAST: .functype mismatched_return_sext () -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i32.const $push4=, 1 +; FAST-NEXT: call $push3=, foo, $pop4 +; FAST-NEXT: i32.const $push0=, 31 +; FAST-NEXT: i32.shl $push1=, $pop3, $pop0 +; FAST-NEXT: i32.const $push5=, 31 +; FAST-NEXT: i32.shr_s $push2=, $pop1, $pop5 +; FAST-NEXT: return $pop2 %v = tail call i1 @foo(i1 1) %u = sext i1 %v to i32 ret i32 %u } -; CHECK-LABEL: mismatched_return_trunc: -; CHECK: call declare i32 @int() define i1 @mismatched_return_trunc() { +; CHECK-LABEL: mismatched_return_trunc: +; CHECK: .functype mismatched_return_trunc () -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: call $push0=, int +; CHECK-NEXT: return $pop0 %v = tail call i32 @int() %u = trunc i32 %v to i1 ret i1 %u @@ -210,30 +392,115 @@ define i1 @mismatched_return_trunc() { ; Stack-allocated arguments inhibit tail calls -; CHECK-LABEL: stack_arg: -; CHECK: call define i32 @stack_arg(ptr %x) { +; SLOW-LABEL: stack_arg: +; SLOW: .functype stack_arg (i32) -> (i32) +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: global.get $push0=, __stack_pointer +; SLOW-NEXT: i32.const $push1=, 16 +; SLOW-NEXT: i32.sub $push7=, $pop0, $pop1 +; SLOW-NEXT: local.tee $push6=, $2=, $pop7 +; SLOW-NEXT: global.set __stack_pointer, $pop6 +; SLOW-NEXT: i32.const $push4=, 12 +; SLOW-NEXT: i32.add $push5=, $2, $pop4 +; SLOW-NEXT: call $1=, stack_arg, $pop5 +; SLOW-NEXT: i32.const $push2=, 16 +; SLOW-NEXT: i32.add $push3=, $2, $pop2 +; SLOW-NEXT: global.set __stack_pointer, $pop3 +; SLOW-NEXT: return $1 +; +; FAST-LABEL: stack_arg: +; FAST: .functype stack_arg (i32) -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: global.get $push1=, __stack_pointer +; FAST-NEXT: i32.const $push2=, 16 +; FAST-NEXT: i32.sub $push8=, $pop1, $pop2 +; FAST-NEXT: local.tee $push7=, $2=, $pop8 +; FAST-NEXT: global.set __stack_pointer, $pop7 +; FAST-NEXT: i32.const $push5=, 12 +; FAST-NEXT: i32.add $push6=, $2, $pop5 +; FAST-NEXT: local.copy $push0=, $pop6 +; FAST-NEXT: call $1=, stack_arg, $pop0 +; FAST-NEXT: i32.const $push3=, 16 +; FAST-NEXT: i32.add $push4=, $2, $pop3 +; FAST-NEXT: global.set __stack_pointer, $pop4 +; FAST-NEXT: return $1 %a = alloca i32 %v = tail call i32 @stack_arg(ptr %a) ret i32 %v } -; CHECK-LABEL: stack_arg_gep: -; CHECK: call define i32 @stack_arg_gep(ptr %x) { +; SLOW-LABEL: stack_arg_gep: +; SLOW: .functype stack_arg_gep (i32) -> (i32) +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: global.get $push2=, __stack_pointer +; SLOW-NEXT: i32.const $push3=, 16 +; SLOW-NEXT: i32.sub $push9=, $pop2, $pop3 +; SLOW-NEXT: local.tee $push8=, $2=, $pop9 +; SLOW-NEXT: global.set __stack_pointer, $pop8 +; SLOW-NEXT: i32.const $push6=, 8 +; SLOW-NEXT: i32.add $push7=, $2, $pop6 +; SLOW-NEXT: i32.const $push0=, 4 +; SLOW-NEXT: i32.or $push1=, $pop7, $pop0 +; SLOW-NEXT: call $1=, stack_arg_gep, $pop1 +; SLOW-NEXT: i32.const $push4=, 16 +; SLOW-NEXT: i32.add $push5=, $2, $pop4 +; SLOW-NEXT: global.set __stack_pointer, $pop5 +; SLOW-NEXT: return $1 +; +; FAST-LABEL: stack_arg_gep: +; FAST: .functype stack_arg_gep (i32) -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: global.get $push3=, __stack_pointer +; FAST-NEXT: i32.const $push4=, 16 +; FAST-NEXT: i32.sub $push10=, $pop3, $pop4 +; FAST-NEXT: local.tee $push9=, $2=, $pop10 +; FAST-NEXT: global.set __stack_pointer, $pop9 +; FAST-NEXT: i32.const $push7=, 8 +; FAST-NEXT: i32.add $push8=, $2, $pop7 +; FAST-NEXT: local.copy $push0=, $pop8 +; FAST-NEXT: i32.const $push1=, 4 +; FAST-NEXT: i32.add $push2=, $pop0, $pop1 +; FAST-NEXT: call $1=, stack_arg_gep, $pop2 +; FAST-NEXT: i32.const $push5=, 16 +; FAST-NEXT: i32.add $push6=, $2, $pop5 +; FAST-NEXT: global.set __stack_pointer, $pop6 +; FAST-NEXT: return $1 %a = alloca { i32, i32 } %p = getelementptr { i32, i32 }, ptr %a, i32 0, i32 1 %v = tail call i32 @stack_arg_gep(ptr %p) ret i32 %v } -; CHECK-LABEL: stack_arg_cast: -; CHECK: global.get $push{{[0-9]+}}=, __stack_pointer -; CHECK: global.set __stack_pointer, $pop{{[0-9]+}} -; FAST: call ${{[0-9]+}}=, stack_arg_cast, $pop{{[0-9]+}} -; CHECK: global.set __stack_pointer, $pop{{[0-9]+}} -; SLOW: return_call stack_arg_cast, ${{[0-9]+}} define i32 @stack_arg_cast(i32 %x) { +; SLOW-LABEL: stack_arg_cast: +; SLOW: .functype stack_arg_cast (i32) -> (i32) +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: global.get $push0=, __stack_pointer +; SLOW-NEXT: i32.const $push1=, 256 +; SLOW-NEXT: i32.sub $push5=, $pop0, $pop1 +; SLOW-NEXT: local.tee $push4=, $1=, $pop5 +; SLOW-NEXT: global.set __stack_pointer, $pop4 +; SLOW-NEXT: i32.const $push2=, 256 +; SLOW-NEXT: i32.add $push3=, $1, $pop2 +; SLOW-NEXT: global.set __stack_pointer, $pop3 +; SLOW-NEXT: return_call stack_arg_cast, $1 +; +; FAST-LABEL: stack_arg_cast: +; FAST: .functype stack_arg_cast (i32) -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: global.get $push1=, __stack_pointer +; FAST-NEXT: i32.const $push2=, 256 +; FAST-NEXT: i32.sub $push6=, $pop1, $pop2 +; FAST-NEXT: local.tee $push5=, $2=, $pop6 +; FAST-NEXT: global.set __stack_pointer, $pop5 +; FAST-NEXT: local.copy $push0=, $2 +; FAST-NEXT: call $1=, stack_arg_cast, $pop0 +; FAST-NEXT: i32.const $push3=, 256 +; FAST-NEXT: i32.add $push4=, $2, $pop3 +; FAST-NEXT: global.set __stack_pointer, $pop4 +; FAST-NEXT: return $1 %a = alloca [64 x i32] %i = ptrtoint ptr %a to i32 %v = tail call i32 @stack_arg_cast(i32 %i) @@ -252,6 +519,28 @@ define i32 @stack_arg_cast(i32 %x) { ; YAML-NEXT: ReturnTypes: ; YAML-NEXT: - I32 define i32 @unique_caller(ptr %p) { +; SLOW-LABEL: unique_caller: +; SLOW: .functype unique_caller (i32) -> (i32) +; SLOW-NEXT: # %bb.0: +; SLOW-NEXT: i32.const $push4=, 0 +; SLOW-NEXT: f32.const $push3=, 0x0p0 +; SLOW-NEXT: i64.const $push2=, 0 +; SLOW-NEXT: f64.const $push1=, 0x0p0 +; SLOW-NEXT: i32.load $push0=, 0($0) +; SLOW-NEXT: return_call_indirect , $pop4, $pop3, $pop2, $pop1, $pop0 +; +; FAST-LABEL: unique_caller: +; FAST: .functype unique_caller (i32) -> (i32) +; FAST-NEXT: # %bb.0: +; FAST-NEXT: i32.const $push1=, 0 +; FAST-NEXT: i32.const $push7=, 0 +; FAST-NEXT: f32.convert_i32_s $push2=, $pop7 +; FAST-NEXT: i64.const $push3=, 0 +; FAST-NEXT: i32.const $push6=, 0 +; FAST-NEXT: f64.convert_i32_s $push4=, $pop6 +; FAST-NEXT: i32.load $push5=, 0($0) +; FAST-NEXT: call_indirect $push0=, $pop1, $pop2, $pop3, $pop4, $pop5 # Invalid depth argument! +; FAST-NEXT: return $pop0 %f = load ptr, ptr %p %v = tail call i32 %f(i32 0, float 0., i64 0, double 0.) ret i32 %v From 3f23c7f5bedc8786d3f4567d2331a7efcbb2a77e Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 21 Mar 2023 18:00:08 -0700 Subject: [PATCH 314/691] [InstSimplify] Actually use NewOps for calls in simplifyInstructionWithOperands Resolves a TODO. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D146599 --- .../llvm/Analysis/InstructionSimplify.h | 5 +- llvm/lib/Analysis/InstructionSimplify.cpp | 118 +++++++++--------- .../InstCombine/InstCombineCalls.cpp | 10 +- llvm/unittests/Transforms/Utils/LocalTest.cpp | 3 +- 4 files changed, 72 insertions(+), 64 deletions(-) diff --git a/llvm/include/llvm/Analysis/InstructionSimplify.h b/llvm/include/llvm/Analysis/InstructionSimplify.h index 861fa3b20a495..826bd45d8057b 100644 --- a/llvm/include/llvm/Analysis/InstructionSimplify.h +++ b/llvm/include/llvm/Analysis/InstructionSimplify.h @@ -302,8 +302,9 @@ Value *simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, Value *simplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q); -/// Given a callsite, fold the result or return null. -Value *simplifyCall(CallBase *Call, const SimplifyQuery &Q); +/// Given a callsite, callee, and arguments, fold the result or return null. +Value *simplifyCall(CallBase *Call, Value *Callee, ArrayRef Args, + const SimplifyQuery &Q); /// Given a constrained FP intrinsic call, tries to compute its simplified /// version. Returns a simplified result or null. diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index ecb0cdbd13c62..eaf0af92484d7 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -6391,10 +6391,13 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1, return nullptr; } -static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { - - unsigned NumOperands = Call->arg_size(); - Function *F = cast(Call->getCalledFunction()); +static Value *simplifyIntrinsic(CallBase *Call, Value *Callee, + ArrayRef Args, + const SimplifyQuery &Q) { + // Operand bundles should not be in Args. + assert(Call->arg_size() == Args.size()); + unsigned NumOperands = Args.size(); + Function *F = cast(Callee); Intrinsic::ID IID = F->getIntrinsicID(); // Most of the intrinsics with no operands have some kind of side effect. @@ -6420,18 +6423,17 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { } if (NumOperands == 1) - return simplifyUnaryIntrinsic(F, Call->getArgOperand(0), Q); + return simplifyUnaryIntrinsic(F, Args[0], Q); if (NumOperands == 2) - return simplifyBinaryIntrinsic(F, Call->getArgOperand(0), - Call->getArgOperand(1), Q); + return simplifyBinaryIntrinsic(F, Args[0], Args[1], Q); // Handle intrinsics with 3 or more arguments. switch (IID) { case Intrinsic::masked_load: case Intrinsic::masked_gather: { - Value *MaskArg = Call->getArgOperand(2); - Value *PassthruArg = Call->getArgOperand(3); + Value *MaskArg = Args[2]; + Value *PassthruArg = Args[3]; // If the mask is all zeros or undef, the "passthru" argument is the result. if (maskIsAllZeroOrUndef(MaskArg)) return PassthruArg; @@ -6439,8 +6441,7 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { } case Intrinsic::fshl: case Intrinsic::fshr: { - Value *Op0 = Call->getArgOperand(0), *Op1 = Call->getArgOperand(1), - *ShAmtArg = Call->getArgOperand(2); + Value *Op0 = Args[0], *Op1 = Args[1], *ShAmtArg = Args[2]; // If both operands are undef, the result is undef. if (Q.isUndefValue(Op0) && Q.isUndefValue(Op1)) @@ -6448,14 +6449,14 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { // If shift amount is undef, assume it is zero. if (Q.isUndefValue(ShAmtArg)) - return Call->getArgOperand(IID == Intrinsic::fshl ? 0 : 1); + return Args[IID == Intrinsic::fshl ? 0 : 1]; const APInt *ShAmtC; if (match(ShAmtArg, m_APInt(ShAmtC))) { // If there's effectively no shift, return the 1st arg or 2nd arg. APInt BitWidth = APInt(ShAmtC->getBitWidth(), ShAmtC->getBitWidth()); if (ShAmtC->urem(BitWidth).isZero()) - return Call->getArgOperand(IID == Intrinsic::fshl ? 0 : 1); + return Args[IID == Intrinsic::fshl ? 0 : 1]; } // Rotating zero by anything is zero. @@ -6469,31 +6470,24 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { return nullptr; } case Intrinsic::experimental_constrained_fma: { - Value *Op0 = Call->getArgOperand(0); - Value *Op1 = Call->getArgOperand(1); - Value *Op2 = Call->getArgOperand(2); auto *FPI = cast(Call); - if (Value *V = - simplifyFPOp({Op0, Op1, Op2}, {}, Q, *FPI->getExceptionBehavior(), - *FPI->getRoundingMode())) + if (Value *V = simplifyFPOp(Args, {}, Q, *FPI->getExceptionBehavior(), + *FPI->getRoundingMode())) return V; return nullptr; } case Intrinsic::fma: case Intrinsic::fmuladd: { - Value *Op0 = Call->getArgOperand(0); - Value *Op1 = Call->getArgOperand(1); - Value *Op2 = Call->getArgOperand(2); - if (Value *V = simplifyFPOp({Op0, Op1, Op2}, {}, Q, fp::ebIgnore, + if (Value *V = simplifyFPOp(Args, {}, Q, fp::ebIgnore, RoundingMode::NearestTiesToEven)) return V; return nullptr; } case Intrinsic::smul_fix: case Intrinsic::smul_fix_sat: { - Value *Op0 = Call->getArgOperand(0); - Value *Op1 = Call->getArgOperand(1); - Value *Op2 = Call->getArgOperand(2); + Value *Op0 = Args[0]; + Value *Op1 = Args[1]; + Value *Op2 = Args[2]; Type *ReturnType = F->getReturnType(); // Canonicalize constant operand as Op1 (ConstantFolding handles the case @@ -6520,9 +6514,9 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { return nullptr; } case Intrinsic::vector_insert: { - Value *Vec = Call->getArgOperand(0); - Value *SubVec = Call->getArgOperand(1); - Value *Idx = Call->getArgOperand(2); + Value *Vec = Args[0]; + Value *SubVec = Args[1]; + Value *Idx = Args[2]; Type *ReturnType = F->getReturnType(); // (insert_vector Y, (extract_vector X, 0), 0) -> X @@ -6539,51 +6533,52 @@ static Value *simplifyIntrinsic(CallBase *Call, const SimplifyQuery &Q) { } case Intrinsic::experimental_constrained_fadd: { auto *FPI = cast(Call); - return simplifyFAddInst( - FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(), - Q, *FPI->getExceptionBehavior(), *FPI->getRoundingMode()); + return simplifyFAddInst(Args[0], Args[1], FPI->getFastMathFlags(), Q, + *FPI->getExceptionBehavior(), + *FPI->getRoundingMode()); } case Intrinsic::experimental_constrained_fsub: { auto *FPI = cast(Call); - return simplifyFSubInst( - FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(), - Q, *FPI->getExceptionBehavior(), *FPI->getRoundingMode()); + return simplifyFSubInst(Args[0], Args[1], FPI->getFastMathFlags(), Q, + *FPI->getExceptionBehavior(), + *FPI->getRoundingMode()); } case Intrinsic::experimental_constrained_fmul: { auto *FPI = cast(Call); - return simplifyFMulInst( - FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(), - Q, *FPI->getExceptionBehavior(), *FPI->getRoundingMode()); + return simplifyFMulInst(Args[0], Args[1], FPI->getFastMathFlags(), Q, + *FPI->getExceptionBehavior(), + *FPI->getRoundingMode()); } case Intrinsic::experimental_constrained_fdiv: { auto *FPI = cast(Call); - return simplifyFDivInst( - FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(), - Q, *FPI->getExceptionBehavior(), *FPI->getRoundingMode()); + return simplifyFDivInst(Args[0], Args[1], FPI->getFastMathFlags(), Q, + *FPI->getExceptionBehavior(), + *FPI->getRoundingMode()); } case Intrinsic::experimental_constrained_frem: { auto *FPI = cast(Call); - return simplifyFRemInst( - FPI->getArgOperand(0), FPI->getArgOperand(1), FPI->getFastMathFlags(), - Q, *FPI->getExceptionBehavior(), *FPI->getRoundingMode()); + return simplifyFRemInst(Args[0], Args[1], FPI->getFastMathFlags(), Q, + *FPI->getExceptionBehavior(), + *FPI->getRoundingMode()); } default: return nullptr; } } -static Value *tryConstantFoldCall(CallBase *Call, const SimplifyQuery &Q) { - auto *F = dyn_cast(Call->getCalledOperand()); +static Value *tryConstantFoldCall(CallBase *Call, Value *Callee, + ArrayRef Args, + const SimplifyQuery &Q) { + auto *F = dyn_cast(Callee); if (!F || !canConstantFoldCallTo(Call, F)) return nullptr; SmallVector ConstantArgs; - unsigned NumArgs = Call->arg_size(); - ConstantArgs.reserve(NumArgs); - for (auto &Arg : Call->args()) { - Constant *C = dyn_cast(&Arg); + ConstantArgs.reserve(Args.size()); + for (Value *Arg : Args) { + Constant *C = dyn_cast(Arg); if (!C) { - if (isa(Arg.get())) + if (isa(Arg)) continue; return nullptr; } @@ -6593,7 +6588,11 @@ static Value *tryConstantFoldCall(CallBase *Call, const SimplifyQuery &Q) { return ConstantFoldCall(Call, F, ConstantArgs, Q.TLI); } -Value *llvm::simplifyCall(CallBase *Call, const SimplifyQuery &Q) { +Value *llvm::simplifyCall(CallBase *Call, Value *Callee, ArrayRef Args, + const SimplifyQuery &Q) { + // Args should not contain operand bundle operands. + assert(Call->arg_size() == Args.size()); + // musttail calls can only be simplified if they are also DCEd. // As we can't guarantee this here, don't simplify them. if (Call->isMustTailCall()) @@ -6601,16 +6600,15 @@ Value *llvm::simplifyCall(CallBase *Call, const SimplifyQuery &Q) { // call undef -> poison // call null -> poison - Value *Callee = Call->getCalledOperand(); if (isa(Callee) || isa(Callee)) return PoisonValue::get(Call->getType()); - if (Value *V = tryConstantFoldCall(Call, Q)) + if (Value *V = tryConstantFoldCall(Call, Callee, Args, Q)) return V; auto *F = dyn_cast(Callee); if (F && F->isIntrinsic()) - if (Value *Ret = simplifyIntrinsic(Call, Q)) + if (Value *Ret = simplifyIntrinsic(Call, Callee, Args, Q)) return Ret; return nullptr; @@ -6618,9 +6616,10 @@ Value *llvm::simplifyCall(CallBase *Call, const SimplifyQuery &Q) { Value *llvm::simplifyConstrainedFPCall(CallBase *Call, const SimplifyQuery &Q) { assert(isa(Call)); - if (Value *V = tryConstantFoldCall(Call, Q)) + SmallVector Args(Call->args()); + if (Value *V = tryConstantFoldCall(Call, Call->getCalledOperand(), Args, Q)) return V; - if (Value *Ret = simplifyIntrinsic(Call, Q)) + if (Value *Ret = simplifyIntrinsic(Call, Call->getCalledOperand(), Args, Q)) return Ret; return nullptr; } @@ -6775,8 +6774,9 @@ static Value *simplifyInstructionWithOperands(Instruction *I, case Instruction::PHI: return simplifyPHINode(cast(I), NewOps, Q); case Instruction::Call: - // TODO: Use NewOps - return simplifyCall(cast(I), Q); + return simplifyCall( + cast(I), NewOps.back(), + NewOps.drop_back(1 + cast(I)->getNumTotalBundleOperands()), Q); case Instruction::Freeze: return llvm::simplifyFreezeInst(NewOps[0], Q); #define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 2b61b58dbc36a..0fbd62e8a41c0 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1288,9 +1288,15 @@ foldShuffledIntrinsicOperands(IntrinsicInst *II, Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { // Don't try to simplify calls without uses. It will not do anything useful, // but will result in the following folds being skipped. - if (!CI.use_empty()) - if (Value *V = simplifyCall(&CI, SQ.getWithInstruction(&CI))) + if (!CI.use_empty()) { + SmallVector Args; + Args.reserve(CI.arg_size()); + for (Value *Op : CI.args()) + Args.push_back(Op); + if (Value *V = simplifyCall(&CI, CI.getCalledOperand(), Args, + SQ.getWithInstruction(&CI))) return replaceInstUsesWith(CI, V); + } if (Value *FreedOp = getFreedOperand(&CI, &TLI)) return visitFree(CI, FreedOp); diff --git a/llvm/unittests/Transforms/Utils/LocalTest.cpp b/llvm/unittests/Transforms/Utils/LocalTest.cpp index d6b09b35f2caf..443f1f09915fd 100644 --- a/llvm/unittests/Transforms/Utils/LocalTest.cpp +++ b/llvm/unittests/Transforms/Utils/LocalTest.cpp @@ -598,7 +598,8 @@ TEST(Local, SimplifyVScaleWithRange) { // Test that simplifyCall won't try to query it's parent function for // vscale_range attributes in order to simplify llvm.vscale -> constant. - EXPECT_EQ(simplifyCall(CI, SimplifyQuery(M.getDataLayout())), nullptr); + EXPECT_EQ(simplifyCall(CI, VScale, {}, SimplifyQuery(M.getDataLayout())), + nullptr); delete CI; } From d868135691bb0d5c924b8fd2ae26171fbf5d1387 Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Tue, 21 Mar 2023 19:01:12 +0100 Subject: [PATCH 315/691] [libc++] Qualifies ptrdiff_t and max_align_t. This has been done using the following commands find libcxx/test -type f -exec perl -pi -e 's|^([^/]+?)((?(); - test(); + test(); test(); return 0; diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.find.first.of/ranges.find_first_of.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.find.first.of/ranges.find_first_of.pass.cpp index 5673c70c394b4..cea30420428cd 100644 --- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.find.first.of/ranges.find_first_of.pass.cpp +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.find.first.of/ranges.find_first_of.pass.cpp @@ -71,7 +71,7 @@ template struct Data { std::array input1; std::array input2; - ptrdiff_t expected; + std::ptrdiff_t expected; }; template diff --git a/libcxx/test/std/atomics/stdatomic.h.syn/types.compile.pass.cpp b/libcxx/test/std/atomics/stdatomic.h.syn/types.compile.pass.cpp index 84f49a76a810f..3121e7c57bafe 100644 --- a/libcxx/test/std/atomics/stdatomic.h.syn/types.compile.pass.cpp +++ b/libcxx/test/std/atomics/stdatomic.h.syn/types.compile.pass.cpp @@ -203,7 +203,7 @@ void f() { static_assert(std::is_same_v, ::atomic_intptr_t>); static_assert(std::is_same_v, ::atomic_uintptr_t>); static_assert(std::is_same_v, ::atomic_size_t>); - static_assert(std::is_same_v, ::atomic_ptrdiff_t>); + static_assert(std::is_same_v, ::atomic_ptrdiff_t>); static_assert(std::is_same_v, ::atomic_intmax_t>); static_assert(std::is_same_v, ::atomic_uintmax_t>); diff --git a/libcxx/test/std/atomics/types.pass.cpp b/libcxx/test/std/atomics/types.pass.cpp index e0b617071c04f..63ab0f30c4a75 100644 --- a/libcxx/test/std/atomics/types.pass.cpp +++ b/libcxx/test/std/atomics/types.pass.cpp @@ -61,7 +61,7 @@ struct test_atomic A a; (void)a; #if TEST_STD_VER >= 17 static_assert((std::is_same_v), ""); - static_assert((std::is_same_v), ""); + static_assert((std::is_same_v), ""); #endif } }; @@ -149,7 +149,7 @@ int main(int, char**) test (); test (); test (); - test (); + test (); test (); test (); diff --git a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/splice_after_range.pass.cpp b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/splice_after_range.pass.cpp index 527c72c1e7c8d..f2ab6e5faa19d 100644 --- a/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/splice_after_range.pass.cpp +++ b/libcxx/test/std/containers/sequences/forwardlist/forwardlist.ops/splice_after_range.pass.cpp @@ -19,21 +19,21 @@ #include "test_macros.h" #include "min_allocator.h" -typedef ptrdiff_t T; +typedef std::ptrdiff_t T; const T t1[] = {0, 1, 2, 3, 4, 5, 6, 7}; const T t2[] = {10, 11, 12, 13, 14, 15}; -const ptrdiff_t size_t1 = std::end(t1) - std::begin(t1); -const ptrdiff_t size_t2 = std::end(t2) - std::begin(t2); +const std::ptrdiff_t size_t1 = std::end(t1) - std::begin(t1); +const std::ptrdiff_t size_t2 = std::end(t2) - std::begin(t2); template void -testd(const C& c, ptrdiff_t p, ptrdiff_t f, ptrdiff_t l) +testd(const C& c, std::ptrdiff_t p, ptrdiff_t f, ptrdiff_t l) { typename C::const_iterator i = c.begin(); - ptrdiff_t n1 = 0; + std::ptrdiff_t n1 = 0; for (; n1 < p; ++n1, ++i) assert(*i == t1[n1]); - for (ptrdiff_t n2 = f; n2 < l-1; ++n2, ++i) + for (std::ptrdiff_t n2 = f; n2 < l-1; ++n2, ++i) assert(*i == t2[n2]); for (; n1 < size_t1; ++n1, ++i) assert(*i == t1[n1]); @@ -42,11 +42,11 @@ testd(const C& c, ptrdiff_t p, ptrdiff_t f, ptrdiff_t l) template void -tests(const C& c, ptrdiff_t p, ptrdiff_t f, ptrdiff_t l) +tests(const C& c, std::ptrdiff_t p, ptrdiff_t f, ptrdiff_t l) { typename C::const_iterator i = c.begin(); - ptrdiff_t n = 0; - ptrdiff_t d = l > f+1 ? l-1-f : 0; + std::ptrdiff_t n = 0; + std::ptrdiff_t d = l > f+1 ? l-1-f : 0; if (d == 0 || p == f) { for (n = 0; n < size_t1; ++n, ++i) @@ -82,11 +82,11 @@ int main(int, char**) { // splicing different containers typedef std::forward_list C; - for (ptrdiff_t f = 0; f <= size_t2+1; ++f) + for (std::ptrdiff_t f = 0; f <= size_t2+1; ++f) { - for (ptrdiff_t l = f; l <= size_t2+1; ++l) + for (std::ptrdiff_t l = f; l <= size_t2+1; ++l) { - for (ptrdiff_t p = 0; p <= size_t1; ++p) + for (std::ptrdiff_t p = 0; p <= size_t1; ++p) { C c1(std::begin(t1), std::end(t1)); C c2(std::begin(t2), std::end(t2)); @@ -99,11 +99,11 @@ int main(int, char**) } // splicing within same container - for (ptrdiff_t f = 0; f <= size_t1+1; ++f) + for (std::ptrdiff_t f = 0; f <= size_t1+1; ++f) { - for (ptrdiff_t l = f; l <= size_t1; ++l) + for (std::ptrdiff_t l = f; l <= size_t1; ++l) { - for (ptrdiff_t p = 0; p <= f; ++p) + for (std::ptrdiff_t p = 0; p <= f; ++p) { C c1(std::begin(t1), std::end(t1)); @@ -111,7 +111,7 @@ int main(int, char**) std::next(c1.cbefore_begin(), f), std::next(c1.cbefore_begin(), l)); tests(c1, p, f, l); } - for (ptrdiff_t p = l; p <= size_t1; ++p) + for (std::ptrdiff_t p = l; p <= size_t1; ++p) { C c1(std::begin(t1), std::end(t1)); @@ -126,11 +126,11 @@ int main(int, char**) { // splicing different containers typedef std::forward_list> C; - for (ptrdiff_t f = 0; f <= size_t2+1; ++f) + for (std::ptrdiff_t f = 0; f <= size_t2+1; ++f) { - for (ptrdiff_t l = f; l <= size_t2+1; ++l) + for (std::ptrdiff_t l = f; l <= size_t2+1; ++l) { - for (ptrdiff_t p = 0; p <= size_t1; ++p) + for (std::ptrdiff_t p = 0; p <= size_t1; ++p) { C c1(std::begin(t1), std::end(t1)); C c2(std::begin(t2), std::end(t2)); @@ -143,11 +143,11 @@ int main(int, char**) } // splicing within same container - for (ptrdiff_t f = 0; f <= size_t1+1; ++f) + for (std::ptrdiff_t f = 0; f <= size_t1+1; ++f) { - for (ptrdiff_t l = f; l <= size_t1; ++l) + for (std::ptrdiff_t l = f; l <= size_t1; ++l) { - for (ptrdiff_t p = 0; p <= f; ++p) + for (std::ptrdiff_t p = 0; p <= f; ++p) { C c1(std::begin(t1), std::end(t1)); @@ -155,7 +155,7 @@ int main(int, char**) std::next(c1.cbefore_begin(), f), std::next(c1.cbefore_begin(), l)); tests(c1, p, f, l); } - for (ptrdiff_t p = l; p <= size_t1; ++p) + for (std::ptrdiff_t p = l; p <= size_t1; ++p) { C c1(std::begin(t1), std::end(t1)); diff --git a/libcxx/test/std/iterators/iterator.container/ssize.pass.cpp b/libcxx/test/std/iterators/iterator.container/ssize.pass.cpp index 9be44094a099c..e7531aec12b24 100644 --- a/libcxx/test/std/iterators/iterator.container/ssize.pass.cpp +++ b/libcxx/test/std/iterators/iterator.container/ssize.pass.cpp @@ -100,7 +100,7 @@ int main(int, char**) test_const_container ( sv ); static constexpr int arrA [] { 1, 2, 3 }; - ASSERT_SAME_TYPE(ptrdiff_t, decltype(std::ssize(arrA))); + ASSERT_SAME_TYPE(std::ptrdiff_t, decltype(std::ssize(arrA))); static_assert( std::is_signed_v, ""); test_const_array ( arrA ); diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_move.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_move.pass.cpp index a9e6d17720ab3..e0f63ac594367 100644 --- a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_move.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_move.pass.cpp @@ -28,7 +28,7 @@ template struct MaybeNoexceptMove { int x; using value_type = int; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; constexpr friend value_type&& iter_move(MaybeNoexceptMove) noexcept(IsNoexcept) { return std::move(global); diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_swap.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_swap.pass.cpp index 075930dcb0a07..aa0815390391c 100644 --- a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_swap.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.nonmember/iter_swap.pass.cpp @@ -26,7 +26,7 @@ template struct MaybeNoexceptSwap { using value_type = int; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; constexpr friend void iter_swap(MaybeNoexceptSwap, MaybeNoexceptSwap) noexcept(IsNoexcept) { } diff --git a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.cmp/sfinae.compile.pass.cpp b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.cmp/sfinae.compile.pass.cpp index 92e0e5cd8f9fa..8ead39231c0ba 100644 --- a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.cmp/sfinae.compile.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.cmp/sfinae.compile.pass.cpp @@ -49,7 +49,7 @@ struct IterBase { using iterator_category = std::bidirectional_iterator_tag; using value_type = int; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; using pointer = int*; using reference = int&; diff --git a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_move.pass.cpp b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_move.pass.cpp index 712425a0c44ff..5e35f5c8facc4 100644 --- a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_move.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_move.pass.cpp @@ -55,7 +55,7 @@ constexpr bool test() { { struct ThrowingCopyNoexceptDecrement { using value_type = int; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; ThrowingCopyNoexceptDecrement(); ThrowingCopyNoexceptDecrement(const ThrowingCopyNoexceptDecrement&); @@ -80,7 +80,7 @@ constexpr bool test() { { struct NoexceptCopyThrowingDecrement { using value_type = int; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; NoexceptCopyThrowingDecrement(); NoexceptCopyThrowingDecrement(const NoexceptCopyThrowingDecrement&) noexcept; @@ -105,7 +105,7 @@ constexpr bool test() { { struct NoexceptCopyAndDecrement { using value_type = int; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; NoexceptCopyAndDecrement(); NoexceptCopyAndDecrement(const NoexceptCopyAndDecrement&) noexcept; diff --git a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_swap.pass.cpp b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_swap.pass.cpp index d01ee2a1b85a1..7b6fb43b0001f 100644 --- a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_swap.pass.cpp +++ b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.nonmember/iter_swap.pass.cpp @@ -61,7 +61,7 @@ constexpr bool test() { { struct ThrowingCopyNoexceptDecrement { using value_type = int; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; ThrowingCopyNoexceptDecrement(); ThrowingCopyNoexceptDecrement(const ThrowingCopyNoexceptDecrement&); @@ -89,7 +89,7 @@ constexpr bool test() { { struct NoexceptCopyThrowingDecrement { using value_type = int; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; NoexceptCopyThrowingDecrement(); NoexceptCopyThrowingDecrement(const NoexceptCopyThrowingDecrement&) noexcept; @@ -117,7 +117,7 @@ constexpr bool test() { { struct NoexceptCopyAndDecrement { using value_type = int; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; NoexceptCopyAndDecrement(); NoexceptCopyAndDecrement(const NoexceptCopyAndDecrement&) noexcept; diff --git a/libcxx/test/std/language.support/support.types/max_align_t.compile.pass.cpp b/libcxx/test/std/language.support/support.types/max_align_t.compile.pass.cpp index aef00bbc0ddac..5e335841c2a3c 100644 --- a/libcxx/test/std/language.support/support.types/max_align_t.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.types/max_align_t.compile.pass.cpp @@ -26,5 +26,5 @@ static_assert(alignof(std::max_align_t) >= alignof(long double), ""); static_assert(alignof(std::max_align_t) >= alignof(void*), ""); #if TEST_STD_VER > 14 static_assert(alignof(std::max_align_t) <= __STDCPP_DEFAULT_NEW_ALIGNMENT__, - "max_align_t alignment should be no larger than operator new's alignment"); + "std::max_align_t alignment should be no larger than operator new's alignment"); #endif diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.pass.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.pass.cpp index 0da878d0b1f17..cbaf5dfbec19c 100644 --- a/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.pass.cpp +++ b/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.pass.cpp @@ -90,7 +90,7 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/bit_floor.pass.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/bit_floor.pass.cpp index 41d5ed1e06017..473238be5e92c 100644 --- a/libcxx/test/std/numerics/bit/bit.pow.two/bit_floor.pass.cpp +++ b/libcxx/test/std/numerics/bit/bit.pow.two/bit_floor.pass.cpp @@ -87,7 +87,7 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/bit_width.pass.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/bit_width.pass.cpp index 53fdec6bd2076..8110048e13960 100644 --- a/libcxx/test/std/numerics/bit/bit.pow.two/bit_width.pass.cpp +++ b/libcxx/test/std/numerics/bit/bit.pow.two/bit_width.pass.cpp @@ -92,7 +92,7 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/has_single_bit.pass.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/has_single_bit.pass.cpp index 044a87fa41352..1c30f5cec5191 100644 --- a/libcxx/test/std/numerics/bit/bit.pow.two/has_single_bit.pass.cpp +++ b/libcxx/test/std/numerics/bit/bit.pow.two/has_single_bit.pass.cpp @@ -90,7 +90,7 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); diff --git a/libcxx/test/std/numerics/bit/bitops.count/countl_one.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/countl_one.pass.cpp index 06a8a6d6cc879..b236e37ee8791 100644 --- a/libcxx/test/std/numerics/bit/bitops.count/countl_one.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.count/countl_one.pass.cpp @@ -87,7 +87,7 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); diff --git a/libcxx/test/std/numerics/bit/bitops.count/countl_zero.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/countl_zero.pass.cpp index 5a30e57c409a6..58c953f0b97a8 100644 --- a/libcxx/test/std/numerics/bit/bitops.count/countl_zero.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.count/countl_zero.pass.cpp @@ -86,7 +86,7 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); diff --git a/libcxx/test/std/numerics/bit/bitops.count/countr_one.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/countr_one.pass.cpp index 1698e31086ea4..208e694e0282a 100644 --- a/libcxx/test/std/numerics/bit/bitops.count/countr_one.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.count/countr_one.pass.cpp @@ -91,7 +91,7 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); diff --git a/libcxx/test/std/numerics/bit/bitops.count/countr_zero.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/countr_zero.pass.cpp index 91fbab8bbed84..0d14d9e71044b 100644 --- a/libcxx/test/std/numerics/bit/bitops.count/countr_zero.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.count/countr_zero.pass.cpp @@ -88,7 +88,7 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); diff --git a/libcxx/test/std/numerics/bit/bitops.count/popcount.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/popcount.pass.cpp index 00e60ff588176..383338a2f21bd 100644 --- a/libcxx/test/std/numerics/bit/bitops.count/popcount.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.count/popcount.pass.cpp @@ -98,7 +98,7 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); diff --git a/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp b/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp index 5fe63e47403c4..6cc1410eb682f 100644 --- a/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp @@ -86,7 +86,7 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); diff --git a/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp b/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp index 1b63540cd8c45..b218bb0397335 100644 --- a/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp @@ -87,7 +87,7 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); diff --git a/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.integer.pass.cpp b/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.integer.pass.cpp index 03f3bdaf2d561..c506d0776a02c 100644 --- a/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.integer.pass.cpp +++ b/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.integer.pass.cpp @@ -137,7 +137,7 @@ int main(int, char**) #endif // int_test(); - signed_test(); + signed_test(); unsigned_test(); return 0; diff --git a/libcxx/test/std/ranges/range.access/ssize.pass.cpp b/libcxx/test/std/ranges/range.access/ssize.pass.cpp index a15dc344512a1..ac2c5b7b6b764 100644 --- a/libcxx/test/std/ranges/range.access/ssize.pass.cpp +++ b/libcxx/test/std/ranges/range.access/ssize.pass.cpp @@ -71,7 +71,7 @@ constexpr bool test() { // This gets converted to ptrdiff_t because it's wider. ShortUnsignedReturnType c; assert(std::ranges::ssize(c) == 42); - ASSERT_SAME_TYPE(decltype(std::ranges::ssize(c)), ptrdiff_t); + ASSERT_SAME_TYPE(decltype(std::ranges::ssize(c)), std::ptrdiff_t); return true; } diff --git a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/constraints.compile.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/constraints.compile.pass.cpp index 122abe6315c11..a942f43904092 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/constraints.compile.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/constraints.compile.pass.cpp @@ -66,7 +66,7 @@ namespace test3 { struct AlmostInputIterator { using value_type = char; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; using iterator_concept = int; constexpr const char& operator*() const; diff --git a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/iter_move.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/iter_move.pass.cpp index 3e5671936191e..f9d2b3e7f8950 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/iter_move.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/iter_move.pass.cpp @@ -23,7 +23,7 @@ namespace adl { template struct MaybeNoexceptIterator { using value_type = int; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; value_type* ptr_ = nullptr; int* iter_move_invocations_ = nullptr; diff --git a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/iter_swap.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/iter_swap.pass.cpp index 7d0e8a78caedf..18fd3a31ce23c 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/iter_swap.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/range.lazy.split.inner/iter_swap.pass.cpp @@ -24,7 +24,7 @@ namespace adl { template struct MaybeNoexceptIterator { using value_type = int; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; value_type* ptr_ = nullptr; int* iter_swap_invocations_ = nullptr; diff --git a/libcxx/test/std/strings/string.view/types.pass.cpp b/libcxx/test/std/strings/string.view/types.pass.cpp index f952f20561230..25dc54d257409 100644 --- a/libcxx/test/std/strings/string.view/types.pass.cpp +++ b/libcxx/test/std/strings/string.view/types.pass.cpp @@ -47,7 +47,7 @@ test() static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); - static_assert((std::is_same::value), ""); + static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); static_assert((std::is_same::value), ""); diff --git a/libcxx/test/std/thread/thread.semaphore/max.pass.cpp b/libcxx/test/std/thread/thread.semaphore/max.pass.cpp index 5a3026bc351e9..ca7ad0c92e60e 100644 --- a/libcxx/test/std/thread/thread.semaphore/max.pass.cpp +++ b/libcxx/test/std/thread/thread.semaphore/max.pass.cpp @@ -21,6 +21,6 @@ int main(int, char**) static_assert(std::counting_semaphore<>::max() >= 1, ""); static_assert(std::counting_semaphore<1>::max() >= 1, ""); static_assert(std::counting_semaphore::max()>::max() >= std::numeric_limits::max(), ""); - static_assert(std::counting_semaphore::max()>::max() == std::numeric_limits::max(), ""); + static_assert(std::counting_semaphore::max()>::max() == std::numeric_limits::max(), ""); return 0; } diff --git a/libcxx/test/std/utilities/format/format.functions/escaped_output.ascii.pass.cpp b/libcxx/test/std/utilities/format/format.functions/escaped_output.ascii.pass.cpp index a3184e4cba4a0..5ceedf9f05c42 100644 --- a/libcxx/test/std/utilities/format/format.functions/escaped_output.ascii.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/escaped_output.ascii.pass.cpp @@ -109,7 +109,7 @@ auto test_format_to_n = std::size_t n = expected.size(); std::basic_string out(n, CharT(' ')); std::format_to_n_result result = std::format_to_n(out.begin(), n, fmt, std::forward(args)...); - assert(result.size == static_cast(expected.size())); + assert(result.size == static_cast(expected.size())); assert(result.out == out.end()); assert(out == expected); } @@ -119,24 +119,24 @@ auto test_format_to_n = std::basic_string out(n, CharT(' ')); std::format_to_n_result result = std::format_to_n(out.begin(), n, std::locale(), fmt, std::forward(args)...); - assert(result.size == static_cast(expected.size())); + assert(result.size == static_cast(expected.size())); assert(result.out == out.end()); assert(out == expected); } #endif // TEST_HAS_NO_LOCALIZATION { - ptrdiff_t n = 0; + std::ptrdiff_t n = 0; std::basic_string out; std::format_to_n_result result = std::format_to_n(out.begin(), n, fmt, std::forward(args)...); - assert(result.size == static_cast(expected.size())); + assert(result.size == static_cast(expected.size())); assert(result.out == out.end()); assert(out.empty()); } { - ptrdiff_t n = expected.size() / 2; + std::ptrdiff_t n = expected.size() / 2; std::basic_string out(n, CharT(' ')); std::format_to_n_result result = std::format_to_n(out.begin(), n, fmt, std::forward(args)...); - assert(result.size == static_cast(expected.size())); + assert(result.size == static_cast(expected.size())); assert(result.out == out.end()); assert(out == expected.substr(0, n)); } diff --git a/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp b/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp index 0cb0257b8ea57..6d002a10c1479 100644 --- a/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp @@ -115,7 +115,7 @@ auto test_format_to_n = std::size_t n = expected.size(); std::basic_string out(n, CharT(' ')); std::format_to_n_result result = std::format_to_n(out.begin(), n, fmt, std::forward(args)...); - assert(result.size == static_cast(expected.size())); + assert(result.size == static_cast(expected.size())); assert(result.out == out.end()); assert(out == expected); } @@ -125,24 +125,24 @@ auto test_format_to_n = std::basic_string out(n, CharT(' ')); std::format_to_n_result result = std::format_to_n(out.begin(), n, std::locale(), fmt, std::forward(args)...); - assert(result.size == static_cast(expected.size())); + assert(result.size == static_cast(expected.size())); assert(result.out == out.end()); assert(out == expected); } #endif // TEST_HAS_NO_LOCALIZATION { - ptrdiff_t n = 0; + std::ptrdiff_t n = 0; std::basic_string out; std::format_to_n_result result = std::format_to_n(out.begin(), n, fmt, std::forward(args)...); - assert(result.size == static_cast(expected.size())); + assert(result.size == static_cast(expected.size())); assert(result.out == out.end()); assert(out.empty()); } { - ptrdiff_t n = expected.size() / 2; + std::ptrdiff_t n = expected.size() / 2; std::basic_string out(n, CharT(' ')); std::format_to_n_result result = std::format_to_n(out.begin(), n, fmt, std::forward(args)...); - assert(result.size == static_cast(expected.size())); + assert(result.size == static_cast(expected.size())); assert(result.out == out.end()); assert(out == expected.substr(0, n)); } diff --git a/libcxx/test/std/utilities/format/format.range/format.range.fmtkind/format_kind.compile.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.fmtkind/format_kind.compile.pass.cpp index d343ad1b1900b..7179a674a37ad 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.fmtkind/format_kind.compile.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.fmtkind/format_kind.compile.pass.cpp @@ -47,7 +47,7 @@ struct recursive_range { struct iterator { using iterator_concept = std::input_iterator_tag; using value_type = recursive_range; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; using reference = recursive_range; reference operator*() const; diff --git a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple.include.ranges.pass.cpp b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple.include.ranges.pass.cpp index 36f7745b7713f..716acbfdcebde 100644 --- a/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple.include.ranges.pass.cpp +++ b/libcxx/test/std/utilities/tuple/tuple.tuple/tuple.helper/tuple.include.ranges.pass.cpp @@ -22,8 +22,8 @@ using Iterator = int*; class SizedSentinel { public: constexpr bool operator==(int*) const; - friend constexpr ptrdiff_t operator-(const SizedSentinel&, int*); - friend constexpr ptrdiff_t operator-(int*, const SizedSentinel&); + friend constexpr std::ptrdiff_t operator-(const SizedSentinel&, int*); + friend constexpr std::ptrdiff_t operator-(int*, const SizedSentinel&); }; static_assert(std::sized_sentinel_for); diff --git a/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/allocate_deallocate_bytes.pass.cpp b/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/allocate_deallocate_bytes.pass.cpp index 0f5e2f0ae29ad..ee7e09ac1d655 100644 --- a/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/allocate_deallocate_bytes.pass.cpp +++ b/libcxx/test/std/utilities/utility/mem.res/mem.poly.allocator.class/mem.poly.allocator.mem/allocate_deallocate_bytes.pass.cpp @@ -38,10 +38,10 @@ void test() { auto ptr = static_cast(allocation); std::fill(ptr, ptr + 13, '0'); assert(last_size == 13); - assert(last_alignment == alignof(max_align_t)); + assert(last_alignment == alignof(std::max_align_t)); allocator.deallocate_bytes(allocation, 13); assert(last_size == 13); - assert(last_alignment == alignof(max_align_t)); + assert(last_alignment == alignof(std::max_align_t)); } { void* allocation = allocator.allocate_bytes(13, 64); diff --git a/libcxx/test/support/test_iterators.h b/libcxx/test/support/test_iterators.h index b03687447c14b..f0e19fc3d5e62 100644 --- a/libcxx/test/support/test_iterators.h +++ b/libcxx/test/support/test_iterators.h @@ -455,7 +455,7 @@ TEST_CONSTEXPR Iter base(Iter i) { return i; } template struct ThrowingIterator { typedef std::bidirectional_iterator_tag iterator_category; - typedef ptrdiff_t difference_type; + typedef std::ptrdiff_t difference_type; typedef const T value_type; typedef const T * pointer; typedef const T & reference; @@ -566,7 +566,7 @@ struct ThrowingIterator { template struct NonThrowingIterator { typedef std::bidirectional_iterator_tag iterator_category; - typedef ptrdiff_t difference_type; + typedef std::ptrdiff_t difference_type; typedef const T value_type; typedef const T * pointer; typedef const T & reference; @@ -916,7 +916,7 @@ class Iterator { public: using value_type = int; using reference = int&; - using difference_type = ptrdiff_t; + using difference_type = std::ptrdiff_t; private: value_type* ptr_ = nullptr; From dd0bbae5efa4d23322eda905b2f9e11dfd3c5d36 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Wed, 22 Mar 2023 09:28:47 -0700 Subject: [PATCH 316/691] [WebAssembly] Fix epilogue insertion for indirect tail calls Previously epilogues were incorrectly inserted after indirect tail calls because they did not have the `isTerminator` property. Add that property and test that they get correct epilogues. To be safe, also add other properties that were defined for direct tail calls. Differential Revision: https://reviews.llvm.org/D146569 --- .../WebAssembly/WebAssemblyInstrCall.td | 2 +- llvm/test/CodeGen/WebAssembly/tailcall.ll | 37 +++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td index 6a123f8f4030f..ca9a5ef9dda1c 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td @@ -73,7 +73,7 @@ defm RET_CALL : "return_call \t$callee", "return_call\t$callee", 0x12>, Requires<[HasTailCall]>; -let isReturn = 1 in +let isReturn = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in defm RET_CALL_INDIRECT : I<(outs), (ins TypeIndex:$type, table32_op:$table, variable_ops), (outs), (ins TypeIndex:$type, table32_op:$table), [], diff --git a/llvm/test/CodeGen/WebAssembly/tailcall.ll b/llvm/test/CodeGen/WebAssembly/tailcall.ll index 34dd0a9a424b6..84bd142462e37 100644 --- a/llvm/test/CodeGen/WebAssembly/tailcall.ll +++ b/llvm/test/CodeGen/WebAssembly/tailcall.ll @@ -507,6 +507,43 @@ define i32 @stack_arg_cast(i32 %x) { ret i32 %v } +; Checks that epilogues are inserted after return calls. +define i32 @direct_epilogue() { +; CHECK-LABEL: direct_epilogue: +; CHECK: .functype direct_epilogue () -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push0=, __stack_pointer +; CHECK-NEXT: i32.const $push1=, 256 +; CHECK-NEXT: i32.sub $push5=, $pop0, $pop1 +; CHECK-NEXT: local.tee $push4=, $0=, $pop5 +; CHECK-NEXT: global.set __stack_pointer, $pop4 +; CHECK-NEXT: i32.const $push2=, 256 +; CHECK-NEXT: i32.add $push3=, $0, $pop2 +; CHECK-NEXT: global.set __stack_pointer, $pop3 +; CHECK-NEXT: return_call direct_epilogue + %a = alloca [64 x i32] + %v = musttail call i32 @direct_epilogue() + ret i32 %v +} + +define i32 @indirect_epilogue(ptr %p) { +; CHECK-LABEL: indirect_epilogue: +; CHECK: .functype indirect_epilogue (i32) -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: global.get $push0=, __stack_pointer +; CHECK-NEXT: i32.const $push1=, 256 +; CHECK-NEXT: i32.sub $push5=, $pop0, $pop1 +; CHECK-NEXT: local.tee $push4=, $1=, $pop5 +; CHECK-NEXT: global.set __stack_pointer, $pop4 +; CHECK-NEXT: i32.const $push2=, 256 +; CHECK-NEXT: i32.add $push3=, $1, $pop2 +; CHECK-NEXT: global.set __stack_pointer, $pop3 +; CHECK-NEXT: return_call_indirect , $0, $0 + %a = alloca [64 x i32] + %v = musttail call i32 %p(ptr %p) + ret i32 %v +} + ; Check that the signatures generated for external indirectly ; return-called functions include the proper return types From 8eb464f5433ae65bec3536ddb1195e5ff5c46af0 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Wed, 22 Mar 2023 14:48:28 +0000 Subject: [PATCH 317/691] [DebugInfo] Allow parsing line tables aligned to 4 or 8-byte boundaries This allows the DWARFDebugLine::SectionParser to try parsing line tables at 4 or 8-byte boundaries if the unaligned offset appears invalid. If aligning the offset does not reduce errors the offset is used unchanged. This is needed for llvm-dwarfdump to be able to extract the line tables (with --debug-lines) from binaries produced by certain compilers that like to align each line table in the .debug_line section. Note that this alignment does not seem to be invalid since the units do point to the correct line table offsets via the DW_AT_stmt_list attribute. Differential Revision: https://reviews.llvm.org/D143513 --- .../llvm/DebugInfo/DWARF/DWARFDebugLine.h | 1 + llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp | 39 +++++ .../llvm-dwarfdump/ARM/aligned_line_tables.s | 152 ++++++++++++++++++ 3 files changed, 192 insertions(+) create mode 100644 llvm/test/tools/llvm-dwarfdump/ARM/aligned_line_tables.s diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h index de9902ae2ebcb..5c01dad848fd2 100644 --- a/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h +++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h @@ -355,6 +355,7 @@ class DWARFDebugLine { private: DWARFUnit *prepareToParse(uint64_t Offset); void moveToNextTable(uint64_t OldOffset, const Prologue &P); + bool hasValidVersion(uint64_t Offset); LineToUnitMap LineToUnit; diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp index 0725bd7744aea..dc46c76c06e86 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp @@ -1505,6 +1505,21 @@ DWARFUnit *DWARFDebugLine::SectionParser::prepareToParse(uint64_t Offset) { return U; } +bool DWARFDebugLine::SectionParser::hasValidVersion(uint64_t Offset) { + DataExtractor::Cursor Cursor(Offset); + auto [TotalLength, _] = DebugLineData.getInitialLength(Cursor); + DWARFDataExtractor HeaderData(DebugLineData, Cursor.tell() + TotalLength); + uint16_t Version = HeaderData.getU16(Cursor); + if (!Cursor) { + // Ignore any error here. + // If this is not the end of the section parseNext() will still be + // attempted, where this error will occur again (and can be handled). + consumeError(Cursor.takeError()); + return false; + } + return versionIsSupported(Version); +} + void DWARFDebugLine::SectionParser::moveToNextTable(uint64_t OldOffset, const Prologue &P) { // If the length field is not valid, we don't know where the next table is, so @@ -1518,5 +1533,29 @@ void DWARFDebugLine::SectionParser::moveToNextTable(uint64_t OldOffset, Offset = OldOffset + P.TotalLength + P.sizeofTotalLength(); if (!DebugLineData.isValidOffset(Offset)) { Done = true; + return; + } + + // Heuristic: If the version is valid, then this is probably a line table. + // Otherwise, the offset might need alignment (to a 4 or 8 byte boundary). + if (hasValidVersion(Offset)) + return; + + // ARM C/C++ Compiler aligns each line table to word boundaries and pads out + // the .debug_line section to a word multiple. Note that in the specification + // this does not seem forbidden since each unit has a DW_AT_stmt_list. + for (unsigned Align : {4, 8}) { + uint64_t AlignedOffset = alignTo(Offset, Align); + if (!DebugLineData.isValidOffset(AlignedOffset)) { + // This is almost certainly not another line table but some alignment + // padding. This assumes the alignments tested are ordered, and are + // smaller than the header size (which is true for 4 and 8). + Done = true; + return; + } + if (hasValidVersion(AlignedOffset)) { + Offset = AlignedOffset; + break; + } } } diff --git a/llvm/test/tools/llvm-dwarfdump/ARM/aligned_line_tables.s b/llvm/test/tools/llvm-dwarfdump/ARM/aligned_line_tables.s new file mode 100644 index 0000000000000..f59ce7aa9f774 --- /dev/null +++ b/llvm/test/tools/llvm-dwarfdump/ARM/aligned_line_tables.s @@ -0,0 +1,152 @@ +// RUN: llvm-mc %s -defsym ALIGN_4=1 -save-temp-labels -filetype obj -triple arm-none-eabi -o %t.o +// RUN: llvm-nm %t.o | FileCheck %s --check-prefix=L4 +// RUN: llvm-dwarfdump -debug-line %t.o 2>&1 | FileCheck %s --implicit-check-not='warning:' --check-prefix=MULT4 + +// RUN: llvm-mc %s -defsym ALIGN_8=1 -save-temp-labels -filetype obj -triple arm-none-eabi -o %t.o +// RUN: llvm-nm %t.o | FileCheck %s --check-prefix=L8 +// RUN: llvm-dwarfdump -debug-line %t.o 2>&1 | FileCheck %s --implicit-check-not='warning:' --check-prefix=MULT8 + +// RUN: llvm-mc %s -defsym UNALIGNED_PADDING=1 -save-temp-labels -filetype obj -triple arm-none-eabi -o %t.o +// RUN: llvm-nm %t.o | FileCheck %s --check-prefix=LUNALIGN +// RUN: llvm-dwarfdump -debug-line %t.o 2>&1 | FileCheck %s --check-prefix=UNALIGN + +/// This test is based on a real example from ARM C/C++ Compiler. +/// It verifies llvm-dwarfdump is able to dump line tables even if they've been +/// placed at aligned offsets. + +// L4: 0000002b N .Ltable0_end +// MULT4: Address Line Column File ISA Discriminator Flags +// MULT4-NEXT: ------------------ ------ ------ ------ --- ------------- ------------- +// MULT4-NEXT: 0x0000000000000000 1 0 1 0 0 is_stmt end_sequence +// MULT4-EMPTY: +// MULT4-NEXT: debug_line[0x0000002c] +// MULT4-NEXT: Line table prologue: +// MULT4-NEXT: total_length: 0x0000003a{{$}} +// MULT4-NEXT: format: DWARF32 +// MULT4-NEXT: version: 2{{$}} +// MULT4-NEXT: prologue_length: 0x0000001a +// MULT4-NEXT: min_inst_length: 2 +// MULT4-NEXT: default_is_stmt: 1 + +// L8: 00000027 N .Ltable0_end +// MULT8: Address Line Column File ISA Discriminator Flags +// MULT8-NEXT: ------------------ ------ ------ ------ --- ------------- ------------- +// MULT8-NEXT: 0x0000000000000000 1 0 1 0 0 is_stmt end_sequence +// MULT8-EMPTY: +// MULT8-NEXT: debug_line[0x00000028] +// MULT8-NEXT: Line table prologue: +// MULT8-NEXT: total_length: 0x0000003a{{$}} +// MULT8-NEXT: format: DWARF32 +// MULT8-NEXT: version: 2{{$}} +// MULT8-NEXT: prologue_length: 0x0000001a +// MULT8-NEXT: min_inst_length: 2 +// MULT8-NEXT: default_is_stmt: 1 + +/// This should fail to dump: +// LUNALIGN: 00000027 N .Ltable0_end +// UNALIGN: warning: parsing line table prologue at offset 0x00000027: unsupported version + +.section .debug_line +/// First line table +/// Unit total length: +.long .Ltable0_end - .Ltable0_start +.Ltable0_start: +.short 2 /// Version +/// Header length: +.long .Ltable0_header_end - .Ltable0_header_start +.Ltable0_header_start: +.byte 4 /// Min instruction length +.byte 1 /// Max operations per instruction +.byte 0 /// Default is statement +.byte 6 /// Line range +.byte 10 /// Opcode base +.byte 0 /// standard_opcode_lengths[DW_LNS_copy] = 0 +.byte 1 /// standard_opcode_lengths[DW_LNS_advance_pc] = 1 +.byte 1 /// standard_opcode_lengths[DW_LNS_advance_line] = 1 +.byte 1 /// standard_opcode_lengths[DW_LNS_set_file] = 1 +.byte 1 /// standard_opcode_lengths[DW_LNS_set_column] = 1 +.byte 0 /// standard_opcode_lengths[DW_LNS_negate_stmt] = 0 +.byte 0 /// standard_opcode_lengths[DW_LNS_set_basic_block] = 0 +.byte 0 /// standard_opcode_lengths[DW_LNS_const_add_pc] = 0 +.byte 0 /// standard_opcode_lengths[DW_LNS_fixed_advance_pc] = 0 +.byte 0 /// No include directories +/// File name: +.ifdef ALIGN_4 +/// Pad out filename so next 4 byte aligned offset is a multiple of 4 and not 8. +.asciz "foobar.cpp" +.else +.asciz "test.c" +.endif +.byte 0 /// Dir idx +.byte 0 /// Mod time +.byte 0 /// Length +.byte 0 /// End files +.Ltable0_header_end: +/// Line table operations +.byte 0 /// Extended opcode +.byte 1 /// Length 1 +.byte 1 /// DW_LNE_end_sequence +.Ltable0_end: +/// End first line table +/// Padding: +.ifdef UNALIGNED_PADDING +.short 0 +.else +.byte 0 +.endif +/// Second line table +/// Unit total length: +.long .Ltable1_end - .Ltable1_start +.Ltable1_start: +.short 2 /// Version +/// Header length: +.long .Ltable1_header_end - .Ltable1_header_start +.Ltable1_header_start: +.byte 2 /// Min instruction length +.byte 1 /// Max operations per instruction +.byte 0 /// Default is statement +.byte 6 /// Line range +.byte 10 /// Opcode base +.byte 0 /// standard_opcode_lengths[DW_LNS_copy] = 0 +.byte 1 /// standard_opcode_lengths[DW_LNS_advance_pc] = 1 +.byte 1 /// standard_opcode_lengths[DW_LNS_advance_line] = 1 +.byte 1 /// standard_opcode_lengths[DW_LNS_set_file] = 1 +.byte 1 /// standard_opcode_lengths[DW_LNS_set_column] = 1 +.byte 0 /// standard_opcode_lengths[DW_LNS_negate_stmt] = 0 +.byte 0 /// standard_opcode_lengths[DW_LNS_set_basic_block] = 0 +.byte 0 /// standard_opcode_lengths[DW_LNS_const_add_pc] = 0 +.byte 0 /// standard_opcode_lengths[DW_LNS_fixed_advance_pc] = 0 +.byte 0 /// No include directories +.asciz "test.c" /// File name +.byte 0 /// Dir idx +.byte 0 /// Mod time +.byte 0 /// Length +.byte 0 /// End files +.Ltable1_header_end: +/// Line table operations +.byte 4 /// DW_LNS_set_file +.byte 1 /// File 1 +.byte 5 /// DW_LNS_set_column +.byte 1 /// Column 1 +.byte 0 /// Extended opcode +.byte 5 /// Length 5 +.byte 2 /// DW_LNE_set_address +.long 32896 /// Address = 0x00008080 +.byte 3 /// DW_LNS_advance_line +.byte 6 /// Line += 6 +.byte 1 /// DW_LNS_copy +.byte 5 /// DW_LNS_set_column +.byte 2 /// Column 2 +.byte 12 /// Special opcode (address += 0, line += 2) +.byte 30 /// Special opcode (address += 6, line += 2) +.byte 5 /// DW_LNS_set_column +.byte 1 /// Column 1 +.byte 17 /// Special opcode (address += 2, line += 1) +.byte 2 /// DW_LNS_advance_pc +.byte 4 /// += (4 * min instruction length) +.byte 0 /// Extended opcode +.byte 1 /// Length 1 +.byte 1 /// DW_LNE_end_sequence +.Ltable1_end: +/// End second line table +.short 0 /// Padding (to make section a word multiple) From 83e420c65f4a6c0b693af82cfd81ae58fd033f97 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 22 Mar 2023 09:31:51 -0700 Subject: [PATCH 318/691] [Constant] Inline ConstantInt::getSigned ConstantInt::getSigned calls ConstantInt::get with the IsSigned flag set to true. That flag normally defaults to false. For always signed constants the code base is not consistent about whether it uses ConstantInt::getSigned or ConstantInt::get with IsSigned set to true. And it's not clear how to decide which way to use. By making getSigned inline, both ways should generate the same code in the end. Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D146598 --- llvm/include/llvm/IR/Constants.h | 8 ++++++-- llvm/lib/IR/Constants.cpp | 8 -------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h index 9cc56ecf8e970..baa4bac8c8e14 100644 --- a/llvm/include/llvm/IR/Constants.h +++ b/llvm/include/llvm/IR/Constants.h @@ -111,8 +111,12 @@ class ConstantInt final : public ConstantData { /// either getSExtValue() or getZExtValue() will yield a correctly sized and /// signed value for the type Ty. /// Get a ConstantInt for a specific signed value. - static ConstantInt *getSigned(IntegerType *Ty, int64_t V); - static Constant *getSigned(Type *Ty, int64_t V); + static ConstantInt *getSigned(IntegerType *Ty, int64_t V) { + return get(Ty, V, true); + } + static Constant *getSigned(Type *Ty, int64_t V) { + return get(Ty, V, true); + } /// Return a ConstantInt with the specified value and an implied Type. The /// type is the integer type that corresponds to the bit width of the value. diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp index ba68e6be05b52..a4b00d92ea89a 100644 --- a/llvm/lib/IR/Constants.cpp +++ b/llvm/lib/IR/Constants.cpp @@ -899,14 +899,6 @@ ConstantInt *ConstantInt::get(IntegerType *Ty, uint64_t V, bool isSigned) { return get(Ty->getContext(), APInt(Ty->getBitWidth(), V, isSigned)); } -ConstantInt *ConstantInt::getSigned(IntegerType *Ty, int64_t V) { - return get(Ty, V, true); -} - -Constant *ConstantInt::getSigned(Type *Ty, int64_t V) { - return get(Ty, V, true); -} - Constant *ConstantInt::get(Type *Ty, const APInt& V) { ConstantInt *C = get(Ty->getContext(), V); assert(C->getType() == Ty->getScalarType() && From 9e3ca7987a4dc33cdf847b79a6304b117651d21f Mon Sep 17 00:00:00 2001 From: Luke Hutton Date: Wed, 22 Mar 2023 00:54:15 +0000 Subject: [PATCH 319/691] [mlir][tosa] Canonicalize concatenate->slice sequence Adds a canonicalizer for the concatenate->slice sequence where an output of slice can be replaced with an input of concatenate. This is useful in the context of operations with complex inputs and outputs that are legalized from a framework such as TFL. For example, a TFL graph (FFT->FFT) will be legalized to the following TOSA graph: / \ slice slice \ / FFT / \ -+ concatenate | / \ | Redundant slice slice | \ / -+ FFT / \ concatenate | Concatenate and slice operations at the boundaries of the graph are useful as they maintain the correct correspondance of input/output tensors to the original TFL graph. However, consecutive complex operations will result in redundant concatenate->slice sequences which should be removed from the final TOSA graph. The canonicalization does not currently handle dynamic types. Signed-off-by: Luke Hutton Reviewed By: rsuderman Differential Revision: https://reviews.llvm.org/D144545 --- mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td | 1 + .../Dialect/Tosa/IR/TosaCanonicalizations.cpp | 59 +++++++++++++++++++ mlir/test/Dialect/Tosa/canonicalize.mlir | 53 +++++++++++++++++ 3 files changed, 113 insertions(+) diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td index 7c8018ad64606..b6127f1ffa3cf 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td @@ -1556,6 +1556,7 @@ def Tosa_SliceOp: Tosa_Op<"slice", [ Tosa_Tensor1Dto6D:$output ); + let hasCanonicalizer = 1; let hasFolder = 1; } diff --git a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp index 1a8a5782e11f6..16f23e4798c02 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaCanonicalizations.cpp @@ -519,6 +519,65 @@ void ClampOp::getCanonicalizationPatterns(RewritePatternSet &results, results.add(context); } +struct ConcatSliceOptimization : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(tosa::SliceOp sliceOp, + PatternRewriter &rewriter) const override { + Value sliceInput = sliceOp.getInput(); + auto concatOp = sliceInput.getDefiningOp(); + if (!concatOp) + return rewriter.notifyMatchFailure( + sliceOp, "slice input must be concat operation"); + + OperandRange inputs = concatOp.getInput1(); + auto concatType = dyn_cast(concatOp.getType()); + if (!concatType || !concatType.hasStaticShape()) + return rewriter.notifyMatchFailure( + sliceOp, "slice input must be a static ranked tensor"); + int32_t axis = concatOp.getAxis(); + + llvm::SmallVector sliceStart(sliceOp.getStart()); + llvm::ArrayRef sliceSize = sliceOp.getSize(); + + // Validate slice on the concatenated axis. Slicing along this + // axis should span only one of the inputs to the concatenate + // operation. + std::optional replaceWithSlice; + for (auto input : inputs) { + auto inputType = dyn_cast(input.getType()); + if (!inputType || !inputType.hasStaticShape()) + return rewriter.notifyMatchFailure( + sliceOp, "concat input must be a static ranked tensor"); + + if (sliceStart[axis] >= 0 && + (sliceStart[axis] + sliceSize[axis]) <= inputType.getDimSize(axis)) { + replaceWithSlice = + rewriter + .create( + sliceOp.getLoc(), sliceOp.getType(), input, + rewriter.getDenseI64ArrayAttr(sliceOp.getStart()), + rewriter.getDenseI64ArrayAttr(sliceSize)) + .getResult(); + break; + } + sliceStart[axis] -= inputType.getDimSize(axis); + } + + if (!replaceWithSlice) + return rewriter.notifyMatchFailure( + sliceOp, "corresponding concat input not found for slice"); + + rewriter.replaceOp(sliceOp, replaceWithSlice.value()); + return success(); + } +}; + +void SliceOp::getCanonicalizationPatterns(RewritePatternSet &results, + MLIRContext *context) { + results.add(context); +} + //===----------------------------------------------------------------------===// // Operator Folders. //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/Tosa/canonicalize.mlir b/mlir/test/Dialect/Tosa/canonicalize.mlir index e16a614c7cd01..77627d8c8ba62 100644 --- a/mlir/test/Dialect/Tosa/canonicalize.mlir +++ b/mlir/test/Dialect/Tosa/canonicalize.mlir @@ -434,3 +434,56 @@ func.func @fold_resize_bilinear(%arg0 : tensor<1x15x13x1xi8>) -> tensor<1x15x13x %resize = "tosa.resize"(%arg0) {mode = "BILINEAR", scale = array, offset = array, border = array} : (tensor<1x15x13x1xi8>) -> tensor<1x15x13x1xi8> return %resize : tensor<1x15x13x1xi8> } + +// ----- + +// CHECK-LABEL: @canonicalize_concat_slice_final_axis +// CHECK-SAME: %[[VAL_0:.*]]: tensor<1x12x12x1xf32>, %[[VAL_1:.*]]: tensor<1x12x12x1xf32> +// CHECK: return %[[VAL_0]], %[[VAL_1]] : tensor<1x12x12x1xf32>, tensor<1x12x12x1xf32> +func.func @canonicalize_concat_slice_final_axis(%arg0 : tensor<1x12x12x1xf32>, %arg1 : tensor<1x12x12x1xf32>) -> (tensor<1x12x12x1xf32>, tensor<1x12x12x1xf32>) { + %0 = "tosa.concat"(%arg0, %arg1) {axis = 3 : i64} : (tensor<1x12x12x1xf32>, tensor<1x12x12x1xf32>) -> tensor<1x12x12x2xf32> + %1 = "tosa.slice"(%0) {size = array, start = array} : (tensor<1x12x12x2xf32>) -> tensor<1x12x12x1xf32> + %2 = "tosa.slice"(%0) {size = array, start = array} : (tensor<1x12x12x2xf32>) -> tensor<1x12x12x1xf32> + return %1, %2 : tensor<1x12x12x1xf32>, tensor<1x12x12x1xf32> +} + +// ----- + +// CHECK-LABEL: @canonicalize_concat_slice_middle_axis +// CHECK-SAME: %[[VAL_0:.*]]: tensor<1x12x12xf32>, %[[VAL_1:.*]]: tensor<1x12x12xf32> +// CHECK: return %[[VAL_0]], %[[VAL_1]] : tensor<1x12x12xf32>, tensor<1x12x12xf32> +func.func @canonicalize_concat_slice_middle_axis(%arg0 : tensor<1x12x12xf32>, %arg1 : tensor<1x12x12xf32>) -> (tensor<1x12x12xf32>, tensor<1x12x12xf32>) { + %0 = "tosa.concat"(%arg0, %arg1) {axis = 1 : i64} : (tensor<1x12x12xf32>, tensor<1x12x12xf32>) -> tensor<1x24x12xf32> + %1 = "tosa.slice"(%0) {size = array, start = array} : (tensor<1x24x12xf32>) -> tensor<1x12x12xf32> + %2 = "tosa.slice"(%0) {size = array, start = array} : (tensor<1x24x12xf32>) -> tensor<1x12x12xf32> + return %1, %2 : tensor<1x12x12xf32>, tensor<1x12x12xf32> +} + +// ----- + +// CHECK-LABEL: @canonicalize_cross_concat_inputs +// CHECK-SAME: %[[VAL_0:.*]]: tensor<1x12x12xf32>, %[[VAL_1:.*]]: tensor<1x12x12xf32> +// CHECK: %[[VAL_2:.*]] = "tosa.concat"(%[[VAL_0]], %[[VAL_1]]) {axis = 2 : i64} : (tensor<1x12x12xf32>, tensor<1x12x12xf32>) -> tensor<1x12x24xf32> +// CHECK: %[[VAL_3:.*]] = "tosa.slice"(%[[VAL_2]]) {size = array, start = array} : (tensor<1x12x24xf32>) -> tensor<1x12x15xf32> +// CHECK: %[[VAL_4:.*]] = "tosa.slice"(%[[VAL_2]]) {size = array, start = array} : (tensor<1x12x24xf32>) -> tensor<1x12x20xf32> +// CHECK: return %[[VAL_3]], %[[VAL_4]] : tensor<1x12x15xf32>, tensor<1x12x20xf32> +func.func @canonicalize_cross_concat_inputs(%arg0 : tensor<1x12x12xf32>, %arg1 : tensor<1x12x12xf32>) -> (tensor<1x12x15xf32>, tensor<1x12x20xf32>) { + %0 = "tosa.concat"(%arg0, %arg1) {axis = 2 : i64} : (tensor<1x12x12xf32>, tensor<1x12x12xf32>) -> tensor<1x12x24xf32> + %1 = "tosa.slice"(%0) {size = array, start = array} : (tensor<1x12x24xf32>) -> tensor<1x12x15xf32> + %2 = "tosa.slice"(%0) {size = array, start = array} : (tensor<1x12x24xf32>) -> tensor<1x12x20xf32> + return %1, %2 : tensor<1x12x15xf32>, tensor<1x12x20xf32> +} + +// ----- + +// CHECK-LABEL: @canonicalize_concat_slice_on_non_concat_axis +// CHECK-SAME: %[[VAL_0:.*]]: tensor<1x12x12xf32>, %[[VAL_1:.*]]: tensor<1x12x12xf32> +// CHECK: %[[VAL_2:.*]] = "tosa.slice"(%[[VAL_0]]) {size = array, start = array} : (tensor<1x12x12xf32>) -> tensor<1x6x12xf32> +// CHECK: %[[VAL_3:.*]] = "tosa.slice"(%[[VAL_1]]) {size = array, start = array} : (tensor<1x12x12xf32>) -> tensor<1x3x12xf32> +// CHECK: return %[[VAL_2]], %[[VAL_3]] : tensor<1x6x12xf32>, tensor<1x3x12xf32> +func.func @canonicalize_concat_slice_on_non_concat_axis(%arg0 : tensor<1x12x12xf32>, %arg1 : tensor<1x12x12xf32>) -> (tensor<1x6x12xf32>, tensor<1x3x12xf32>) { + %0 = "tosa.concat"(%arg0, %arg1) {axis = 2 : i64} : (tensor<1x12x12xf32>, tensor<1x12x12xf32>) -> tensor<1x12x24xf32> + %1 = "tosa.slice"(%0) {size = array, start = array} : (tensor<1x12x24xf32>) -> tensor<1x6x12xf32> + %2 = "tosa.slice"(%0) {size = array, start = array} : (tensor<1x12x24xf32>) -> tensor<1x3x12xf32> + return %1, %2 : tensor<1x6x12xf32>, tensor<1x3x12xf32> +} From 164b046ebfa8d7ad36ce567e2214c97e4e7b1657 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 22 Mar 2023 10:01:17 -0700 Subject: [PATCH 320/691] [RISCV] Convert segment registers to VR registers in RISCVMCInstLower. Similar to what we do for the LMUL>1 register classes. The printing is only working today because the segment registers have "ABI" names set to their base register name. --- llvm/lib/Target/RISCV/RISCVMCInstLower.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp index 281918259cdb3..6b658539a319b 100644 --- a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp +++ b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp @@ -193,6 +193,19 @@ static bool lowerRISCVVMachineInstrToMCInst(const MachineInstr *MI, } else if (RISCV::FPR64RegClass.contains(Reg)) { Reg = TRI->getSubReg(Reg, RISCV::sub_32); assert(Reg && "Superregister does not exist"); + } else if (RISCV::VRN2M1RegClass.contains(Reg) || + RISCV::VRN2M2RegClass.contains(Reg) || + RISCV::VRN2M4RegClass.contains(Reg) || + RISCV::VRN3M1RegClass.contains(Reg) || + RISCV::VRN3M2RegClass.contains(Reg) || + RISCV::VRN4M1RegClass.contains(Reg) || + RISCV::VRN4M2RegClass.contains(Reg) || + RISCV::VRN5M1RegClass.contains(Reg) || + RISCV::VRN6M1RegClass.contains(Reg) || + RISCV::VRN7M1RegClass.contains(Reg) || + RISCV::VRN8M1RegClass.contains(Reg)) { + Reg = TRI->getSubReg(Reg, RISCV::sub_vrm1_0); + assert(Reg && "Subregister does not exist"); } MCOp = MCOperand::createReg(Reg); From a67e989cd2a730ea778102f2a0d965daed0182bd Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 22 Mar 2023 10:07:18 -0700 Subject: [PATCH 321/691] [RISCV] Add FallbackRegAltNameIndex to ABIRegAltName. Remove now redundant fake ABI names from vector registers. This also fixes a crash that occurs if you use fflags as an instruction operand in the assembly and use -debug. It's not a valid register for any instruction since this wouldn't be common. It doesn't have an ABI name so it crashes the register printing in the debug output. --- llvm/lib/Target/RISCV/RISCVRegisterInfo.td | 70 ++++++++++------------ 1 file changed, 33 insertions(+), 37 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index 301f2ad77d00c..7e91441e91f47 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -45,6 +45,7 @@ class RISCVReg64 let SubRegIndices = [sub_32]; } +let FallbackRegAltNameIndex = NoRegAltName in def ABIRegAltName : RegAltNameIndex; def sub_vrm4_0 : SubRegIndex<256>; @@ -415,51 +416,46 @@ class VRegList LIn, int start, int nf, int lmul, bit isV0> { } // Vector registers -let RegAltNameIndices = [ABIRegAltName] in { - foreach Index = 0-31 in { - def V#Index : RISCVReg, DwarfRegNum<[!add(Index, 96)]>; - } +foreach Index = 0-31 in { + def V#Index : RISCVReg, DwarfRegNum<[!add(Index, 96)]>; +} - foreach Index = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, - 24, 26, 28, 30] in { - def V#Index#M2 : RISCVRegWithSubRegs("V"#Index), - !cast("V"#!add(Index, 1))], - ["v"#Index]>, - DwarfRegAlias("V"#Index)> { - let SubRegIndices = [sub_vrm1_0, sub_vrm1_1]; - } +foreach Index = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, + 24, 26, 28, 30] in { + def V#Index#M2 : RISCVRegWithSubRegs("V"#Index), + !cast("V"#!add(Index, 1))]>, + DwarfRegAlias("V"#Index)> { + let SubRegIndices = [sub_vrm1_0, sub_vrm1_1]; } +} - foreach Index = [0, 4, 8, 12, 16, 20, 24, 28] in { - def V#Index#M4 : RISCVRegWithSubRegs("V"#Index#"M2"), - !cast("V"#!add(Index, 2)#"M2")], - ["v"#Index]>, - DwarfRegAlias("V"#Index)> { - let SubRegIndices = [sub_vrm2_0, sub_vrm2_1]; - } +foreach Index = [0, 4, 8, 12, 16, 20, 24, 28] in { + def V#Index#M4 : RISCVRegWithSubRegs("V"#Index#"M2"), + !cast("V"#!add(Index, 2)#"M2")]>, + DwarfRegAlias("V"#Index)> { + let SubRegIndices = [sub_vrm2_0, sub_vrm2_1]; } +} - foreach Index = [0, 8, 16, 24] in { - def V#Index#M8 : RISCVRegWithSubRegs("V"#Index#"M4"), - !cast("V"#!add(Index, 4)#"M4")], - ["v"#Index]>, - DwarfRegAlias("V"#Index)> { - let SubRegIndices = [sub_vrm4_0, sub_vrm4_1]; - } +foreach Index = [0, 8, 16, 24] in { + def V#Index#M8 : RISCVRegWithSubRegs("V"#Index#"M4"), + !cast("V"#!add(Index, 4)#"M4")]>, + DwarfRegAlias("V"#Index)> { + let SubRegIndices = [sub_vrm4_0, sub_vrm4_1]; } - - def VTYPE : RISCVReg<0, "vtype", ["vtype"]>; - def VL : RISCVReg<0, "vl", ["vl"]>; - def VXSAT : RISCVReg<0, "vxsat", ["vxsat"]>; - def VXRM : RISCVReg<0, "vxrm", ["vxrm"]>; - let isConstant = true in - def VLENB : RISCVReg<0, "vlenb", ["vlenb"]>, - DwarfRegNum<[!add(4096, SysRegVLENB.Encoding)]>; } +def VTYPE : RISCVReg<0, "vtype">; +def VL : RISCVReg<0, "vl">; +def VXSAT : RISCVReg<0, "vxsat">; +def VXRM : RISCVReg<0, "vxrm">; +let isConstant = true in +def VLENB : RISCVReg<0, "vlenb">, + DwarfRegNum<[!add(4096, SysRegVLENB.Encoding)]>; + def VCSR : RegisterClass<"RISCV", [XLenVT], 32, (add VTYPE, VL, VLENB)> { let RegInfos = XLenRI; From 700cd99061edeeba7b657e32acca940225fa25ae Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Wed, 22 Mar 2023 10:01:45 -0700 Subject: [PATCH 322/691] Restore "[MemProf] Context disambiguation cloning pass [patch 1a/3]" This restores commit d6ad4f01c3dafcab335bca66dac6e36d9eac8421, which was reverted in commit 883dbb9c86be87593a58ef10b070b3a0564c7fee, along with a fix for gcc 12.2 build errors in the original commit. Support for building, printing, and displaying CallsiteContextGraph which represents the MemProf metadata contexts. Uses CRTP to enable support for both IR (regular LTO) and summary (ThinLTO). This patch includes the support for building it in regular LTO mode (from memprof and callsite metadata), and the next patch will add the handling for building it from ThinLTO summaries. Also includes support for dumping the graph to text and to dot files. Follow-on patches will contain the support for cloning on the graph and in the IR. The graph represents the call contexts in all memprof metadata on allocation calls, with nodes for the allocations themselves, as well as for the calls in each context. The graph is initially built from the allocation memprof metadata (or summary) MIBs. It is then updated to match calls with callsite metadata onto the nodes, updating it to reflect any inlining performed on those calls. Each MIB (representing an allocation's call context with allocation behavior) is assigned a unique context id during the graph build. The edges and nodes in the graph are decorated with the context ids they carry. This is used to correctly update the graph when cloning is performed so that we can uniquify the context for a single (possibly cloned) allocation. Differential Revision: https://reviews.llvm.org/D140908 --- .../IPO/MemProfContextDisambiguation.h | 38 + llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassBuilderPipelines.cpp | 11 + llvm/lib/Passes/PassRegistry.def | 1 + llvm/lib/Transforms/IPO/CMakeLists.txt | 1 + .../IPO/MemProfContextDisambiguation.cpp | 1583 +++++++++++++++++ llvm/test/ThinLTO/X86/memprof-summary.ll | 184 -- .../MemProfContextDisambiguation/basic.ll | 158 ++ .../duplicate-context-ids.ll | 232 +++ .../duplicate-context-ids2.ll | 386 ++++ .../indirectcall.ll | 261 +++ .../MemProfContextDisambiguation/inlined.ll | 189 ++ .../MemProfContextDisambiguation/inlined2.ll | 135 ++ .../pass-pipeline.ll | 41 + 14 files changed, 3037 insertions(+), 184 deletions(-) create mode 100644 llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h create mode 100644 llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp delete mode 100644 llvm/test/ThinLTO/X86/memprof-summary.ll create mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/basic.ll create mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll create mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll create mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll create mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll create mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll create mode 100644 llvm/test/Transforms/MemProfContextDisambiguation/pass-pipeline.ll diff --git a/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h b/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h new file mode 100644 index 0000000000000..56e56ed67f7df --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h @@ -0,0 +1,38 @@ +//==- MemProfContextDisambiguation.h - Context Disambiguation ----*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Implements support for context disambiguation of allocation calls for profile +// guided heap optimization using memprof metadata. See implementation file for +// details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H +#define LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { +class Module; + +class MemProfContextDisambiguation + : public PassInfoMixin { + /// Run the context disambiguator on \p M, returns true if any changes made. + bool processModule(Module &M); + +public: + MemProfContextDisambiguation() {} + + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 89d2e6a4b2d1a..a04f8bbaa5dc0 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -117,6 +117,7 @@ #include "llvm/Transforms/IPO/Internalize.h" #include "llvm/Transforms/IPO/LoopExtractor.h" #include "llvm/Transforms/IPO/LowerTypeTests.h" +#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" #include "llvm/Transforms/IPO/MergeFunctions.h" #include "llvm/Transforms/IPO/ModuleInliner.h" #include "llvm/Transforms/IPO/OpenMPOpt.h" diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 1d386139d9e6c..aaabe23049288 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -57,6 +57,7 @@ #include "llvm/Transforms/IPO/InferFunctionAttrs.h" #include "llvm/Transforms/IPO/Inliner.h" #include "llvm/Transforms/IPO/LowerTypeTests.h" +#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" #include "llvm/Transforms/IPO/MergeFunctions.h" #include "llvm/Transforms/IPO/ModuleInliner.h" #include "llvm/Transforms/IPO/OpenMPOpt.h" @@ -271,6 +272,10 @@ static cl::opt AttributorRun( clEnumValN(AttributorRunOption::NONE, "none", "disable attributor runs"))); +cl::opt EnableMemProfContextDisambiguation( + "enable-memprof-context-disambiguation", cl::init(false), cl::Hidden, + cl::ZeroOrMore, cl::desc("Enable MemProf context disambiguation")); + PipelineTuningOptions::PipelineTuningOptions() { LoopInterleaving = true; LoopVectorization = true; @@ -1709,6 +1714,12 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, InlineContext{ThinOrFullLTOPhase::FullLTOPostLink, InlinePass::CGSCCInliner})); + // Perform context disambiguation after inlining, since that would reduce the + // amount of additional cloning required to distinguish the allocation + // contexts. + if (EnableMemProfContextDisambiguation) + MPM.addPass(MemProfContextDisambiguation()); + // Optimize globals again after we ran the inliner. MPM.addPass(GlobalOptPass()); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 04d648580a8c5..82592a1ee9b55 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -87,6 +87,7 @@ MODULE_PASS("name-anon-globals", NameAnonGlobalPass()) MODULE_PASS("no-op-module", NoOpModulePass()) MODULE_PASS("objc-arc-apelim", ObjCARCAPElimPass()) MODULE_PASS("partial-inliner", PartialInlinerPass()) +MODULE_PASS("memprof-context-disambiguation", MemProfContextDisambiguation()) MODULE_PASS("pgo-icall-prom", PGOIndirectCallPromotion()) MODULE_PASS("pgo-instr-gen", PGOInstrumentationGen()) MODULE_PASS("pgo-instr-use", PGOInstrumentationUse()) diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt index 063a9a60d0cb5..e03aff0f65d7a 100644 --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -27,6 +27,7 @@ add_llvm_component_library(LLVMipo Internalize.cpp LoopExtractor.cpp LowerTypeTests.cpp + MemProfContextDisambiguation.cpp MergeFunctions.cpp ModuleInliner.cpp OpenMPOpt.cpp diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp new file mode 100644 index 0000000000000..5a6625743eecf --- /dev/null +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -0,0 +1,1583 @@ +//==-- MemProfContextDisambiguation.cpp - Disambiguate contexts -------------=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements support for context disambiguation of allocation +// calls for profile guided heap optimization. Specifically, it uses Memprof +// profiles which indicate context specific allocation behavior (currently +// distinguishing cold vs hot memory allocations). Cloning is performed to +// expose the cold allocation call contexts, and the allocation calls are +// subsequently annotated with an attribute for later transformation. +// +// The transformations can be performed either directly on IR (regular LTO), or +// (eventually) on a ThinLTO index (later applied to the IR during the ThinLTO +// backend). Both types of LTO operate on a the same base graph representation, +// which uses CRTP to support either IR or Index formats. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/MemoryProfileInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/IPO.h" +#include +#include +using namespace llvm; +using namespace llvm::memprof; + +#define DEBUG_TYPE "memprof-context-disambiguation" + +static cl::opt DotFilePathPrefix( + "memprof-dot-file-path-prefix", cl::init(""), cl::Hidden, + cl::value_desc("filename"), + cl::desc("Specify the path prefix of the MemProf dot files.")); + +static cl::opt ExportToDot("memprof-export-to-dot", cl::init(false), + cl::Hidden, + cl::desc("Export graph to dot files.")); + +static cl::opt + DumpCCG("memprof-dump-ccg", cl::init(false), cl::Hidden, + cl::desc("Dump CallingContextGraph to stdout after each stage.")); + +static cl::opt + VerifyCCG("memprof-verify-ccg", cl::init(false), cl::Hidden, + cl::desc("Perform verification checks on CallingContextGraph.")); + +static cl::opt + VerifyNodes("memprof-verify-nodes", cl::init(false), cl::Hidden, + cl::desc("Perform frequent verification checks on nodes.")); + +inline bool hasSingleAllocType(uint8_t AllocTypes) { + switch (AllocTypes) { + case (uint8_t)AllocationType::Cold: + case (uint8_t)AllocationType::NotCold: + return true; + break; + case (uint8_t)AllocationType::None: + assert(false); + break; + default: + return false; + break; + } + llvm_unreachable("invalid alloc type"); +} + +/// CRTP base for graphs built from either IR or ThinLTO summary index. +/// +/// The graph represents the call contexts in all memprof metadata on allocation +/// calls, with nodes for the allocations themselves, as well as for the calls +/// in each context. The graph is initially built from the allocation memprof +/// metadata (or summary) MIBs. It is then updated to match calls with callsite +/// metadata onto the nodes, updating it to reflect any inlining performed on +/// those calls. +/// +/// Each MIB (representing an allocation's call context with allocation +/// behavior) is assigned a unique context id during the graph build. The edges +/// and nodes in the graph are decorated with the context ids they carry. This +/// is used to correctly update the graph when cloning is performed so that we +/// can uniquify the context for a single (possibly cloned) allocation. +template +class CallsiteContextGraph { +public: + CallsiteContextGraph() = default; + CallsiteContextGraph(const CallsiteContextGraph &) = default; + CallsiteContextGraph(CallsiteContextGraph &&) = default; + + /// Main entry point to perform analysis and transformations on graph. + bool process(); + + void dump() const; + void print(raw_ostream &OS) const; + + friend raw_ostream &operator<<(raw_ostream &OS, + const CallsiteContextGraph &CCG) { + CCG.print(OS); + return OS; + } + + friend struct GraphTraits< + const CallsiteContextGraph *>; + friend struct DOTGraphTraits< + const CallsiteContextGraph *>; + + void exportToDot(std::string Label) const; + + /// Represents a function clone via FuncTy pointer and clone number pair. + struct FuncInfo final + : public std::pair { + using Base = std::pair; + FuncInfo(const Base &B) : Base(B) {} + FuncInfo(FuncTy *F = nullptr, unsigned CloneNo = 0) : Base(F, CloneNo) {} + explicit operator bool() const { return this->first != nullptr; } + FuncTy *func() const { return this->first; } + unsigned cloneNo() const { return this->second; } + }; + + /// Represents a callsite clone via CallTy and clone number pair. + struct CallInfo final : public std::pair { + using Base = std::pair; + CallInfo(const Base &B) : Base(B) {} + CallInfo(CallTy Call = nullptr, unsigned CloneNo = 0) + : Base(Call, CloneNo) {} + explicit operator bool() const { return (bool)this->first; } + CallTy call() const { return this->first; } + unsigned cloneNo() const { return this->second; } + void setCloneNo(unsigned N) { this->second = N; } + void print(raw_ostream &OS) const { + if (!operator bool()) { + assert(!cloneNo()); + OS << "null Call"; + return; + } + call()->print(OS); + OS << "\t(clone " << cloneNo() << ")"; + } + void dump() const { + print(dbgs()); + dbgs() << "\n"; + } + friend raw_ostream &operator<<(raw_ostream &OS, const CallInfo &Call) { + Call.print(OS); + return OS; + } + }; + + struct ContextEdge; + + /// Node in the Callsite Context Graph + struct ContextNode { + // Keep this for now since in the IR case where we have an Instruction* it + // is not as immediately discoverable. Used for printing richer information + // when dumping graph. + bool IsAllocation; + + // Keeps track of when the Call was reset to null because there was + // recursion. + bool Recursive = false; + + // The corresponding allocation or interior call. + CallInfo Call; + + // For alloc nodes this is a unique id assigned when constructed, and for + // callsite stack nodes it is the original stack id when the node is + // constructed from the memprof MIB metadata on the alloc nodes. Note that + // this is only used when matching callsite metadata onto the stack nodes + // created when processing the allocation memprof MIBs, and for labeling + // nodes in the dot graph. Therefore we don't bother to assign a value for + // clones. + uint64_t OrigStackOrAllocId = 0; + + // This will be formed by ORing together the AllocationType enum values + // for contexts including this node. + uint8_t AllocTypes = 0; + + // Edges to all callees in the profiled call stacks. + // TODO: Should this be a map (from Callee node) for more efficient lookup? + std::vector> CalleeEdges; + + // Edges to all callers in the profiled call stacks. + // TODO: Should this be a map (from Caller node) for more efficient lookup? + std::vector> CallerEdges; + + // The set of IDs for contexts including this node. + DenseSet ContextIds; + + // List of clones of this ContextNode, initially empty. + std::vector Clones; + + // If a clone, points to the original uncloned node. + ContextNode *CloneOf = nullptr; + + ContextNode(bool IsAllocation) : IsAllocation(IsAllocation), Call() {} + + ContextNode(bool IsAllocation, CallInfo C) + : IsAllocation(IsAllocation), Call(C) {} + + std::unique_ptr clone() { + auto Clone = std::make_unique(IsAllocation, Call); + if (CloneOf) { + CloneOf->Clones.push_back(Clone.get()); + Clone->CloneOf = CloneOf; + } else { + Clones.push_back(Clone.get()); + Clone->CloneOf = this; + } + return Clone; + } + + ContextNode *getOrigNode() { + if (!CloneOf) + return this; + return CloneOf; + } + + void addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType, + unsigned int ContextId); + + ContextEdge *findEdgeFromCallee(const ContextNode *Callee); + ContextEdge *findEdgeFromCaller(const ContextNode *Caller); + void eraseCalleeEdge(const ContextEdge *Edge); + void eraseCallerEdge(const ContextEdge *Edge); + + void setCall(CallInfo C) { Call = C; } + + bool hasCall() const { return (bool)Call.call(); } + + void printCall(raw_ostream &OS) const { Call.print(OS); } + + // True if this node was effectively removed from the graph, in which case + // its context id set, caller edges, and callee edges should all be empty. + bool isRemoved() const { + assert(ContextIds.empty() == + (CalleeEdges.empty() && CallerEdges.empty())); + return ContextIds.empty(); + } + + void dump() const; + void print(raw_ostream &OS) const; + + friend raw_ostream &operator<<(raw_ostream &OS, const ContextNode &Node) { + Node.print(OS); + return OS; + } + }; + + /// Edge in the Callsite Context Graph from a ContextNode N to a caller or + /// callee. + struct ContextEdge { + ContextNode *Callee; + ContextNode *Caller; + + // This will be formed by ORing together the AllocationType enum values + // for contexts including this edge. + uint8_t AllocTypes = 0; + + // The set of IDs for contexts including this edge. + DenseSet ContextIds; + + ContextEdge(ContextNode *Callee, ContextNode *Caller, uint8_t AllocType, + DenseSet ContextIds) + : Callee(Callee), Caller(Caller), AllocTypes(AllocType), + ContextIds(ContextIds) {} + + DenseSet &getContextIds() { return ContextIds; } + + void dump() const; + void print(raw_ostream &OS) const; + + friend raw_ostream &operator<<(raw_ostream &OS, const ContextEdge &Edge) { + Edge.print(OS); + return OS; + } + }; + +protected: + /// Get a list of nodes corresponding to the stack ids in the given callsite + /// context. + template + std::vector + getStackIdsWithContextNodes(CallStack &CallsiteContext); + + /// Adds nodes for the given allocation and any stack ids on its memprof MIB + /// metadata (or summary). + ContextNode *addAllocNode(CallInfo Call, const FuncTy *F); + + /// Adds nodes for the given MIB stack ids. + template + void addStackNodesForMIB(ContextNode *AllocNode, + CallStack &StackContext, + CallStack &CallsiteContext, + AllocationType AllocType); + + /// Matches all callsite metadata (or summary) to the nodes created for + /// allocation memprof MIB metadata, synthesizing new nodes to reflect any + /// inlining performed on those callsite instructions. + void updateStackNodes(); + + /// Update graph to conservatively handle any callsite stack nodes that target + /// multiple different callee target functions. + void handleCallsitesWithMultipleTargets(); + + /// Save lists of calls with MemProf metadata in each function, for faster + /// iteration. + std::vector>> + FuncToCallsWithMetadata; + + /// Map from callsite node to the enclosing caller function. + std::map NodeToCallingFunc; + +private: + using EdgeIter = typename std::vector>::iterator; + + using CallContextInfo = std::tuple, + const FuncTy *, DenseSet>; + + /// Assigns the given Node to calls at or inlined into the location with + /// the Node's stack id, after post order traversing and processing its + /// caller nodes. Uses the call information recorded in the given + /// StackIdToMatchingCalls map, and creates new nodes for inlined sequences + /// as needed. Called by updateStackNodes which sets up the given + /// StackIdToMatchingCalls map. + void assignStackNodesPostOrder( + ContextNode *Node, DenseSet &Visited, + DenseMap> &StackIdToMatchingCalls); + + /// Duplicates the given set of context ids, updating the provided + /// map from each original id with the newly generated context ids, + /// and returning the new duplicated id set. + DenseSet duplicateContextIds( + const DenseSet &StackSequenceContextIds, + DenseMap> &OldToNewContextIds); + + /// Propagates all duplicated context ids across the graph. + void propagateDuplicateContextIds( + const DenseMap> &OldToNewContextIds); + + /// Connect the NewNode to OrigNode's callees if TowardsCallee is true, + /// else to its callers. Also updates OrigNode's edges to remove any context + /// ids moved to the newly created edge. + void connectNewNode(ContextNode *NewNode, ContextNode *OrigNode, + bool TowardsCallee); + + /// Get the stack id corresponding to the given Id or Index (for IR this will + /// return itself, for a summary index this will return the id recorded in the + /// index for that stack id index value). + uint64_t getStackId(uint64_t IdOrIndex) const { + return static_cast(this)->getStackId(IdOrIndex); + } + + /// Returns true if the given call targets the given function. + bool calleeMatchesFunc(CallTy Call, const FuncTy *Func) { + return static_cast(this)->calleeMatchesFunc(Call, Func); + } + + /// Get a list of nodes corresponding to the stack ids in the given + /// callsite's context. + std::vector getStackIdsWithContextNodesForCall(CallTy Call) { + return static_cast(this)->getStackIdsWithContextNodesForCall( + Call); + } + + /// Get the last stack id in the context for callsite. + uint64_t getLastStackId(CallTy Call) { + return static_cast(this)->getLastStackId(Call); + } + + /// Gets a label to use in the dot graph for the given call clone in the given + /// function. + std::string getLabel(const FuncTy *Func, const CallTy Call, + unsigned CloneNo) const { + return static_cast(this)->getLabel(Func, Call, CloneNo); + } + + /// Helpers to find the node corresponding to the given call or stackid. + ContextNode *getNodeForInst(const CallInfo &C); + ContextNode *getNodeForAlloc(const CallInfo &C); + ContextNode *getNodeForStackId(uint64_t StackId); + + /// Removes the node information recorded for the given call. + void unsetNodeForInst(const CallInfo &C); + + /// Computes the alloc type corresponding to the given context ids, by + /// unioning their recorded alloc types. + uint8_t computeAllocType(DenseSet &ContextIds); + + /// Map from each context ID to the AllocationType assigned to that context. + std::map ContextIdToAllocationType; + + /// Identifies the context node created for a stack id when adding the MIB + /// contexts to the graph. This is used to locate the context nodes when + /// trying to assign the corresponding callsites with those stack ids to these + /// nodes. + std::map StackEntryIdToContextNodeMap; + + /// Maps to track the calls to their corresponding nodes in the graph. + std::map AllocationCallToContextNodeMap; + std::map NonAllocationCallToContextNodeMap; + + /// Owner of all ContextNode unique_ptrs. + std::vector> NodeOwner; + + /// Perform sanity checks on graph when requested. + void check() const; + + /// Keeps track of the last unique context id assigned. + unsigned int LastContextId = 0; +}; + +template +using ContextNode = + typename CallsiteContextGraph::ContextNode; +template +using ContextEdge = + typename CallsiteContextGraph::ContextEdge; +template +using FuncInfo = + typename CallsiteContextGraph::FuncInfo; +template +using CallInfo = + typename CallsiteContextGraph::CallInfo; + +/// CRTP derived class for graphs built from IR (regular LTO). +class ModuleCallsiteContextGraph + : public CallsiteContextGraph { +public: + ModuleCallsiteContextGraph(Module &M); + +private: + friend CallsiteContextGraph; + + uint64_t getStackId(uint64_t IdOrIndex) const; + bool calleeMatchesFunc(Instruction *Call, const Function *Func); + uint64_t getLastStackId(Instruction *Call); + std::vector getStackIdsWithContextNodesForCall(Instruction *Call); + std::string getLabel(const Function *Func, const Instruction *Call, + unsigned CloneNo) const; + + const Module &Mod; +}; + +namespace { + +struct FieldSeparator { + bool Skip = true; + const char *Sep; + + FieldSeparator(const char *Sep = ", ") : Sep(Sep) {} +}; + +raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) { + if (FS.Skip) { + FS.Skip = false; + return OS; + } + return OS << FS.Sep; +} + +} // end anonymous namespace + +template +typename CallsiteContextGraph::ContextNode * +CallsiteContextGraph::getNodeForInst( + const CallInfo &C) { + ContextNode *Node = getNodeForAlloc(C); + if (Node) + return Node; + + auto NonAllocCallNode = NonAllocationCallToContextNodeMap.find(C); + if (NonAllocCallNode != NonAllocationCallToContextNodeMap.end()) { + return NonAllocCallNode->second; + } + return nullptr; +} + +template +typename CallsiteContextGraph::ContextNode * +CallsiteContextGraph::getNodeForAlloc( + const CallInfo &C) { + auto AllocCallNode = AllocationCallToContextNodeMap.find(C); + if (AllocCallNode != AllocationCallToContextNodeMap.end()) { + return AllocCallNode->second; + } + return nullptr; +} + +template +typename CallsiteContextGraph::ContextNode * +CallsiteContextGraph::getNodeForStackId( + uint64_t StackId) { + auto StackEntryNode = StackEntryIdToContextNodeMap.find(StackId); + if (StackEntryNode != StackEntryIdToContextNodeMap.end()) + return StackEntryNode->second; + return nullptr; +} + +template +void CallsiteContextGraph::unsetNodeForInst( + const CallInfo &C) { + AllocationCallToContextNodeMap.erase(C) || + NonAllocationCallToContextNodeMap.erase(C); + assert(!AllocationCallToContextNodeMap.count(C) && + !NonAllocationCallToContextNodeMap.count(C)); +} + +template +void CallsiteContextGraph::ContextNode:: + addOrUpdateCallerEdge(ContextNode *Caller, AllocationType AllocType, + unsigned int ContextId) { + for (auto &Edge : CallerEdges) { + if (Edge->Caller == Caller) { + Edge->AllocTypes |= (uint8_t)AllocType; + Edge->getContextIds().insert(ContextId); + return; + } + } + std::shared_ptr Edge = std::make_shared( + this, Caller, (uint8_t)AllocType, DenseSet({ContextId})); + CallerEdges.push_back(Edge); + Caller->CalleeEdges.push_back(Edge); +} + +template +typename CallsiteContextGraph::ContextEdge * +CallsiteContextGraph::ContextNode:: + findEdgeFromCallee(const ContextNode *Callee) { + for (const auto &Edge : CalleeEdges) + if (Edge->Callee == Callee) + return Edge.get(); + return nullptr; +} + +template +typename CallsiteContextGraph::ContextEdge * +CallsiteContextGraph::ContextNode:: + findEdgeFromCaller(const ContextNode *Caller) { + for (const auto &Edge : CallerEdges) + if (Edge->Caller == Caller) + return Edge.get(); + return nullptr; +} + +template +void CallsiteContextGraph::ContextNode:: + eraseCalleeEdge(const ContextEdge *Edge) { + auto EI = + std::find_if(CalleeEdges.begin(), CalleeEdges.end(), + [Edge](const std::shared_ptr &CalleeEdge) { + return CalleeEdge.get() == Edge; + }); + assert(EI != CalleeEdges.end()); + CalleeEdges.erase(EI); +} + +template +void CallsiteContextGraph::ContextNode:: + eraseCallerEdge(const ContextEdge *Edge) { + auto EI = + std::find_if(CallerEdges.begin(), CallerEdges.end(), + [Edge](const std::shared_ptr &CallerEdge) { + return CallerEdge.get() == Edge; + }); + assert(EI != CallerEdges.end()); + CallerEdges.erase(EI); +} + +template +uint8_t CallsiteContextGraph::computeAllocType( + DenseSet &ContextIds) { + uint8_t BothTypes = + (uint8_t)AllocationType::Cold | (uint8_t)AllocationType::NotCold; + uint8_t AllocType = (uint8_t)AllocationType::None; + for (auto Id : ContextIds) { + AllocType |= (uint8_t)ContextIdToAllocationType[Id]; + // Bail early if alloc type reached both, no further refinement. + if (AllocType == BothTypes) + return AllocType; + } + return AllocType; +} + +template +typename CallsiteContextGraph::ContextNode * +CallsiteContextGraph::addAllocNode( + CallInfo Call, const FuncTy *F) { + assert(!getNodeForAlloc(Call)); + NodeOwner.push_back( + std::make_unique(/*IsAllocation=*/true, Call)); + ContextNode *AllocNode = NodeOwner.back().get(); + AllocationCallToContextNodeMap[Call] = AllocNode; + NodeToCallingFunc[AllocNode] = F; + // Use LastContextId as a uniq id for MIB allocation nodes. + AllocNode->OrigStackOrAllocId = LastContextId; + // Alloc type should be updated as we add in the MIBs. We should assert + // afterwards that it is not still None. + AllocNode->AllocTypes = (uint8_t)AllocationType::None; + + return AllocNode; +} + +template +template +void CallsiteContextGraph::addStackNodesForMIB( + ContextNode *AllocNode, CallStack &StackContext, + CallStack &CallsiteContext, AllocationType AllocType) { + ContextIdToAllocationType[++LastContextId] = AllocType; + + // Update alloc type and context ids for this MIB. + AllocNode->AllocTypes |= (uint8_t)AllocType; + AllocNode->ContextIds.insert(LastContextId); + + // Now add or update nodes for each stack id in alloc's context. + // Later when processing the stack ids on non-alloc callsites we will adjust + // for any inlining in the context. + ContextNode *PrevNode = AllocNode; + // Look for recursion (direct recursion should have been collapsed by + // module summary analysis, here we should just be detecting mutual + // recursion). Mark these nodes so we don't try to clone. + SmallSet StackIdSet; + // Skip any on the allocation call (inlining). + for (auto ContextIter = StackContext.beginAfterSharedPrefix(CallsiteContext); + ContextIter != StackContext.end(); ++ContextIter) { + auto StackId = getStackId(*ContextIter); + ContextNode *StackNode = getNodeForStackId(StackId); + if (!StackNode) { + NodeOwner.push_back( + std::make_unique(/*IsAllocation=*/false)); + StackNode = NodeOwner.back().get(); + StackEntryIdToContextNodeMap[StackId] = StackNode; + StackNode->OrigStackOrAllocId = StackId; + } + auto Ins = StackIdSet.insert(StackId); + if (!Ins.second) + StackNode->Recursive = true; + StackNode->ContextIds.insert(LastContextId); + StackNode->AllocTypes |= (uint8_t)AllocType; + PrevNode->addOrUpdateCallerEdge(StackNode, AllocType, LastContextId); + PrevNode = StackNode; + } +} + +template +DenseSet +CallsiteContextGraph::duplicateContextIds( + const DenseSet &StackSequenceContextIds, + DenseMap> &OldToNewContextIds) { + DenseSet NewContextIds; + for (auto OldId : StackSequenceContextIds) { + NewContextIds.insert(++LastContextId); + OldToNewContextIds[OldId].insert(LastContextId); + assert(ContextIdToAllocationType.count(OldId)); + // The new context has the same allocation type as original. + ContextIdToAllocationType[LastContextId] = ContextIdToAllocationType[OldId]; + } + return NewContextIds; +} + +template +void CallsiteContextGraph:: + propagateDuplicateContextIds( + const DenseMap> &OldToNewContextIds) { + // Build a set of duplicated context ids corresponding to the input id set. + auto GetNewIds = [&OldToNewContextIds](const DenseSet &ContextIds) { + DenseSet NewIds; + for (auto Id : ContextIds) + if (auto NewId = OldToNewContextIds.find(Id); + NewId != OldToNewContextIds.end()) + NewIds.insert(NewId->second.begin(), NewId->second.end()); + return NewIds; + }; + + // Recursively update context ids sets along caller edges. + auto UpdateCallers = [&](ContextNode *Node, + DenseSet &Visited, + auto &&UpdateCallers) -> void { + for (auto Edge : Node->CallerEdges) { + auto Inserted = Visited.insert(Edge.get()); + if (!Inserted.second) + continue; + ContextNode *NextNode = Edge->Caller; + DenseSet NewIdsToAdd = GetNewIds(Edge->getContextIds()); + // Only need to recursively iterate to NextNode via this caller edge if + // it resulted in any added ids to NextNode. + if (!NewIdsToAdd.empty()) { + Edge->getContextIds().insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); + NextNode->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); + UpdateCallers(NextNode, Visited, UpdateCallers); + } + } + }; + + DenseSet Visited; + for (auto &Entry : AllocationCallToContextNodeMap) { + auto *Node = Entry.second; + // Update ids on the allocation nodes before calling the recursive + // update along caller edges, since this simplifies the logic during + // that traversal. + DenseSet NewIdsToAdd = GetNewIds(Node->ContextIds); + Node->ContextIds.insert(NewIdsToAdd.begin(), NewIdsToAdd.end()); + UpdateCallers(Node, Visited, UpdateCallers); + } +} + +template +void CallsiteContextGraph::connectNewNode( + ContextNode *NewNode, ContextNode *OrigNode, bool TowardsCallee) { + // Make a copy of the context ids, since this will be adjusted below as they + // are moved. + DenseSet RemainingContextIds = NewNode->ContextIds; + auto &OrigEdges = + TowardsCallee ? OrigNode->CalleeEdges : OrigNode->CallerEdges; + // Increment iterator in loop so that we can remove edges as needed. + for (auto EI = OrigEdges.begin(); EI != OrigEdges.end();) { + auto Edge = *EI; + // Remove any matching context ids from Edge, return set that were found and + // removed, these are the new edge's context ids. Also update the remaining + // (not found ids). + DenseSet NewEdgeContextIds, NotFoundContextIds; + set_subtract(Edge->getContextIds(), RemainingContextIds, NewEdgeContextIds, + NotFoundContextIds); + RemainingContextIds.swap(NotFoundContextIds); + // If no matching context ids for this edge, skip it. + if (NewEdgeContextIds.empty()) { + ++EI; + continue; + } + if (TowardsCallee) { + auto NewEdge = std::make_shared( + Edge->Callee, NewNode, computeAllocType(NewEdgeContextIds), + NewEdgeContextIds); + NewNode->CalleeEdges.push_back(NewEdge); + NewEdge->Callee->CallerEdges.push_back(NewEdge); + } else { + auto NewEdge = std::make_shared( + NewNode, Edge->Caller, computeAllocType(NewEdgeContextIds), + NewEdgeContextIds); + NewNode->CallerEdges.push_back(NewEdge); + NewEdge->Caller->CalleeEdges.push_back(NewEdge); + } + // Remove old edge if context ids empty. + if (Edge->getContextIds().empty()) { + if (TowardsCallee) { + Edge->Callee->eraseCallerEdge(Edge.get()); + EI = OrigNode->CalleeEdges.erase(EI); + } else { + Edge->Caller->eraseCalleeEdge(Edge.get()); + EI = OrigNode->CallerEdges.erase(EI); + } + continue; + } + ++EI; + } +} + +template +void CallsiteContextGraph:: + assignStackNodesPostOrder(ContextNode *Node, + DenseSet &Visited, + DenseMap> + &StackIdToMatchingCalls) { + auto Inserted = Visited.insert(Node); + if (!Inserted.second) + return; + // Post order traversal. Iterate over a copy since we may add nodes and + // therefore new callers during the recursive call, invalidating any + // iterator over the original edge vector. We don't need to process these + // new nodes as they were already processed on creation. + auto CallerEdges = Node->CallerEdges; + for (auto &Edge : CallerEdges) { + // Skip any that have been removed during the recursion. + if (!Edge) + continue; + assignStackNodesPostOrder(Edge->Caller, Visited, StackIdToMatchingCalls); + } + + // If this node's stack id is in the map, update the graph to contain new + // nodes representing any inlining at interior callsites. Note we move the + // associated context ids over to the new nodes. + + // Ignore this node if it is for an allocation or we didn't record any + // stack id lists ending at it. + if (Node->IsAllocation || + !StackIdToMatchingCalls.count(Node->OrigStackOrAllocId)) + return; + + auto &Calls = StackIdToMatchingCalls[Node->OrigStackOrAllocId]; + // Handle the simple case first. A single call with a single stack id. + // In this case there is no need to create any new context nodes, simply + // assign the context node for stack id to this Call. + if (Calls.size() == 1) { + auto &[Call, Ids, Func, SavedContextIds] = Calls[0]; + if (Ids.size() == 1) { + assert(SavedContextIds.empty()); + // It should be this Node + assert(Node == getNodeForStackId(Ids[0])); + if (Node->Recursive) + return; + Node->setCall(Call); + NonAllocationCallToContextNodeMap[Call] = Node; + NodeToCallingFunc[Node] = Func; + return; + } + } + + // Find the node for the last stack id, which should be the same + // across all calls recorded for this id, and is this node's id. + uint64_t LastId = Node->OrigStackOrAllocId; + ContextNode *LastNode = getNodeForStackId(LastId); + // We should only have kept stack ids that had nodes. + assert(LastNode); + + for (unsigned I = 0; I < Calls.size(); I++) { + auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; + // Skip any for which we didn't assign any ids, these don't get a node in + // the graph. + if (SavedContextIds.empty()) + continue; + + assert(LastId == Ids.back()); + + ContextNode *FirstNode = getNodeForStackId(Ids[0]); + assert(FirstNode); + + // Recompute the context ids for this stack id sequence (the + // intersection of the context ids of the corresponding nodes). + // Start with the ids we saved in the map for this call, which could be + // duplicated context ids. We have to recompute as we might have overlap + // overlap between the saved context ids for different last nodes, and + // removed them already during the post order traversal. + set_intersect(SavedContextIds, FirstNode->ContextIds); + ContextNode *PrevNode = nullptr; + for (auto Id : Ids) { + ContextNode *CurNode = getNodeForStackId(Id); + // We should only have kept stack ids that had nodes and weren't + // recursive. + assert(CurNode); + assert(!CurNode->Recursive); + if (!PrevNode) { + PrevNode = CurNode; + continue; + } + auto *Edge = CurNode->findEdgeFromCallee(PrevNode); + if (!Edge) { + SavedContextIds.clear(); + break; + } + PrevNode = CurNode; + set_intersect(SavedContextIds, Edge->getContextIds()); + + // If we now have no context ids for clone, skip this call. + if (SavedContextIds.empty()) + break; + } + if (SavedContextIds.empty()) + continue; + + // Create new context node. + NodeOwner.push_back( + std::make_unique(/*IsAllocation=*/false, Call)); + ContextNode *NewNode = NodeOwner.back().get(); + NodeToCallingFunc[NewNode] = Func; + NonAllocationCallToContextNodeMap[Call] = NewNode; + NewNode->ContextIds = SavedContextIds; + NewNode->AllocTypes = computeAllocType(NewNode->ContextIds); + + // Connect to callees of innermost stack frame in inlined call chain. + // This updates context ids for FirstNode's callee's to reflect those + // moved to NewNode. + connectNewNode(NewNode, FirstNode, /*TowardsCallee=*/true); + + // Connect to callers of outermost stack frame in inlined call chain. + // This updates context ids for FirstNode's caller's to reflect those + // moved to NewNode. + connectNewNode(NewNode, LastNode, /*TowardsCallee=*/false); + + // Now we need to remove context ids from edges/nodes between First and + // Last Node. + PrevNode = nullptr; + for (auto Id : Ids) { + ContextNode *CurNode = getNodeForStackId(Id); + // We should only have kept stack ids that had nodes. + assert(CurNode); + + // Remove the context ids moved to NewNode from CurNode, and the + // edge from the prior node. + set_subtract(CurNode->ContextIds, NewNode->ContextIds); + if (PrevNode) { + auto *PrevEdge = CurNode->findEdgeFromCallee(PrevNode); + assert(PrevEdge); + set_subtract(PrevEdge->getContextIds(), NewNode->ContextIds); + if (PrevEdge->getContextIds().empty()) { + PrevNode->eraseCallerEdge(PrevEdge); + CurNode->eraseCalleeEdge(PrevEdge); + } + } + PrevNode = CurNode; + } + } +} + +template +void CallsiteContextGraph::updateStackNodes() { + // Map of stack id to all calls with that as the last (outermost caller) + // callsite id that has a context node (some might not due to pruning + // performed during matching of the allocation profile contexts). + // The CallContextInfo contains the Call and a list of its stack ids with + // ContextNodes, the function containing Call, and the set of context ids + // the analysis will eventually identify for use in any new node created + // for that callsite. + DenseMap> StackIdToMatchingCalls; + for (auto &[Func, CallsWithMetadata] : FuncToCallsWithMetadata) { + for (auto &Call : CallsWithMetadata) { + // Ignore allocations, already handled. + if (AllocationCallToContextNodeMap.count(Call)) + continue; + auto StackIdsWithContextNodes = + getStackIdsWithContextNodesForCall(Call.call()); + // If there were no nodes created for MIBs on allocs (maybe this was in + // the unambiguous part of the MIB stack that was pruned), ignore. + if (StackIdsWithContextNodes.empty()) + continue; + // Otherwise, record this Call along with the list of ids for the last + // (outermost caller) stack id with a node. + StackIdToMatchingCalls[StackIdsWithContextNodes.back()].push_back( + {Call.call(), StackIdsWithContextNodes, Func, {}}); + } + } + + // First make a pass through all stack ids that correspond to a call, + // as identified in the above loop. Compute the context ids corresponding to + // each of these calls when they correspond to multiple stack ids due to + // due to inlining. Perform any duplication of context ids required when + // there is more than one call with the same stack ids. Their (possibly newly + // duplicated) context ids are saved in the StackIdToMatchingCalls map. + DenseMap> OldToNewContextIds; + for (auto &It : StackIdToMatchingCalls) { + auto &Calls = It.getSecond(); + // Skip single calls with a single stack id. These don't need a new node. + if (Calls.size() == 1) { + auto &Ids = std::get<1>(Calls[0]); + if (Ids.size() == 1) + continue; + } + // In order to do the best and maximal matching of inlined calls to context + // node sequences we will sort the vectors of stack ids in descending order + // of length, and within each length, lexicographically by stack id. The + // latter is so that we can specially handle calls that have identical stack + // id sequences (either due to cloning or artificially because of the MIB + // context pruning). + std::sort(Calls.begin(), Calls.end(), + [](const CallContextInfo &A, const CallContextInfo &B) { + auto &IdsA = std::get<1>(A); + auto &IdsB = std::get<1>(B); + return IdsA.size() > IdsB.size() || + (IdsA.size() == IdsB.size() && IdsA < IdsB); + }); + + // Find the node for the last stack id, which should be the same + // across all calls recorded for this id, and is the id for this + // entry in the StackIdToMatchingCalls map. + uint64_t LastId = It.getFirst(); + ContextNode *LastNode = getNodeForStackId(LastId); + // We should only have kept stack ids that had nodes. + assert(LastNode); + + if (LastNode->Recursive) + continue; + + // Initialize the context ids with the last node's. We will subsequently + // refine the context ids by computing the intersection along all edges. + DenseSet LastNodeContextIds = LastNode->ContextIds; + assert(!LastNodeContextIds.empty()); + + for (unsigned I = 0; I < Calls.size(); I++) { + auto &[Call, Ids, Func, SavedContextIds] = Calls[I]; + assert(SavedContextIds.empty()); + assert(LastId == Ids.back()); + + // First compute the context ids for this stack id sequence (the + // intersection of the context ids of the corresponding nodes). + // Start with the remaining saved ids for the last node. + assert(!LastNodeContextIds.empty()); + DenseSet StackSequenceContextIds = LastNodeContextIds; + + ContextNode *PrevNode = LastNode; + ContextNode *CurNode = LastNode; + bool Skip = false; + + // Iterate backwards through the stack Ids, starting after the last Id + // in the list, which was handled once outside for all Calls. + for (auto IdIter = Ids.rbegin() + 1; IdIter != Ids.rend(); IdIter++) { + auto Id = *IdIter; + CurNode = getNodeForStackId(Id); + // We should only have kept stack ids that had nodes. + assert(CurNode); + + if (CurNode->Recursive) { + Skip = true; + break; + } + + auto *Edge = CurNode->findEdgeFromCaller(PrevNode); + // If there is no edge then the nodes belong to different MIB contexts, + // and we should skip this inlined context sequence. For example, this + // particular inlined context may include stack ids A->B, and we may + // indeed have nodes for both A and B, but it is possible that they were + // never profiled in sequence in a single MIB for any allocation (i.e. + // we might have profiled an allocation that involves the callsite A, + // but through a different one of its callee callsites, and we might + // have profiled an allocation that involves callsite B, but reached + // from a different caller callsite). + if (!Edge) { + Skip = true; + break; + } + PrevNode = CurNode; + + // Update the context ids, which is the intersection of the ids along + // all edges in the sequence. + set_intersect(StackSequenceContextIds, Edge->getContextIds()); + + // If we now have no context ids for clone, skip this call. + if (StackSequenceContextIds.empty()) { + Skip = true; + break; + } + } + if (Skip) + continue; + + // If some of this call's stack ids did not have corresponding nodes (due + // to pruning), don't include any context ids for contexts that extend + // beyond these nodes. Otherwise we would be matching part of unrelated / + // not fully matching stack contexts. To do this, subtract any context ids + // found in caller nodes of the last node found above. + if (Ids.back() != getLastStackId(Call)) { + for (auto PE : LastNode->CallerEdges) { + set_subtract(StackSequenceContextIds, PE->getContextIds()); + if (StackSequenceContextIds.empty()) + break; + } + // If we now have no context ids for clone, skip this call. + if (StackSequenceContextIds.empty()) + continue; + } + + // Check if the next set of stack ids is the same (since the Calls vector + // of tuples is sorted by the stack ids we can just look at the next one). + bool DuplicateContextIds = false; + if (I + 1 < Calls.size()) { + auto NextIds = std::get<1>(Calls[I + 1]); + DuplicateContextIds = Ids == NextIds; + } + + // If we don't have duplicate context ids, then we can assign all the + // context ids computed for the original node sequence to this call. + // If there are duplicate calls with the same stack ids then we synthesize + // new context ids that are duplicates of the originals. These are + // assigned to SavedContextIds, which is a reference into the map entry + // for this call, allowing us to access these ids later on. + OldToNewContextIds.reserve(OldToNewContextIds.size() + + StackSequenceContextIds.size()); + SavedContextIds = + DuplicateContextIds + ? duplicateContextIds(StackSequenceContextIds, OldToNewContextIds) + : StackSequenceContextIds; + assert(!SavedContextIds.empty()); + + if (!DuplicateContextIds) { + // Update saved last node's context ids to remove those that are + // assigned to other calls, so that it is ready for the next call at + // this stack id. + set_subtract(LastNodeContextIds, StackSequenceContextIds); + if (LastNodeContextIds.empty()) + break; + } + } + } + + // Propagate the duplicate context ids over the graph. + propagateDuplicateContextIds(OldToNewContextIds); + + if (VerifyCCG) + check(); + + // Now perform a post-order traversal over the graph, starting with the + // allocation nodes, essentially processing nodes from callers to callees. + // For any that contains an id in the map, update the graph to contain new + // nodes representing any inlining at interior callsites. Note we move the + // associated context ids over to the new nodes. + DenseSet Visited; + for (auto &Entry : AllocationCallToContextNodeMap) + assignStackNodesPostOrder(Entry.second, Visited, StackIdToMatchingCalls); +} + +uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *Call) { + CallStack CallsiteContext( + Call->getMetadata(LLVMContext::MD_callsite)); + return CallsiteContext.back(); +} + +std::string ModuleCallsiteContextGraph::getLabel(const Function *Func, + const Instruction *Call, + unsigned CloneNo) const { + return (Twine(Call->getFunction()->getName()) + " -> " + + cast(Call)->getCalledFunction()->getName()) + .str(); +} + +std::vector +ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall( + Instruction *Call) { + CallStack CallsiteContext( + Call->getMetadata(LLVMContext::MD_callsite)); + return getStackIdsWithContextNodes( + CallsiteContext); +} + +template +template +std::vector +CallsiteContextGraph::getStackIdsWithContextNodes( + CallStack &CallsiteContext) { + std::vector StackIds; + for (auto IdOrIndex : CallsiteContext) { + auto StackId = getStackId(IdOrIndex); + ContextNode *Node = getNodeForStackId(StackId); + if (!Node) + break; + StackIds.push_back(StackId); + } + return StackIds; +} + +ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(Module &M) : Mod(M) { + for (auto &F : M) { + std::vector CallsWithMetadata; + for (auto &BB : F) { + for (auto &I : BB) { + if (!isa(I)) + continue; + if (auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof)) { + CallsWithMetadata.push_back(&I); + auto *AllocNode = addAllocNode(&I, &F); + auto *CallsiteMD = I.getMetadata(LLVMContext::MD_callsite); + assert(CallsiteMD); + CallStack CallsiteContext(CallsiteMD); + // Add all of the MIBs and their stack nodes. + for (auto &MDOp : MemProfMD->operands()) { + auto *MIBMD = cast(MDOp); + MDNode *StackNode = getMIBStackNode(MIBMD); + assert(StackNode); + CallStack StackContext(StackNode); + addStackNodesForMIB( + AllocNode, StackContext, CallsiteContext, + getMIBAllocType(MIBMD)); + } + assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None); + // Memprof and callsite metadata on memory allocations no longer + // needed. + I.setMetadata(LLVMContext::MD_memprof, nullptr); + I.setMetadata(LLVMContext::MD_callsite, nullptr); + } + // For callsite metadata, add to list for this function for later use. + else if (I.getMetadata(LLVMContext::MD_callsite)) + CallsWithMetadata.push_back(&I); + } + } + if (!CallsWithMetadata.empty()) + FuncToCallsWithMetadata.push_back({&F, CallsWithMetadata}); + } + + if (DumpCCG) { + dbgs() << "CCG before updating call stack chains:\n"; + dbgs() << *this; + } + + if (ExportToDot) + exportToDot("prestackupdate"); + + updateStackNodes(); + + handleCallsitesWithMultipleTargets(); + + // Strip off remaining callsite metadata, no longer needed. + for (auto &FuncEntry : FuncToCallsWithMetadata) + for (auto &Call : FuncEntry.second) + Call.call()->setMetadata(LLVMContext::MD_callsite, nullptr); +} + +template +void CallsiteContextGraph::handleCallsitesWithMultipleTargets() { + // Look for and workaround callsites that call multiple functions. + // This can happen for indirect calls, which needs better handling, and in + // more rare cases (e.g. macro expansion). + // TODO: To fix this for indirect calls we will want to perform speculative + // devirtualization using either the normal PGO info with ICP, or using the + // information in the profiled MemProf contexts. We can do this prior to + // this transformation for regular LTO, and for ThinLTO we can simulate that + // effect in the summary and perform the actual speculative devirtualization + // while cloning in the ThinLTO backend. + for (auto Entry = NonAllocationCallToContextNodeMap.begin(); + Entry != NonAllocationCallToContextNodeMap.end();) { + auto *Node = Entry->second; + assert(Node->Clones.empty()); + // Check all node callees and see if in the same function. + bool Removed = false; + auto Call = Node->Call.call(); + for (auto &Edge : Node->CalleeEdges) { + if (!Edge->Callee->hasCall()) + continue; + assert(NodeToCallingFunc.count(Edge->Callee)); + // Check if the called function matches that of the callee node. + if (calleeMatchesFunc(Call, NodeToCallingFunc[Edge->Callee])) + continue; + // Work around by setting Node to have a null call, so it gets + // skipped during cloning. Otherwise assignFunctions will assert + // because its data structures are not designed to handle this case. + Entry = NonAllocationCallToContextNodeMap.erase(Entry); + Node->setCall(CallInfo()); + Removed = true; + break; + } + if (!Removed) + Entry++; + } +} + +uint64_t ModuleCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const { + // In the Module (IR) case this is already the Id. + return IdOrIndex; +} + +bool ModuleCallsiteContextGraph::calleeMatchesFunc(Instruction *Call, + const Function *Func) { + auto *CB = dyn_cast(Call); + if (!CB->getCalledOperand()) + return false; + auto *CalleeVal = CB->getCalledOperand()->stripPointerCasts(); + auto *CalleeFunc = dyn_cast(CalleeVal); + if (CalleeFunc == Func) + return true; + auto *Alias = dyn_cast(CalleeVal); + return Alias && Alias->getAliasee() == Func; +} + +static std::string getAllocTypeString(uint8_t AllocTypes) { + if (!AllocTypes) + return "None"; + std::string Str; + if (AllocTypes & (uint8_t)AllocationType::NotCold) + Str += "NotCold"; + if (AllocTypes & (uint8_t)AllocationType::Cold) + Str += "Cold"; + return Str; +} + +template +void CallsiteContextGraph::ContextNode::dump() + const { + print(dbgs()); + dbgs() << "\n"; +} + +template +void CallsiteContextGraph::ContextNode::print( + raw_ostream &OS) const { + OS << "Node " << this << "\n"; + OS << "\t"; + printCall(OS); + if (Recursive) + OS << " (recursive)"; + OS << "\n"; + OS << "\tAllocTypes: " << getAllocTypeString(AllocTypes) << "\n"; + OS << "\tContextIds:"; + std::vector SortedIds(ContextIds.begin(), ContextIds.end()); + std::sort(SortedIds.begin(), SortedIds.end()); + for (auto Id : SortedIds) + OS << " " << Id; + OS << "\n"; + OS << "\tCalleeEdges:\n"; + for (auto &Edge : CalleeEdges) + OS << "\t\t" << *Edge << "\n"; + OS << "\tCallerEdges:\n"; + for (auto &Edge : CallerEdges) + OS << "\t\t" << *Edge << "\n"; + if (!Clones.empty()) { + OS << "\tClones: "; + FieldSeparator FS; + for (auto *Clone : Clones) + OS << FS << Clone; + OS << "\n"; + } else if (CloneOf) { + OS << "\tClone of " << CloneOf << "\n"; + } +} + +template +void CallsiteContextGraph::ContextEdge::dump() + const { + print(dbgs()); + dbgs() << "\n"; +} + +template +void CallsiteContextGraph::ContextEdge::print( + raw_ostream &OS) const { + OS << "Edge from Callee " << Callee << " to Caller: " << Caller + << " AllocTypes: " << getAllocTypeString(AllocTypes); + OS << " ContextIds:"; + std::vector SortedIds(ContextIds.begin(), ContextIds.end()); + std::sort(SortedIds.begin(), SortedIds.end()); + for (auto Id : SortedIds) + OS << " " << Id; +} + +template +void CallsiteContextGraph::dump() const { + print(dbgs()); +} + +template +void CallsiteContextGraph::print( + raw_ostream &OS) const { + OS << "Callsite Context Graph:\n"; + using GraphType = const CallsiteContextGraph *; + for (const auto Node : nodes(this)) { + if (Node->isRemoved()) + continue; + Node->print(OS); + OS << "\n"; + } +} + +template +static void checkEdge( + const std::shared_ptr> &Edge) { + // Confirm that alloc type is not None and that we have at least one context + // id. + assert(Edge->AllocTypes != (uint8_t)AllocationType::None); + assert(!Edge->ContextIds.empty()); +} + +template +static void checkNode(const ContextNode *Node) { + if (Node->isRemoved()) + return; + // Node's context ids should be the union of both its callee and caller edge + // context ids. + if (Node->CallerEdges.size()) { + auto EI = Node->CallerEdges.begin(); + auto &FirstEdge = *EI; + EI++; + DenseSet CallerEdgeContextIds(FirstEdge->ContextIds); + for (; EI != Node->CallerEdges.end(); EI++) { + const auto &Edge = *EI; + set_union(CallerEdgeContextIds, Edge->ContextIds); + } + // Node can have more context ids than callers if some contexts terminate at + // node and some are longer. + assert(Node->ContextIds == CallerEdgeContextIds || + set_is_subset(CallerEdgeContextIds, Node->ContextIds)); + } + if (Node->CalleeEdges.size()) { + auto EI = Node->CalleeEdges.begin(); + auto &FirstEdge = *EI; + EI++; + DenseSet CalleeEdgeContextIds(FirstEdge->ContextIds); + for (; EI != Node->CalleeEdges.end(); EI++) { + const auto &Edge = *EI; + set_union(CalleeEdgeContextIds, Edge->ContextIds); + } + assert(Node->ContextIds == CalleeEdgeContextIds); + } +} + +template +void CallsiteContextGraph::check() const { + using GraphType = const CallsiteContextGraph *; + for (const auto Node : nodes(this)) { + checkNode(Node); + for (auto &Edge : Node->CallerEdges) + checkEdge(Edge); + } +} + +template +struct GraphTraits *> { + using GraphType = const CallsiteContextGraph *; + using NodeRef = const ContextNode *; + + using NodePtrTy = std::unique_ptr>; + static NodeRef getNode(const NodePtrTy &P) { return P.get(); } + + using nodes_iterator = + mapped_iterator::const_iterator, + decltype(&getNode)>; + + static nodes_iterator nodes_begin(GraphType G) { + return nodes_iterator(G->NodeOwner.begin(), &getNode); + } + + static nodes_iterator nodes_end(GraphType G) { + return nodes_iterator(G->NodeOwner.end(), &getNode); + } + + static NodeRef getEntryNode(GraphType G) { + return G->NodeOwner.begin()->get(); + } + + using EdgePtrTy = std::shared_ptr>; + static const ContextNode * + GetCallee(const EdgePtrTy &P) { + return P->Callee; + } + + using ChildIteratorType = + mapped_iterator>>::const_iterator, + decltype(&GetCallee)>; + + static ChildIteratorType child_begin(NodeRef N) { + return ChildIteratorType(N->CalleeEdges.begin(), &GetCallee); + } + + static ChildIteratorType child_end(NodeRef N) { + return ChildIteratorType(N->CalleeEdges.end(), &GetCallee); + } +}; + +template +struct DOTGraphTraits *> + : public DefaultDOTGraphTraits { + DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {} + + using GraphType = const CallsiteContextGraph *; + using GTraits = GraphTraits; + using NodeRef = typename GTraits::NodeRef; + using ChildIteratorType = typename GTraits::ChildIteratorType; + + static std::string getNodeLabel(NodeRef Node, GraphType G) { + std::string LabelString = + (Twine("OrigId: ") + (Node->IsAllocation ? "Alloc" : "") + + Twine(Node->OrigStackOrAllocId)) + .str(); + LabelString += "\n"; + if (Node->hasCall()) { + auto Func = G->NodeToCallingFunc.find(Node); + assert(Func != G->NodeToCallingFunc.end()); + LabelString += + G->getLabel(Func->second, Node->Call.call(), Node->Call.cloneNo()); + } else { + LabelString += "null call"; + if (Node->Recursive) + LabelString += " (recursive)"; + else + LabelString += " (external)"; + } + return LabelString; + } + + static std::string getNodeAttributes(NodeRef Node, GraphType) { + std::string AttributeString = (Twine("tooltip=\"") + getNodeId(Node) + " " + + getContextIds(Node->ContextIds) + "\"") + .str(); + AttributeString += + (Twine(",fillcolor=\"") + getColor(Node->AllocTypes) + "\"").str(); + AttributeString += ",style=\"filled\""; + if (Node->CloneOf) { + AttributeString += ",color=\"blue\""; + AttributeString += ",style=\"filled,bold,dashed\""; + } else + AttributeString += ",style=\"filled\""; + return AttributeString; + } + + static std::string getEdgeAttributes(NodeRef, ChildIteratorType ChildIter, + GraphType) { + auto &Edge = *(ChildIter.getCurrent()); + return (Twine("tooltip=\"") + getContextIds(Edge->ContextIds) + "\"" + + Twine(",fillcolor=\"") + getColor(Edge->AllocTypes) + "\"") + .str(); + } + + // Since the NodeOwners list includes nodes that are no longer connected to + // the graph, skip them here. + static bool isNodeHidden(NodeRef Node, GraphType) { + return Node->isRemoved(); + } + +private: + static std::string getContextIds(const DenseSet &ContextIds) { + std::string IdString = "ContextIds:"; + if (ContextIds.size() < 100) { + std::vector SortedIds(ContextIds.begin(), ContextIds.end()); + std::sort(SortedIds.begin(), SortedIds.end()); + for (auto Id : SortedIds) + IdString += (" " + Twine(Id)).str(); + } else { + IdString += (" (" + Twine(ContextIds.size()) + " ids)").str(); + } + return IdString; + } + + static std::string getColor(uint8_t AllocTypes) { + if (AllocTypes == (uint8_t)AllocationType::NotCold) + // Color "brown1" actually looks like a lighter red. + return "brown1"; + if (AllocTypes == (uint8_t)AllocationType::Cold) + return "cyan"; + if (AllocTypes == + ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold)) + // Lighter purple. + return "mediumorchid1"; + return "gray"; + } + + static std::string getNodeId(NodeRef Node) { + std::stringstream SStream; + SStream << std::hex << "N0x" << (unsigned long long)Node; + std::string Result = SStream.str(); + return Result; + } +}; + +template +void CallsiteContextGraph::exportToDot( + std::string Label) const { + WriteGraph(this, "", false, Label, + DotFilePathPrefix + "ccg." + Label + ".dot"); +} + +template +bool CallsiteContextGraph::process() { + if (DumpCCG) { + dbgs() << "CCG before cloning:\n"; + dbgs() << *this; + } + if (ExportToDot) + exportToDot("postbuild"); + + if (VerifyCCG) { + check(); + } + + return false; +} + +bool MemProfContextDisambiguation::processModule(Module &M) { + bool Changed = false; + + ModuleCallsiteContextGraph CCG(M); + Changed = CCG.process(); + + return Changed; +} + +PreservedAnalyses MemProfContextDisambiguation::run(Module &M, + ModuleAnalysisManager &AM) { + if (!processModule(M)) + return PreservedAnalyses::all(); + return PreservedAnalyses::none(); +} diff --git a/llvm/test/ThinLTO/X86/memprof-summary.ll b/llvm/test/ThinLTO/X86/memprof-summary.ll deleted file mode 100644 index 597cd44c030e7..0000000000000 --- a/llvm/test/ThinLTO/X86/memprof-summary.ll +++ /dev/null @@ -1,184 +0,0 @@ -;; Check memprof summaries (per module, combined index, and distributed indexes) - -; RUN: split-file %s %t -; RUN: opt -module-summary %t/a.ll -o %ta.bc -; RUN: opt -module-summary %t/b.ll -o %tb.bc - -; RUN: llvm-dis -o - %ta.bc | FileCheck %s --check-prefix=PRELINKDISA -; PRELINKDISA: gv: (name: "main", {{.*}} callsites: ((callee: ^2, clones: (0), stackIds: (8632435727821051414)), (callee: ^2, clones: (0), stackIds: (15025054523792398438)))))) ; guid = 15822663052811949562 - -; RUN: llvm-dis -o - %tb.bc | FileCheck %s --check-prefix=PRELINKDISB -; PRELINKDISB: ^[[PLBAR:[0-9]+]] = gv: (name: "_Z3barv", {{.*}} allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (12481870273128938184, 2732490490862098848, 8632435727821051414)), (type: cold, stackIds: (12481870273128938184, 2732490490862098848, 15025054523792398438)))))))) ; guid = 4555904644815367798 -; PRELINKDISB: ^[[PLFOO:[0-9]+]] = gv: (name: "_Z3foov", {{.*}} callsites: ((callee: ^[[PLBAZ:[0-9]+]], clones: (0), stackIds: (2732490490862098848)))))) ; guid = 9191153033785521275 -; PRELINKDISB: ^[[PLBAZ]] = gv: (name: "_Z3bazv", {{.*}} callsites: ((callee: ^[[PLBAR]], clones: (0), stackIds: (12481870273128938184)))))) ; guid = 15176620447596392000 - -; RUN: llvm-bcanalyzer -dump %ta.bc | FileCheck %s --check-prefix=PRELINKBCANA -; PRELINKBCANA: - -; RUN: llvm-bcanalyzer -dump %tb.bc | FileCheck %s --check-prefix=PRELINKBCANB -; PRELINKBCANB: - -; RUN: llvm-lto2 run %ta.bc %tb.bc -o %t -save-temps \ -; RUN: -thinlto-distributed-indexes \ -; RUN: -r=%ta.bc,main,plx \ -; RUN: -r=%ta.bc,_Z3foov, \ -; RUN: -r=%ta.bc,free, \ -; RUN: -r=%ta.bc,sleep, \ -; RUN: -r=%tb.bc,_Z3foov,pl \ -; RUN: -r=%tb.bc,_Znam, \ -; RUN: -r=%tb.bc,_Z3bazv,pl - -; RUN: llvm-dis -o - %t.index.bc | FileCheck %s --check-prefix=COMBINEDDIS -; COMBINEDDIS: ^[[COMBBAR:[0-9]+]] = gv: (guid: 4555904644815367798, {{.*}} allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (12481870273128938184, 2732490490862098848, 8632435727821051414)), (type: cold, stackIds: (12481870273128938184, 2732490490862098848, 15025054523792398438)))))))) -; COMBINEDDIS: ^[[COMBFOO:[0-9]+]] = gv: (guid: 9191153033785521275, {{.*}} callsites: ((callee: ^[[COMBBAZ:[0-9]+]], clones: (0), stackIds: (2732490490862098848)))))) -; COMBINEDDIS: ^[[COMBBAZ]] = gv: (guid: 15176620447596392000, {{.*}} callsites: ((callee: ^[[COMBBAR]], clones: (0), stackIds: (12481870273128938184)))))) -; COMBINEDDIS: ^[[COMBMAIN:[0-9]+]] = gv: (guid: 15822663052811949562, {{.*}} callsites: ((callee: ^[[COMBFOO]], clones: (0), stackIds: (8632435727821051414)), (callee: ^[[COMBFOO]], clones: (0), stackIds: (15025054523792398438)))))) - -; RUN: llvm-bcanalyzer -dump %t.index.bc | FileCheck %s --check-prefix=COMBINEDBCAN -; COMBINEDBCAN: - -; RUN: llvm-dis -o - %ta.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDDISA -; DISTRIBUTEDDISA: gv: (guid: 9191153033785521275, {{.*}} callsites: ((callee: null, clones: (0), stackIds: (2732490490862098848)))))) -; DISTRIBUTEDDISA: gv: (guid: 15822663052811949562, {{.*}} callsites: ((callee: ^2, clones: (0), stackIds: (8632435727821051414)), (callee: ^2, clones: (0), stackIds: (15025054523792398438)))))) - -; RUN: llvm-dis -o - %tb.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDDISB -; DISTRIBUTEDDISB: ^[[DISTRBAR:[0-9]+]] = gv: (guid: 4555904644815367798, {{.*}} allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (12481870273128938184, 2732490490862098848, 8632435727821051414)), (type: cold, stackIds: (12481870273128938184, 2732490490862098848, 15025054523792398438)))))))) -; DISTRIBUTEDDISB: ^[[DISTRFOO:[0-9]+]] = gv: (guid: 9191153033785521275, {{.*}} callsites: ((callee: ^[[DISTRBAZ:[0-9]+]], clones: (0), stackIds: (2732490490862098848)))))) -; DISTRIBUTEDDISB: ^[[DISTRBAZ]] = gv: (guid: 15176620447596392000, {{.*}} callsites: ((callee: ^[[DISTRBAR]], clones: (0), stackIds: (12481870273128938184)))))) - -; RUN: llvm-bcanalyzer -dump %ta.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDBCANA -; DISTRIBUTEDBCANA: - -; RUN: llvm-bcanalyzer -dump %tb.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDBCANB -; DISTRIBUTEDBCANB: - -;--- a.ll -; ModuleID = 'a.cc' -source_filename = "a.cc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: mustprogress norecurse uwtable -define dso_local noundef i32 @main(i32 noundef %argc, ptr nocapture noundef readnone %argv) local_unnamed_addr #0 !dbg !39 { -entry: - %call = call noundef ptr @_Z3foov(), !dbg !42, !callsite !43 - %call1 = call noundef ptr @_Z3foov(), !dbg !44, !callsite !45 - call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(10) %call, i8 0, i64 10, i1 false), !dbg !46 - call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(10) %call1, i8 0, i64 10, i1 false), !dbg !47 - call void @free(ptr noundef %call) #4, !dbg !48 - %call2 = call i32 @sleep(i32 noundef 10), !dbg !49 - call void @free(ptr noundef %call1) #4, !dbg !50 - ret i32 0, !dbg !51 -} - -declare !dbg !52 noundef ptr @_Z3foov() local_unnamed_addr #1 - -; Function Attrs: argmemonly mustprogress nocallback nofree nounwind willreturn writeonly -declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2 - -; Function Attrs: inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("free") -declare void @free(ptr allocptr nocapture noundef) local_unnamed_addr #3 - -declare !dbg !53 i32 @sleep(i32 noundef) local_unnamed_addr #1 - -attributes #0 = { mustprogress norecurse uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #1 = { "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #2 = { argmemonly mustprogress nocallback nofree nounwind willreturn writeonly } -attributes #3 = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("free") "alloc-family"="malloc" "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #4 = { nounwind } - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 16.0.0 (git@github.com:llvm/llvm-project.git ffecb643ee2c49e55e0689339b6d5921b5e6ff8b)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) -!1 = !DIFile(filename: "a.cc", directory: ".", checksumkind: CSK_MD5, checksum: "ebabd56909271a1d4a7cac81c10624d5") -!2 = !{i32 7, !"Dwarf Version", i32 5} -!3 = !{i32 2, !"Debug Info Version", i32 3} -!4 = !{i32 1, !"wchar_size", i32 4} -!5 = !{i32 8, !"PIC Level", i32 2} -!6 = !{i32 7, !"PIE Level", i32 2} -!7 = !{i32 7, !"uwtable", i32 2} -!8 = !{i32 7, !"frame-pointer", i32 2} -!39 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 5, type: !40, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41) -!40 = !DISubroutineType(types: !41) -!41 = !{} -!42 = !DILocation(line: 6, column: 13, scope: !39) -!43 = !{i64 8632435727821051414} -!44 = !DILocation(line: 7, column: 13, scope: !39) -!45 = !{i64 -3421689549917153178} -!46 = !DILocation(line: 8, column: 3, scope: !39) -!47 = !DILocation(line: 9, column: 3, scope: !39) -!48 = !DILocation(line: 10, column: 3, scope: !39) -!49 = !DILocation(line: 11, column: 3, scope: !39) -!50 = !DILocation(line: 12, column: 3, scope: !39) -!51 = !DILocation(line: 13, column: 3, scope: !39) -!52 = !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 4, type: !40, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !41) -!53 = !DISubprogram(name: "sleep", scope: !54, file: !54, line: 453, type: !40, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !41) -!54 = !DIFile(filename: "include/unistd.h", directory: "/usr", checksumkind: CSK_MD5, checksum: "ee8f41a17f563f029d0e930ad871815a") - -;--- b.ll -; ModuleID = 'b.cc' -source_filename = "b.cc" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -; Function Attrs: mustprogress noinline uwtable -define internal noalias noundef nonnull ptr @_Z3barv() local_unnamed_addr #0 !dbg !39 { -entry: - %call = call noalias noundef nonnull dereferenceable(10) ptr @_Znam(i64 noundef 10) #2, !dbg !42, !memprof !43, !callsite !48 - ret ptr %call, !dbg !49 -} - -; Function Attrs: nobuiltin allocsize(0) -declare noundef nonnull ptr @_Znam(i64 noundef) local_unnamed_addr #1 - -; Function Attrs: mustprogress noinline uwtable -define dso_local noalias noundef nonnull ptr @_Z3bazv() local_unnamed_addr #0 !dbg !50 { -entry: - %call = call noundef ptr @_Z3barv(), !dbg !51, !callsite !52 - ret ptr %call, !dbg !53 -} - -; Function Attrs: mustprogress uwtable -define dso_local noalias noundef nonnull ptr @_Z3foov() local_unnamed_addr #3 !dbg !54 { -entry: - %call = call noundef ptr @_Z3bazv(), !dbg !55, !callsite !56 - ret ptr %call, !dbg !57 -} - -attributes #0 = { mustprogress noinline uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #1 = { nobuiltin allocsize(0) "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } -attributes #2 = { builtin allocsize(0) } -attributes #3 = { mustprogress uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8} - -!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 16.0.0 (git@github.com:llvm/llvm-project.git ffecb643ee2c49e55e0689339b6d5921b5e6ff8b)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) -!1 = !DIFile(filename: "b.cc", directory: ".", checksumkind: CSK_MD5, checksum: "335f81d275af57725cfc9ffc7be49bc2") -!2 = !{i32 7, !"Dwarf Version", i32 5} -!3 = !{i32 2, !"Debug Info Version", i32 3} -!4 = !{i32 1, !"wchar_size", i32 4} -!5 = !{i32 8, !"PIC Level", i32 2} -!6 = !{i32 7, !"PIE Level", i32 2} -!7 = !{i32 7, !"uwtable", i32 2} -!8 = !{i32 7, !"frame-pointer", i32 2} -!39 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !1, file: !1, line: 1, type: !40, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41) -!40 = !DISubroutineType(types: !41) -!41 = !{} -!42 = !DILocation(line: 2, column: 10, scope: !39) -!43 = !{!44, !46} -!44 = !{!45, !"notcold"} -!45 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} -!46 = !{!47, !"cold"} -!47 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} -!48 = !{i64 9086428284934609951} -!49 = !DILocation(line: 2, column: 3, scope: !39) -!50 = distinct !DISubprogram(name: "baz", linkageName: "_Z3bazv", scope: !1, file: !1, line: 5, type: !40, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41) -!51 = !DILocation(line: 6, column: 10, scope: !50) -!52 = !{i64 -5964873800580613432} -!53 = !DILocation(line: 6, column: 3, scope: !50) -!54 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 9, type: !40, scopeLine: 9, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41) -!55 = !DILocation(line: 10, column: 10, scope: !54) -!56 = !{i64 2732490490862098848} -!57 = !DILocation(line: 10, column: 3, scope: !54) diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll new file mode 100644 index 0000000000000..539d88a815ed1 --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll @@ -0,0 +1,158 @@ +;; Test callsite context graph generation for simple call graph with +;; two memprof contexts and no inlining. +;; +;; Original code looks like: +;; +;; char *bar() { +;; return new char[10]; +;; } +;; +;; char *baz() { +;; return bar(); +;; } +;; +;; char *foo() { +;; return baz(); +;; } +;; +;; int main(int argc, char **argv) { +;; char *x = foo(); +;; char *y = foo(); +;; memset(x, 0, 10); +;; memset(y, 0, 10); +;; delete[] x; +;; sleep(10); +;; delete[] y; +;; return 0; +;; } +;; +;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; memory freed after sleep(10) results in cold lifetimes. +;; +;; The IR was then reduced using llvm-reduce with the expected FileCheck input. + +; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ +; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP + +; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() #0 { +entry: + %call = call noundef ptr @_Z3foov(), !callsite !0 + %call1 = call noundef ptr @_Z3foov(), !callsite !1 + ret i32 0 +} + +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #1 + +; Function Attrs: nobuiltin +declare void @_ZdaPv() #2 + +define internal ptr @_Z3barv() #3 { +entry: + %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !2, !callsite !7 + ret ptr null +} + +declare ptr @_Znam(i64) + +define internal ptr @_Z3bazv() #4 { +entry: + %call = call noundef ptr @_Z3barv(), !callsite !8 + ret ptr null +} + +; Function Attrs: noinline +define internal ptr @_Z3foov() #5 { +entry: + %call = call noundef ptr @_Z3bazv(), !callsite !9 + ret ptr null +} + +; uselistorder directives +uselistorder ptr @_Z3foov, { 1, 0 } + +attributes #0 = { "tune-cpu"="generic" } +attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: write) } +attributes #2 = { nobuiltin } +attributes #3 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" } +attributes #4 = { "stack-protector-buffer-size"="8" } +attributes #5 = { noinline } +attributes #6 = { builtin } + +!0 = !{i64 8632435727821051414} +!1 = !{i64 -3421689549917153178} +!2 = !{!3, !5} +!3 = !{!4, !"notcold"} +!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!5 = !{!6, !"cold"} +!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!7 = !{i64 9086428284934609951} +!8 = !{i64 -5964873800580613432} +!9 = !{i64 2732490490862098848} + + +; DUMP: CCG before cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[BAR:0x[a-z0-9]+]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 + +; DUMP: Node [[BAZ]] +; DUMP: %call = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 + +; DUMP: Node [[FOO]] +; DUMP: %call = call noundef ptr @_Z3bazv() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 + +; DUMP: Node [[MAIN1]] +; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: + + +; DOT: digraph "postbuild" { +; DOT: label="postbuild"; +; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"]; +; DOT: Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 12481870273128938184\n_Z3bazv -\> _Z3barv}"]; +; DOT: Node[[BAZ]] -> Node[[BAR]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\n_Z3foov -\> _Z3bazv}"]; +; DOT: Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOT: } diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll new file mode 100644 index 0000000000000..c5ed97f182a98 --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll @@ -0,0 +1,232 @@ +;; Test callsite context graph generation for call graph with with MIBs +;; that have pruned contexts that partially match multiple inlined +;; callsite contexts, requiring duplication of context ids and nodes +;; while matching callsite nodes onto the graph. +;; +;; Original code looks like: +;; +;; char *D() { +;; return new char[10]; +;; } +;; +;; char *F() { +;; return D(); +;; } +;; +;; char *C() { +;; return D(); +;; } +;; +;; char *B() { +;; return C(); +;; } +;; +;; char *E() { +;; return C(); +;; } +;; int main(int argc, char **argv) { +;; char *x = B(); // cold +;; char *y = E(); // cold +;; char *z = F(); // default +;; memset(x, 0, 10); +;; memset(y, 0, 10); +;; memset(z, 0, 10); +;; delete[] z; +;; sleep(10); +;; delete[] x; +;; delete[] y; +;; return 0; +;; } +;; +;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; memory freed after sleep(10) results in cold lifetimes. +;; +;; The code below was created by forcing inlining of C into both B and E. +;; Since both allocation contexts via C are cold, the matched memprof +;; metadata has the context pruned above C's callsite. This requires +;; matching the stack node for C to callsites where it was inlined (i.e. +;; the callsites in B and E that have callsite metadata that includes C's). +;; It also requires duplication of that node in the graph as well as the +;; duplication of the context ids along that path through the graph, +;; so that we can represent the duplicated (via inlining) C callsite. +;; +;; The IR was then reduced using llvm-reduce with the expected FileCheck input. + +; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ +; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP + +; RUN: cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE +; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define internal ptr @_Z1Dv() { +entry: + %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !0, !callsite !5 + ret ptr null +} + +declare ptr @_Znam(i64) + +define internal ptr @_Z1Fv() #0 { +entry: + %call = call noundef ptr @_Z1Dv(), !callsite !6 + ret ptr null +} + +; Function Attrs: mustprogress noinline optnone uwtable +define internal ptr @_Z1Cv() #1 { +entry: + %call = call noundef ptr @_Z1Dv(), !callsite !7 + ret ptr null +} + +; Function Attrs: mustprogress noinline optnone uwtable +define internal ptr @_Z1Bv() #1 { +entry: + %call.i = call noundef ptr @_Z1Dv(), !callsite !8 + ret ptr null +} + +; Function Attrs: mustprogress noinline optnone uwtable +define internal ptr @_Z1Ev() #1 { +entry: + %call.i = call noundef ptr @_Z1Dv(), !callsite !9 + ret ptr null +} + +; Function Attrs: noinline +declare i32 @main() #2 + +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3 + +; Function Attrs: nounwind +declare void @_ZdaPv() #4 + +declare i32 @sleep() #5 + +attributes #0 = { "disable-tail-calls"="true" } +attributes #1 = { mustprogress noinline optnone uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #2 = { noinline } +attributes #3 = { nocallback nofree nounwind willreturn memory(argmem: write) } +attributes #4 = { nounwind } +attributes #5 = { "no-trapping-math"="true" } +attributes #6 = { builtin } + +!0 = !{!1, !3} +!1 = !{!2, !"cold"} +!2 = !{i64 6541423618768552252, i64 -6270142974039008131} +!3 = !{!4, !"notcold"} +!4 = !{i64 6541423618768552252, i64 -4903163940066524832} +!5 = !{i64 6541423618768552252} +!6 = !{i64 -4903163940066524832} +!7 = !{i64 -6270142974039008131} +!8 = !{i64 -6270142974039008131, i64 -184525619819294889} +!9 = !{i64 -6270142974039008131, i64 1905834578520680781} + + +;; After adding only the alloc node memprof metadata, we only have 2 contexts. + +; DUMP: CCG before updating call stack chains: +; DUMP: Callsite Context Graph: +; DUMP: Node [[D:0x[a-z0-9]+]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1 +; DUMP: Edge from Callee [[D]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2 + +; DUMP: Node [[C]] +; DUMP: null Call +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[C]] AllocTypes: Cold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[F]] +; DUMP: null Call +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 +; DUMP: CallerEdges: + +;; After updating for callsite metadata, we should have generated context ids 3 and 4, +;; along with 2 new nodes for those callsites. All have the same allocation type +;; behavior as the original C node. + +; DUMP: CCG before cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[D]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 3 4 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 +; DUMP: Edge from Callee [[D]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3 +; DUMP: Edge from Callee [[D]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 +; DUMP: Edge from Callee [[D]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1 + +; DUMP: Node [[F]] +; DUMP: %call = call noundef ptr @_Z1Dv() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 +; DUMP: CallerEdges: + +; DUMP: Node [[C2]] +; DUMP: %call = call noundef ptr @_Z1Dv() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3 +; DUMP: CallerEdges: + +; DUMP: Node [[B]] +; DUMP: %call.i = call noundef ptr @_Z1Dv() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4 +; DUMP: CallerEdges: + +; DUMP: Node [[E]] +; DUMP: %call.i = call noundef ptr @_Z1Dv() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1 +; DUMP: CallerEdges: + + +; DOTPRE: digraph "prestackupdate" { +; DOTPRE: label="prestackupdate"; +; DOTPRE: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"]; +; DOTPRE: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12176601099670543485\nnull call (external)}"]; +; DOTPRE: Node[[C]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"]; +; DOTPRE: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\nnull call (external)}"]; +; DOTPRE: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"]; +; DOTPRE: } + + +; DOTPOST:digraph "postbuild" { +; DOTPOST: label="postbuild"; +; DOTPOST: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"]; +; DOTPOST: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"]; +; DOTPOST: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"]; +; DOTPOST: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"]; +; DOTPOST: Node[[C]] -> Node[[D]][tooltip="ContextIds: 3",fillcolor="cyan"]; +; DOTPOST: Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"]; +; DOTPOST: Node[[B]] -> Node[[D]][tooltip="ContextIds: 4",fillcolor="cyan"]; +; DOTPOST: Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"]; +; DOTPOST: Node[[E]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"]; +; DOTPOST:} diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll new file mode 100644 index 0000000000000..da0fd3f44b45e --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll @@ -0,0 +1,386 @@ +;; Test callsite context graph generation for call graph with with MIBs +;; that have pruned contexts that partially match multiple inlined +;; callsite contexts, requiring duplication of context ids and nodes +;; while matching callsite nodes onto the graph. This test requires more +;; complex duplication due to multiple contexts for different allocations +;; that share some of the same callsite nodes. +;; +;; Original code looks like: +;; +;; char *D(bool Call1) { +;; if (Call1) +;; return new char[10]; +;; else +;; return new char[10]; +;; } +;; +;; char *C(bool Call1) { +;; return D(Call1); +;; } +;; +;; char *B(bool Call1) { +;; if (Call1) +;; return C(true); +;; else +;; return C(false); +;; } +;; +;; char *A(bool Call1) { +;; return B(Call1); +;; } +;; +;; char *A1() { +;; return A(true); +;; } +;; +;; char *A2() { +;; return A(true); +;; } +;; +;; char *A3() { +;; return A(false); +;; } +;; +;; char *A4() { +;; return A(false); +;; } +;; +;; char *E() { +;; return B(true); +;; } +;; +;; char *F() { +;; return B(false); +;; } +;; +;; int main(int argc, char **argv) { +;; char *a1 = A1(); // cold +;; char *a2 = A2(); // cold +;; char *e = E(); // default +;; char *a3 = A3(); // default +;; char *a4 = A4(); // default +;; char *f = F(); // cold +;; memset(a1, 0, 10); +;; memset(a2, 0, 10); +;; memset(e, 0, 10); +;; memset(a3, 0, 10); +;; memset(a4, 0, 10); +;; memset(f, 0, 10); +;; delete[] a3; +;; delete[] a4; +;; delete[] e; +;; sleep(10); +;; delete[] a1; +;; delete[] a2; +;; delete[] f; +;; return 0; +;; } +;; +;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; memory freed after sleep(10) results in cold lifetimes. +;; +;; The code below was created by forcing inlining of A into its callers, +;; without any other inlining or optimizations. Since both allocation contexts +;; via A for each allocation in D have the same allocation type (cold via +;; A1 and A2 for the first new in D, and non-cold via A3 and A4 for the second +;; new in D, the contexts for those respective allocations are pruned above A. +;; The allocations via E and F are to ensure we don't prune above B. +;; +;; The matching onto the inlined A[1234]->A sequences will require duplication +;; of the context id assigned to the context from A for each allocation in D. +;; This test ensures that we do this correctly in the presence of callsites +;; shared by the different duplicated context ids (i.e. callsite in C). +;; +;; The IR was then reduced using llvm-reduce with the expected FileCheck input. + +; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ +; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: mustprogress noinline uwtable +define ptr @_Z1Db(i1 %Call1) #0 { +entry: + %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !5 + br label %return + +if.else: ; No predecessors! + %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !6, !callsite !11 + br label %return + +return: ; preds = %if.else, %entry + ret ptr null +} + +; Function Attrs: nobuiltin +declare ptr @_Znam(i64) #1 + +define ptr @_Z1Cb(i1 %Call1) { +entry: + %tobool = trunc i8 0 to i1 + %call = call noundef ptr @_Z1Db(i1 noundef zeroext %tobool), !callsite !12 + ret ptr null +} + +; Function Attrs: mustprogress noinline uwtable +define ptr @_Z1Bb(i1 %Call1) #0 { +entry: + %call = call noundef ptr @_Z1Cb(i1 noundef zeroext true), !callsite !13 + br label %return + +if.else: ; No predecessors! + %call1 = call noundef ptr @_Z1Cb(i1 noundef zeroext false), !callsite !14 + br label %return + +return: ; preds = %if.else, %entry + ret ptr null +} + +define ptr @_Z1Ab(i1 %tobool) #2 { +entry: + %call = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool), !callsite !15 + ret ptr null +} + +; Function Attrs: mustprogress noinline uwtable +define ptr @_Z2A1v(i1 %tobool.i) #0 { +entry: + %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !16 + ret ptr null +} + +; Function Attrs: mustprogress noinline uwtable +define ptr @_Z2A2v(i1 %tobool.i) #0 { +entry: + %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !17 + ret ptr null +} + +; Function Attrs: mustprogress noinline uwtable +define ptr @_Z2A3v(i1 %tobool.i) #0 { +entry: + %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !18 + ret ptr null +} + +; Function Attrs: mustprogress noinline uwtable +define ptr @_Z2A4v(i1 %tobool.i) #0 { +entry: + %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !19 + ret ptr null +} + +; Function Attrs: mustprogress noinline uwtable +define ptr @_Z1Ev() #0 { +entry: + %call = call noundef ptr @_Z1Bb(i1 noundef zeroext true), !callsite !20 + ret ptr null +} + +; Function Attrs: mustprogress noinline uwtable +define ptr @_Z1Fv() #0 { +entry: + %call = call noundef ptr @_Z1Bb(i1 noundef zeroext false), !callsite !21 + ret ptr null +} + +; Function Attrs: noinline +declare i32 @main() #3 + +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4 + +declare void @_ZdaPv() #5 + +declare i32 @sleep() #6 + +; uselistorder directives +uselistorder ptr @_Znam, { 1, 0 } + +attributes #0 = { mustprogress noinline uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { nobuiltin } +attributes #2 = { "tune-cpu"="generic" } +attributes #3 = { noinline } +attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) } +attributes #5 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" } +attributes #6 = { "disable-tail-calls"="true" } +attributes #7 = { builtin allocsize(0) } + +!0 = !{!1, !3} +!1 = !{!2, !"notcold"} +!2 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 1905834578520680781} +!3 = !{!4, !"cold"} +!4 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 -6528110295079665978} +!5 = !{i64 4854880825882961848} +!6 = !{!7, !9} +!7 = !{!8, !"notcold"} +!8 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -6528110295079665978} +!9 = !{!10, !"cold"} +!10 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -4903163940066524832} +!11 = !{i64 -8775068539491628272} +!12 = !{i64 -904694911315397047} +!13 = !{i64 6532298921261778285} +!14 = !{i64 7859682663773658275} +!15 = !{i64 -6528110295079665978} +!16 = !{i64 -6528110295079665978, i64 5747919905719679568} +!17 = !{i64 -6528110295079665978, i64 -5753238080028016843} +!18 = !{i64 -6528110295079665978, i64 1794685869326395337} +!19 = !{i64 -6528110295079665978, i64 5462047985461644151} +!20 = !{i64 1905834578520680781} +!21 = !{i64 -4903163940066524832} + + +;; After adding only the alloc node memprof metadata, we only have 4 contexts (we only +;; match the interesting parts of the pre-update graph here). + +; DUMP: CCG before updating call stack chains: +; DUMP: Callsite Context Graph: + +; DUMP: Node [[D1:0x[a-z0-9]+]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 + +; DUMP: Node [[C:0x[a-z0-9]+]] +; DUMP: null Call +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 3 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: Edge from Callee [[D2:0x[a-z0-9]+]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 + +; DUMP: Node [[D2]] +; DUMP: %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 3 4 + + +;; After updating for callsite metadata, we should have duplicated the context +;; ids coming from node A (2 and 3) 4 times, for the 4 different callers of A, +;; and used those on new nodes for those callers. Note that while in reality +;; we only have cold edges coming from A1 and A2 and noncold from A3 and A4, +;; due to the pruning we have lost this information and thus end up duplicating +;; both of A's contexts to all of the new nodes (which could result in some +;; unnecessary cloning. + +; DUMP: CCG before cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[D1]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 5 7 9 11 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 + +; DUMP: Node [[C]] +; DUMP: %call = call noundef ptr @_Z1Db(i1 noundef zeroext %tobool) (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 3 4 5 6 7 8 9 10 11 12 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 +; DUMP: Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[C]] to Caller: [[B1:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 +; DUMP: Edge from Callee [[C]] to Caller: [[B2:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 + +; DUMP: Node [[B1]] +; DUMP: %call = call noundef ptr @_Z1Cb(i1 noundef zeroext true) (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 5 7 9 11 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[C]] to Caller: [[B1]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[B1]] to Caller: [[A2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 5 +; DUMP: Edge from Callee [[B1]] to Caller: [[A3:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 7 +; DUMP: Edge from Callee [[B1]] to Caller: [[A1:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 9 +; DUMP: Edge from Callee [[B1]] to Caller: [[A4:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 11 +; DUMP: Edge from Callee [[B1]] to Caller: [[A:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 + +; DUMP: Node [[E]] +; DUMP: %call = call noundef ptr @_Z1Bb(i1 noundef zeroext true) (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[E]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[D2]] +; DUMP: %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 3 4 6 8 10 12 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 + +; DUMP: Node [[B2]] +; DUMP: %call1 = call noundef ptr @_Z1Cb(i1 noundef zeroext false) (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 3 4 6 8 10 12 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[C]] to Caller: [[B2]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[B2]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 +; DUMP: Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6 +; DUMP: Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8 +; DUMP: Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10 +; DUMP: Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12 +; DUMP: Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3 + +; DUMP: Node [[F]] +; DUMP: %call = call noundef ptr @_Z1Bb(i1 noundef zeroext false) (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B2]] to Caller: [[F]] AllocTypes: Cold ContextIds: 4 +; DUMP: CallerEdges: + +; DUMP: Node [[A2]] +; DUMP: %call = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool) (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 5 6 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[A2]] AllocTypes: Cold ContextIds: 5 +; DUMP: Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6 +; DUMP: CallerEdges: + +; DUMP: Node [[A3]] +; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 7 8 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[A3]] AllocTypes: Cold ContextIds: 7 +; DUMP: Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8 +; DUMP: CallerEdges: + +; DUMP: Node [[A1]] +; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 9 10 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[A1]] AllocTypes: Cold ContextIds: 9 +; DUMP: Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10 +; DUMP: CallerEdges: + +; DUMP: Node [[A4]] +; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 11 12 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[A4]] AllocTypes: Cold ContextIds: 11 +; DUMP: Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12 +; DUMP: CallerEdges: + +; DUMP: Node [[A]] +; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 2 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[A]] AllocTypes: Cold ContextIds: 2 +; DUMP: Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3 +; DUMP: CallerEdges: diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll b/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll new file mode 100644 index 0000000000000..9ebf219dd37a0 --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll @@ -0,0 +1,261 @@ +;; Tests callsite context graph generation for call graph containing indirect +;; calls. Currently this should result in conservative behavior, such that the +;; indirect call receives a null call in its graph node, to prevent subsequent +;; cloning. +;; +;; Original code looks like: +;; +;; char *foo() { +;; return new char[10]; +;; } +;; class A { +;; public: +;; virtual char *x() { return foo(); } +;; }; +;; class B : public A { +;; public: +;; char *x() final { return foo(); } +;; }; +;; char *bar(A *a) { +;; return a->x(); +;; } +;; int main(int argc, char **argv) { +;; char *x = foo(); +;; char *y = foo(); +;; B b; +;; char *z = bar(&b); +;; char *w = bar(&b); +;; A a; +;; char *r = bar(&a); +;; char *s = bar(&a); +;; memset(x, 0, 10); +;; memset(y, 0, 10); +;; memset(z, 0, 10); +;; memset(w, 0, 10); +;; memset(r, 0, 10); +;; memset(s, 0, 10); +;; delete[] x; +;; delete[] w; +;; delete[] r; +;; sleep(10); +;; delete[] y; +;; delete[] z; +;; delete[] s; +;; return 0; +;; } +;; +;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; memory freed after sleep(10) results in cold lifetimes. +;; +;; Compiled without optimization to prevent inlining and devirtualization. +;; +;; The IR was then reduced using llvm-reduce with the expected FileCheck input. + +; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ +; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP + +; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare ptr @_Z3barP1A(ptr) + +define i32 @main(ptr %b, ptr %a) #0 { +entry: + %call = call noundef ptr @_Z3foov(), !callsite !0 + %call1 = call noundef ptr @_Z3foov(), !callsite !1 + %call2 = call noundef ptr @_Z3barP1A(ptr noundef %b), !callsite !2 + %call3 = call noundef ptr @_Z3barP1A(ptr noundef %b), !callsite !3 + %call4 = call noundef ptr @_Z3barP1A(ptr noundef %a), !callsite !4 + %call5 = call noundef ptr @_Z3barP1A(ptr noundef %a), !callsite !5 + ret i32 0 +} + +; Function Attrs: noinline +declare void @_ZN1BC2Ev() #1 + +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2 + +; Function Attrs: nobuiltin +declare void @_ZdaPv() #3 + +define internal ptr @_ZN1A1xEv() #4 { +entry: + %call = call noundef ptr @_Z3foov(), !callsite !6 + ret ptr null +} + +; Function Attrs: mustprogress uwtable +define internal ptr @_ZN1B1xEv() #5 { +entry: + %call = call noundef ptr @_Z3foov(), !callsite !7 + ret ptr null +} + +; Function Attrs: mustprogress uwtable +define internal ptr @_Z3foov() #5 { +entry: + %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !8, !callsite !21 + ret ptr null +} + +declare ptr @_Znam(i64) #6 + +; uselistorder directives +uselistorder ptr @_Z3foov, { 3, 2, 1, 0 } + +attributes #0 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" } +attributes #1 = { noinline } +attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: write) } +attributes #3 = { nobuiltin } +attributes #4 = { "tune-cpu"="generic" } +attributes #5 = { mustprogress uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #6 = { "disable-tail-calls"="true" } +attributes #7 = { builtin } + +!0 = !{i64 8632435727821051414} +!1 = !{i64 -3421689549917153178} +!2 = !{i64 6792096022461663180} +!3 = !{i64 -2709642582978494015} +!4 = !{i64 748269490701775343} +!5 = !{i64 -5747251260480066785} +!6 = !{i64 8256774051149711748} +!7 = !{i64 -4831879094954754638} +!8 = !{!9, !11, !13, !15, !17, !19} +!9 = !{!10, !"notcold"} +!10 = !{i64 2732490490862098848, i64 8256774051149711748, i64 -4820244510750103755, i64 748269490701775343} +!11 = !{!12, !"cold"} +!12 = !{i64 2732490490862098848, i64 8256774051149711748, i64 -4820244510750103755, i64 -5747251260480066785} +!13 = !{!14, !"notcold"} +!14 = !{i64 2732490490862098848, i64 8632435727821051414} +!15 = !{!16, !"cold"} +!16 = !{i64 2732490490862098848, i64 -4831879094954754638, i64 -4820244510750103755, i64 6792096022461663180} +!17 = !{!18, !"notcold"} +!18 = !{i64 2732490490862098848, i64 -4831879094954754638, i64 -4820244510750103755, i64 -2709642582978494015} +!19 = !{!20, !"cold"} +!20 = !{i64 2732490490862098848, i64 -3421689549917153178} +!21 = !{i64 2732490490862098848} + + +; DUMP: CCG before cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[FOO:0x[a-z0-9]+]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 3 4 5 6 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[AX:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3 +; DUMP: Edge from Callee [[FOO]] to Caller: [[BX:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 4 5 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 6 + +; DUMP: Node [[AX]] +; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[AX]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[AX]] to Caller: [[BAR:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 + +;; Bar contains an indirect call, with multiple targets. It's call should be null. +; DUMP: Node [[BAR]] +; DUMP: null Call +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 4 5 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[AX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN3:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN4:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN5:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN6:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 5 + +; DUMP: Node [[MAIN3]] +; DUMP: %call4 = call noundef ptr @_Z3barP1A(ptr noundef %a) (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN3]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN4]] +; DUMP: %call5 = call noundef ptr @_Z3barP1A(ptr noundef %a) (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN4]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN1]] +; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3 +; DUMP: CallerEdges: + +; DUMP: Node [[BX]] +; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 4 5 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[BX]] AllocTypes: NotColdCold ContextIds: 4 5 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5 + +; DUMP: Node [[MAIN5]] +; DUMP: %call2 = call noundef ptr @_Z3barP1A(ptr noundef %b) (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN5]] AllocTypes: Cold ContextIds: 4 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN6]] +; DUMP: %call3 = call noundef ptr @_Z3barP1A(ptr noundef %b) (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 5 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN6]] AllocTypes: NotCold ContextIds: 5 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 6 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 6 +; DUMP: CallerEdges: + + +; DOT: digraph "postbuild" { +; DOT: label="postbuild"; +; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2 3 4 5 6",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3foov -\> _Znam}"]; +; DOT: Node[[AX:0x[a-z0-9]+]] [shape=record,tooltip="N[[AX]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 8256774051149711748\n_ZN1A1xEv -\> _Z3foov}"]; +; DOT: Node[[AX]] -> Node[[FOO]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13626499562959447861\nnull call (external)}"]; +; DOT: Node[[BAR]] -> Node[[AX]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOT: Node[[BAR]] -> Node[[BX:0x[a-z0-9]+]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"]; +; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 748269490701775343\nmain -\> _Z3barP1A}"]; +; DOT: Node[[MAIN1]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12699492813229484831\nmain -\> _Z3barP1A}"]; +; DOT: Node[[MAIN2]] -> Node[[BAR]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOT: Node[[MAIN3:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN3]] ContextIds: 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN3]] -> Node[[FOO]][tooltip="ContextIds: 3",fillcolor="brown1"]; +; DOT: Node[[BX]] [shape=record,tooltip="N[[BX]] ContextIds: 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13614864978754796978\n_ZN1B1xEv -\> _Z3foov}"]; +; DOT: Node[[BX]] -> Node[[FOO]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"]; +; DOT: Node[[MAIN4:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN4]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 6792096022461663180\nmain -\> _Z3barP1A}"]; +; DOT: Node[[MAIN4]] -> Node[[BAR]][tooltip="ContextIds: 4",fillcolor="cyan"]; +; DOT: Node[[MAIN5:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN5]] ContextIds: 5",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 15737101490731057601\nmain -\> _Z3barP1A}"]; +; DOT: Node[[MAIN5]] -> Node[[BAR]][tooltip="ContextIds: 5",fillcolor="brown1"]; +; DOT: Node[[MAIN6:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN6]] ContextIds: 6",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN6]] -> Node[[FOO]][tooltip="ContextIds: 6",fillcolor="cyan"]; +; DOT: } diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll b/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll new file mode 100644 index 0000000000000..59f135ca06627 --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll @@ -0,0 +1,189 @@ +;; Test callsite context graph generation for call graph with two memprof +;; contexts and partial inlining, requiring generation of a new fused node to +;; represent the inlined sequence while matching callsite nodes onto the graph. +;; +;; Original code looks like: +;; +;; char *bar() { +;; return new char[10]; +;; } +;; +;; char *baz() { +;; return bar(); +;; } +;; +;; char *foo() { +;; return baz(); +;; } +;; +;; int main(int argc, char **argv) { +;; char *x = foo(); +;; char *y = foo(); +;; memset(x, 0, 10); +;; memset(y, 0, 10); +;; delete[] x; +;; sleep(10); +;; delete[] y; +;; return 0; +;; } +;; +;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; memory freed after sleep(10) results in cold lifetimes. +;; +;; The code below was created by forcing inlining of baz into foo, and +;; bar into baz. Due to the inlining of bar we will initially have two +;; allocation nodes in the graph. This tests that we correctly match +;; foo (with baz inlined) onto the graph nodes first, and generate a new +;; fused node for it. We should then not match baz (with bar inlined) as that +;; is not reached by the MIB contexts (since all calls from main will look +;; like main -> foo(+baz) -> bar after the inlining reflected in this IR). +;; +;; The IR was then reduced using llvm-reduce with the expected FileCheck input. + +; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ +; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP + +; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define internal ptr @_Z3barv() { +entry: + %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !5 + ret ptr null +} + +; Function Attrs: nobuiltin +declare ptr @_Znam(i64) #0 + +; Function Attrs: mustprogress +define internal ptr @_Z3bazv() #1 { +entry: + %call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !6 + ret ptr null +} + +; Function Attrs: noinline +define internal ptr @_Z3foov() #2 { +entry: + %call.i = call noundef ptr @_Z3barv(), !callsite !7 + ret ptr null +} + +define i32 @main() #3 { +entry: + %call = call noundef ptr @_Z3foov(), !callsite !8 + %call1 = call noundef ptr @_Z3foov(), !callsite !9 + ret i32 0 +} + +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4 + +; Function Attrs: nounwind +declare void @_ZdaPv() #5 + +declare i32 @sleep() #6 + +attributes #0 = { nobuiltin } +attributes #1 = { mustprogress } +attributes #2 = { noinline } +attributes #3 = { "tune-cpu"="generic" } +attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) } +attributes #5 = { nounwind } +attributes #6 = { "disable-tail-calls"="true" } +attributes #7 = { builtin } + +!0 = !{!1, !3} +!1 = !{!2, !"notcold"} +!2 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!3 = !{!4, !"cold"} +!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!5 = !{i64 9086428284934609951} +!6 = !{i64 9086428284934609951, i64 -5964873800580613432} +!7 = !{i64 -5964873800580613432, i64 2732490490862098848} +!8 = !{i64 8632435727821051414} +!9 = !{i64 -3421689549917153178} + + +; DUMP: CCG before cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[BAR:0x[a-z0-9]+]] +; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 + +;; This is leftover from the MIB on the alloc inlined into baz. It is not +;; matched with any call, since there is no such node in the IR. Due to the +;; null call it will not participate in any context transformations. +; DUMP: Node [[FOO2:0x[a-z0-9]+]] +; DUMP: null Call +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 3 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ:0x[a-z0-9]+]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3 +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 + +; DUMP: Node [[MAIN1]] +; DUMP: %call = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: + +; DUMP: Node [[BAZ]] +; DUMP: %call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 3 4 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4 + +;; This is the node synthesized for the call to bar in foo that was created +;; by inlining baz into foo. +; DUMP: Node [[FOO]] +; DUMP: %call.i = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 + + +; DOT: digraph "postbuild" { +; DOT: label="postbuild"; +; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"]; +; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\nnull call (external)}"]; +; DOT: Node[[FOO]] -> Node[[BAZ:0x[a-z0-9]+]][tooltip="ContextIds: 3 4",fillcolor="mediumorchid1"]; +; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 3",fillcolor="brown1"]; +; DOT: Node[[MAIN1]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 4",fillcolor="cyan"]; +; DOT: Node[[MAIN2]] -> Node[[FOO2]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOT: Node[[BAZ]] [shape=record,tooltip="N[[BAZ]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc2\n_Z3bazv -\> _Znam}"]; +; DOT: Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"]; +; DOT: Node[[FOO2]] -> Node[[BAR]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOT: } diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll b/llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll new file mode 100644 index 0000000000000..a3a056ade8c49 --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll @@ -0,0 +1,135 @@ +;; Test callsite context graph generation for call graph with two memprof +;; contexts and multiple levels of inlining, requiring generation of new +;; fused nodes to represent the inlined sequence while matching callsite +;; nodes onto the graph. In particular this tests the case where a function +;; has inlined a callee containing an inlined callee. +;; +;; Original code looks like: +;; +;; char *bar() __attribute__((noinline)) { +;; return new char[10]; +;; } +;; +;; char *baz() { +;; return bar(); +;; } +;; +;; char *foo() { +;; return baz(); +;; } +;; +;; int main(int argc, char **argv) { +;; char *x = foo(); +;; char *y = foo(); +;; memset(x, 0, 10); +;; memset(y, 0, 10); +;; delete[] x; +;; sleep(10); +;; delete[] y; +;; return 0; +;; } +;; +;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; memory freed after sleep(10) results in cold lifetimes. +;; +;; Both foo and baz are inlined into main, at both foo callsites. +;; We should update the graph for new fused nodes for both of those inlined +;; callsites to bar. +;; +;; Note that baz and bar are both dead due to the inlining, but have been left +;; in the input IR to ensure that the MIB call chain is matched to the longer +;; inline sequences from main. +;; +;; The IR was then reduced using llvm-reduce with the expected FileCheck input. + +; RUN: opt -passes=memprof-context-disambiguation \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define ptr @_Z3barv() #0 { +entry: + %call = call noalias noundef nonnull dereferenceable(10) ptr @_Znam(i64 noundef 10) #7, !memprof !7, !callsite !12, !heapallocsite !13 + ret ptr null +} + +; Function Attrs: nobuiltin +declare ptr @_Znam(i64) #1 + +; Function Attrs: mustprogress +declare ptr @_Z3bazv() #2 + +define i32 @main() #3 { +delete.end5: + %call.i.i = call noundef ptr @_Z3barv(), !callsite !14 + %call.i.i8 = call noundef ptr @_Z3barv(), !callsite !15 + ret i32 0 +} + +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4 + +declare void @_ZdaPv() #5 + +declare i32 @sleep() #6 + +attributes #0 = { "stack-protector-buffer-size"="8" } +attributes #1 = { nobuiltin } +attributes #2 = { mustprogress } +attributes #3 = { "tune-cpu"="generic" } +attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) } +attributes #5 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" } +attributes #6 = { "disable-tail-calls"="true" } +attributes #7 = { builtin } + +!llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !6} + +!0 = !{i32 7, !"Dwarf Version", i32 5} +!1 = !{i32 2, !"Debug Info Version", i32 3} +!2 = !{i32 1, !"wchar_size", i32 4} +!3 = !{i32 8, !"PIC Level", i32 2} +!4 = !{i32 7, !"PIE Level", i32 2} +!5 = !{i32 7, !"uwtable", i32 2} +!6 = !{i32 7, !"frame-pointer", i32 2} +!7 = !{!8, !10} +!8 = !{!9, !"notcold"} +!9 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!10 = !{!11, !"cold"} +!11 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!12 = !{i64 9086428284934609951} +!13 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) +!14 = !{i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!15 = !{i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} + + +; DUMP: CCG before cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[BAR:0x[a-z0-9]+]] +; DUMP: %call = call noalias noundef nonnull dereferenceable(10) ptr @_Znam(i64 noundef 10) #7, !heapallocsite !7 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 + +;; This is the node synthesized for the first inlined call chain of main->foo->baz +; DUMP: Node [[MAIN1]] +; DUMP: %call.i.i = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: + +;; This is the node synthesized for the second inlined call chain of main->foo->baz +; DUMP: Node [[MAIN2]] +; DUMP: %call.i.i8 = call noundef ptr @_Z3barv() (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/pass-pipeline.ll b/llvm/test/Transforms/MemProfContextDisambiguation/pass-pipeline.ll new file mode 100644 index 0000000000000..fede5fe96eccd --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/pass-pipeline.ll @@ -0,0 +1,41 @@ +;; Test that MemProfContextDisambiguation is enabled under the expected conditions +;; and in the expected position. + +;; Pass is not currently enabled by default at any opt level. +; RUN: opt -debug-pass-manager -passes='lto' -S %s \ +; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation" +; RUN: opt -debug-pass-manager -passes='lto' -S %s \ +; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation" +; RUN: opt -debug-pass-manager -passes='lto' -S %s \ +; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation" +; RUN: opt -debug-pass-manager -passes='lto' -S %s \ +; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation" + +;; Pass should not run even under option at O0/O1. +; RUN: opt -debug-pass-manager -passes='lto' -S %s \ +; RUN: -enable-memprof-context-disambiguation \ +; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation" +; RUN: opt -debug-pass-manager -passes='lto' -S %s \ +; RUN: -enable-memprof-context-disambiguation \ +; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation" + +;; Pass should be enabled under option at O2/O3. +; RUN: opt -debug-pass-manager -passes='lto' -S %s \ +; RUN: -enable-memprof-context-disambiguation \ +; RUN: 2>&1 | FileCheck %s --check-prefix=ENABLED +; RUN: opt -debug-pass-manager -passes='lto' -S %s \ +; RUN: -enable-memprof-context-disambiguation \ +; RUN: 2>&1 | FileCheck %s --check-prefix=ENABLED + +;; When enabled, MemProfContextDisambiguation runs just after inlining. +; ENABLED: Running pass: InlinerPass +; ENABLED: Invalidating analysis: InlineAdvisorAnalysis +; ENABLED: Running pass: MemProfContextDisambiguation + +define noundef ptr @_Z3barv() { +entry: + %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) + ret ptr %call +} + +declare noundef nonnull ptr @_Znam(i64 noundef) From 96449fa5b639449b720a9d84da5d8c29084b4328 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 22 Mar 2023 17:21:25 +0000 Subject: [PATCH 323/691] [gn build] Port 700cd99061ed --- llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn index 644d30f10854e..0dbeb793e40eb 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn @@ -48,6 +48,7 @@ static_library("IPO") { "Internalize.cpp", "LoopExtractor.cpp", "LowerTypeTests.cpp", + "MemProfContextDisambiguation.cpp", "MergeFunctions.cpp", "ModuleInliner.cpp", "OpenMPOpt.cpp", From 77044a47b4dec308e02c796e7951ab1745a7f53c Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Wed, 22 Mar 2023 04:57:08 +0000 Subject: [PATCH 324/691] [CMake] Build runtimes for riscv64-unknown-fuchsia This is necessary to have a complete RISC-V toolchain for Fuchsia. Differential Revision: https://reviews.llvm.org/D146608 --- clang/cmake/caches/Fuchsia-stage2.cmake | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake index c874d8cacd197..037cb67e82189 100644 --- a/clang/cmake/caches/Fuchsia-stage2.cmake +++ b/clang/cmake/caches/Fuchsia-stage2.cmake @@ -204,7 +204,7 @@ if(FUCHSIA_SDK) set(BUILTINS_${target}_CMAKE_SYSROOT ${FUCHSIA_${target}_SYSROOT} CACHE PATH "") endforeach() - foreach(target x86_64-unknown-fuchsia;aarch64-unknown-fuchsia) + foreach(target x86_64-unknown-fuchsia;aarch64-unknown-fuchsia;riscv64-unknown-fuchsia) # Set the per-target runtimes options. list(APPEND RUNTIME_TARGETS "${target}") set(RUNTIMES_${target}_CMAKE_SYSTEM_NAME Fuchsia CACHE STRING "") @@ -276,12 +276,12 @@ if(FUCHSIA_SDK) set(LLVM_RUNTIME_MULTILIBS "asan;noexcept;compat;asan+noexcept;hwasan;hwasan+noexcept" CACHE STRING "") - set(LLVM_RUNTIME_MULTILIB_asan_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia" CACHE STRING "") - set(LLVM_RUNTIME_MULTILIB_noexcept_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia" CACHE STRING "") - set(LLVM_RUNTIME_MULTILIB_compat_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia" CACHE STRING "") - set(LLVM_RUNTIME_MULTILIB_asan+noexcept_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia" CACHE STRING "") - set(LLVM_RUNTIME_MULTILIB_hwasan_TARGETS "aarch64-unknown-fuchsia" CACHE STRING "") - set(LLVM_RUNTIME_MULTILIB_hwasan+noexcept_TARGETS "aarch64-unknown-fuchsia" CACHE STRING "") + set(LLVM_RUNTIME_MULTILIB_asan_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia;riscv64-unknown-fuchsia" CACHE STRING "") + set(LLVM_RUNTIME_MULTILIB_noexcept_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia;riscv64-unknown-fuchsia" CACHE STRING "") + set(LLVM_RUNTIME_MULTILIB_compat_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia;riscv64-unknown-fuchsia" CACHE STRING "") + set(LLVM_RUNTIME_MULTILIB_asan+noexcept_TARGETS "x86_64-unknown-fuchsia;aarch64-unknown-fuchsia;riscv64-unknown-fuchsia" CACHE STRING "") + set(LLVM_RUNTIME_MULTILIB_hwasan_TARGETS "aarch64-unknown-fuchsia;riscv64-unknown-fuchsia" CACHE STRING "") + set(LLVM_RUNTIME_MULTILIB_hwasan+noexcept_TARGETS "aarch64-unknown-fuchsia;riscv64-unknown-fuchsia" CACHE STRING "") endif() set(LLVM_BUILTIN_TARGETS "${BUILTIN_TARGETS}" CACHE STRING "") From 84de01908b58f3aa25cc3dc700a8a1b01b5263f0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 22 Mar 2023 10:24:57 -0700 Subject: [PATCH 325/691] [RISCV] Remove AnyReg RegisterClass used by .insn instructions. Use custom operand instead. The fake register class interferes too much with the autogenerated register class tables. Especially the fake spill size. I'm working on .insn support for compressed instructions and adding AnyRegC broke CodeGen. --- llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp | 6 ++++++ llvm/lib/Target/RISCV/RISCVInstrInfo.td | 11 +++++++++++ llvm/lib/Target/RISCV/RISCVRegisterInfo.td | 12 ------------ 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 9c6d54e62b16c..d984f39321a6e 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -368,6 +368,12 @@ struct RISCVOperand final : public MCParsedAsmOperand { bool isV0Reg() const { return Kind == KindTy::Register && Reg.RegNum == RISCV::V0; } + bool isAnyReg() const { + return Kind == KindTy::Register && + (RISCVMCRegisterClasses[RISCV::GPRRegClassID].contains(Reg.RegNum) || + RISCVMCRegisterClasses[RISCV::FPR64RegClassID].contains(Reg.RegNum) || + RISCVMCRegisterClasses[RISCV::VRRegClassID].contains(Reg.RegNum)); + } bool isImm() const override { return Kind == KindTy::Immediate; } bool isMem() const override { return false; } bool isSystemRegister() const { return Kind == KindTy::SystemRegister; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index ab8a8a4cc9935..85c3082dce64f 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1090,6 +1090,17 @@ def : InstAlias<"zext.b $rd, $rs", (ANDI GPR:$rd, GPR:$rs, 0xFF), 0>; // .insn directive instructions //===----------------------------------------------------------------------===// +def AnyRegOperand : AsmOperandClass { + let Name = "AnyRegOperand"; + let RenderMethod = "addRegOperands"; + let PredicateMethod = "isAnyReg"; +} + +def AnyReg : Operand { + let OperandType = "OPERAND_REGISTER"; + let ParserMatchClass = AnyRegOperand; +} + // isCodeGenOnly = 1 to hide them from the tablegened assembly parser. let isCodeGenOnly = 1, hasSideEffects = 1, mayLoad = 1, mayStore = 1, hasNoSchedulingInfo = 1 in { diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index 7e91441e91f47..d06453c82739e 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -578,15 +578,3 @@ foreach m = LMULList.m in { // Special registers def FFLAGS : RISCVReg<0, "fflags">; def FRM : RISCVReg<0, "frm">; - -// Any type register. Used for .insn directives when we don't know what the -// register types could be. -// NOTE: The alignment and size are bogus values. The Size needs to be non-zero -// or tablegen will use "untyped" to determine the size which will assert. -let isAllocatable = 0 in -def AnyReg : RegisterClass<"RISCV", [untyped], 32, - (add (sequence "X%u", 0, 31), - (sequence "F%u_D", 0, 31), - (sequence "V%u", 0, 31))> { - let Size = 32; -} From c4125a37806aa2f663018f4f8dc5bbd5159c51c1 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Thu, 2 Mar 2023 12:04:27 -0800 Subject: [PATCH 326/691] Revert "Remove the LINK_COMPONENTS entry from lldb-instr CMakery" This reverts commit e12a950d90f88aeddaa97d6e7c8fd0bfedc42f73. D142241 broke `-sBUILD_SHARED_LIBS=ON` build. After investigations in https://github.com/llvm/llvm-project/issues/60314, the issue that prompted D142441 now seems gone. Fixes https://github.com/llvm/llvm-project/issues/60314. Reviewed By: JDevlieghere Differential Revision: https://reviews.llvm.org/D145181 --- lldb/tools/lldb-instr/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lldb/tools/lldb-instr/CMakeLists.txt b/lldb/tools/lldb-instr/CMakeLists.txt index a1bbd7e2b7c93..8da453b2894fd 100644 --- a/lldb/tools/lldb-instr/CMakeLists.txt +++ b/lldb/tools/lldb-instr/CMakeLists.txt @@ -11,4 +11,6 @@ add_lldb_tool(lldb-instr clangSerialization clangTooling + LINK_COMPONENTS + Support ) From 24e3102edb4a48b8754efeaecad3e474a626f35c Mon Sep 17 00:00:00 2001 From: Julian Lettner Date: Tue, 21 Mar 2023 16:20:22 -0700 Subject: [PATCH 327/691] [TSan] Avoid deadlock between ReportRace() and dlopen() interceptor This change prevents rare deadlocks observed for specific macOS/iOS GUI applications which issue many `dlopen()` calls from multiple different threads at startup and where TSan finds and reports a race during startup. Providing a reliable test for this has been deemed infeasible. Although I've only observed this deadlock on Apple platforms, conceptually the cause is not confined to Apple code so the fix lives in platform-independent code. Deadlock scenario: ``` Thread 2 | Thread 4 ReportRace() | Lock internal TSan mutexes | &ctx->slot_mtx | | dlopen() interceptor | OnLibraryLoaded() | MemoryMappingLayout::DumpListOfModules() | calls dyld API, which takes internal lock | lock() interceptor | TSan tries to take internal mutexes again | &ctx->slot_mtx call into symbolizer | MemoryMappingLayout::DumpListOfModules() calls dyld API, which hangs on trying to take lock ``` Resulting in: * Thread 2 has internal TSan mutex, blocked on dyld lock * Thread 4 has dyld lock, blocked on internal TSan mutex The fix prevents this situation by not intercepting any of the calls originating from `MemoryMappingLayout::DumpListOfModules()`. Stack traces for deadlock between ReportRace() and dlopen() interceptor: ``` thread #2, queue = 'com.apple.root.default-qos' frame #0: libsystem_kernel.dylib frame #1: libclang_rt.tsan_osx_dynamic.dylib`::wrap_os_unfair_lock_lock_with_options(lock=, options=) at tsan_interceptors_mac.cpp:306:3 frame #2: dyld`dyld4::RuntimeLocks::withLoadersReadLock(this=0x000000016f21b1e0, work=0x00000001814523c0) block_pointer) at DyldRuntimeState.cpp:227:28 frame #3: dyld`dyld4::APIs::_dyld_get_image_header(this=0x0000000101012a20, imageIndex=614) at DyldAPIs.cpp:240:11 frame #4: libclang_rt.tsan_osx_dynamic.dylib`__sanitizer::MemoryMappingLayout::CurrentImageHeader(this=) at sanitizer_procmaps_mac.cpp:391:35 frame #5: libclang_rt.tsan_osx_dynamic.dylib`__sanitizer::MemoryMappingLayout::Next(this=0x000000016f2a2800, segment=0x000000016f2a2738) at sanitizer_procmaps_mac.cpp:397:51 frame #6: libclang_rt.tsan_osx_dynamic.dylib`__sanitizer::MemoryMappingLayout::DumpListOfModules(this=0x000000016f2a2800, modules=0x00000001011000a0) at sanitizer_procmaps_mac.cpp:460:10 frame #7: libclang_rt.tsan_osx_dynamic.dylib`__sanitizer::ListOfModules::init(this=0x00000001011000a0) at sanitizer_mac.cpp:610:18 frame #8: libclang_rt.tsan_osx_dynamic.dylib`__sanitizer::Symbolizer::FindModuleForAddress(unsigned long) [inlined] __sanitizer::Symbolizer::RefreshModules(this=0x0000000101100078) at sanitizer_symbolizer_libcdep.cpp:185:12 frame #9: libclang_rt.tsan_osx_dynamic.dylib`__sanitizer::Symbolizer::FindModuleForAddress(this=0x0000000101100078, address=6465454512) at sanitizer_symbolizer_libcdep.cpp:204:5 frame #10: libclang_rt.tsan_osx_dynamic.dylib`__sanitizer::Symbolizer::SymbolizePC(this=0x0000000101100078, addr=6465454512) at sanitizer_symbolizer_libcdep.cpp:88:15 frame #11: libclang_rt.tsan_osx_dynamic.dylib`__tsan::SymbolizeCode(addr=6465454512) at tsan_symbolize.cpp:106:35 frame #12: libclang_rt.tsan_osx_dynamic.dylib`__tsan::SymbolizeStack(trace=StackTrace @ 0x0000600002d66d00) at tsan_rtl_report.cpp:112:28 frame #13: libclang_rt.tsan_osx_dynamic.dylib`__tsan::ScopedReportBase::AddMemoryAccess(this=0x000000016f2a2a90, addr=4381057136, external_tag=, s=, tid=, stack=, mset=0x00000001012fc310) at tsan_rtl_report.cpp:190:16 frame #14: libclang_rt.tsan_osx_dynamic.dylib`__tsan::ReportRace(thr=0x00000001012fc000, shadow_mem=0x000008020a4340e0, cur=, old=, typ0=1) at tsan_rtl_report.cpp:795:9 frame #15: libclang_rt.tsan_osx_dynamic.dylib`__tsan::DoReportRace(thr=0x00000001012fc000, shadow_mem=0x000008020a4340e0, cur=Shadow @ x22, old=Shadow @ 0x0000600002d6b4f0, typ=1) at tsan_rtl_access.cpp:166:3 frame #16: libclang_rt.tsan_osx_dynamic.dylib`::__tsan_read8(void *) at tsan_rtl_access.cpp:220:5 frame #17: libclang_rt.tsan_osx_dynamic.dylib`::__tsan_read8(void *) [inlined] __tsan::MemoryAccess(thr=0x00000001012fc000, pc=, addr=, size=8, typ=1) at tsan_rtl_access.cpp:442:3 frame #18: libclang_rt.tsan_osx_dynamic.dylib`::__tsan_read8(addr=) at tsan_interface.inc:34:3 thread #4, queue = 'com.apple.dock.fullscreen' frame #0: libsystem_kernel.dylib frame #1: libclang_rt.tsan_osx_dynamic.dylib`__sanitizer::FutexWait(p=, cmp=) at sanitizer_mac.cpp:540:3 frame #2: libclang_rt.tsan_osx_dynamic.dylib`__sanitizer::Semaphore::Wait(this=) at sanitizer_mutex.cpp:35:7 frame #3: libclang_rt.tsan_osx_dynamic.dylib`__sanitizer::Mutex::Lock(this=0x0000000102992a80) at sanitizer_mutex.h:196:18 frame #4: libclang_rt.tsan_osx_dynamic.dylib`__tsan::ScopedInterceptor::~ScopedInterceptor() [inlined] __sanitizer::GenericScopedLock<__sanitizer::Mutex>::GenericScopedLock(this=, mu=0x0000000102992a80) at sanitizer_mutex.h:383:10 frame #5: libclang_rt.tsan_osx_dynamic.dylib`__tsan::ScopedInterceptor::~ScopedInterceptor() [inlined] __sanitizer::GenericScopedLock<__sanitizer::Mutex>::GenericScopedLock(this=, mu=0x0000000102992a80) at sanitizer_mutex.h:382:77 frame #6: libclang_rt.tsan_osx_dynamic.dylib`__tsan::ScopedInterceptor::~ScopedInterceptor() at tsan_rtl.h:708:10 frame #7: libclang_rt.tsan_osx_dynamic.dylib`__tsan::ScopedInterceptor::~ScopedInterceptor() [inlined] __tsan::TryTraceFunc(thr=0x000000010f084000, pc=0) at tsan_rtl.h:751:7 frame #8: libclang_rt.tsan_osx_dynamic.dylib`__tsan::ScopedInterceptor::~ScopedInterceptor() [inlined] __tsan::FuncExit(thr=0x000000010f084000) at tsan_rtl.h:798:7 frame #9: libclang_rt.tsan_osx_dynamic.dylib`__tsan::ScopedInterceptor::~ScopedInterceptor(this=0x000000016f3ba280) at tsan_interceptors_posix.cpp:300:5 frame #10: libclang_rt.tsan_osx_dynamic.dylib`__tsan::ScopedInterceptor::~ScopedInterceptor(this=) at tsan_interceptors_posix.cpp:293:41 frame #11: libclang_rt.tsan_osx_dynamic.dylib`::wrap_os_unfair_lock_lock_with_options(lock=0x000000016f21b1e8, options=OS_UNFAIR_LOCK_NONE) at tsan_interceptors_mac.cpp:310:1 frame #12: dyld`dyld4::RuntimeLocks::withLoadersReadLock(this=0x000000016f21b1e0, work=0x00000001814525d4) block_pointer) at DyldRuntimeState.cpp:227:28 frame #13: dyld`dyld4::APIs::_dyld_get_image_vmaddr_slide(this=0x0000000101012a20, imageIndex=412) at DyldAPIs.cpp:273:11 frame #14: libclang_rt.tsan_osx_dynamic.dylib`__sanitizer::MemoryMappingLayout::Next(__sanitizer::MemoryMappedSegment*) at sanitizer_procmaps_mac.cpp:286:17 frame #15: libclang_rt.tsan_osx_dynamic.dylib`__sanitizer::MemoryMappingLayout::Next(this=0x000000016f3ba560, segment=0x000000016f3ba498) at sanitizer_procmaps_mac.cpp:432:15 frame #16: libclang_rt.tsan_osx_dynamic.dylib`__sanitizer::MemoryMappingLayout::DumpListOfModules(this=0x000000016f3ba560, modules=0x000000016f3ba618) at sanitizer_procmaps_mac.cpp:460:10 frame #17: libclang_rt.tsan_osx_dynamic.dylib`__sanitizer::ListOfModules::init(this=0x000000016f3ba618) at sanitizer_mac.cpp:610:18 frame #18: libclang_rt.tsan_osx_dynamic.dylib`__sanitizer::LibIgnore::OnLibraryLoaded(this=0x0000000101f3aa40, name="") at sanitizer_libignore.cpp:54:11 frame #19: libclang_rt.tsan_osx_dynamic.dylib`::wrap_dlopen(filename="", flag=) at sanitizer_common_interceptors.inc:6466:3 ``` rdar://106766395 Differential Revision: https://reviews.llvm.org/D146593 --- .../lib/tsan/rtl/tsan_interceptors_posix.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp index 97aa4b77311f1..6ac6ac6a7fb4c 100644 --- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp +++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp @@ -2497,11 +2497,21 @@ static void HandleRecvmsg(ThreadState *thr, uptr pc, res; \ }) +// Ignore interceptors in OnLibraryLoaded()/Unloaded(). These hooks use code +// (ListOfModules::init, MemoryMappingLayout::DumpListOfModules) that make +// intercepted calls, which can cause deadlockes with ReportRace() which also +// uses this code. #define COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, handle) \ - libignore()->OnLibraryLoaded(filename) + ({ \ + ScopedIgnoreInterceptors ignore_interceptors; \ + libignore()->OnLibraryLoaded(filename); \ + }) -#define COMMON_INTERCEPTOR_LIBRARY_UNLOADED() \ - libignore()->OnLibraryUnloaded() +#define COMMON_INTERCEPTOR_LIBRARY_UNLOADED() \ + ({ \ + ScopedIgnoreInterceptors ignore_interceptors; \ + libignore()->OnLibraryUnloaded(); \ + }) #define COMMON_INTERCEPTOR_ACQUIRE(ctx, u) \ Acquire(((TsanInterceptorContext *) ctx)->thr, pc, u) From 16b7cf245ec0ff5428daee4f71af62e1938bfc73 Mon Sep 17 00:00:00 2001 From: Saleem Abdulrasool Date: Tue, 21 Mar 2023 12:03:54 -0400 Subject: [PATCH 328/691] SymbolFile: ensure that we have a value before invoking `getBitWidth` Ensure that the variant returned by `member->getValue()` has a value and is not `Empty`. Failure to do so will trigger an assertion failure in `llvm::pdb::Variant::getBitWidth()`. This can occur when the `static` member is a forward declaration. Differential Revision: https://reviews.llvm.org/D146536 Reviewed By: sgraenitz --- lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp b/lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp index da57338ffb58a..b1a882465c404 100644 --- a/lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp +++ b/lldb/source/Plugins/SymbolFile/PDB/PDBASTParser.cpp @@ -1299,6 +1299,15 @@ void PDBASTParser::AddRecordMembers( // Query the symbol's value as the variable initializer if valid. if (member_comp_type.IsConst()) { auto value = member->getValue(); + if (value.Type == llvm::pdb::Empty) { + LLDB_LOG(GetLog(LLDBLog::AST), + "Class '{0}' has member '{1}' of type '{2}' with an unknown " + "constant size.", + record_type.GetTypeName(), member_name, + member_comp_type.GetTypeName()); + continue; + } + clang::QualType qual_type = decl->getType(); unsigned type_width = m_ast.getASTContext().getIntWidth(qual_type); unsigned constant_width = value.getBitWidth(); From ead9644684e85e0611f3b0ff72926820f1277e09 Mon Sep 17 00:00:00 2001 From: Emilia Dreamer Date: Wed, 22 Mar 2023 20:26:38 +0200 Subject: [PATCH 329/691] [clang-format] Annotate noexcept, explicit specifiers as containing expressions The noexcept specifier and explicit specifier can optionally include a boolean expression to make these specifiers apply conditionally, however, clang-format didn't set the context for the parenthesized content of these specifiers, meaning they inherited the parent context, which usually isn't an expressions, leading to misannotated binary operators. This patch applies expression context to the content of these specifiers, making them similar to the static_assert keyword. Fixes https://github.com/llvm/llvm-project/issues/44543 Reviewed By: owenpan, MyDeveloperDay Differential Revision: https://reviews.llvm.org/D146284 --- clang/lib/Format/TokenAnnotator.cpp | 7 ++++--- clang/unittests/Format/FormatTest.cpp | 4 ++++ clang/unittests/Format/TokenAnnotatorTest.cpp | 11 +++++++++++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index c5644c5bfea16..55be50aec203e 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -318,9 +318,10 @@ class AnnotatingParser { // export type X = (...); Contexts.back().IsExpression = false; } else if (OpeningParen.Previous && - (OpeningParen.Previous->isOneOf(tok::kw_static_assert, - tok::kw_while, tok::l_paren, - tok::comma, TT_BinaryOperator) || + (OpeningParen.Previous->isOneOf( + tok::kw_static_assert, tok::kw_noexcept, tok::kw_explicit, + tok::kw_while, tok::l_paren, tok::comma, + TT_BinaryOperator) || OpeningParen.Previous->isIf())) { // static_assert, if and while usually contain expressions. Contexts.back().IsExpression = true; diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 1beb6a75c5225..eeb1234999a10 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -11592,6 +11592,10 @@ TEST_F(FormatTest, UnderstandsRvalueReferences) { verifyFormat("template class A {\n" " static_assert(B && C, \"Something is wrong\");\n" "};"); + verifyFormat("template void swap() noexcept(Bar && Foo);"); + verifyFormat("template struct S {\n" + " explicit(Bar && Foo) S(const S &);\n" + "};"); verifyGoogleFormat("#define IF(a, b, c) if (a && (b == c))"); verifyGoogleFormat("#define WHILE(a, b, c) while (a && (b == c))"); verifyFormat("#define A(a, b) (a && b)"); diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index bc8f7f36372d2..3a6fb0e9e4b3f 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -242,6 +242,17 @@ TEST_F(TokenAnnotatorTest, UnderstandsUsesOfStarAndAmp) { "}"); ASSERT_EQ(Tokens.size(), 12u) << Tokens; EXPECT_TOKEN(Tokens[7], tok::amp, TT_BinaryOperator); + + Tokens = + annotate("template void swap() noexcept(Bar && Foo);"); + ASSERT_EQ(Tokens.size(), 23u) << Tokens; + EXPECT_TOKEN(Tokens[15], tok::ampamp, TT_BinaryOperator); + + Tokens = annotate("template struct S {\n" + " explicit(Bar && Foo) S(const S &);\n" + "};"); + ASSERT_EQ(Tokens.size(), 30u) << Tokens; + EXPECT_TOKEN(Tokens[14], tok::ampamp, TT_BinaryOperator); } TEST_F(TokenAnnotatorTest, UnderstandsUsesOfPlusAndMinus) { From 984354fbbe4e207798f6d83c6f46b7603952dd36 Mon Sep 17 00:00:00 2001 From: Jorge Gorbe Moya Date: Tue, 21 Mar 2023 15:30:32 -0700 Subject: [PATCH 330/691] [lldb] Update some uses of Python2 API in typemaps. Python 3 doesn't have a distinction between PyInt and PyLong, it's all PyLong now. This also fixes a bug in SetNumberFromObject. This used to crash LLDB: ``` lldb -o "script data=lldb.SBData(); data.SetDataFromUInt64Array([2**63])" ``` The problem happened in the PyInt path: ``` if (PyInt_Check(obj)) number = static_cast(PyInt_AsLong(obj)); ``` when obj doesn't fit in a signed long, `PyInt_AsLong` would fail with "OverflowError: Python int too large to convert to C long". The existing long path does the right thing, as it will call `PyLong_AsUnsignedLongLong` for uint64_t. Differential Revision: https://reviews.llvm.org/D146590 --- lldb/bindings/python/python-typemaps.swig | 18 +++++++----------- lldb/test/API/python_api/sbdata/TestSBData.py | 3 ++- 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/lldb/bindings/python/python-typemaps.swig b/lldb/bindings/python/python-typemaps.swig index b3e7dd4ee265a..3e9675c8c00f1 100644 --- a/lldb/bindings/python/python-typemaps.swig +++ b/lldb/bindings/python/python-typemaps.swig @@ -103,11 +103,11 @@ // typemap for a char buffer %typemap(in) (char *dst, size_t dst_len) { - if (!PyInt_Check($input)) { + if (!PyLong_Check($input)) { PyErr_SetString(PyExc_ValueError, "Expecting an integer"); SWIG_fail; } - $2 = PyInt_AsLong($input); + $2 = PyLong_AsLong($input); if ($2 <= 0) { PyErr_SetString(PyExc_ValueError, "Positive integer expected"); SWIG_fail; @@ -139,11 +139,11 @@ // typemap for handling an snprintf-like API like SBThread::GetStopDescription. %typemap(in) (char *dst_or_null, size_t dst_len) { - if (!PyInt_Check($input)) { + if (!PyLong_Check($input)) { PyErr_SetString(PyExc_ValueError, "Expecting an integer"); SWIG_fail; } - $2 = PyInt_AsLong($input); + $2 = PyLong_AsLong($input); if ($2 <= 0) { PyErr_SetString(PyExc_ValueError, "Positive integer expected"); SWIG_fail; @@ -205,9 +205,7 @@ // typemap for an incoming buffer // See also SBProcess::ReadMemory. %typemap(in) (void *buf, size_t size) { - if (PyInt_Check($input)) { - $2 = PyInt_AsLong($input); - } else if (PyLong_Check($input)) { + if (PyLong_Check($input)) { $2 = PyLong_AsLong($input); } else { PyErr_SetString(PyExc_ValueError, "Expecting an integer or long object"); @@ -258,9 +256,7 @@ template <> int32_t PyLongAsT(PyObject *obj) { } template bool SetNumberFromPyObject(T &number, PyObject *obj) { - if (PyInt_Check(obj)) - number = static_cast(PyInt_AsLong(obj)); - else if (PyLong_Check(obj)) + if (PyLong_Check(obj)) number = PyLongAsT(obj); else return false; @@ -345,7 +341,7 @@ template <> bool SetNumberFromPyObject(double &number, PyObject *obj) { count = $2; PyObject *list = PyList_New(count); for (uint32_t j = 0; j < count; j++) { - PyObject *item = PyInt_FromLong($1[j]); + PyObject *item = PyLong_FromLong($1[j]); int ok = PyList_SetItem(list, j, item); if (ok != 0) { $result = Py_None; diff --git a/lldb/test/API/python_api/sbdata/TestSBData.py b/lldb/test/API/python_api/sbdata/TestSBData.py index 932781b9b1b0f..ba839590c1a36 100644 --- a/lldb/test/API/python_api/sbdata/TestSBData.py +++ b/lldb/test/API/python_api/sbdata/TestSBData.py @@ -387,12 +387,13 @@ def test_with_run_command(self): self.assert_data(data2.GetUnsignedInt8, 4, 111) self.assert_data(data2.GetUnsignedInt8, 5, 33) - data2.SetDataFromUInt64Array([1, 2, 3, 4, 5]) + data2.SetDataFromUInt64Array([1, 2, 3, 4, 5, 2**63]) self.assert_data(data2.GetUnsignedInt64, 0, 1) self.assert_data(data2.GetUnsignedInt64, 8, 2) self.assert_data(data2.GetUnsignedInt64, 16, 3) self.assert_data(data2.GetUnsignedInt64, 24, 4) self.assert_data(data2.GetUnsignedInt64, 32, 5) + self.assert_data(data2.GetUnsignedInt64, 40, 2**63) self.assertEqual( data2.uint64[0], 1, From 38d69df5c2dad0d4ceb08d840840ab083dd673fe Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 22 Mar 2023 11:36:02 -0700 Subject: [PATCH 331/691] [Driver][test] Fix avr-ld.c for -DCLANG_DEFAULT_LINKER=lld after D145646 --- clang/test/Driver/avr-ld.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/clang/test/Driver/avr-ld.c b/clang/test/Driver/avr-ld.c index 3088bc00446f4..4042ecb89adf5 100644 --- a/clang/test/Driver/avr-ld.c +++ b/clang/test/Driver/avr-ld.c @@ -44,16 +44,16 @@ // RUN: %clang -### --target=avr -mmcu=atxmega128a1 --rtlib=libgcc --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKO %s // LINKO: {{".*ld.*"}} {{.*}} {{"-L.*avrxmega7"}} {{.*}} "--defsym=__DATA_REGION_ORIGIN__=0x802000" "--start-group" {{.*}} "-latxmega128a1" {{.*}} "--end-group" "--relax" "-mavrxmega7" -// RUN: %clang -### --target=avr -mmcu=atmega328 -flto --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKP %s +// RUN: %clang -### --target=avr -mmcu=atmega328 -fuse-ld=ld -flto --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck --check-prefix=LINKP %s // LINKP: {{".*ld.*"}} {{.*}} "--defsym=__DATA_REGION_ORIGIN__=0x800100" "-plugin" {{.*}} "-plugin-opt=mcpu=atmega328" -// RUN: %clang -### --target=avr -flto --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKQ %s +// RUN: %clang -### --target=avr -fuse-ld=ld -flto --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck --check-prefix=LINKQ %s // LINKQ: {{".*ld.*"}} {{.*}} "-plugin" // LINKQ-NOT: "-plugin-opt=mcpu" -// RUN: %clang -### --target=avr -mmcu=atmega328 -flto=thin --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKR %s -// LINKR: {{".*ld.*"}} {{.*}} "--defsym=__DATA_REGION_ORIGIN__=0x800100" "-plugin" {{.*}} "-plugin-opt=mcpu=atmega328" "-plugin-opt=thinlto" +// RUN: %clang -### --target=avr -mmcu=atmega328 -fuse-ld=lld -flto=thin --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKR %s +// LINKR: {{".*ld.*"}} {{.*}} "--defsym=__DATA_REGION_ORIGIN__=0x800100" "-plugin-opt=mcpu=atmega328" "-plugin-opt=thinlto" -// RUN: %clang -### --target=avr -mmcu=atmega328 -flto --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKS %s -// LINKS: {{".*ld.*"}} {{.*}} "--defsym=__DATA_REGION_ORIGIN__=0x800100" "-plugin" {{.*}} "-plugin-opt=mcpu=atmega328" +// RUN: %clang -### --target=avr -mmcu=atmega328 -fuse-ld=lld -flto --sysroot %S/Inputs/basic_avr_tree %s 2>&1 | FileCheck -check-prefix LINKS %s +// LINKS: {{".*ld.*"}} {{.*}} "--defsym=__DATA_REGION_ORIGIN__=0x800100" "-plugin-opt=mcpu=atmega328" // LINKS-NOT: "-plugin-opt=thinlto" From eac8e25ea5ee64ea46f93bba42d842fbde61609c Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 22 Mar 2023 16:56:05 +0000 Subject: [PATCH 332/691] [CodeGen] Fix type of MachineRegisterInfo::RegAllocHints. NFC. The first member of the pair should be unsigned instead of Register because it is the hint type, 0 for simple (target independent) hints and other values for target dependent hints. Differential Revision: https://reviews.llvm.org/D146646 --- llvm/include/llvm/CodeGen/MachineRegisterInfo.h | 17 ++++++++--------- llvm/lib/CodeGen/CalcSpillWeights.cpp | 2 +- llvm/lib/CodeGen/TargetRegisterInfo.cpp | 4 ++-- llvm/lib/CodeGen/VirtRegMap.cpp | 6 +++--- llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp | 4 ++-- .../deltas/ReduceVirtualRegisters.cpp | 2 +- 6 files changed, 17 insertions(+), 18 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h index ce447be3af41f..fc4e5ca756248 100644 --- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h @@ -101,8 +101,9 @@ class MachineRegisterInfo { /// first member of the pair being non-zero. If the hinted register is /// virtual, it means the allocator should prefer the physical register /// allocated to it if any. - IndexedMap>, - VirtReg2IndexFunctor> RegAllocHints; + IndexedMap>, + VirtReg2IndexFunctor> + RegAllocHints; /// PhysRegUseDefLists - This is an array of the head of the use/def list for /// physical registers. @@ -818,27 +819,25 @@ class MachineRegisterInfo { /// getRegAllocationHint - Return the register allocation hint for the /// specified virtual register. If there are many hints, this returns the /// one with the greatest weight. - std::pair - getRegAllocationHint(Register VReg) const { + std::pair getRegAllocationHint(Register VReg) const { assert(VReg.isVirtual()); Register BestHint = (RegAllocHints[VReg.id()].second.size() ? RegAllocHints[VReg.id()].second[0] : Register()); - return std::pair(RegAllocHints[VReg.id()].first, - BestHint); + return {RegAllocHints[VReg.id()].first, BestHint}; } /// getSimpleHint - same as getRegAllocationHint except it will only return /// a target independent hint. Register getSimpleHint(Register VReg) const { assert(VReg.isVirtual()); - std::pair Hint = getRegAllocationHint(VReg); + std::pair Hint = getRegAllocationHint(VReg); return Hint.first ? Register() : Hint.second; } /// getRegAllocationHints - Return a reference to the vector of all /// register allocation hints for VReg. - const std::pair> - &getRegAllocationHints(Register VReg) const { + const std::pair> & + getRegAllocationHints(Register VReg) const { assert(VReg.isVirtual()); return RegAllocHints[VReg]; } diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp index 1146c1d465da5..5a005ba7b414d 100644 --- a/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -157,7 +157,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start, unsigned NumInstr = 0; // Number of instructions using LI SmallPtrSet Visited; - std::pair TargetHint = MRI.getRegAllocationHint(LI.reg()); + std::pair TargetHint = MRI.getRegAllocationHint(LI.reg()); if (LI.isSpillable()) { Register Reg = LI.reg(); diff --git a/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/llvm/lib/CodeGen/TargetRegisterInfo.cpp index e6baf00c06451..051de1612284c 100644 --- a/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -424,8 +424,8 @@ bool TargetRegisterInfo::getRegAllocationHints( SmallVectorImpl &Hints, const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { const MachineRegisterInfo &MRI = MF.getRegInfo(); - const std::pair> &Hints_MRI = - MRI.getRegAllocationHints(VirtReg); + const std::pair> &Hints_MRI = + MRI.getRegAllocationHints(VirtReg); SmallSet HintedRegs; // First hint may be a target hint. diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp index f80b06d7e9b7c..8e00712d2308e 100644 --- a/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/llvm/lib/CodeGen/VirtRegMap.cpp @@ -116,10 +116,10 @@ bool VirtRegMap::hasPreferredPhys(Register VirtReg) const { } bool VirtRegMap::hasKnownPreference(Register VirtReg) const { - std::pair Hint = MRI->getRegAllocationHint(VirtReg); - if (Register::isPhysicalRegister(Hint.second)) + std::pair Hint = MRI->getRegAllocationHint(VirtReg); + if (Hint.second.isPhysical()) return true; - if (Register::isVirtualRegister(Hint.second)) + if (Hint.second.isVirtual()) return hasPhys(Hint.second); return false; } diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index e6c6ab2efd50e..0fc2d8c6f5712 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -338,7 +338,7 @@ bool ARMBaseRegisterInfo::getRegAllocationHints( SmallVectorImpl &Hints, const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const { const MachineRegisterInfo &MRI = MF.getRegInfo(); - std::pair Hint = MRI.getRegAllocationHint(VirtReg); + std::pair Hint = MRI.getRegAllocationHint(VirtReg); unsigned Odd; switch (Hint.first) { @@ -391,7 +391,7 @@ bool ARMBaseRegisterInfo::getRegAllocationHints( void ARMBaseRegisterInfo::updateRegAllocHint(Register Reg, Register NewReg, MachineFunction &MF) const { MachineRegisterInfo *MRI = &MF.getRegInfo(); - std::pair Hint = MRI->getRegAllocationHint(Reg); + std::pair Hint = MRI->getRegAllocationHint(Reg); if ((Hint.first == ARMRI::RegPairOdd || Hint.first == ARMRI::RegPairEven) && Hint.second.isVirtual()) { // If 'Reg' is one of the even / odd register pair and it's now changed diff --git a/llvm/tools/llvm-reduce/deltas/ReduceVirtualRegisters.cpp b/llvm/tools/llvm-reduce/deltas/ReduceVirtualRegisters.cpp index eed5be7054e41..2b97e65bbf093 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceVirtualRegisters.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceVirtualRegisters.cpp @@ -23,7 +23,7 @@ static void dropRegisterHintsFromFunction(Oracle &O, MachineFunction &MF) { for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { Register Reg = Register::index2VirtReg(I); - const std::pair> &Hints = + const std::pair> &Hints = MRI.getRegAllocationHints(Reg); if (Hints.second.empty()) continue; From 7e5c48b8bd9ff0ee5de3ba28c833f1225f14e44d Mon Sep 17 00:00:00 2001 From: Carlos Galvez Date: Wed, 22 Mar 2023 18:38:10 +0000 Subject: [PATCH 333/691] [clang-tidy][NFC] Move avoid-underscore-in-googletest-name to google folder Since the check belongs to the google module, it makes sense that the corresponding test also belongs to the google module. Differential Revision: https://reviews.llvm.org/D146653 --- .../avoid-underscore-in-googletest-name.cpp | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename clang-tools-extra/test/clang-tidy/checkers/{readability => google}/avoid-underscore-in-googletest-name.cpp (100%) diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/avoid-underscore-in-googletest-name.cpp b/clang-tools-extra/test/clang-tidy/checkers/google/avoid-underscore-in-googletest-name.cpp similarity index 100% rename from clang-tools-extra/test/clang-tidy/checkers/readability/avoid-underscore-in-googletest-name.cpp rename to clang-tools-extra/test/clang-tidy/checkers/google/avoid-underscore-in-googletest-name.cpp From 6afcc54ac7d68fa2b28f0e7cbf9dc1d4ac7fb95e Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 22 Mar 2023 09:51:58 -0700 Subject: [PATCH 334/691] [SCEV] Infer no-self-wrap via constant ranges Without this, pointer IVs in loops with small constant trip counts couldn't be proven no-self-wrap. This came up in a new LSR transform, but may also benefit other SCEV consumers as well. Differential Revision: https://reviews.llvm.org/D146596 --- llvm/lib/Analysis/ScalarEvolution.cpp | 12 ++++++++++++ llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp | 9 +-------- .../Analysis/ScalarEvolution/different-loops-recs.ll | 2 +- .../max-backedge-taken-count-guard-info.ll | 9 ++++----- .../Transforms/LoopRotate/pr51981-scev-problem.ll | 4 ++-- llvm/test/Transforms/LoopVersioning/lcssa.ll | 1 - 6 files changed, 20 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index df525f4d6be7a..df872f61906c8 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -4988,6 +4988,18 @@ ScalarEvolution::proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) { SCEV::NoWrapFlags Result = SCEV::FlagAnyWrap; + if (!AR->hasNoSelfWrap()) { + const SCEV *BECount = getConstantMaxBackedgeTakenCount(AR->getLoop()); + if (const SCEVConstant *BECountMax = dyn_cast(BECount)) { + ConstantRange StepCR = getSignedRange(AR->getStepRecurrence(*this)); + const APInt &BECountAP = BECountMax->getAPInt(); + unsigned NoOverflowBitWidth = + BECountAP.getActiveBits() + StepCR.getMinSignedBits(); + if (NoOverflowBitWidth <= getTypeSizeInBits(AR->getType())) + Result = ScalarEvolution::setFlags(Result, SCEV::FlagNW); + } + } + if (!AR->hasNoSignedWrap()) { ConstantRange AddRecRange = getSignedRange(AR); ConstantRange IncRange = getSignedRange(AR->getStepRecurrence(*this)); diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 82312de71c72c..fbdc436e0d37e 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -6769,14 +6769,7 @@ canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, // iteration. The simplest case to consider is a candidate IV which is // narrower than the trip count (and thus original IV), but this can // also happen due to non-unit strides on the candidate IVs. - // TODO: This check should be replaceable with PostInc->hasNoSelfWrap(), - // but in practice we appear to be missing inference for cases we should - // be able to catch. - ConstantRange StepCR = SE.getSignedRange(AddRec->getStepRecurrence(SE)); - ConstantRange BECountCR = SE.getUnsignedRange(BECount); - unsigned NoOverflowBitWidth = BECountCR.getActiveBits() + StepCR.getMinSignedBits(); - unsigned ARBitWidth = SE.getTypeSizeInBits(AddRec->getType()); - if (NoOverflowBitWidth > ARBitWidth) + if (!AddRec->hasNoSelfWrap()) continue; const SCEVAddRecExpr *PostInc = AddRec->getPostIncExpr(SE); diff --git a/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll b/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll index 44081f32d0af6..60b2e9d50dd59 100644 --- a/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll +++ b/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll @@ -94,7 +94,7 @@ define void @test_01(i32 %a, i32 %b) { ; CHECK: %s3 = add i32 %is1, %phi5 ; CHECK-NEXT: --> {{{{}}(59 + (2 * %a) + %b),+,6}<%loop1>,+,2}<%loop2> ; CHECK: %s4 = add i32 %phi2, %is2 -; CHECK-NEXT: --> {{{{}}(159 + (2 * %b)),+,2}<%loop1>,+,6}<%loop2> +; CHECK-NEXT: --> {{{{}}(159 + (2 * %b)),+,2}<%loop1>,+,6}<%loop2> ; CHECK: %s5 = add i32 %is1, %is2 ; CHECK-NEXT: --> {{{{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2> ; CHECK: %s6 = add i32 %is2, %is1 diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll index e1acec162d3c8..d4d3a9e13e277 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll @@ -1633,9 +1633,9 @@ define i32 @ptr_induction_ult_1(ptr %a, ptr %b) { ; CHECK-LABEL: 'ptr_induction_ult_1' ; CHECK-NEXT: Classifying expressions for: @ptr_induction_ult_1 ; CHECK-NEXT: %ptr.iv = phi ptr [ %ptr.iv.next, %loop ], [ %a, %entry ] -; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: %a LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: %a LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %ptr.iv.next = getelementptr i32, ptr %ptr.iv, i64 1 -; CHECK-NEXT: --> {(4 + %a),+,4}<%loop> U: full-set S: full-set Exits: (4 + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(4 + %a),+,4}<%loop> U: full-set S: full-set Exits: (4 + %a) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @ptr_induction_ult_1 ; CHECK-NEXT: Loop %loop: backedge-taken count is 0 ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is 0 @@ -1686,18 +1686,17 @@ exit: ret i32 0 } -; TODO: The pointer induction variable can be implied No Self Wrap. define void @gep_addrec_nw(ptr %a) { ; CHECK-LABEL: 'gep_addrec_nw' ; CHECK-NEXT: Classifying expressions for: @gep_addrec_nw ; CHECK-NEXT: %lsr.iv1 = phi ptr [ %uglygep2, %for.body ], [ %a, %entry ] -; CHECK-NEXT: --> {%a,+,4}<%for.body> U: full-set S: full-set Exits: (1512 + %a) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {%a,+,4}<%for.body> U: full-set S: full-set Exits: (1512 + %a) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %lsr.iv = phi i64 [ %lsr.iv.next, %for.body ], [ 379, %entry ] ; CHECK-NEXT: --> {379,+,-1}<%for.body> U: [1,380) S: [1,380) Exits: 1 LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %lsr.iv.next = add nsw i64 %lsr.iv, -1 ; CHECK-NEXT: --> {378,+,-1}<%for.body> U: [0,379) S: [0,379) Exits: 0 LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: %uglygep2 = getelementptr i8, ptr %lsr.iv1, i64 4 -; CHECK-NEXT: --> {(4 + %a),+,4}<%for.body> U: full-set S: full-set Exits: (1516 + %a) LoopDispositions: { %for.body: Computable } +; CHECK-NEXT: --> {(4 + %a),+,4}<%for.body> U: full-set S: full-set Exits: (1516 + %a) LoopDispositions: { %for.body: Computable } ; CHECK-NEXT: Determining loop execution counts for: @gep_addrec_nw ; CHECK-NEXT: Loop %for.body: backedge-taken count is 378 ; CHECK-NEXT: Loop %for.body: constant max backedge-taken count is 378 diff --git a/llvm/test/Transforms/LoopRotate/pr51981-scev-problem.ll b/llvm/test/Transforms/LoopRotate/pr51981-scev-problem.ll index dd5031c56722f..2c2c88b6acb20 100644 --- a/llvm/test/Transforms/LoopRotate/pr51981-scev-problem.ll +++ b/llvm/test/Transforms/LoopRotate/pr51981-scev-problem.ll @@ -22,7 +22,7 @@ ; CHECK-SCEV: %narrow = trunc i32 %wide to i16 ; CHECK-SCEV: --> (trunc i32 %wide to i16) U: full-set S: full-set Exits: <> LoopDispositions: { %loop.outer.header: Variant, %loop.inner: Invariant } ; CHECK-SCEV: %iv = phi i16 [ %narrow, %loop.inner.ph ], [ %iv.plus, %loop.inner ] -; CHECK-SCEV: --> {(trunc i32 %wide to i16),+,1}<%loop.inner> U: full-set S: full-set Exits: (-1 + (700 umax (1 + (trunc i32 %wide to i16)))) LoopDispositions: { %loop.inner: Computable, %loop.outer.header: Variant } +; CHECK-SCEV: --> {(trunc i32 %wide to i16),+,1}<%loop.inner> U: full-set S: full-set Exits: (-1 + (700 umax (1 + (trunc i32 %wide to i16)))) LoopDispositions: { %loop.inner: Computable, %loop.outer.header: Variant } ; ; CHECK-SCEV: Classifying expressions for: @test_function ; CHECK-SCEV: %wide1 = load i32, ptr @offset, align 1 @@ -32,7 +32,7 @@ ; CHECK-SCEV: %narrow = trunc i32 %wide2 to i16 ; CHECK-SCEV: --> (trunc i32 %wide2 to i16) U: full-set S: full-set Exits: <> LoopDispositions: { %loop.inner.ph: Variant, %loop.inner: Invariant } ; CHECK-SCEV: %iv = phi i16 [ %narrow, %loop.inner.ph ], [ %iv.plus, %loop.inner ] -; CHECK-SCEV: --> {(trunc i32 %wide2 to i16),+,1}<%loop.inner> U: full-set S: full-set Exits: (-1 + (700 umax (1 + (trunc i32 %wide2 to i16)))) LoopDispositions: { %loop.inner: Computable, %loop.inner.ph: Variant } +; CHECK-SCEV: --> {(trunc i32 %wide2 to i16),+,1}<%loop.inner> U: full-set S: full-set Exits: (-1 + (700 umax (1 + (trunc i32 %wide2 to i16)))) LoopDispositions: { %loop.inner: Computable, %loop.inner.ph: Variant } @offset = external dso_local global i32, align 1 diff --git a/llvm/test/Transforms/LoopVersioning/lcssa.ll b/llvm/test/Transforms/LoopVersioning/lcssa.ll index ee14f693abd7a..4b51c21257243 100644 --- a/llvm/test/Transforms/LoopVersioning/lcssa.ll +++ b/llvm/test/Transforms/LoopVersioning/lcssa.ll @@ -56,7 +56,6 @@ define void @fill_no_null_opt(i8** %ls1.20, i8** %ls2.21, i8* %cse3.22) #0 { ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP]], [[SCEVGEP2]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[LS2_21_PROMOTED]], [[SCEVGEP1]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, i8* [[LS1_20_PROMOTED]], i64 -1 ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %bb1.ph.lver.orig, label %bb1.ph ; CHECK: bb1.ph.lver.orig: ; From e655d8a54880cf550567dda0e9a1a33f6ee98df5 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Thu, 9 Mar 2023 01:14:03 +0100 Subject: [PATCH 335/691] [libc++] Granularize __mutex_base This also updates the moved code to the current style. (i.e. `_VSTD` -> `std`, `_LIBCPP_INLINE_VISIBILITY` -> `_LIBCPP_HIDE_FROM_ABI`, clang-format). Reviewed By: Mordante, #libc, EricWF Spies: arichardson, libcxx-commits, mikhail.ramalho Differential Revision: https://reviews.llvm.org/D146228 --- libcxx/docs/ReleaseNotes.rst | 3 +- libcxx/include/CMakeLists.txt | 6 +- .../__condition_variable/condition_variable.h | 243 ++++++++++++++++++ libcxx/include/__mutex/lock_guard.h | 53 ++++ libcxx/include/__mutex/mutex.h | 53 ++++ libcxx/include/__mutex/tag_types.h | 48 ++++ libcxx/include/__mutex/unique_lock.h | 172 +++++++++++++ libcxx/include/condition_variable | 17 +- libcxx/include/libcxx.imp | 2 + libcxx/include/module.modulemap.in | 12 +- libcxx/include/mutex | 14 +- libcxx/include/shared_mutex | 13 +- libcxx/include/thread | 9 +- libcxx/src/shared_mutex.cpp | 1 + libcxx/test/libcxx/private_headers.verify.cpp | 6 +- .../native_handle.pass.cpp | 4 +- .../test/libcxx/transitive_includes/cxx03.csv | 8 + .../test/libcxx/transitive_includes/cxx11.csv | 8 + .../test/libcxx/transitive_includes/cxx14.csv | 1 + .../test/libcxx/transitive_includes/cxx17.csv | 1 + .../test/libcxx/transitive_includes/cxx20.csv | 1 + .../test/libcxx/transitive_includes/cxx2b.csv | 9 +- .../futures.shared_future/wait.pass.cpp | 1 + .../futures.unique_future/wait.pass.cpp | 1 + .../thread.lock.shared.cons/mutex.pass.cpp | 5 +- .../mutex_try_to_lock.pass.cpp | 6 +- .../thread.lock.shared.locking/lock.pass.cpp | 6 +- .../try_lock.pass.cpp | 3 +- .../try_lock_for.pass.cpp | 3 +- .../try_lock_until.pass.cpp | 3 +- .../thread.lock.unique.cons/mutex.pass.cpp | 5 +- .../mutex_try_to_lock.pass.cpp | 5 +- .../thread.lock.unique.locking/lock.pass.cpp | 5 +- .../try_lock.pass.cpp | 3 +- .../try_lock_for.pass.cpp | 3 +- .../try_lock_until.pass.cpp | 3 +- .../unlock.pass.cpp | 3 +- .../thread.mutex.class/lock.pass.cpp | 5 +- .../thread.mutex.class/try_lock.pass.cpp | 5 +- .../thread.mutex.recursive/lock.pass.cpp | 5 +- .../thread.mutex.recursive/try_lock.pass.cpp | 5 +- .../thread.shared_mutex.class/lock.pass.cpp | 5 +- .../lock_shared.pass.cpp | 5 +- .../try_lock.pass.cpp | 5 +- .../try_lock_shared.pass.cpp | 5 +- .../lock.pass.cpp | 6 +- .../lock_shared.pass.cpp | 3 +- .../try_lock.pass.cpp | 5 +- .../try_lock_shared.pass.cpp | 5 +- .../thread.timedmutex.class/lock.pass.cpp | 5 +- .../thread.timedmutex.class/try_lock.pass.cpp | 5 +- .../thread.timedmutex.recursive/lock.pass.cpp | 5 +- .../try_lock.pass.cpp | 5 +- .../thread.thread.this/sleep_until.pass.cpp | 5 +- .../time.duration.nonmember/ostream.pass.cpp | 1 + .../time.hms.nonmembers/ostream.pass.cpp | 3 +- .../time/time.syn/formatter.duration.pass.cpp | 1 + .../time/time.syn/formatter.hh_mm_ss.pass.cpp | 1 + libcxx/utils/data/ignore_format.txt | 1 - 59 files changed, 757 insertions(+), 73 deletions(-) create mode 100644 libcxx/include/__condition_variable/condition_variable.h create mode 100644 libcxx/include/__mutex/lock_guard.h create mode 100644 libcxx/include/__mutex/mutex.h create mode 100644 libcxx/include/__mutex/tag_types.h create mode 100644 libcxx/include/__mutex/unique_lock.h diff --git a/libcxx/docs/ReleaseNotes.rst b/libcxx/docs/ReleaseNotes.rst index aa14e6bfcd97d..8083ba337fc16 100644 --- a/libcxx/docs/ReleaseNotes.rst +++ b/libcxx/docs/ReleaseNotes.rst @@ -62,7 +62,8 @@ Deprecations and Removals includes are removed based on the language version used. Incidental transitive inclusions of the following headers have been removed: - - C++2b: ``atomic``, ``bit``, ``cstring``, ``type_traits`` + - C++2b: ``atomic``, ``bit``, ``cstdint``, ``cstdlib``, ``cstring``, ``initializer_list``, ``new``, ``stdexcept``, + ``type_traits``, ``typeinfo`` - The headers ```` and ```` have been removed, since all the contents have been implemented in namespace ``std`` for at least two releases. diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index f8c52328ccff0..8232784cb6c7e 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -294,6 +294,7 @@ set(files __concepts/semiregular.h __concepts/swappable.h __concepts/totally_ordered.h + __condition_variable/condition_variable.h __config __coroutine/coroutine_handle.h __coroutine/coroutine_traits.h @@ -474,7 +475,10 @@ set(files __memory_resource/pool_options.h __memory_resource/synchronized_pool_resource.h __memory_resource/unsynchronized_pool_resource.h - __mutex_base + __mutex/lock_guard.h + __mutex/mutex.h + __mutex/tag_types.h + __mutex/unique_lock.h __node_handle __numeric/accumulate.h __numeric/adjacent_difference.h diff --git a/libcxx/include/__condition_variable/condition_variable.h b/libcxx/include/__condition_variable/condition_variable.h new file mode 100644 index 0000000000000..e66f78725a08c --- /dev/null +++ b/libcxx/include/__condition_variable/condition_variable.h @@ -0,0 +1,243 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___CONDITION_VARIABLE_CONDITION_VARIABLE_H +#define _LIBCPP___CONDITION_VARIABLE_CONDITION_VARIABLE_H + +#include <__chrono/steady_clock.h> +#include <__chrono/system_clock.h> +#include <__chrono/time_point.h> +#include <__config> +#include <__mutex/mutex.h> +#include <__mutex/unique_lock.h> +#include <__threading_support> +#include <__type_traits/enable_if.h> +#include <__type_traits/is_floating_point.h> +#include <__utility/move.h> +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +#ifndef _LIBCPP_HAS_NO_THREADS + +_LIBCPP_BEGIN_NAMESPACE_STD + +// enum class cv_status +_LIBCPP_DECLARE_STRONG_ENUM(cv_status){no_timeout, timeout}; +_LIBCPP_DECLARE_STRONG_ENUM_EPILOG(cv_status) + +class _LIBCPP_TYPE_VIS condition_variable { + __libcpp_condvar_t __cv_ = _LIBCPP_CONDVAR_INITIALIZER; + +public: + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR condition_variable() _NOEXCEPT = default; + +# ifdef _LIBCPP_HAS_TRIVIAL_CONDVAR_DESTRUCTION + ~condition_variable() = default; +# else + ~condition_variable(); +# endif + + condition_variable(const condition_variable&) = delete; + condition_variable& operator=(const condition_variable&) = delete; + + void notify_one() _NOEXCEPT; + void notify_all() _NOEXCEPT; + + void wait(unique_lock& __lk) _NOEXCEPT; + template + _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS void wait(unique_lock& __lk, _Predicate __pred); + + template + _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS cv_status + wait_until(unique_lock& __lk, const chrono::time_point<_Clock, _Duration>& __t); + + template + _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS bool + wait_until(unique_lock& __lk, const chrono::time_point<_Clock, _Duration>& __t, _Predicate __pred); + + template + _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS cv_status + wait_for(unique_lock& __lk, const chrono::duration<_Rep, _Period>& __d); + + template + bool _LIBCPP_HIDE_FROM_ABI + wait_for(unique_lock& __lk, const chrono::duration<_Rep, _Period>& __d, _Predicate __pred); + + typedef __libcpp_condvar_t* native_handle_type; + _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() { return &__cv_; } + +private: + void + __do_timed_wait(unique_lock& __lk, chrono::time_point) _NOEXCEPT; +# if defined(_LIBCPP_HAS_COND_CLOCKWAIT) + void + __do_timed_wait(unique_lock& __lk, chrono::time_point) _NOEXCEPT; +# endif + template + void __do_timed_wait(unique_lock& __lk, chrono::time_point<_Clock, chrono::nanoseconds>) _NOEXCEPT; +}; +#endif // !_LIBCPP_HAS_NO_THREADS + +template +inline _LIBCPP_HIDE_FROM_ABI __enable_if_t::value, chrono::nanoseconds> +__safe_nanosecond_cast(chrono::duration<_Rep, _Period> __d) { + using namespace chrono; + using __ratio = ratio_divide<_Period, nano>; + using __ns_rep = nanoseconds::rep; + _Rep __result_float = __d.count() * __ratio::num / __ratio::den; + + _Rep __result_max = numeric_limits<__ns_rep>::max(); + if (__result_float >= __result_max) { + return nanoseconds::max(); + } + + _Rep __result_min = numeric_limits<__ns_rep>::min(); + if (__result_float <= __result_min) { + return nanoseconds::min(); + } + + return nanoseconds(static_cast<__ns_rep>(__result_float)); +} + +template +inline _LIBCPP_HIDE_FROM_ABI __enable_if_t::value, chrono::nanoseconds> +__safe_nanosecond_cast(chrono::duration<_Rep, _Period> __d) { + using namespace chrono; + if (__d.count() == 0) { + return nanoseconds(0); + } + + using __ratio = ratio_divide<_Period, nano>; + using __ns_rep = nanoseconds::rep; + __ns_rep __result_max = numeric_limits<__ns_rep>::max(); + if (__d.count() > 0 && __d.count() > __result_max / __ratio::num) { + return nanoseconds::max(); + } + + __ns_rep __result_min = numeric_limits<__ns_rep>::min(); + if (__d.count() < 0 && __d.count() < __result_min / __ratio::num) { + return nanoseconds::min(); + } + + __ns_rep __result = __d.count() * __ratio::num / __ratio::den; + if (__result == 0) { + return nanoseconds(1); + } + + return nanoseconds(__result); +} + +#ifndef _LIBCPP_HAS_NO_THREADS +template +void condition_variable::wait(unique_lock& __lk, _Predicate __pred) { + while (!__pred()) + wait(__lk); +} + +template +cv_status condition_variable::wait_until(unique_lock& __lk, const chrono::time_point<_Clock, _Duration>& __t) { + using namespace chrono; + using __clock_tp_ns = time_point<_Clock, nanoseconds>; + + typename _Clock::time_point __now = _Clock::now(); + if (__t <= __now) + return cv_status::timeout; + + __clock_tp_ns __t_ns = __clock_tp_ns(std::__safe_nanosecond_cast(__t.time_since_epoch())); + + __do_timed_wait(__lk, __t_ns); + return _Clock::now() < __t ? cv_status::no_timeout : cv_status::timeout; +} + +template +bool condition_variable::wait_until( + unique_lock& __lk, const chrono::time_point<_Clock, _Duration>& __t, _Predicate __pred) { + while (!__pred()) { + if (wait_until(__lk, __t) == cv_status::timeout) + return __pred(); + } + return true; +} + +template +cv_status condition_variable::wait_for(unique_lock& __lk, const chrono::duration<_Rep, _Period>& __d) { + using namespace chrono; + if (__d <= __d.zero()) + return cv_status::timeout; + using __ns_rep = nanoseconds::rep; + steady_clock::time_point __c_now = steady_clock::now(); + +# if defined(_LIBCPP_HAS_COND_CLOCKWAIT) + using __clock_tp_ns = time_point; + __ns_rep __now_count_ns = std::__safe_nanosecond_cast(__c_now.time_since_epoch()).count(); +# else + using __clock_tp_ns = time_point; + __ns_rep __now_count_ns = std::__safe_nanosecond_cast(system_clock::now().time_since_epoch()).count(); +# endif + + __ns_rep __d_ns_count = std::__safe_nanosecond_cast(__d).count(); + + if (__now_count_ns > numeric_limits<__ns_rep>::max() - __d_ns_count) { + __do_timed_wait(__lk, __clock_tp_ns::max()); + } else { + __do_timed_wait(__lk, __clock_tp_ns(nanoseconds(__now_count_ns + __d_ns_count))); + } + + return steady_clock::now() - __c_now < __d ? cv_status::no_timeout : cv_status::timeout; +} + +template +inline bool +condition_variable::wait_for(unique_lock& __lk, const chrono::duration<_Rep, _Period>& __d, _Predicate __pred) { + return wait_until(__lk, chrono::steady_clock::now() + __d, std::move(__pred)); +} + +# if defined(_LIBCPP_HAS_COND_CLOCKWAIT) +inline void condition_variable::__do_timed_wait( + unique_lock& __lk, chrono::time_point __tp) _NOEXCEPT { + using namespace chrono; + if (!__lk.owns_lock()) + __throw_system_error(EPERM, "condition_variable::timed wait: mutex not locked"); + nanoseconds __d = __tp.time_since_epoch(); + timespec __ts; + seconds __s = duration_cast(__d); + using __ts_sec = decltype(__ts.tv_sec); + const __ts_sec __ts_sec_max = numeric_limits<__ts_sec>::max(); + if (__s.count() < __ts_sec_max) { + __ts.tv_sec = static_cast<__ts_sec>(__s.count()); + __ts.tv_nsec = (__d - __s).count(); + } else { + __ts.tv_sec = __ts_sec_max; + __ts.tv_nsec = giga::num - 1; + } + int __ec = pthread_cond_clockwait(&__cv_, __lk.mutex()->native_handle(), CLOCK_MONOTONIC, &__ts); + if (__ec != 0 && __ec != ETIMEDOUT) + __throw_system_error(__ec, "condition_variable timed_wait failed"); +} +# endif // _LIBCPP_HAS_COND_CLOCKWAIT + +template +inline void condition_variable::__do_timed_wait(unique_lock& __lk, + chrono::time_point<_Clock, chrono::nanoseconds> __tp) _NOEXCEPT { + wait_for(__lk, __tp - _Clock::now()); +} + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_HAS_NO_THREADS + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___CONDITION_VARIABLE_CONDITION_VARIABLE_H diff --git a/libcxx/include/__mutex/lock_guard.h b/libcxx/include/__mutex/lock_guard.h new file mode 100644 index 0000000000000..c075512fb97a9 --- /dev/null +++ b/libcxx/include/__mutex/lock_guard.h @@ -0,0 +1,53 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___MUTEX_LOCK_GUARD_H +#define _LIBCPP___MUTEX_LOCK_GUARD_H + +#include <__config> +#include <__mutex/tag_types.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#ifndef _LIBCPP_HAS_NO_THREADS + +_LIBCPP_BEGIN_NAMESPACE_STD + +template +class _LIBCPP_TEMPLATE_VIS _LIBCPP_THREAD_SAFETY_ANNOTATION(scoped_lockable) lock_guard { +public: + typedef _Mutex mutex_type; + +private: + mutex_type& __m_; + +public: + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI explicit lock_guard(mutex_type& __m) + _LIBCPP_THREAD_SAFETY_ANNOTATION(acquire_capability(__m)) + : __m_(__m) { + __m_.lock(); + } + + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI lock_guard(mutex_type& __m, adopt_lock_t) + _LIBCPP_THREAD_SAFETY_ANNOTATION(requires_capability(__m)) + : __m_(__m) {} + _LIBCPP_HIDE_FROM_ABI ~lock_guard() _LIBCPP_THREAD_SAFETY_ANNOTATION(release_capability()) { __m_.unlock(); } + +private: + lock_guard(lock_guard const&) = delete; + lock_guard& operator=(lock_guard const&) = delete; +}; +_LIBCPP_CTAD_SUPPORTED_FOR_TYPE(lock_guard); + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_HAS_NO_THREADS + +#endif // _LIBCPP___MUTEX_LOCK_GUARD_H diff --git a/libcxx/include/__mutex/mutex.h b/libcxx/include/__mutex/mutex.h new file mode 100644 index 0000000000000..13def1e5d1535 --- /dev/null +++ b/libcxx/include/__mutex/mutex.h @@ -0,0 +1,53 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___MUTEX_MUTEX_H +#define _LIBCPP___MUTEX_MUTEX_H + +#include <__config> +#include <__threading_support> +#include <__type_traits/is_nothrow_default_constructible.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#ifndef _LIBCPP_HAS_NO_THREADS + +_LIBCPP_BEGIN_NAMESPACE_STD + +class _LIBCPP_TYPE_VIS _LIBCPP_THREAD_SAFETY_ANNOTATION(capability("mutex")) mutex { + __libcpp_mutex_t __m_ = _LIBCPP_MUTEX_INITIALIZER; + +public: + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR mutex() = default; + + mutex(const mutex&) = delete; + mutex& operator=(const mutex&) = delete; + +# if defined(_LIBCPP_HAS_TRIVIAL_MUTEX_DESTRUCTION) + ~mutex() = default; +# else + ~mutex() _NOEXCEPT; +# endif + + void lock() _LIBCPP_THREAD_SAFETY_ANNOTATION(acquire_capability()); + bool try_lock() _NOEXCEPT _LIBCPP_THREAD_SAFETY_ANNOTATION(try_acquire_capability(true)); + void unlock() _NOEXCEPT _LIBCPP_THREAD_SAFETY_ANNOTATION(release_capability()); + + typedef __libcpp_mutex_t* native_handle_type; + _LIBCPP_HIDE_FROM_ABI native_handle_type native_handle() { return &__m_; } +}; + +static_assert(is_nothrow_default_constructible::value, "the default constructor for std::mutex must be nothrow"); + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_HAS_NO_THREADS + +#endif // _LIBCPP___MUTEX_MUTEX_H diff --git a/libcxx/include/__mutex/tag_types.h b/libcxx/include/__mutex/tag_types.h new file mode 100644 index 0000000000000..02cf007ae1219 --- /dev/null +++ b/libcxx/include/__mutex/tag_types.h @@ -0,0 +1,48 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___MUTEX_TAG_TYPES_H +#define _LIBCPP___MUTEX_TAG_TYPES_H + +#include <__config> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#ifndef _LIBCPP_HAS_NO_THREADS + +_LIBCPP_BEGIN_NAMESPACE_STD + +struct _LIBCPP_TYPE_VIS defer_lock_t { + explicit defer_lock_t() = default; +}; + +struct _LIBCPP_TYPE_VIS try_to_lock_t { + explicit try_to_lock_t() = default; +}; + +struct _LIBCPP_TYPE_VIS adopt_lock_t { + explicit adopt_lock_t() = default; +}; + +# if defined(_LIBCPP_BUILDING_LIBRARY) +extern _LIBCPP_EXPORTED_FROM_ABI const defer_lock_t defer_lock; +extern _LIBCPP_EXPORTED_FROM_ABI const try_to_lock_t try_to_lock; +extern _LIBCPP_EXPORTED_FROM_ABI const adopt_lock_t adopt_lock; +# elif !defined(_LIBCPP_CXX03_LANG) +/* inline */ constexpr defer_lock_t defer_lock = defer_lock_t(); +/* inline */ constexpr try_to_lock_t try_to_lock = try_to_lock_t(); +/* inline */ constexpr adopt_lock_t adopt_lock = adopt_lock_t(); +# endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_HAS_NO_THREADS + +#endif // _LIBCPP___MUTEX_TAG_TYPES_H diff --git a/libcxx/include/__mutex/unique_lock.h b/libcxx/include/__mutex/unique_lock.h new file mode 100644 index 0000000000000..a057d1c69d3ef --- /dev/null +++ b/libcxx/include/__mutex/unique_lock.h @@ -0,0 +1,172 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___MUTEX_UNIQUE_LOCK_H +#define _LIBCPP___MUTEX_UNIQUE_LOCK_H + +#include <__chrono/duration.h> +#include <__chrono/time_point.h> +#include <__config> +#include <__memory/addressof.h> +#include <__mutex/tag_types.h> +#include <__utility/swap.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#ifndef _LIBCPP_HAS_NO_THREADS + +_LIBCPP_BEGIN_NAMESPACE_STD + +template +class _LIBCPP_TEMPLATE_VIS unique_lock { +public: + typedef _Mutex mutex_type; + +private: + mutex_type* __m_; + bool __owns_; + +public: + _LIBCPP_HIDE_FROM_ABI unique_lock() _NOEXCEPT : __m_(nullptr), __owns_(false) {} + _LIBCPP_HIDE_FROM_ABI explicit unique_lock(mutex_type& __m) : __m_(std::addressof(__m)), __owns_(true) { + __m_->lock(); + } + + _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, defer_lock_t) _NOEXCEPT + : __m_(std::addressof(__m)), + __owns_(false) {} + + _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, try_to_lock_t) + : __m_(std::addressof(__m)), __owns_(__m.try_lock()) {} + + _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, adopt_lock_t) : __m_(std::addressof(__m)), __owns_(true) {} + + template + _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, const chrono::time_point<_Clock, _Duration>& __t) + : __m_(std::addressof(__m)), __owns_(__m.try_lock_until(__t)) {} + + template + _LIBCPP_HIDE_FROM_ABI unique_lock(mutex_type& __m, const chrono::duration<_Rep, _Period>& __d) + : __m_(std::addressof(__m)), __owns_(__m.try_lock_for(__d)) {} + + _LIBCPP_HIDE_FROM_ABI ~unique_lock() { + if (__owns_) + __m_->unlock(); + } + + unique_lock(unique_lock const&) = delete; + unique_lock& operator=(unique_lock const&) = delete; + + _LIBCPP_HIDE_FROM_ABI unique_lock(unique_lock&& __u) _NOEXCEPT : __m_(__u.__m_), __owns_(__u.__owns_) { + __u.__m_ = nullptr; + __u.__owns_ = false; + } + + _LIBCPP_HIDE_FROM_ABI unique_lock& operator=(unique_lock&& __u) _NOEXCEPT { + if (__owns_) + __m_->unlock(); + + __m_ = __u.__m_; + __owns_ = __u.__owns_; + __u.__m_ = nullptr; + __u.__owns_ = false; + return *this; + } + + void lock(); + bool try_lock(); + + template + bool try_lock_for(const chrono::duration<_Rep, _Period>& __d); + + template + bool try_lock_until(const chrono::time_point<_Clock, _Duration>& __t); + + void unlock(); + + _LIBCPP_HIDE_FROM_ABI void swap(unique_lock& __u) _NOEXCEPT { + std::swap(__m_, __u.__m_); + std::swap(__owns_, __u.__owns_); + } + + _LIBCPP_HIDE_FROM_ABI mutex_type* release() _NOEXCEPT { + mutex_type* __m = __m_; + __m_ = nullptr; + __owns_ = false; + return __m; + } + + _LIBCPP_HIDE_FROM_ABI bool owns_lock() const _NOEXCEPT { return __owns_; } + _LIBCPP_HIDE_FROM_ABI explicit operator bool() const _NOEXCEPT { return __owns_; } + _LIBCPP_HIDE_FROM_ABI mutex_type* mutex() const _NOEXCEPT { return __m_; } +}; +_LIBCPP_CTAD_SUPPORTED_FOR_TYPE(unique_lock); + +template +void unique_lock<_Mutex>::lock() { + if (__m_ == nullptr) + __throw_system_error(EPERM, "unique_lock::lock: references null mutex"); + if (__owns_) + __throw_system_error(EDEADLK, "unique_lock::lock: already locked"); + __m_->lock(); + __owns_ = true; +} + +template +bool unique_lock<_Mutex>::try_lock() { + if (__m_ == nullptr) + __throw_system_error(EPERM, "unique_lock::try_lock: references null mutex"); + if (__owns_) + __throw_system_error(EDEADLK, "unique_lock::try_lock: already locked"); + __owns_ = __m_->try_lock(); + return __owns_; +} + +template +template +bool unique_lock<_Mutex>::try_lock_for(const chrono::duration<_Rep, _Period>& __d) { + if (__m_ == nullptr) + __throw_system_error(EPERM, "unique_lock::try_lock_for: references null mutex"); + if (__owns_) + __throw_system_error(EDEADLK, "unique_lock::try_lock_for: already locked"); + __owns_ = __m_->try_lock_for(__d); + return __owns_; +} + +template +template +bool unique_lock<_Mutex>::try_lock_until(const chrono::time_point<_Clock, _Duration>& __t) { + if (__m_ == nullptr) + __throw_system_error(EPERM, "unique_lock::try_lock_until: references null mutex"); + if (__owns_) + __throw_system_error(EDEADLK, "unique_lock::try_lock_until: already locked"); + __owns_ = __m_->try_lock_until(__t); + return __owns_; +} + +template +void unique_lock<_Mutex>::unlock() { + if (!__owns_) + __throw_system_error(EPERM, "unique_lock::unlock: not locked"); + __m_->unlock(); + __owns_ = false; +} + +template +inline _LIBCPP_HIDE_FROM_ABI void swap(unique_lock<_Mutex>& __x, unique_lock<_Mutex>& __y) _NOEXCEPT { + __x.swap(__y); +} + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_HAS_NO_THREADS + +#endif // _LIBCPP___MUTEX_UNIQUE_LOCK_H diff --git a/libcxx/include/condition_variable b/libcxx/include/condition_variable index bb7b13c6fe3eb..e1eec6066ec2e 100644 --- a/libcxx/include/condition_variable +++ b/libcxx/include/condition_variable @@ -107,10 +107,18 @@ public: */ #include <__assert> // all public C++ headers provide the assertion handler +#include <__chrono/duration.h> +#include <__chrono/steady_clock.h> +#include <__chrono/time_point.h> +#include <__condition_variable/condition_variable.h> #include <__config> #include <__memory/shared_ptr.h> #include <__memory/unique_ptr.h> -#include <__mutex_base> +#include <__mutex/lock_guard.h> +#include <__mutex/mutex.h> +#include <__mutex/tag_types.h> +#include <__mutex/unique_lock.h> +#include <__utility/move.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -271,7 +279,14 @@ _LIBCPP_END_NAMESPACE_STD #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include # include +# include +# include +# include +# include +# include +# include # include +# include #endif #endif // _LIBCPP_CONDITION_VARIABLE diff --git a/libcxx/include/libcxx.imp b/libcxx/include/libcxx.imp index d85ae36953a1e..b9a670c7be50b 100644 --- a/libcxx/include/libcxx.imp +++ b/libcxx/include/libcxx.imp @@ -22,6 +22,7 @@ { include: [ "@<__chrono/.*>", "private", "", "public" ] }, { include: [ "@<__compare/.*>", "private", "", "public" ] }, { include: [ "@<__concepts/.*>", "private", "", "public" ] }, + { include: [ "@<__condition_variable/.*>", "private", "", "public" ] }, { include: [ "@<__coroutine/.*>", "private", "", "public" ] }, { include: [ "@<__debug_utils/.*>", "private", "", "public" ] }, { include: [ "@<__exception/.*>", "private", "", "public" ] }, @@ -34,6 +35,7 @@ { include: [ "@<__iterator/.*>", "private", "", "public" ] }, { include: [ "@<__memory/.*>", "private", "", "public" ] }, { include: [ "@<__memory_resource/.*>", "private", "", "public" ] }, + { include: [ "@<__mutex/.*>", "private", "", "public" ] }, { include: [ "@<__numeric/.*>", "private", "", "public" ] }, { include: [ "@<__random/.*>", "private", "", "public" ] }, { include: [ "@<__ranges/.*>", "private", "", "public" ] }, diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in index 89a03cbb341d3..7bae70fafee70 100644 --- a/libcxx/include/module.modulemap.in +++ b/libcxx/include/module.modulemap.in @@ -813,6 +813,10 @@ module std [system] { module condition_variable { header "condition_variable" export * + + module __condition_variable { + module condition_variable { private header "__condition_variable/condition_variable.h" } + } } module coroutine { header "coroutine" @@ -1174,6 +1178,13 @@ module std [system] { @requires_LIBCXX_ENABLE_THREADS@ header "mutex" export * + + module __mutex { + module lock_guard { private header "__mutex/lock_guard.h" } + module mutex { private header "__mutex/mutex.h" } + module tag_types { private header "__mutex/tag_types.h" } + module unique_lock { private header "__mutex/unique_lock.h" } + } } module new { header "new" @@ -1695,7 +1706,6 @@ module std [system] { private header "__locale" export * } module __mbstate_t { private header "__mbstate_t.h" export * } - module __mutex_base { private header "__mutex_base" export * } module __node_handle { private header "__node_handle" export * } module __split_buffer { private header "__split_buffer" export * } module __std_stream { diff --git a/libcxx/include/mutex b/libcxx/include/mutex index 9d24768d6c0e2..3e89aaa4e6e6e 100644 --- a/libcxx/include/mutex +++ b/libcxx/include/mutex @@ -187,9 +187,15 @@ template */ #include <__assert> // all public C++ headers provide the assertion handler +#include <__chrono/steady_clock.h> +#include <__chrono/time_point.h> +#include <__condition_variable/condition_variable.h> #include <__config> #include <__memory/shared_ptr.h> -#include <__mutex_base> +#include <__mutex/lock_guard.h> +#include <__mutex/mutex.h> +#include <__mutex/tag_types.h> +#include <__mutex/unique_lock.h> #include <__threading_support> #include <__utility/forward.h> #include @@ -706,9 +712,15 @@ _LIBCPP_POP_MACROS #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include # include +# include +# include # include # include +# include +# include +# include # include +# include #endif #endif // _LIBCPP_MUTEX diff --git a/libcxx/include/shared_mutex b/libcxx/include/shared_mutex index 550f8344ae19a..dd142f4d53600 100644 --- a/libcxx/include/shared_mutex +++ b/libcxx/include/shared_mutex @@ -124,7 +124,18 @@ template #include <__assert> // all public C++ headers provide the assertion handler #include <__availability> +#include <__chrono/duration.h> +#include <__chrono/steady_clock.h> +#include <__chrono/time_point.h> +#include <__condition_variable/condition_variable.h> #include <__config> +#include <__memory/addressof.h> +#include <__mutex/mutex.h> +#include <__mutex/tag_types.h> +#include <__mutex/unique_lock.h> +#include <__utility/swap.h> +#include +#include #include _LIBCPP_PUSH_MACROS @@ -133,8 +144,6 @@ _LIBCPP_PUSH_MACROS #if _LIBCPP_STD_VER >= 14 || defined(_LIBCPP_BUILDING_LIBRARY) -#include <__mutex_base> - #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif diff --git a/libcxx/include/thread b/libcxx/include/thread index 13e722ca9476a..19c8c2df89fd4 100644 --- a/libcxx/include/thread +++ b/libcxx/include/thread @@ -84,12 +84,16 @@ void sleep_for(const chrono::duration& rel_time); */ #include <__assert> // all public C++ headers provide the assertion handler +#include <__chrono/steady_clock.h> +#include <__chrono/time_point.h> +#include <__condition_variable/condition_variable.h> #include <__config> #include <__exception/terminate.h> #include <__functional/hash.h> #include <__memory/addressof.h> #include <__memory/unique_ptr.h> -#include <__mutex_base> +#include <__mutex/mutex.h> +#include <__mutex/unique_lock.h> #include <__thread/poll_with_backoff.h> #include <__thread/timed_backoff_policy.h> #include <__threading_support> @@ -416,7 +420,10 @@ _LIBCPP_POP_MACROS #endif #if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include +# include # include +# include #endif #endif // _LIBCPP_THREAD diff --git a/libcxx/src/shared_mutex.cpp b/libcxx/src/shared_mutex.cpp index 73d4dc1c1c7a9..b1976c11d7ef2 100644 --- a/libcxx/src/shared_mutex.cpp +++ b/libcxx/src/shared_mutex.cpp @@ -10,6 +10,7 @@ #ifndef _LIBCPP_HAS_NO_THREADS +#include #include #if defined(__ELF__) && defined(_LIBCPP_LINK_PTHREAD_LIB) # pragma comment(lib, "pthread") diff --git a/libcxx/test/libcxx/private_headers.verify.cpp b/libcxx/test/libcxx/private_headers.verify.cpp index dd2a655642e51..263c05a8b2ce5 100644 --- a/libcxx/test/libcxx/private_headers.verify.cpp +++ b/libcxx/test/libcxx/private_headers.verify.cpp @@ -328,6 +328,7 @@ END-SCRIPT #include <__concepts/semiregular.h> // expected-error@*:* {{use of private header from outside its module: '__concepts/semiregular.h'}} #include <__concepts/swappable.h> // expected-error@*:* {{use of private header from outside its module: '__concepts/swappable.h'}} #include <__concepts/totally_ordered.h> // expected-error@*:* {{use of private header from outside its module: '__concepts/totally_ordered.h'}} +#include <__condition_variable/condition_variable.h> // expected-error@*:* {{use of private header from outside its module: '__condition_variable/condition_variable.h'}} #include <__coroutine/coroutine_handle.h> // expected-error@*:* {{use of private header from outside its module: '__coroutine/coroutine_handle.h'}} #include <__coroutine/coroutine_traits.h> // expected-error@*:* {{use of private header from outside its module: '__coroutine/coroutine_traits.h'}} #include <__coroutine/noop_coroutine_handle.h> // expected-error@*:* {{use of private header from outside its module: '__coroutine/noop_coroutine_handle.h'}} @@ -505,7 +506,10 @@ END-SCRIPT #include <__memory_resource/pool_options.h> // expected-error@*:* {{use of private header from outside its module: '__memory_resource/pool_options.h'}} #include <__memory_resource/synchronized_pool_resource.h> // expected-error@*:* {{use of private header from outside its module: '__memory_resource/synchronized_pool_resource.h'}} #include <__memory_resource/unsynchronized_pool_resource.h> // expected-error@*:* {{use of private header from outside its module: '__memory_resource/unsynchronized_pool_resource.h'}} -#include <__mutex_base> // expected-error@*:* {{use of private header from outside its module: '__mutex_base'}} +#include <__mutex/lock_guard.h> // expected-error@*:* {{use of private header from outside its module: '__mutex/lock_guard.h'}} +#include <__mutex/mutex.h> // expected-error@*:* {{use of private header from outside its module: '__mutex/mutex.h'}} +#include <__mutex/tag_types.h> // expected-error@*:* {{use of private header from outside its module: '__mutex/tag_types.h'}} +#include <__mutex/unique_lock.h> // expected-error@*:* {{use of private header from outside its module: '__mutex/unique_lock.h'}} #include <__node_handle> // expected-error@*:* {{use of private header from outside its module: '__node_handle'}} #include <__numeric/accumulate.h> // expected-error@*:* {{use of private header from outside its module: '__numeric/accumulate.h'}} #include <__numeric/adjacent_difference.h> // expected-error@*:* {{use of private header from outside its module: '__numeric/adjacent_difference.h'}} diff --git a/libcxx/test/libcxx/thread/thread.condition/thread.condition.condvar/native_handle.pass.cpp b/libcxx/test/libcxx/thread/thread.condition/thread.condition.condvar/native_handle.pass.cpp index 374aa2fd15350..13d1bfcb88126 100644 --- a/libcxx/test/libcxx/thread/thread.condition/thread.condition.condvar/native_handle.pass.cpp +++ b/libcxx/test/libcxx/thread/thread.condition/thread.condition.condvar/native_handle.pass.cpp @@ -17,8 +17,10 @@ // typedef pthread_cond_t* native_handle_type; // native_handle_type native_handle(); -#include #include +#include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/libcxx/transitive_includes/cxx03.csv b/libcxx/test/libcxx/transitive_includes/cxx03.csv index 92a84f59268bb..e52cf25b099ae 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx03.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx03.csv @@ -734,6 +734,14 @@ set stdexcept set tuple set type_traits set version +shared_mutex cerrno +shared_mutex cstddef +shared_mutex ctime +shared_mutex iosfwd +shared_mutex limits +shared_mutex ratio +shared_mutex system_error +shared_mutex type_traits shared_mutex version source_location cstdint source_location version diff --git a/libcxx/test/libcxx/transitive_includes/cxx11.csv b/libcxx/test/libcxx/transitive_includes/cxx11.csv index 833e8ac19f8e9..3f322fefe710f 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx11.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx11.csv @@ -735,6 +735,14 @@ set stdexcept set tuple set type_traits set version +shared_mutex cerrno +shared_mutex cstddef +shared_mutex ctime +shared_mutex iosfwd +shared_mutex limits +shared_mutex ratio +shared_mutex system_error +shared_mutex type_traits shared_mutex version source_location cstdint source_location version diff --git a/libcxx/test/libcxx/transitive_includes/cxx14.csv b/libcxx/test/libcxx/transitive_includes/cxx14.csv index c3c0ae6f71e8d..5503cb3ed1bf9 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx14.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx14.csv @@ -737,6 +737,7 @@ set stdexcept set tuple set type_traits set version +shared_mutex cerrno shared_mutex cstddef shared_mutex ctime shared_mutex iosfwd diff --git a/libcxx/test/libcxx/transitive_includes/cxx17.csv b/libcxx/test/libcxx/transitive_includes/cxx17.csv index c3c0ae6f71e8d..5503cb3ed1bf9 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx17.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx17.csv @@ -737,6 +737,7 @@ set stdexcept set tuple set type_traits set version +shared_mutex cerrno shared_mutex cstddef shared_mutex ctime shared_mutex iosfwd diff --git a/libcxx/test/libcxx/transitive_includes/cxx20.csv b/libcxx/test/libcxx/transitive_includes/cxx20.csv index a5be0f14d8589..6985245378c82 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx20.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx20.csv @@ -743,6 +743,7 @@ set stdexcept set tuple set type_traits set version +shared_mutex cerrno shared_mutex cstddef shared_mutex ctime shared_mutex iosfwd diff --git a/libcxx/test/libcxx/transitive_includes/cxx2b.csv b/libcxx/test/libcxx/transitive_includes/cxx2b.csv index 8bed29fafc013..9701567c65835 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx2b.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx2b.csv @@ -110,11 +110,8 @@ complex version concepts cstddef concepts version condition_variable cstddef -condition_variable cstdint condition_variable cstdlib -condition_variable cstring condition_variable ctime -condition_variable initializer_list condition_variable iosfwd condition_variable limits condition_variable new @@ -403,9 +400,7 @@ memory_resource version mutex cstddef mutex cstdint mutex cstdlib -mutex cstring mutex ctime -mutex initializer_list mutex iosfwd mutex limits mutex new @@ -519,6 +514,7 @@ set optional set stdexcept set tuple set version +shared_mutex cerrno shared_mutex cstddef shared_mutex ctime shared_mutex iosfwd @@ -591,12 +587,9 @@ system_error string system_error version thread compare thread cstddef -thread cstdint -thread cstring thread ctime thread iosfwd thread limits -thread new thread ratio thread system_error thread tuple diff --git a/libcxx/test/std/thread/futures/futures.shared_future/wait.pass.cpp b/libcxx/test/std/thread/futures/futures.shared_future/wait.pass.cpp index 12c71ab05e26e..5709e9d45df97 100644 --- a/libcxx/test/std/thread/futures/futures.shared_future/wait.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.shared_future/wait.pass.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/futures/futures.unique_future/wait.pass.cpp b/libcxx/test/std/thread/futures/futures.unique_future/wait.pass.cpp index 2385156c3154b..4e6b789e45c26 100644 --- a/libcxx/test/std/thread/futures/futures.unique_future/wait.pass.cpp +++ b/libcxx/test/std/thread/futures/futures.unique_future/wait.pass.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp index c5536d8f37e9c..962dcc03729f4 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp @@ -22,11 +22,12 @@ // template shared_lock(shared_lock<_Mutex>) // -> shared_lock<_Mutex>; // C++17 +#include +#include +#include #include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp index fde0ed6b0b5b7..4527b6d8124bc 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp @@ -19,11 +19,13 @@ // shared_lock(mutex_type& m, try_to_lock_t); +#include +#include +#include +#include #include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp index 1bc131a7b968f..f7715168ee10d 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp @@ -19,11 +19,13 @@ // void lock(); +#include +#include +#include +#include #include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock.pass.cpp index ae387cdc8446d..0e707fcf2d50a 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock.pass.cpp @@ -17,9 +17,10 @@ // bool try_lock(); -#include #include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock_for.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock_for.pass.cpp index 7f8189c298577..d28ae395ccb0d 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock_for.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock_for.pass.cpp @@ -16,10 +16,11 @@ // template // bool try_lock_for(const chrono::duration& rel_time); -#include #include #include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock_until.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock_until.pass.cpp index fb4afdd4d9101..880bf1cbd4999 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock_until.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/try_lock_until.pass.cpp @@ -16,10 +16,11 @@ // template // bool try_lock_until(const chrono::time_point& abs_time); -#include #include #include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex.pass.cpp index bba78cf24178a..2be25748e903b 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex.pass.cpp @@ -18,10 +18,11 @@ // template unique_lock(unique_lock<_Mutex>) // -> unique_lock<_Mutex>; // C++17 +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_try_to_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_try_to_lock.pass.cpp index f4f344ef9b2c3..992d383dfa780 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_try_to_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.cons/mutex_try_to_lock.pass.cpp @@ -16,10 +16,11 @@ // unique_lock(mutex_type& m, try_to_lock_t); +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/lock.pass.cpp index fa43f5dd874a5..4aa6660449c99 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/lock.pass.cpp @@ -16,10 +16,11 @@ // void lock(); +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock.pass.cpp index 9249959ad005a..4cf5ec2ab5ccf 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock.pass.cpp @@ -16,8 +16,9 @@ // bool try_lock(); -#include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock_for.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock_for.pass.cpp index fe29d1625069e..8e7004e5eec85 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock_for.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock_for.pass.cpp @@ -16,8 +16,9 @@ // template // bool try_lock_for(const chrono::duration& rel_time); -#include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock_until.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock_until.pass.cpp index a1e8553b965fe..077bc517399ab 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock_until.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/try_lock_until.pass.cpp @@ -16,8 +16,9 @@ // template // bool try_lock_until(const chrono::time_point& abs_time); -#include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/unlock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/unlock.pass.cpp index 4cd72b6bd8d2f..30c795150dace 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/unlock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.unique/thread.lock.unique.locking/unlock.pass.cpp @@ -14,8 +14,9 @@ // void unlock(); -#include #include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/lock.pass.cpp index 6f91cc3d3ab11..b3e76cf886c4d 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/lock.pass.cpp @@ -15,10 +15,11 @@ // void lock(); +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/try_lock.pass.cpp index 645b74bca920e..bf3cb6530b3b9 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.class/try_lock.pass.cpp @@ -15,10 +15,11 @@ // bool try_lock(); +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/lock.pass.cpp index 51b64b9aaddbb..d9bff9b3cbda5 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/lock.pass.cpp @@ -15,10 +15,11 @@ // void lock(); +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/try_lock.pass.cpp index 801e2e738c5cb..1247c1ce1ba5f 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.mutex.requirements.mutex/thread.mutex.recursive/try_lock.pass.cpp @@ -15,10 +15,11 @@ // bool try_lock(); +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock.pass.cpp index 24a52ae69f5af..5d20951576a82 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock.pass.cpp @@ -20,10 +20,11 @@ // void lock(); +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock_shared.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock_shared.pass.cpp index 3f5a0642ab128..eca75f005ca55 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock_shared.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/lock_shared.pass.cpp @@ -20,11 +20,12 @@ // void lock_shared(); +#include +#include +#include #include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock.pass.cpp index 92727eadbd9b3..bcbe7dfd78c1f 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock.pass.cpp @@ -20,10 +20,11 @@ // bool try_lock(); +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock_shared.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock_shared.pass.cpp index ac6a95bd87a52..5a54a7ecdd1f8 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock_shared.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.shared_mutex.requirements/thread.shared_mutex.class/try_lock_shared.pass.cpp @@ -20,11 +20,12 @@ // bool try_lock_shared(); +#include +#include +#include #include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock.pass.cpp index c4836a574e9dc..ffec5056f103f 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock.pass.cpp @@ -20,12 +20,12 @@ // void lock(); -#include - #include -#include #include +#include +#include #include +#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock_shared.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock_shared.pass.cpp index 1ec0814e207ed..7187c95f2cd2d 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock_shared.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/lock_shared.pass.cpp @@ -20,12 +20,13 @@ // void lock_shared(); -#include #include #include +#include #include #include +#include #include #include "make_test_thread.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock.pass.cpp index 20eda45677f4e..e454ae2214bc1 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock.pass.cpp @@ -20,10 +20,11 @@ // bool try_lock(); +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared.pass.cpp index d1f37a9c42df6..9597218f36ecb 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.sharedtimedmutex.requirements/thread.sharedtimedmutex.class/try_lock_shared.pass.cpp @@ -20,11 +20,12 @@ // bool try_lock_shared(); +#include +#include +#include #include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/lock.pass.cpp index a2a91bc26a7dc..a71bd3d38b2c3 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/lock.pass.cpp @@ -15,10 +15,11 @@ // void lock(); +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock.pass.cpp index 02d0874c08069..f3942ccb9d860 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.class/try_lock.pass.cpp @@ -15,10 +15,11 @@ // bool try_lock(); +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/lock.pass.cpp index 91c6f1c064324..bad5a4457e516 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/lock.pass.cpp @@ -15,10 +15,11 @@ // void lock(); +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock.pass.cpp index 5915698553f5a..63be0ac713f8b 100644 --- a/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock.pass.cpp +++ b/libcxx/test/std/thread/thread.mutex/thread.mutex.requirements/thread.timedmutex.requirements/thread.timedmutex.recursive/try_lock.pass.cpp @@ -15,10 +15,11 @@ // bool try_lock(); +#include +#include +#include #include #include -#include -#include #include "make_test_thread.h" #include "test_macros.h" diff --git a/libcxx/test/std/thread/thread.threads/thread.thread.this/sleep_until.pass.cpp b/libcxx/test/std/thread/thread.threads/thread.thread.this/sleep_until.pass.cpp index 3b4ae203d2c3b..7a080651da393 100644 --- a/libcxx/test/std/thread/thread.threads/thread.thread.this/sleep_until.pass.cpp +++ b/libcxx/test/std/thread/thread.threads/thread.thread.this/sleep_until.pass.cpp @@ -13,9 +13,10 @@ // template // void sleep_until(const chrono::time_point& abs_time); -#include -#include #include +#include +#include +#include #include "test_macros.h" diff --git a/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp b/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp index 0c2f6852ecc27..e05146686434a 100644 --- a/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp +++ b/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp @@ -31,6 +31,7 @@ #include #include +#include #include #include "make_string.h" diff --git a/libcxx/test/std/time/time.hms/time.hms.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.hms/time.hms.nonmembers/ostream.pass.cpp index 5bace05949e87..c639ea8dcec42 100644 --- a/libcxx/test/std/time/time.hms/time.hms.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.hms/time.hms.nonmembers/ostream.pass.cpp @@ -27,8 +27,9 @@ // basic_ostream& // operator<<(basic_ostream& os, const hh_mm_ss& hms); -#include #include +#include +#include #include #include "make_string.h" diff --git a/libcxx/test/std/time/time.syn/formatter.duration.pass.cpp b/libcxx/test/std/time/time.syn/formatter.duration.pass.cpp index 1a2fccacb44d7..3ca7ae2b40c91 100644 --- a/libcxx/test/std/time/time.syn/formatter.duration.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.duration.pass.cpp @@ -31,6 +31,7 @@ #include #include #include +#include #include #include "formatter_tests.h" diff --git a/libcxx/test/std/time/time.syn/formatter.hh_mm_ss.pass.cpp b/libcxx/test/std/time/time.syn/formatter.hh_mm_ss.pass.cpp index 1bbe72a8e285e..2fbca0e1aece3 100644 --- a/libcxx/test/std/time/time.syn/formatter.hh_mm_ss.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.hh_mm_ss.pass.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include #include "formatter_tests.h" diff --git a/libcxx/utils/data/ignore_format.txt b/libcxx/utils/data/ignore_format.txt index a6838ccb2b022..a89361cbebc6a 100644 --- a/libcxx/utils/data/ignore_format.txt +++ b/libcxx/utils/data/ignore_format.txt @@ -489,7 +489,6 @@ libcxx/include/__memory/uninitialized_algorithms.h libcxx/include/__memory/unique_ptr.h libcxx/include/__memory/uses_allocator.h libcxx/include/mutex -libcxx/include/__mutex_base libcxx/include/new libcxx/include/__node_handle libcxx/include/numbers From 0766c1bd5c0ece916be14b620b02bea845cb9fac Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 22 Mar 2023 12:07:08 -0700 Subject: [PATCH 336/691] [LFTR] Simplify integer case for genLoopLimit [nfc-ish] The integer case in genLoopLimit reduces down to a special case for narrowing the bitwidth of the limit, and then performing the same expansion we would for a pointer IV. Differential Revision: https://reviews.llvm.org/D146638 --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 69 +++++-------------- 1 file changed, 18 insertions(+), 51 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index 43f3beb4b34b5..28e6794389538 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -910,60 +910,27 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB, assert(isLoopCounter(IndVar, L, SE)); assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer"); const SCEVAddRecExpr *AR = cast(SE->getSCEV(IndVar)); - const SCEV *IVInit = AR->getStart(); assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); - // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter - // finds a valid pointer IV. - if (IndVar->getType()->isPointerTy()) { - const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR; - const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE); - assert(SE->isLoopInvariant(IVLimit, L) && - "Computed iteration count is not loop invariant!"); - return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), - ExitingBB->getTerminator()); - } else { - // In any other case, convert both IVInit and ExitCount to integers before - // comparing. This may result in SCEV expansion of pointers, but in practice - // SCEV will fold the pointer arithmetic away as such: - // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). - // - // Valid Cases: (1) both integers is most common; (2) both may be pointers - // for simple memset-style loops. - // - // IVInit integer and ExitCount pointer would only occur if a canonical IV - // were generated on top of case #2, which is not expected. - - // For unit stride, IVCount = Start + ExitCount with 2's complement - // overflow. - - // For integer IVs, truncate the IV before computing IVInit + BECount, - // unless we know apriori that the limit must be a constant when evaluated - // in the bitwidth of the IV. We prefer (potentially) keeping a truncate - // of the IV in the loop over a (potentially) expensive expansion of the - // widened exit count add(zext(add)) expression. - if (SE->getTypeSizeInBits(IVInit->getType()) - > SE->getTypeSizeInBits(ExitCount->getType())) { - if (isa(IVInit) && isa(ExitCount)) - ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType()); - else - IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType()); - } - - const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount); - - if (UsePostInc) - IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType())); - - // Expand the code for the iteration count. - assert(SE->isLoopInvariant(IVLimit, L) && - "Computed iteration count is not loop invariant!"); - // Ensure that we generate the same type as IndVar, or a smaller integer - // type. In the presence of null pointer values, we have an integer type - // SCEV expression (IVInit) for a pointer type IV value (IndVar). - return Rewriter.expandCodeFor(IVLimit, ExitCount->getType(), - ExitingBB->getTerminator()); + // For integer IVs, truncate the IV before computing the limit unless we + // know apriori that the limit must be a constant when evaluated in the + // bitwidth of the IV. We prefer (potentially) keeping a truncate of the + // IV in the loop over a (potentially) expensive expansion of the widened + // exit count add(zext(add)) expression. + if (IndVar->getType()->isIntegerTy() && + SE->getTypeSizeInBits(AR->getType()) > + SE->getTypeSizeInBits(ExitCount->getType())) { + const SCEV *IVInit = AR->getStart(); + if (!isa(IVInit) || !isa(ExitCount)) + AR = cast(SE->getTruncateExpr(AR, ExitCount->getType())); } + + const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR; + const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE); + assert(SE->isLoopInvariant(IVLimit, L) && + "Computed iteration count is not loop invariant!"); + return Rewriter.expandCodeFor(IVLimit, ARBase->getType(), + ExitingBB->getTerminator()); } /// This method rewrites the exit condition of the loop to be a canonical != From 08622314d2a23101536ca744f0092eaec63e14e8 Mon Sep 17 00:00:00 2001 From: Jeff Byrnes Date: Wed, 22 Mar 2023 12:19:00 -0700 Subject: [PATCH 337/691] Precommit tests for D146327 --- .../propagate-remove-dead-args.ll | 66 +++++++++++++++++ .../dce-after-argument-promotion-loads.ll | 72 +++++++++++++++++++ 2 files changed, 138 insertions(+) create mode 100644 llvm/test/Transforms/ArgumentPromotion/propagate-remove-dead-args.ll create mode 100644 llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion-loads.ll diff --git a/llvm/test/Transforms/ArgumentPromotion/propagate-remove-dead-args.ll b/llvm/test/Transforms/ArgumentPromotion/propagate-remove-dead-args.ll new file mode 100644 index 0000000000000..cc1f7fb26a479 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/propagate-remove-dead-args.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt < %s -passes=argpromotion -S | FileCheck %s + +%ptr.struct = type { ptr, ptr, ptr } + +define internal void @child(ptr %this, ptr %y, ptr %x) { +; CHECK-LABEL: define internal void @child +; CHECK-SAME: (ptr [[Y:%.*]], half [[X_0_VAL:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: store half [[X_0_VAL]], ptr [[Y]], align 2 +; CHECK-NEXT: ret void +; +entry: + %0 = load half, ptr %x + store half %0, ptr %y + ret void +} + +define internal void @parent(ptr %this, ptr %p1, ptr %p2) { +; CHECK-LABEL: define internal void @parent +; CHECK-SAME: (ptr [[THIS:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SRC_ELEMENT_OP_0:%.*]] = getelementptr ptr, ptr [[THIS]], i64 0 +; CHECK-NEXT: [[LOAD0:%.*]] = load ptr, ptr [[SRC_ELEMENT_OP_0]], align 8 +; CHECK-NEXT: [[P2_VAL2:%.*]] = load half, ptr [[P2]], align 2 +; CHECK-NEXT: call void @child(ptr [[P1]], half [[P2_VAL2]]) +; CHECK-NEXT: [[SRC_ELEMENT_OP_1:%.*]] = getelementptr ptr, ptr [[THIS]], i64 1 +; CHECK-NEXT: [[LOAD1:%.*]] = load ptr, ptr [[SRC_ELEMENT_OP_1]], align 8 +; CHECK-NEXT: [[P2_VAL1:%.*]] = load half, ptr [[P2]], align 2 +; CHECK-NEXT: call void @child(ptr [[P1]], half [[P2_VAL1]]) +; CHECK-NEXT: [[SRC_ELEMENT_OP_2:%.*]] = getelementptr ptr, ptr [[THIS]], i64 2 +; CHECK-NEXT: [[LOAD2:%.*]] = load ptr, ptr [[SRC_ELEMENT_OP_2]], align 8 +; CHECK-NEXT: [[P2_VAL:%.*]] = load half, ptr [[P2]], align 2 +; CHECK-NEXT: call void @child(ptr [[P1]], half [[P2_VAL]]) +; CHECK-NEXT: ret void +; +entry: + %src_element_op_0 = getelementptr ptr, ptr %this, i64 0 + %load0 = load ptr, ptr %src_element_op_0 + call void @child(ptr %load0, ptr %p1, ptr %p2) + %src_element_op_1 = getelementptr ptr, ptr %this, i64 1 + %load1 = load ptr, ptr %src_element_op_1 + call void @child(ptr %load1, ptr %p1, ptr %p2) + %src_element_op_2 = getelementptr ptr, ptr %this, i64 2 + %load2 = load ptr, ptr %src_element_op_2 + call void @child(ptr %load2, ptr %p1, ptr %p2) + ret void +} + +define void @grandparent() { +; CHECK-LABEL: define void @grandparent() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[F:%.*]] = alloca [[PTR_STRUCT:%.*]], align 8 +; CHECK-NEXT: [[XPTR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[YPTR:%.*]] = alloca i32, align 4 +; CHECK-NEXT: call void @parent(ptr [[F]], ptr [[XPTR]], ptr [[YPTR]]) +; CHECK-NEXT: ret void +; +entry: + %f = alloca %ptr.struct + %xptr = alloca i32 + %yptr = alloca i32 + call void @parent(ptr %f, ptr %xptr, ptr %yptr) + ret void +} + diff --git a/llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion-loads.ll b/llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion-loads.ll new file mode 100644 index 0000000000000..2bdd42b3dd8ca --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion-loads.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature +; RUN: opt -O3 -S < %s | FileCheck %s + +; Arg promotion eliminates the struct argument, and eliminates dead arguments, but introduces and leaves dead loads of the eliminated dead arg in callers + +%struct.ss = type { ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr, ptr } + +define internal void @phantomLoad(ptr %p, ptr %y, ptr %x) { +entry: + %0 = load i32, ptr %x + store i32 %0, ptr %y + ret void +} + +define ptr @parent(ptr align 8 dereferenceable(72) %f, i16 %val1, i16 %val2, i32 %val3) align 2 { +; CHECK-LABEL: define {{[^@]+}}@parent +; CHECK-SAME: (ptr readonly returned align 8 dereferenceable(72) [[F:%.*]], i16 [[VAL1:%.*]], i16 [[VAL2:%.*]], i32 [[VAL3:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] align 2 { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP_NOT_NOT_I:%.*]] = icmp eq i32 [[VAL3]], 0 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[F]], i64 0, i32 8 +; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 +; CHECK-NEXT: br i1 [[CMP_NOT_NOT_I]], label [[IF_THEN_I:%.*]], label [[IF_ELSE_I:%.*]] +; CHECK: if.then.i: +; CHECK-NEXT: store i16 [[VAL1]], ptr [[TMP1]], align 2 +; CHECK-NEXT: [[ADD_PTR_I_I_I_I_I:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 +; CHECK-NEXT: br label [[BADCHILD_EXIT:%.*]] +; CHECK: if.else.i: +; CHECK-NEXT: [[ADD_PTR_I_I_I_I7_I:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 +; CHECK-NEXT: store i16 [[VAL1]], ptr [[ADD_PTR_I_I_I_I7_I]], align 2 +; CHECK-NEXT: br label [[BADCHILD_EXIT]] +; CHECK: badChild.exit: +; CHECK-NEXT: [[DOTSINK_I:%.*]] = phi ptr [ [[TMP1]], [[IF_ELSE_I]] ], [ [[ADD_PTR_I_I_I_I_I]], [[IF_THEN_I]] ] +; CHECK-NEXT: store i16 [[VAL2]], ptr [[DOTSINK_I]], align 2 +; CHECK-NEXT: ret ptr [[F]] +; +entry: + call void @badChild(ptr align 8 dereferenceable(72) %f, i16 %val1, i16 %val2, i32 %val3) #4 + ret ptr %f +} + +define internal void @badChild(ptr align 8 dereferenceable(72) %this, i16 %val1, i16 %val2, i32 %val3) align 2 { +entry: + %othergep = getelementptr inbounds %struct.ss, ptr %this, i64 0, i32 2 + %load0 = load ptr, ptr %othergep, align 8 + %load2 = load ptr, ptr %this + %x = alloca i32 + %y = alloca i32 + call void @phantomLoad(ptr %load0, ptr %x, ptr %y) + call void @phantomLoad(ptr %load2, ptr %x, ptr %y) + %cmp.not.not = icmp eq i32 %val3, 0 + br i1 %cmp.not.not, label %if.then, label %if.else + +if.then: ; preds = %entry + %0 = getelementptr inbounds %struct.ss, ptr %this, i64 0, i32 8 + %1 = load ptr, ptr %0, align 8 + store i16 %val1, ptr %1, align 2 + %add.ptr.i.i.i.i = getelementptr inbounds i8, ptr %1, i64 16 + store i16 %val2, ptr %add.ptr.i.i.i.i, align 2 + br label %if.end + +if.else: ; preds = %entry + %2 = getelementptr inbounds %struct.ss, ptr %this, i64 0, i32 8 + %3 = load ptr, ptr %2, align 8 + %add.ptr.i.i.i.i7 = getelementptr inbounds i8, ptr %3, i64 16 + store i16 %val1, ptr %add.ptr.i.i.i.i7, align 2 + store i16 %val2, ptr %3, align 2 + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} + From 587b3713309b03f73d2affba8a9a992a70aff174 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Wed, 22 Mar 2023 19:30:55 +0000 Subject: [PATCH 338/691] [gn build] Port e655d8a54880 --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 764f12ce21ec2..bab71dfafae19 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -369,6 +369,7 @@ if (current_toolchain == default_toolchain) { "__concepts/semiregular.h", "__concepts/swappable.h", "__concepts/totally_ordered.h", + "__condition_variable/condition_variable.h", "__config", "__coroutine/coroutine_handle.h", "__coroutine/coroutine_traits.h", @@ -549,6 +550,10 @@ if (current_toolchain == default_toolchain) { "__memory_resource/pool_options.h", "__memory_resource/synchronized_pool_resource.h", "__memory_resource/unsynchronized_pool_resource.h", + "__mutex/lock_guard.h", + "__mutex/mutex.h", + "__mutex/tag_types.h", + "__mutex/unique_lock.h", "__mutex_base", "__node_handle", "__numeric/accumulate.h", From cf8dc9dfe9eb66621cb7c860bf81b29699415c66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Gr=C3=A4nitz?= Date: Wed, 22 Mar 2023 20:21:59 +0100 Subject: [PATCH 339/691] [JITLink] Introduce target flags for Symbol and prepare ObjectLinkingLayer to account for them AArch32 branch offsets explicitly encode the target instruction subset (Arm/Thumb) in their least significant bit. We want this bit set (or clear) in addreses we hand out, but the addresses in the LinkGraph should be the real/physical addresses. This patch allows ELFLinkGraphBuilder's to set target-specific flags in jitlink::Symbol and prepares ObjectLinkingLayer to account for them. Reviewed By: lhames Differential Revision: https://reviews.llvm.org/D146641 --- .../llvm/ExecutionEngine/JITLink/JITLink.h | 20 +++++++++++++--- .../JITLink/ELFLinkGraphBuilder.h | 23 ++++++++++++++++--- .../Orc/ObjectLinkingLayer.cpp | 13 +++++++---- 3 files changed, 45 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h index 2b6696c7fdffd..3bc9bebea6e0b 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h @@ -367,13 +367,15 @@ inline orc::ExecutorAddr alignToBlock(orc::ExecutorAddr Addr, Block &B) { // must end with a zero, and contain no zeros before the end. bool isCStringBlock(Block &B); -/// Describes symbol linkage. This can be used to make resolve definition -/// clashes. +/// Describes symbol linkage. This can be used to resolve definition clashes. enum class Linkage : uint8_t { Strong, Weak, }; +/// Holds target-specific properties for a symbol. +using TargetFlagsType = uint8_t; + /// For errors and debugging output. const char *getLinkageName(Linkage L); @@ -611,6 +613,17 @@ class Symbol { this->S = static_cast(S); } + /// Check wehther the given target flags are set for this Symbol. + bool hasTargetFlags(TargetFlagsType Flags) const { + return static_cast(TargetFlags) & Flags; + } + + /// Set the target flags for this Symbol. + void setTargetFlags(TargetFlagsType Flags) { + assert(Flags <= 1 && "Add more bits to store more than single flag"); + TargetFlags = Flags; + } + /// Returns true if this is a weakly referenced external symbol. /// This method may only be called on external symbols. bool isWeaklyReferenced() const { @@ -655,12 +668,13 @@ class Symbol { // FIXME: A char* or SymbolStringPtr may pack better. StringRef Name; Addressable *Base = nullptr; - uint64_t Offset : 58; + uint64_t Offset : 57; uint64_t L : 1; uint64_t S : 2; uint64_t IsLive : 1; uint64_t IsCallable : 1; uint64_t WeakRef : 1; + uint64_t TargetFlags : 1; size_t Size = 0; }; diff --git a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h index 26feb8ea3277b..9d2d4958dcf6c 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h +++ b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h @@ -112,6 +112,17 @@ class ELFLinkGraphBuilder : public ELFLinkGraphBuilderBase { Expected> getSymbolLinkageAndScope(const typename ELFT::Sym &Sym, StringRef Name); + /// Set the target flags on the given Symbol. + virtual TargetFlagsType makeTargetFlags(const typename ELFT::Sym &Sym) { + return TargetFlagsType{}; + } + + /// Get the physical offset of the symbol on the target platform. + virtual orc::ExecutorAddrDiff getRawOffset(const typename ELFT::Sym &Sym, + TargetFlagsType Flags) { + return Sym.getValue(); + } + Error prepare(); Error graphifySections(); Error graphifySymbols(); @@ -478,6 +489,9 @@ template Error ELFLinkGraphBuilder::graphifySymbols() { << "\"\n"; }); + TargetFlagsType Flags = makeTargetFlags(Sym); + orc::ExecutorAddrDiff Offset = getRawOffset(Sym, Flags); + // In RISCV, temporary symbols (Used to generate dwarf, eh_frame // sections...) will appear in object code's symbol table, and LLVM does // not use names on these temporary symbols (RISCV gnu toolchain uses @@ -485,10 +499,13 @@ template Error ELFLinkGraphBuilder::graphifySymbols() { // anonymous symbol. auto &GSym = Name->empty() - ? G->addAnonymousSymbol(*B, Sym.getValue(), Sym.st_size, + ? G->addAnonymousSymbol(*B, Offset, Sym.st_size, false, false) - : G->addDefinedSymbol(*B, Sym.getValue(), *Name, Sym.st_size, L, - S, Sym.getType() == ELF::STT_FUNC, false); + : G->addDefinedSymbol(*B, Offset, *Name, Sym.st_size, L, + S, Sym.getType() == ELF::STT_FUNC, + false); + + GSym.setTargetFlags(Flags); setGraphSymbol(SymIndex, GSym); } } else if (Sym.isUndefined() && Sym.isExternal()) { diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp index e7356e5d3151e..2c270cd66285d 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp @@ -39,6 +39,10 @@ bool hasInitializerSection(jitlink::LinkGraph &G) { return false; } +JITTargetAddress getJITSymbolPtrForSymbol(Symbol &Sym) { + return Sym.getAddress().getValue(); +} + JITSymbolFlags getJITSymbolFlagsForSymbol(Symbol &Sym) { JITSymbolFlags Flags; @@ -215,10 +219,9 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { for (auto *Sym : G.defined_symbols()) if (Sym->hasName() && Sym->getScope() != Scope::Local) { auto InternedName = ES.intern(Sym->getName()); + auto Ptr = getJITSymbolPtrForSymbol(*Sym); auto Flags = getJITSymbolFlagsForSymbol(*Sym); - - InternedResult[InternedName] = - JITEvaluatedSymbol(Sym->getAddress().getValue(), Flags); + InternedResult[InternedName] = JITEvaluatedSymbol(Ptr, Flags); if (AutoClaim && !MR->getSymbols().count(InternedName)) { assert(!ExtraSymbolsToClaim.count(InternedName) && "Duplicate symbol to claim?"); @@ -229,9 +232,9 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { for (auto *Sym : G.absolute_symbols()) if (Sym->hasName() && Sym->getScope() != Scope::Local) { auto InternedName = ES.intern(Sym->getName()); + auto Ptr = getJITSymbolPtrForSymbol(*Sym); auto Flags = getJITSymbolFlagsForSymbol(*Sym); - InternedResult[InternedName] = - JITEvaluatedSymbol(Sym->getAddress().getValue(), Flags); + InternedResult[InternedName] = JITEvaluatedSymbol(Ptr, Flags); if (AutoClaim && !MR->getSymbols().count(InternedName)) { assert(!ExtraSymbolsToClaim.count(InternedName) && "Duplicate symbol to claim?"); From 77ed8311a625f449e7ee8bebda3b2940be6dc211 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 22 Mar 2023 12:56:13 -0700 Subject: [PATCH 340/691] [test] Add tools/llvm-dwarfdump/ARM/lit.local.cfg after D143513 --- llvm/test/tools/llvm-dwarfdump/ARM/lit.local.cfg | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 llvm/test/tools/llvm-dwarfdump/ARM/lit.local.cfg diff --git a/llvm/test/tools/llvm-dwarfdump/ARM/lit.local.cfg b/llvm/test/tools/llvm-dwarfdump/ARM/lit.local.cfg new file mode 100644 index 0000000000000..236e1d3441665 --- /dev/null +++ b/llvm/test/tools/llvm-dwarfdump/ARM/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'ARM' in config.root.targets: + config.unsupported = True From 3d334df58742ff53fb00aa3caeb7eb5da3436348 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 16 Mar 2023 13:09:44 -0400 Subject: [PATCH 341/691] [libc++] Remove availability markup for std::format std::format is currently experimental, so there is technically no deployment target requirement for it (since the only symbols required for it are in `libc++experimental.a`). However, some parts of std::format depend indirectly on the floating point std::to_chars implementation, which does have deployment target requirements. This patch removes all the availability format for std::format and updates the XFAILs in the tests to properly explain why they fail on old deployment targets, when they do. It also changes a couple of tests to avoid depending on floating-point std::to_chars when it isn't fundamental to the test. Finally, some tests are marked as XFAIL but I added a comment saying TODO FMT This test should not require std::to_chars(floating-point) These tests do not fundamentally depend on floating-point std::to_chars, however they end up failing because calling std::format even without a floating-point argument to format will end up requiring floating-point std::to_chars. I believe this is an implementation artifact that could be avoided in all cases where we know the format string at compile-time. In the tests, I added the TODO comment only to the places where we could do better and actually avoid relying on floating-point std::to_chars because we know the format string at compile-time. Differential Revision: https://reviews.llvm.org/D134598 --- libcxx/include/__availability | 11 ---- libcxx/include/__chrono/formatter.h | 32 +++++----- libcxx/include/__chrono/ostream.h | 35 +++++------ libcxx/include/__format/container_adaptor.h | 8 +-- libcxx/include/__format/format_arg.h | 6 +- libcxx/include/__format/format_args.h | 2 +- libcxx/include/__format/format_context.h | 5 +- libcxx/include/__format/format_functions.h | 62 +++++++++++-------- libcxx/include/__format/format_fwd.h | 6 +- .../include/__format/format_parse_context.h | 2 +- libcxx/include/__format/formatter.h | 2 +- libcxx/include/__format/formatter_bool.h | 2 +- libcxx/include/__format/formatter_char.h | 8 +-- .../__format/formatter_floating_point.h | 6 +- libcxx/include/__format/formatter_integer.h | 26 ++++---- libcxx/include/__format/formatter_pointer.h | 6 +- libcxx/include/__format/formatter_string.h | 10 +-- libcxx/include/__format/formatter_tuple.h | 6 +- .../__format/range_default_formatter.h | 13 ++-- libcxx/include/__format/range_formatter.h | 2 +- libcxx/include/vector | 2 +- .../format.functions.format.pass.cpp | 5 +- .../format.functions.vformat.pass.cpp | 5 +- .../container.adaptors.format/format.pass.cpp | 4 -- .../container.adaptors.format/parse.pass.cpp | 4 -- .../format.functions.format.pass.cpp | 4 ++ .../format.functions.vformat.pass.cpp | 5 +- .../vector.bool.fmt/format.pass.cpp | 4 -- .../vector.bool.fmt/parse.pass.cpp | 4 -- .../time.cal.day.nonmembers/ostream.pass.cpp | 4 ++ .../time.cal.md.nonmembers/ostream.pass.cpp | 4 ++ .../time.cal/time.cal.mdlast/ostream.pass.cpp | 4 ++ .../ostream.pass.cpp | 4 ++ .../time.cal.mwd.nonmembers/ostream.pass.cpp | 4 ++ .../ostream.pass.cpp | 4 ++ .../ostream.pass.cpp | 4 ++ .../ostream.pass.cpp | 4 ++ .../ostream.pass.cpp | 4 ++ .../time.cal.year.nonmembers/ostream.pass.cpp | 4 ++ .../time.cal.ym.nonmembers/ostream.pass.cpp | 4 ++ .../time.cal.ymd.nonmembers/ostream.pass.cpp | 4 ++ .../ostream.pass.cpp | 4 ++ .../time.cal.ymwd.nonmembers/ostream.pass.cpp | 4 ++ .../ostream.pass.cpp | 4 ++ .../time.duration.nonmember/ostream.pass.cpp | 4 ++ .../time.hms.nonmembers/ostream.pass.cpp | 3 + .../std/time/time.syn/formatter.day.pass.cpp | 4 ++ .../time/time.syn/formatter.duration.pass.cpp | 3 + .../time/time.syn/formatter.hh_mm_ss.pass.cpp | 3 + .../time/time.syn/formatter.month.pass.cpp | 4 ++ .../time.syn/formatter.month_day.pass.cpp | 4 ++ .../formatter.month_day_last.pass.cpp | 4 ++ .../time.syn/formatter.month_weekday.pass.cpp | 4 ++ .../time/time.syn/formatter.weekday.pass.cpp | 4 ++ .../time.syn/formatter.weekday_index.pass.cpp | 4 ++ .../time.syn/formatter.weekday_last.pass.cpp | 4 ++ .../std/time/time.syn/formatter.year.pass.cpp | 4 ++ .../time.syn/formatter.year_month.pass.cpp | 4 ++ .../formatter.year_month_day.pass.cpp | 4 ++ .../formatter.year_month_day_last.pass.cpp | 4 ++ .../formatter.year_month_weekday.pass.cpp | 4 ++ ...formatter.year_month_weekday_last.pass.cpp | 4 ++ .../make_format_args.pass.cpp | 4 +- .../make_wformat_args.pass.cpp | 4 +- .../format.arg/visit_format_arg.pass.cpp | 3 - .../format.arguments/format.args/get.pass.cpp | 3 - .../format/format.error/format.error.pass.cpp | 3 - .../format/format.fmt.string/ctor.verify.cpp | 4 +- .../format/format.fmt.string/get.pass.cpp | 4 +- .../format.fmt.string/types.compile.pass.cpp | 9 ++- .../concept.formattable.compile.pass.cpp | 7 ++- ...concept.formattable.float.compile.pass.cpp | 58 +++++++++++++++++ .../formatter.floating_point.pass.cpp | 3 + .../format.parse.ctx/check_arg_id.pass.cpp | 3 - .../format.parse.ctx/next_arg_id.pass.cpp | 3 - .../format/format.functions/P2418.pass.cpp | 4 ++ .../format/format.functions/ascii.pass.cpp | 4 ++ .../escaped_output.ascii.pass.cpp | 4 ++ .../escaped_output.unicode.pass.cpp | 4 ++ .../format.functions/format.locale.pass.cpp | 3 + .../format.functions/format.locale.verify.cpp | 4 ++ .../format/format.functions/format.pass.cpp | 3 + .../format/format.functions/format.verify.cpp | 4 ++ .../format_to.locale.pass.cpp | 3 + .../format_to.locale.verify.cpp | 4 ++ .../format.functions/format_to.pass.cpp | 3 + .../format.functions/format_to.verify.cpp | 4 ++ .../format_to_n.locale.pass.cpp | 3 + .../format_to_n.locale.verify.cpp | 4 ++ .../format.functions/format_to_n.pass.cpp | 3 + .../format.functions/format_to_n.verify.cpp | 4 ++ .../formatted_size.locale.pass.cpp | 3 + .../formatted_size.locale.verify.cpp | 4 ++ .../format.functions/formatted_size.pass.cpp | 3 + .../formatted_size.verify.cpp | 4 ++ .../locale-specific_form.pass.cpp | 3 + .../format/format.functions/unicode.pass.cpp | 4 ++ .../format.functions/vformat.locale.pass.cpp | 3 + .../format/format.functions/vformat.pass.cpp | 3 + .../vformat_to.locale.pass.cpp | 3 + .../format.functions/vformat_to.pass.cpp | 3 + .../format.range.fmtdef/format.pass.cpp | 4 -- .../format.range.fmtdef/parse.pass.cpp | 4 -- .../format.range.fmtdef/set_brackets.pass.cpp | 4 -- .../set_separator.pass.cpp | 4 -- .../format_kind.compile.pass.cpp | 3 + .../format.functions.format.pass.cpp | 5 +- .../format.functions.vformat.pass.cpp | 5 +- .../format.range.fmtmap/format.pass.cpp | 4 -- .../format.range.fmtmap/parse.pass.cpp | 4 -- .../format.functions.format.pass.cpp | 5 +- .../format.functions.vformat.pass.cpp | 5 +- .../format.range.fmtset/format.pass.cpp | 4 -- .../format.range.fmtset/parse.pass.cpp | 4 -- .../format.functions.format.pass.cpp | 5 +- .../format.functions.vformat.pass.cpp | 5 +- .../format.range.formatter/format.pass.cpp | 4 -- .../format.range.formatter/parse.pass.cpp | 4 -- .../set_brackets.pass.cpp | 4 -- .../set_separator.pass.cpp | 4 -- .../underlying.pass.cpp | 4 -- .../format.functions.format.pass.cpp | 6 +- .../format.functions.format.verify.cpp | 4 ++ .../format.functions.vformat.pass.cpp | 5 +- .../format/format.tuple/format.pass.cpp | 6 +- .../format/format.tuple/parse.pass.cpp | 6 +- .../format/format.tuple/set_brackets.pass.cpp | 6 +- .../format.tuple/set_separator.pass.cpp | 6 +- libcxx/utils/ci/run-buildbot | 6 -- 129 files changed, 460 insertions(+), 285 deletions(-) create mode 100644 libcxx/test/std/utilities/format/format.formattable/concept.formattable.float.compile.pass.cpp diff --git a/libcxx/include/__availability b/libcxx/include/__availability index c03d373cafb51..5978dabdacb5f 100644 --- a/libcxx/include/__availability +++ b/libcxx/include/__availability @@ -149,13 +149,6 @@ // # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_latch // # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore - // This controls the availability of the C++20 format library. - // The library is in development and not ABI stable yet. P2216 is - // retroactively accepted in C++20. This paper contains ABI breaking - // changes. -# define _LIBCPP_AVAILABILITY_FORMAT -// # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_format - // This controls whether the library claims to provide a default verbose // termination function, and consequently whether the headers will try // to use it when the mechanism isn't overriden at compile-time. @@ -259,10 +252,6 @@ # define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_semaphore # endif -# define _LIBCPP_AVAILABILITY_FORMAT \ - __attribute__((unavailable)) -# define _LIBCPP_AVAILABILITY_DISABLE_FTM___cpp_lib_format - # define _LIBCPP_HAS_NO_VERBOSE_ABORT_IN_LIBRARY #else diff --git a/libcxx/include/__chrono/formatter.h b/libcxx/include/__chrono/formatter.h index ee7cf93fc79b9..f6e0f9602fdd9 100644 --- a/libcxx/include/__chrono/formatter.h +++ b/libcxx/include/__chrono/formatter.h @@ -542,7 +542,7 @@ __format_chrono(const _Tp& __value, } // namespace __formatter template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __formatter_chrono { +struct _LIBCPP_TEMPLATE_VIS __formatter_chrono { public: _LIBCPP_HIDE_FROM_ABI constexpr auto __parse( basic_format_parse_context<_CharT>& __parse_ctx, __format_spec::__fields __fields, __format_spec::__flags __flags) @@ -582,7 +582,7 @@ struct formatter, _CharT> : public __formatter_c }; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -594,7 +594,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -606,7 +606,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -618,7 +618,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -630,7 +630,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -642,7 +642,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -654,7 +654,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -666,7 +666,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -678,7 +678,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -690,7 +690,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -702,7 +702,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -714,7 +714,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -726,7 +726,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -738,7 +738,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; @@ -750,7 +750,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: using _Base = __formatter_chrono<_CharT>; diff --git a/libcxx/include/__chrono/ostream.h b/libcxx/include/__chrono/ostream.h index 23feb9d711303..2f34115c729a3 100644 --- a/libcxx/include/__chrono/ostream.h +++ b/libcxx/include/__chrono/ostream.h @@ -93,7 +93,7 @@ _LIBCPP_HIDE_FROM_ABI auto __units_suffix() { } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const duration<_Rep, _Period>& __d) { basic_ostringstream<_CharT, _Traits> __s; __s.flags(__os.flags()); @@ -104,8 +104,7 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, const duration<_Rep, _Period>& } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& -operator<<(basic_ostream<_CharT, _Traits>& __os, const day& __d) { +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const day& __d) { return __os << (__d.ok() ? std::format(_LIBCPP_STATICALLY_WIDEN(_CharT, "{:%d}"), __d) // Note this error differs from the wording of the Standard. The // Standard wording doesn't work well on AIX or Windows. There @@ -117,7 +116,7 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, const day& __d) { } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const month& __m) { return __os << (__m.ok() ? std::format(__os.getloc(), _LIBCPP_STATICALLY_WIDEN(_CharT, "{:L%b}"), __m) : std::format(__os.getloc(), @@ -126,14 +125,14 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, const month& __m) { } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const year& __y) { return __os << (__y.ok() ? std::format(_LIBCPP_STATICALLY_WIDEN(_CharT, "{:%Y}"), __y) : std::format(_LIBCPP_STATICALLY_WIDEN(_CharT, "{:%Y} is not a valid year"), __y)); } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const weekday& __wd) { return __os << (__wd.ok() ? std::format(__os.getloc(), _LIBCPP_STATICALLY_WIDEN(_CharT, "{:L%a}"), __wd) : std::format(__os.getloc(), // TODO FMT Standard mandated locale isn't used. @@ -142,7 +141,7 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, const weekday& __wd) { } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const weekday_indexed& __wdi) { auto __i = __wdi.index(); return __os << (__i >= 1 && __i <= 5 @@ -154,13 +153,13 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, const weekday_indexed& __wdi) { } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const weekday_last& __wdl) { return __os << std::format(__os.getloc(), _LIBCPP_STATICALLY_WIDEN(_CharT, "{:L}[last]"), __wdl.weekday()); } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const month_day& __md) { // TODO FMT The Standard allows 30th of February to be printed. // It would be nice to show an error message instead. @@ -168,47 +167,47 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, const month_day& __md) { } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const month_day_last& __mdl) { return __os << std::format(__os.getloc(), _LIBCPP_STATICALLY_WIDEN(_CharT, "{:L}/last"), __mdl.month()); } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const month_weekday& __mwd) { return __os << std::format( __os.getloc(), _LIBCPP_STATICALLY_WIDEN(_CharT, "{:L}/{:L}"), __mwd.month(), __mwd.weekday_indexed()); } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const month_weekday_last& __mwdl) { return __os << std::format( __os.getloc(), _LIBCPP_STATICALLY_WIDEN(_CharT, "{:L}/{:L}"), __mwdl.month(), __mwdl.weekday_last()); } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const year_month& __ym) { return __os << std::format(__os.getloc(), _LIBCPP_STATICALLY_WIDEN(_CharT, "{}/{:L}"), __ym.year(), __ym.month()); } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const year_month_day& __ymd) { return __os << (__ymd.ok() ? std::format(_LIBCPP_STATICALLY_WIDEN(_CharT, "{:%F}"), __ymd) : std::format(_LIBCPP_STATICALLY_WIDEN(_CharT, "{:%F} is not a valid date"), __ymd)); } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const year_month_day_last& __ymdl) { return __os << std::format( __os.getloc(), _LIBCPP_STATICALLY_WIDEN(_CharT, "{}/{:L}"), __ymdl.year(), __ymdl.month_day_last()); } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const year_month_weekday& __ymwd) { return __os << std::format( __os.getloc(), @@ -219,7 +218,7 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, const year_month_weekday& __ymw } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const year_month_weekday_last& __ymwdl) { return __os << std::format( __os.getloc(), @@ -230,7 +229,7 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, const year_month_weekday_last& } template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT basic_ostream<_CharT, _Traits>& +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const hh_mm_ss<_Duration> __hms) { return __os << std::format(__os.getloc(), _LIBCPP_STATICALLY_WIDEN(_CharT, "{:L%T}"), __hms); } diff --git a/libcxx/include/__format/container_adaptor.h b/libcxx/include/__format/container_adaptor.h index 9439b10c29914..5b95f600f0cdc 100644 --- a/libcxx/include/__format/container_adaptor.h +++ b/libcxx/include/__format/container_adaptor.h @@ -37,7 +37,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD // adaptor headers. To use the format functions users already include . template -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __formatter_container_adaptor { +struct _LIBCPP_TEMPLATE_VIS __formatter_container_adaptor { private: using __maybe_const_container = __fmt_maybe_const; using __maybe_const_adaptor = __maybe_const, _Adaptor>; @@ -57,15 +57,15 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __formatter_container_ad }; template _Container> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter, _CharT> +struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_container_adaptor, _CharT> {}; template -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter, _CharT> +struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_container_adaptor, _CharT> {}; template _Container> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter, _CharT> +struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_container_adaptor, _CharT> {}; #endif //_LIBCPP_STD_VER >= 23 diff --git a/libcxx/include/__format/format_arg.h b/libcxx/include/__format/format_arg.h index a25976a0795e7..7e37dd4f0b377 100644 --- a/libcxx/include/__format/format_arg.h +++ b/libcxx/include/__format/format_arg.h @@ -95,7 +95,7 @@ constexpr __arg_t __get_packed_type(uint64_t __types, size_t __id) { // This function is not user obervable, so it can directly use the non-standard // types of the "variant". See __arg_t for more details. template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT decltype(auto) +_LIBCPP_HIDE_FROM_ABI decltype(auto) __visit_format_arg(_Visitor&& __vis, basic_format_arg<_Context> __arg) { switch (__arg.__type_) { case __format::__arg_t::__none: @@ -225,7 +225,7 @@ class __basic_format_arg_value { }; template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT basic_format_arg { +class _LIBCPP_TEMPLATE_VIS basic_format_arg { public: class _LIBCPP_TEMPLATE_VIS handle; @@ -277,7 +277,7 @@ class _LIBCPP_TEMPLATE_VIS basic_format_arg<_Context>::handle { // This function is user facing, so it must wrap the non-standard types of // the "variant" in a handle to stay conforming. See __arg_t for more details. template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT decltype(auto) +_LIBCPP_HIDE_FROM_ABI decltype(auto) visit_format_arg(_Visitor&& __vis, basic_format_arg<_Context> __arg) { switch (__arg.__type_) { # ifndef _LIBCPP_HAS_NO_INT128 diff --git a/libcxx/include/__format/format_args.h b/libcxx/include/__format/format_args.h index 32f1de97c2d1c..c2e7c96412377 100644 --- a/libcxx/include/__format/format_args.h +++ b/libcxx/include/__format/format_args.h @@ -27,7 +27,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT basic_format_args { +class _LIBCPP_TEMPLATE_VIS basic_format_args { public: _LIBCPP_HIDE_FROM_ABI basic_format_args() noexcept = default; diff --git a/libcxx/include/__format/format_context.h b/libcxx/include/__format/format_context.h index b8a9a54cf1e22..521131db84d80 100644 --- a/libcxx/include/__format/format_context.h +++ b/libcxx/include/__format/format_context.h @@ -41,7 +41,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template requires output_iterator<_OutIt, const _CharT&> -class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT basic_format_context; +class _LIBCPP_TEMPLATE_VIS basic_format_context; #ifndef _LIBCPP_HAS_NO_LOCALIZATION /** @@ -80,7 +80,6 @@ requires output_iterator<_OutIt, const _CharT&> class // clang-format off _LIBCPP_TEMPLATE_VIS - _LIBCPP_AVAILABILITY_FORMAT _LIBCPP_PREFERRED_NAME(format_context) _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wformat_context)) // clang-format on @@ -162,7 +161,7 @@ class // Here the width of an element in input is determined dynamically. // Note when the top-level element has no width the retargeting is not needed. template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT +class _LIBCPP_TEMPLATE_VIS basic_format_context::__iterator, _CharT> { public: using iterator = typename __format::__retarget_buffer<_CharT>::__iterator; diff --git a/libcxx/include/__format/format_functions.h b/libcxx/include/__format/format_functions.h index 218ae5b34c173..75afd92ce0566 100644 --- a/libcxx/include/__format/format_functions.h +++ b/libcxx/include/__format/format_functions.h @@ -344,7 +344,7 @@ struct _LIBCPP_TEMPLATE_VIS basic_format_string { _Context{__types_.data(), __handles_.data(), sizeof...(_Args)}); } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT constexpr basic_string_view<_CharT> get() const noexcept { + _LIBCPP_HIDE_FROM_ABI constexpr basic_string_view<_CharT> get() const noexcept { return __str_; } @@ -409,21 +409,21 @@ requires(output_iterator<_OutIt, const _CharT&>) _LIBCPP_HIDE_FROM_ABI _OutIt // https://reviews.llvm.org/D110499#inline-1180704 // TODO FMT Evaluate whether we want to file a Clang bug report regarding this. template _OutIt> -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _OutIt vformat_to(_OutIt __out_it, string_view __fmt, format_args __args) { return _VSTD::__vformat_to(_VSTD::move(__out_it), __fmt, __args); } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template _OutIt> -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _OutIt vformat_to(_OutIt __out_it, wstring_view __fmt, wformat_args __args) { return _VSTD::__vformat_to(_VSTD::move(__out_it), __fmt, __args); } #endif template _OutIt, class... _Args> -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _OutIt format_to(_OutIt __out_it, format_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::vformat_to(_VSTD::move(__out_it), __fmt.get(), _VSTD::make_format_args(__args...)); @@ -431,14 +431,17 @@ format_to(_OutIt __out_it, format_string<_Args...> __fmt, _Args&&... __args) { #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template _OutIt, class... _Args> -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _OutIt format_to(_OutIt __out_it, wformat_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::vformat_to(_VSTD::move(__out_it), __fmt.get(), _VSTD::make_wformat_args(__args...)); } #endif -_LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string +// TODO FMT This needs to be a template or std::to_chars(floating-point) availability markup +// fires too eagerly, see http://llvm.org/PR61563. +template +_LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI string vformat(string_view __fmt, format_args __args) { string __res; _VSTD::vformat_to(_VSTD::back_inserter(__res), __fmt, __args); @@ -446,7 +449,10 @@ vformat(string_view __fmt, format_args __args) { } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS -_LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring +// TODO FMT This needs to be a template or std::to_chars(floating-point) availability markup +// fires too eagerly, see http://llvm.org/PR61563. +template +_LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI wstring vformat(wstring_view __fmt, wformat_args __args) { wstring __res; _VSTD::vformat_to(_VSTD::back_inserter(__res), __fmt, __args); @@ -455,14 +461,14 @@ vformat(wstring_view __fmt, wformat_args __args) { #endif template -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string format(format_string<_Args...> __fmt, +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI string format(format_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::vformat(__fmt.get(), _VSTD::make_format_args(__args...)); } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI wstring format(wformat_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::vformat(__fmt.get(), _VSTD::make_wformat_args(__args...)); } @@ -479,14 +485,14 @@ _LIBCPP_HIDE_FROM_ABI format_to_n_result<_OutIt> __vformat_to_n(_OutIt __out_it, } template _OutIt, class... _Args> -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT format_to_n_result<_OutIt> +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI format_to_n_result<_OutIt> format_to_n(_OutIt __out_it, iter_difference_t<_OutIt> __n, format_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::__vformat_to_n(_VSTD::move(__out_it), __n, __fmt.get(), _VSTD::make_format_args(__args...)); } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template _OutIt, class... _Args> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT format_to_n_result<_OutIt> +_LIBCPP_HIDE_FROM_ABI format_to_n_result<_OutIt> format_to_n(_OutIt __out_it, iter_difference_t<_OutIt> __n, wformat_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::__vformat_to_n(_VSTD::move(__out_it), __n, __fmt.get(), _VSTD::make_wformat_args(__args...)); @@ -502,14 +508,14 @@ _LIBCPP_HIDE_FROM_ABI size_t __vformatted_size(basic_string_view<_CharT> __fmt, } template -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT size_t +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI size_t formatted_size(format_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::__vformatted_size(__fmt.get(), basic_format_args{_VSTD::make_format_args(__args...)}); } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT size_t +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI size_t formatted_size(wformat_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::__vformatted_size(__fmt.get(), basic_format_args{_VSTD::make_wformat_args(__args...)}); } @@ -536,7 +542,7 @@ requires(output_iterator<_OutIt, const _CharT&>) _LIBCPP_HIDE_FROM_ABI _OutIt } template _OutIt> -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt vformat_to( +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _OutIt vformat_to( _OutIt __out_it, locale __loc, string_view __fmt, format_args __args) { return _VSTD::__vformat_to(_VSTD::move(__out_it), _VSTD::move(__loc), __fmt, __args); @@ -544,7 +550,7 @@ _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt v #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template _OutIt> -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt vformat_to( +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _OutIt vformat_to( _OutIt __out_it, locale __loc, wstring_view __fmt, wformat_args __args) { return _VSTD::__vformat_to(_VSTD::move(__out_it), _VSTD::move(__loc), __fmt, __args); @@ -552,7 +558,7 @@ _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt v #endif template _OutIt, class... _Args> -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _OutIt format_to(_OutIt __out_it, locale __loc, format_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::vformat_to(_VSTD::move(__out_it), _VSTD::move(__loc), __fmt.get(), _VSTD::make_format_args(__args...)); @@ -560,14 +566,17 @@ format_to(_OutIt __out_it, locale __loc, format_string<_Args...> __fmt, _Args&&. #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template _OutIt, class... _Args> -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT _OutIt +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _OutIt format_to(_OutIt __out_it, locale __loc, wformat_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::vformat_to(_VSTD::move(__out_it), _VSTD::move(__loc), __fmt.get(), _VSTD::make_wformat_args(__args...)); } #endif -_LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string +// TODO FMT This needs to be a template or std::to_chars(floating-point) availability markup +// fires too eagerly, see http://llvm.org/PR61563. +template +_LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI string vformat(locale __loc, string_view __fmt, format_args __args) { string __res; _VSTD::vformat_to(_VSTD::back_inserter(__res), _VSTD::move(__loc), __fmt, @@ -576,7 +585,10 @@ vformat(locale __loc, string_view __fmt, format_args __args) { } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS -_LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring +// TODO FMT This needs to be a template or std::to_chars(floating-point) availability markup +// fires too eagerly, see http://llvm.org/PR61563. +template +_LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI wstring vformat(locale __loc, wstring_view __fmt, wformat_args __args) { wstring __res; _VSTD::vformat_to(_VSTD::back_inserter(__res), _VSTD::move(__loc), __fmt, @@ -586,7 +598,7 @@ vformat(locale __loc, wstring_view __fmt, wformat_args __args) { #endif template -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string format(locale __loc, +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI string format(locale __loc, format_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::vformat(_VSTD::move(__loc), __fmt.get(), @@ -595,7 +607,7 @@ _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT string f #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT wstring +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI wstring format(locale __loc, wformat_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::vformat(_VSTD::move(__loc), __fmt.get(), _VSTD::make_wformat_args(__args...)); @@ -614,7 +626,7 @@ _LIBCPP_HIDE_FROM_ABI format_to_n_result<_OutIt> __vformat_to_n(_OutIt __out_it, } template _OutIt, class... _Args> -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT format_to_n_result<_OutIt> +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI format_to_n_result<_OutIt> format_to_n(_OutIt __out_it, iter_difference_t<_OutIt> __n, locale __loc, format_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::__vformat_to_n(_VSTD::move(__out_it), __n, _VSTD::move(__loc), __fmt.get(), @@ -623,7 +635,7 @@ format_to_n(_OutIt __out_it, iter_difference_t<_OutIt> __n, locale __loc, format #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template _OutIt, class... _Args> -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT format_to_n_result<_OutIt> +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI format_to_n_result<_OutIt> format_to_n(_OutIt __out_it, iter_difference_t<_OutIt> __n, locale __loc, wformat_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::__vformat_to_n(_VSTD::move(__out_it), __n, _VSTD::move(__loc), __fmt.get(), @@ -641,14 +653,14 @@ _LIBCPP_HIDE_FROM_ABI size_t __vformatted_size(locale __loc, basic_string_view<_ } template -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT size_t +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI size_t formatted_size(locale __loc, format_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::__vformatted_size(_VSTD::move(__loc), __fmt.get(), basic_format_args{_VSTD::make_format_args(__args...)}); } #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template -_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FORMAT size_t +_LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI size_t formatted_size(locale __loc, wformat_string<_Args...> __fmt, _Args&&... __args) { return _VSTD::__vformatted_size(_VSTD::move(__loc), __fmt.get(), basic_format_args{_VSTD::make_wformat_args(__args...)}); } diff --git a/libcxx/include/__format/format_fwd.h b/libcxx/include/__format/format_fwd.h index d3e573f893672..120b2fc8d47de 100644 --- a/libcxx/include/__format/format_fwd.h +++ b/libcxx/include/__format/format_fwd.h @@ -23,14 +23,14 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT basic_format_arg; +class _LIBCPP_TEMPLATE_VIS basic_format_arg; template requires output_iterator<_OutIt, const _CharT&> -class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT basic_format_context; +class _LIBCPP_TEMPLATE_VIS basic_format_context; template -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter; +struct _LIBCPP_TEMPLATE_VIS formatter; #endif //_LIBCPP_STD_VER >= 20 diff --git a/libcxx/include/__format/format_parse_context.h b/libcxx/include/__format/format_parse_context.h index ac2f5a843405f..79f53f77d4a05 100644 --- a/libcxx/include/__format/format_parse_context.h +++ b/libcxx/include/__format/format_parse_context.h @@ -24,7 +24,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT basic_format_parse_context { +class _LIBCPP_TEMPLATE_VIS basic_format_parse_context { public: using char_type = _CharT; using const_iterator = typename basic_string_view<_CharT>::const_iterator; diff --git a/libcxx/include/__format/formatter.h b/libcxx/include/__format/formatter.h index e2c58889c5e17..172b2d5f7b8a1 100644 --- a/libcxx/include/__format/formatter.h +++ b/libcxx/include/__format/formatter.h @@ -32,7 +32,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD /// - is_copy_assignable, and /// - is_move_assignable. template -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter { +struct _LIBCPP_TEMPLATE_VIS formatter { formatter() = delete; formatter(const formatter&) = delete; formatter& operator=(const formatter&) = delete; diff --git a/libcxx/include/__format/formatter_bool.h b/libcxx/include/__format/formatter_bool.h index 84f8bcfa629bf..1fb75755fc572 100644 --- a/libcxx/include/__format/formatter_bool.h +++ b/libcxx/include/__format/formatter_bool.h @@ -36,7 +36,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter { +struct _LIBCPP_TEMPLATE_VIS formatter { public: _LIBCPP_HIDE_FROM_ABI constexpr auto parse(basic_format_parse_context<_CharT>& __parse_ctx) -> decltype(__parse_ctx.begin()) { diff --git a/libcxx/include/__format/formatter_char.h b/libcxx/include/__format/formatter_char.h index eaac70cbe18ee..7d63c042c554f 100644 --- a/libcxx/include/__format/formatter_char.h +++ b/libcxx/include/__format/formatter_char.h @@ -31,7 +31,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __formatter_char { +struct _LIBCPP_TEMPLATE_VIS __formatter_char { public: _LIBCPP_HIDE_FROM_ABI constexpr auto parse(basic_format_parse_context<_CharT>& __parse_ctx) -> decltype(__parse_ctx.begin()) { @@ -74,14 +74,14 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __formatter_char { }; template <> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter : public __formatter_char {}; +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_char {}; # ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS template <> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter : public __formatter_char {}; +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_char {}; template <> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter : public __formatter_char { +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_char { }; # endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS diff --git a/libcxx/include/__format/formatter_floating_point.h b/libcxx/include/__format/formatter_floating_point.h index 31cd012e484ad..cac74c1f2a62c 100644 --- a/libcxx/include/__format/formatter_floating_point.h +++ b/libcxx/include/__format/formatter_floating_point.h @@ -739,13 +739,13 @@ struct _LIBCPP_TEMPLATE_VIS __formatter_floating_point { }; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_floating_point<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_floating_point<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_floating_point<_CharT> {}; #endif //_LIBCPP_STD_VER >= 20 diff --git a/libcxx/include/__format/formatter_integer.h b/libcxx/include/__format/formatter_integer.h index f157698818ac8..0e144100da9ab 100644 --- a/libcxx/include/__format/formatter_integer.h +++ b/libcxx/include/__format/formatter_integer.h @@ -31,7 +31,7 @@ #if _LIBCPP_STD_VER >= 20 template <__fmt_char_type _CharT> - struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __formatter_integer { + struct _LIBCPP_TEMPLATE_VIS __formatter_integer { public: _LIBCPP_HIDE_FROM_ABI constexpr auto @@ -60,43 +60,43 @@ // Signed integral types. template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter : public __formatter_integer<_CharT> { +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> { }; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter : public __formatter_integer<_CharT> {}; +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter : public __formatter_integer<_CharT> {}; +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; # ifndef _LIBCPP_HAS_NO_INT128 template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<__int128_t, _CharT> +struct _LIBCPP_TEMPLATE_VIS formatter<__int128_t, _CharT> : public __formatter_integer<_CharT> {}; # endif // Unsigned integral types. template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; # ifndef _LIBCPP_HAS_NO_INT128 template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<__uint128_t, _CharT> +struct _LIBCPP_TEMPLATE_VIS formatter<__uint128_t, _CharT> : public __formatter_integer<_CharT> {}; # endif diff --git a/libcxx/include/__format/formatter_pointer.h b/libcxx/include/__format/formatter_pointer.h index fe1b3cb496f1a..48d8372a2341f 100644 --- a/libcxx/include/__format/formatter_pointer.h +++ b/libcxx/include/__format/formatter_pointer.h @@ -55,13 +55,13 @@ struct _LIBCPP_TEMPLATE_VIS __formatter_pointer { // - template<> struct formatter; // - template<> struct formatter; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_pointer<_CharT> {}; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter : public __formatter_pointer<_CharT> { +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_pointer<_CharT> { }; template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_pointer<_CharT> {}; #endif //_LIBCPP_STD_VER >= 20 diff --git a/libcxx/include/__format/formatter_string.h b/libcxx/include/__format/formatter_string.h index e11708d8e28cc..c14518be495bc 100644 --- a/libcxx/include/__format/formatter_string.h +++ b/libcxx/include/__format/formatter_string.h @@ -57,7 +57,7 @@ struct _LIBCPP_TEMPLATE_VIS __formatter_string { // Formatter const char*. template <__fmt_char_type _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter +struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_string<_CharT> { using _Base = __formatter_string<_CharT>; @@ -95,7 +95,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<_CharT*, _CharT> +struct _LIBCPP_TEMPLATE_VIS formatter<_CharT*, _CharT> : public formatter { using _Base = formatter; @@ -106,7 +106,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<_CharT*, _Char // Formatter char[]. template <__fmt_char_type _CharT, size_t _Size> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<_CharT[_Size], _CharT> +struct _LIBCPP_TEMPLATE_VIS formatter<_CharT[_Size], _CharT> : public __formatter_string<_CharT> { using _Base = __formatter_string<_CharT>; @@ -117,7 +117,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<_CharT[_Size], // Formatter std::string. template <__fmt_char_type _CharT, class _Traits, class _Allocator> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter, _CharT> +struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_string<_CharT> { using _Base = __formatter_string<_CharT>; @@ -130,7 +130,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter, _CharT> +struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_string<_CharT> { using _Base = __formatter_string<_CharT>; diff --git a/libcxx/include/__format/formatter_tuple.h b/libcxx/include/__format/formatter_tuple.h index d1874ddecb723..e6831de78c227 100644 --- a/libcxx/include/__format/formatter_tuple.h +++ b/libcxx/include/__format/formatter_tuple.h @@ -39,7 +39,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 23 template <__fmt_char_type _CharT, class _Tuple, formattable<_CharT>... _Args> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __formatter_tuple { +struct _LIBCPP_TEMPLATE_VIS __formatter_tuple { _LIBCPP_HIDE_FROM_ABI constexpr void set_separator(basic_string_view<_CharT> __separator) noexcept { __separator_ = __separator; } @@ -164,11 +164,11 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __formatter_tuple { }; template <__fmt_char_type _CharT, formattable<_CharT>... _Args> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter, _CharT> +struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_tuple<_CharT, pair<_Args...>, _Args...> {}; template <__fmt_char_type _CharT, formattable<_CharT>... _Args> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter, _CharT> +struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_tuple<_CharT, tuple<_Args...>, _Args...> {}; #endif //_LIBCPP_STD_VER >= 23 diff --git a/libcxx/include/__format/range_default_formatter.h b/libcxx/include/__format/range_default_formatter.h index 7fdb254de3b88..eab2951fcf552 100644 --- a/libcxx/include/__format/range_default_formatter.h +++ b/libcxx/include/__format/range_default_formatter.h @@ -84,12 +84,12 @@ inline constexpr range_format format_kind<_Rp> = [] { }(); template -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __range_default_formatter; +struct _LIBCPP_TEMPLATE_VIS __range_default_formatter; // Required specializations template -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __range_default_formatter { +struct _LIBCPP_TEMPLATE_VIS __range_default_formatter { private: using __maybe_const_r = __fmt_maybe_const<_Rp, _CharT>; range_formatter>, _CharT> __underlying_; @@ -115,7 +115,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __range_default_formatte }; template -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __range_default_formatter { +struct _LIBCPP_TEMPLATE_VIS __range_default_formatter { private: using __maybe_const_map = __fmt_maybe_const<_Rp, _CharT>; using __element_type = remove_cvref_t>; @@ -143,7 +143,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __range_default_formatte }; template -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __range_default_formatter { +struct _LIBCPP_TEMPLATE_VIS __range_default_formatter { private: using __maybe_const_set = __fmt_maybe_const<_Rp, _CharT>; using __element_type = remove_cvref_t>; @@ -168,14 +168,13 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __range_default_formatte template requires(_Kp == range_format::string || _Kp == range_format::debug_string) -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT __range_default_formatter<_Kp, _Rp, _CharT> { +struct _LIBCPP_TEMPLATE_VIS __range_default_formatter<_Kp, _Rp, _CharT> { __range_default_formatter() = delete; // TODO FMT Implement }; template requires(format_kind<_Rp> != range_format::disabled && formattable, _CharT>) -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<_Rp, _CharT> - : __range_default_formatter, _Rp, _CharT> {}; +struct _LIBCPP_TEMPLATE_VIS formatter<_Rp, _CharT> : __range_default_formatter, _Rp, _CharT> {}; #endif //_LIBCPP_STD_VER >= 23 diff --git a/libcxx/include/__format/range_formatter.h b/libcxx/include/__format/range_formatter.h index f60151029c9c3..47323433d76fa 100644 --- a/libcxx/include/__format/range_formatter.h +++ b/libcxx/include/__format/range_formatter.h @@ -40,7 +40,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template requires same_as, _Tp> && formattable<_Tp, _CharT> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT range_formatter { +struct _LIBCPP_TEMPLATE_VIS range_formatter { _LIBCPP_HIDE_FROM_ABI constexpr void set_separator(basic_string_view<_CharT> __separator) noexcept { __separator_ = __separator; } diff --git a/libcxx/include/vector b/libcxx/include/vector index 980af582558be..2361ba8e215ff 100644 --- a/libcxx/include/vector +++ b/libcxx/include/vector @@ -3328,7 +3328,7 @@ inline constexpr bool __format::__enable_insertable> = true; template // Since is-vector-bool-reference is only used once it's inlined here. requires same_as> -struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<_Tp, CharT> { +struct _LIBCPP_TEMPLATE_VIS formatter<_Tp, CharT> { private: formatter __underlying_; diff --git a/libcxx/test/std/containers/container.adaptors/container.adaptors.format/format.functions.format.pass.cpp b/libcxx/test/std/containers/container.adaptors/container.adaptors.format/format.functions.format.pass.cpp index 67a34ef637eec..6753f614351d8 100644 --- a/libcxx/test/std/containers/container.adaptors/container.adaptors.format/format.functions.format.pass.cpp +++ b/libcxx/test/std/containers/container.adaptors/container.adaptors.format/format.functions.format.pass.cpp @@ -11,9 +11,8 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} // [container.adaptors.format] // For each of queue, priority_queue, and stack, the library provides the diff --git a/libcxx/test/std/containers/container.adaptors/container.adaptors.format/format.functions.vformat.pass.cpp b/libcxx/test/std/containers/container.adaptors/container.adaptors.format/format.functions.vformat.pass.cpp index a7136e7e404e8..ab9b7e1004694 100644 --- a/libcxx/test/std/containers/container.adaptors/container.adaptors.format/format.functions.vformat.pass.cpp +++ b/libcxx/test/std/containers/container.adaptors/container.adaptors.format/format.functions.vformat.pass.cpp @@ -11,9 +11,8 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} // [container.adaptors.format] // For each of queue, priority_queue, and stack, the library provides the diff --git a/libcxx/test/std/containers/container.adaptors/container.adaptors.format/format.pass.cpp b/libcxx/test/std/containers/container.adaptors/container.adaptors.format/format.pass.cpp index 8950ece9a57e8..9f978ebbbf63e 100644 --- a/libcxx/test/std/containers/container.adaptors/container.adaptors.format/format.pass.cpp +++ b/libcxx/test/std/containers/container.adaptors/container.adaptors.format/format.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // [container.adaptors.format] // For each of queue, priority_queue, and stack, the library provides the // following formatter specialization where adaptor-type is the name of the diff --git a/libcxx/test/std/containers/container.adaptors/container.adaptors.format/parse.pass.cpp b/libcxx/test/std/containers/container.adaptors/container.adaptors.format/parse.pass.cpp index b620279d76079..7a5f3ead39010 100644 --- a/libcxx/test/std/containers/container.adaptors/container.adaptors.format/parse.pass.cpp +++ b/libcxx/test/std/containers/container.adaptors/container.adaptors.format/parse.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // [container.adaptors.format] // For each of queue, priority_queue, and stack, the library provides the // following formatter specialization where adaptor-type is the name of the diff --git a/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/format.functions.format.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/format.functions.format.pass.cpp index 05a0715c321ed..4f8e0337d652f 100644 --- a/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/format.functions.format.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/format.functions.format.pass.cpp @@ -11,6 +11,10 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // template diff --git a/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/format.functions.vformat.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/format.functions.vformat.pass.cpp index c5a623795957a..c94aedceedb89 100644 --- a/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/format.functions.vformat.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/format.functions.vformat.pass.cpp @@ -11,9 +11,8 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} // diff --git a/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/format.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/format.pass.cpp index acb517a068f4c..e20ea9b33035a 100644 --- a/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/format.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/format.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/parse.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/parse.pass.cpp index c099cfe1e5965..c6013ce7690dc 100644 --- a/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/parse.pass.cpp +++ b/libcxx/test/std/containers/sequences/vector.bool/vector.bool.fmt/parse.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/time/time.cal/time.cal.day/time.cal.day.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.day/time.cal.day.nonmembers/ostream.pass.cpp index 0e1730447e76f..b30ed775b87f0 100644 --- a/libcxx/test/std/time/time.cal/time.cal.day/time.cal.day.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.day/time.cal.day.nonmembers/ostream.pass.cpp @@ -15,6 +15,10 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.md/time.cal.md.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.md/time.cal.md.nonmembers/ostream.pass.cpp index 4e4feda0c8091..de25ac5ed86d0 100644 --- a/libcxx/test/std/time/time.cal/time.cal.md/time.cal.md.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.md/time.cal.md.nonmembers/ostream.pass.cpp @@ -16,6 +16,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.mdlast/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.mdlast/ostream.pass.cpp index 491d30bf969e2..52424227ca9af 100644 --- a/libcxx/test/std/time/time.cal/time.cal.mdlast/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.mdlast/ostream.pass.cpp @@ -18,6 +18,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.month/time.cal.month.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.month/time.cal.month.nonmembers/ostream.pass.cpp index 2efc023c611b6..86862cc9711f3 100644 --- a/libcxx/test/std/time/time.cal/time.cal.month/time.cal.month.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.month/time.cal.month.nonmembers/ostream.pass.cpp @@ -15,6 +15,10 @@ // TODO FMT Investigate Windows issues. // UNSUPPORTED: msvc, target={{.+}}-windows-gnu +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.mwd/time.cal.mwd.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.mwd/time.cal.mwd.nonmembers/ostream.pass.cpp index 006df5e29e56b..7fe5611e1496a 100644 --- a/libcxx/test/std/time/time.cal/time.cal.mwd/time.cal.mwd.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.mwd/time.cal.mwd.nonmembers/ostream.pass.cpp @@ -18,6 +18,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.nonmembers/ostream.pass.cpp index 0a76fbd471d88..677219a0ee237 100644 --- a/libcxx/test/std/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.nonmembers/ostream.pass.cpp @@ -18,6 +18,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.wdidx/time.cal.wdidx.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.wdidx/time.cal.wdidx.nonmembers/ostream.pass.cpp index 7a59350ffea10..f52406affaead 100644 --- a/libcxx/test/std/time/time.cal/time.cal.wdidx/time.cal.wdidx.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.wdidx/time.cal.wdidx.nonmembers/ostream.pass.cpp @@ -13,6 +13,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.wdlast/time.cal.wdlast.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.wdlast/time.cal.wdlast.nonmembers/ostream.pass.cpp index 90dca782880a5..86fbc6d7b185a 100644 --- a/libcxx/test/std/time/time.cal/time.cal.wdlast/time.cal.wdlast.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.wdlast/time.cal.wdlast.nonmembers/ostream.pass.cpp @@ -13,6 +13,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/ostream.pass.cpp index b52293b16d978..59c6bafdd1353 100644 --- a/libcxx/test/std/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/ostream.pass.cpp @@ -10,6 +10,10 @@ // UNSUPPORTED: no-localization // UNSUPPORTED: libcpp-has-no-incomplete-format +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.year/time.cal.year.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.year/time.cal.year.nonmembers/ostream.pass.cpp index c979fc2874ca7..1ba7f6eebd011 100644 --- a/libcxx/test/std/time/time.cal/time.cal.year/time.cal.year.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.year/time.cal.year.nonmembers/ostream.pass.cpp @@ -13,6 +13,10 @@ // TODO FMT Investigate Windows issues. // UNSUPPORTED: msvc, target={{.+}}-windows-gnu +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/ostream.pass.cpp index 13257ccbbb475..624bf4d4f1fd8 100644 --- a/libcxx/test/std/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/ostream.pass.cpp @@ -18,6 +18,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.ymd/time.cal.ymd.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.ymd/time.cal.ymd.nonmembers/ostream.pass.cpp index 595e00f0008c8..64a42ff081602 100644 --- a/libcxx/test/std/time/time.cal/time.cal.ymd/time.cal.ymd.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.ymd/time.cal.ymd.nonmembers/ostream.pass.cpp @@ -16,6 +16,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/ostream.pass.cpp index 9ded734a83548..254fe6a846d83 100644 --- a/libcxx/test/std/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/ostream.pass.cpp @@ -18,6 +18,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.ymwd/time.cal.ymwd.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.ymwd/time.cal.ymwd.nonmembers/ostream.pass.cpp index 05578f6c2397b..76c74b80945b5 100644 --- a/libcxx/test/std/time/time.cal/time.cal.ymwd/time.cal.ymwd.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.ymwd/time.cal.ymwd.nonmembers/ostream.pass.cpp @@ -16,6 +16,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.cal/time.cal.ymwdlast/time.cal.ymwdlast.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.cal/time.cal.ymwdlast/time.cal.ymwdlast.nonmembers/ostream.pass.cpp index 150813379e276..478e2b88d2a91 100644 --- a/libcxx/test/std/time/time.cal/time.cal.ymwdlast/time.cal.ymwdlast.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.cal/time.cal.ymwdlast/time.cal.ymwdlast.nonmembers/ostream.pass.cpp @@ -16,6 +16,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp b/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp index e05146686434a..416a472de5085 100644 --- a/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp +++ b/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp @@ -15,6 +15,10 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.hms/time.hms.nonmembers/ostream.pass.cpp b/libcxx/test/std/time/time.hms/time.hms.nonmembers/ostream.pass.cpp index c639ea8dcec42..5aef2140e1f78 100644 --- a/libcxx/test/std/time/time.hms/time.hms.nonmembers/ostream.pass.cpp +++ b/libcxx/test/std/time/time.hms/time.hms.nonmembers/ostream.pass.cpp @@ -16,6 +16,9 @@ // TODO FMT Investigate Windows issues. // UNSUPPORTED: msvc, target={{.+}}-windows-gnu +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.day.pass.cpp b/libcxx/test/std/time/time.syn/formatter.day.pass.cpp index 373db7217e14d..2329e1b6e2451 100644 --- a/libcxx/test/std/time/time.syn/formatter.day.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.day.pass.cpp @@ -14,6 +14,10 @@ // TODO FMT Investigate Windows issues. // UNSUPPORTED: msvc, target={{.+}}-windows-gnu +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.duration.pass.cpp b/libcxx/test/std/time/time.syn/formatter.duration.pass.cpp index 3ca7ae2b40c91..fd9d095603a81 100644 --- a/libcxx/test/std/time/time.syn/formatter.duration.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.duration.pass.cpp @@ -16,6 +16,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.hh_mm_ss.pass.cpp b/libcxx/test/std/time/time.syn/formatter.hh_mm_ss.pass.cpp index 2fbca0e1aece3..a0f1ec0f8164b 100644 --- a/libcxx/test/std/time/time.syn/formatter.hh_mm_ss.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.hh_mm_ss.pass.cpp @@ -17,6 +17,9 @@ // XFAIL: LIBCXX-FREEBSD-FIXME +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.month.pass.cpp b/libcxx/test/std/time/time.syn/formatter.month.pass.cpp index 324887f8af8c5..d479679f88498 100644 --- a/libcxx/test/std/time/time.syn/formatter.month.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.month.pass.cpp @@ -11,6 +11,10 @@ // UNSUPPORTED: no-localization // UNSUPPORTED: libcpp-has-no-incomplete-format +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.month_day.pass.cpp b/libcxx/test/std/time/time.syn/formatter.month_day.pass.cpp index f86476f716325..cce2832509394 100644 --- a/libcxx/test/std/time/time.syn/formatter.month_day.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.month_day.pass.cpp @@ -17,6 +17,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.month_day_last.pass.cpp b/libcxx/test/std/time/time.syn/formatter.month_day_last.pass.cpp index 7c78271b908bb..60ddd8ba47759 100644 --- a/libcxx/test/std/time/time.syn/formatter.month_day_last.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.month_day_last.pass.cpp @@ -14,6 +14,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.month_weekday.pass.cpp b/libcxx/test/std/time/time.syn/formatter.month_weekday.pass.cpp index f34b22c00cfe6..207cc09c166e3 100644 --- a/libcxx/test/std/time/time.syn/formatter.month_weekday.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.month_weekday.pass.cpp @@ -14,6 +14,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.weekday.pass.cpp b/libcxx/test/std/time/time.syn/formatter.weekday.pass.cpp index 1b3fbe9f59adb..de8172e98282d 100644 --- a/libcxx/test/std/time/time.syn/formatter.weekday.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.weekday.pass.cpp @@ -14,6 +14,10 @@ // TODO FMT Investigate Windows issues. // UNSUPPORTED: msvc, target={{.+}}-windows-gnu +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.weekday_index.pass.cpp b/libcxx/test/std/time/time.syn/formatter.weekday_index.pass.cpp index 54930343a8d7a..a063d23427a89 100644 --- a/libcxx/test/std/time/time.syn/formatter.weekday_index.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.weekday_index.pass.cpp @@ -17,6 +17,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.weekday_last.pass.cpp b/libcxx/test/std/time/time.syn/formatter.weekday_last.pass.cpp index e3fa9ae3ba3dd..a33c57481d0a0 100644 --- a/libcxx/test/std/time/time.syn/formatter.weekday_last.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.weekday_last.pass.cpp @@ -17,6 +17,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.year.pass.cpp b/libcxx/test/std/time/time.syn/formatter.year.pass.cpp index beb47321e6644..7907033828cf2 100644 --- a/libcxx/test/std/time/time.syn/formatter.year.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.year.pass.cpp @@ -14,6 +14,10 @@ // TODO FMT Investigate Windows issues. // UNSUPPORTED: msvc, target={{.+}}-windows-gnu +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.year_month.pass.cpp b/libcxx/test/std/time/time.syn/formatter.year_month.pass.cpp index 344967d41f774..d7c65bb62ad7c 100644 --- a/libcxx/test/std/time/time.syn/formatter.year_month.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.year_month.pass.cpp @@ -14,6 +14,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.year_month_day.pass.cpp b/libcxx/test/std/time/time.syn/formatter.year_month_day.pass.cpp index 25d5a5807467b..22fada55d5768 100644 --- a/libcxx/test/std/time/time.syn/formatter.year_month_day.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.year_month_day.pass.cpp @@ -17,6 +17,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.year_month_day_last.pass.cpp b/libcxx/test/std/time/time.syn/formatter.year_month_day_last.pass.cpp index 35ce599e1a08a..5ffa9e3d9cd9f 100644 --- a/libcxx/test/std/time/time.syn/formatter.year_month_day_last.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.year_month_day_last.pass.cpp @@ -14,6 +14,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.year_month_weekday.pass.cpp b/libcxx/test/std/time/time.syn/formatter.year_month_weekday.pass.cpp index 617f183882202..775fe81fea80f 100644 --- a/libcxx/test/std/time/time.syn/formatter.year_month_weekday.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.year_month_weekday.pass.cpp @@ -14,6 +14,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/time/time.syn/formatter.year_month_weekday_last.pass.cpp b/libcxx/test/std/time/time.syn/formatter.year_month_weekday_last.pass.cpp index 50968a39dbe64..166cc616888d6 100644 --- a/libcxx/test/std/time/time.syn/formatter.year_month_weekday_last.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.year_month_weekday_last.pass.cpp @@ -12,6 +12,10 @@ // TODO FMT It seems GCC uses too much memory in the CI and fails. // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.fr_FR.UTF-8 // REQUIRES: locale.ja_JP.UTF-8 diff --git a/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_format_args.pass.cpp b/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_format_args.pass.cpp index 2e602428df484..44fee37d40245 100644 --- a/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_format_args.pass.cpp +++ b/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_format_args.pass.cpp @@ -22,10 +22,10 @@ #include "test_macros.h" int main(int, char**) { - [[maybe_unused]] auto store = std::make_format_args(42, nullptr, false, 1.0); + [[maybe_unused]] auto store = std::make_format_args(42, nullptr, false, 'x'); LIBCPP_STATIC_ASSERT( - std::same_as>); + std::same_as>); return 0; } diff --git a/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_wformat_args.pass.cpp b/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_wformat_args.pass.cpp index 14328a1425d08..feb23c503a21f 100644 --- a/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_wformat_args.pass.cpp +++ b/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_wformat_args.pass.cpp @@ -22,10 +22,10 @@ #include "test_macros.h" int main(int, char**) { - [[maybe_unused]] auto store = std::make_wformat_args(42, nullptr, false, 1.0); + [[maybe_unused]] auto store = std::make_wformat_args(42, nullptr, false, 'x'); LIBCPP_STATIC_ASSERT( - std::same_as>); + std::same_as>); return 0; } diff --git a/libcxx/test/std/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp b/libcxx/test/std/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp index 29092a35d711c..c67d868dcfebe 100644 --- a/libcxx/test/std/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp +++ b/libcxx/test/std/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp @@ -8,9 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-has-no-incomplete-format -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} - // // template diff --git a/libcxx/test/std/utilities/format/format.arguments/format.args/get.pass.cpp b/libcxx/test/std/utilities/format/format.arguments/format.args/get.pass.cpp index 35bee3ecce59c..ecb055e3026bc 100644 --- a/libcxx/test/std/utilities/format/format.arguments/format.args/get.pass.cpp +++ b/libcxx/test/std/utilities/format/format.arguments/format.args/get.pass.cpp @@ -8,9 +8,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-has-no-incomplete-format -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} - // // basic_format_arg get(size_t i) const noexcept; diff --git a/libcxx/test/std/utilities/format/format.error/format.error.pass.cpp b/libcxx/test/std/utilities/format/format.error/format.error.pass.cpp index e3d3c48995bc1..c2d3d6b2c8e95 100644 --- a/libcxx/test/std/utilities/format/format.error/format.error.pass.cpp +++ b/libcxx/test/std/utilities/format/format.error/format.error.pass.cpp @@ -9,9 +9,6 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-has-no-incomplete-format -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} - // // class format_error; diff --git a/libcxx/test/std/utilities/format/format.fmt.string/ctor.verify.cpp b/libcxx/test/std/utilities/format/format.fmt.string/ctor.verify.cpp index 8f5404daaf396..d51531dacf734 100644 --- a/libcxx/test/std/utilities/format/format.fmt.string/ctor.verify.cpp +++ b/libcxx/test/std/utilities/format/format.fmt.string/ctor.verify.cpp @@ -33,14 +33,14 @@ void run() { (void)std::basic_format_string{"{}"}; // expected-error-re {{call to consteval function{{.*}}is not a constant expression}} (void)std::basic_format_string{"{0:{0}P}"}; // expected-error-re {{call to consteval function{{.*}}is not a constant expression}} (void)std::basic_format_string{"{0:{0}}"}; - (void)std::basic_format_string{"{0:{0}}"}; // expected-error-re {{call to consteval function{{.*}}is not a constant expression}} + (void)std::basic_format_string{"{0:{0}}"}; // expected-error-re {{call to consteval function{{.*}}is not a constant expression}} (void)std::basic_format_string{"{.3}"}; // expected-error-re {{call to consteval function{{.*}}is not a constant expression}} #ifndef TEST_HAS_NO_WIDE_CHARACTERS (void)std::basic_format_string{L"foo"}; (void)std::basic_format_string{L"{}"}; // expected-error-re {{call to consteval function{{.*}}is not a constant expression}} (void)std::basic_format_string{L"{0:{0}P}"}; // expected-error-re {{call to consteval function{{.*}}is not a constant expression}} (void)std::basic_format_string{L"{0:{0}}"}; - (void)std::basic_format_string{L"{0:{0}}"}; // expected-error-re {{call to consteval function{{.*}}is not a constant expression}} + (void)std::basic_format_string{L"{0:{0}}"}; // expected-error-re {{call to consteval function{{.*}}is not a constant expression}} (void)std::basic_format_string{L"{.3}"}; // expected-error-re {{call to consteval function{{.*}}is not a constant expression}} #endif } diff --git a/libcxx/test/std/utilities/format/format.fmt.string/get.pass.cpp b/libcxx/test/std/utilities/format/format.fmt.string/get.pass.cpp index bf7e2add0e8f5..d9a8c9f719b9a 100644 --- a/libcxx/test/std/utilities/format/format.fmt.string/get.pass.cpp +++ b/libcxx/test/std/utilities/format/format.fmt.string/get.pass.cpp @@ -35,10 +35,10 @@ template constexpr bool test() { assert((std::basic_format_string{CSTR("foo")}.get() == SV("foo"))); assert((std::basic_format_string{CSTR("{}")}.get() == SV("{}"))); - assert((std::basic_format_string{CSTR("{} {:01.23L}")}.get() == SV("{} {:01.23L}"))); + assert((std::basic_format_string{CSTR("{} {:*>6}")}.get() == SV("{} {:*>6}"))); // Embedded NUL character - assert((std::basic_format_string{SV("{}\0{}")}.get() == SV("{}\0{}"))); + assert((std::basic_format_string{SV("{}\0{}")}.get() == SV("{}\0{}"))); return true; } diff --git a/libcxx/test/std/utilities/format/format.fmt.string/types.compile.pass.cpp b/libcxx/test/std/utilities/format/format.fmt.string/types.compile.pass.cpp index 3ebd2bfc4fbd5..1ecfb5d992741 100644 --- a/libcxx/test/std/utilities/format/format.fmt.string/types.compile.pass.cpp +++ b/libcxx/test/std/utilities/format/format.fmt.string/types.compile.pass.cpp @@ -29,12 +29,11 @@ static_assert(std::same_as, std::basic_format_string>); static_assert(std::same_as, std::basic_format_string>); -static_assert(std::same_as, std::basic_format_string>); -static_assert(std::same_as, std::basic_format_string>); +static_assert(std::same_as, std::basic_format_string>); +static_assert(std::same_as, std::basic_format_string>); #ifndef TEST_HAS_NO_WIDE_CHARACTERS static_assert(std::same_as, std::basic_format_string>); static_assert(std::same_as, std::basic_format_string>); -static_assert(std::same_as, std::basic_format_string>); -static_assert( - std::same_as, std::basic_format_string>); +static_assert(std::same_as, std::basic_format_string>); +static_assert(std::same_as, std::basic_format_string>); #endif diff --git a/libcxx/test/std/utilities/format/format.formattable/concept.formattable.compile.pass.cpp b/libcxx/test/std/utilities/format/format.formattable/concept.formattable.compile.pass.cpp index 0e4708e068ec4..54c8c1bd1f170 100644 --- a/libcxx/test/std/utilities/format/format.formattable/concept.formattable.compile.pass.cpp +++ b/libcxx/test/std/utilities/format/format.formattable/concept.formattable.compile.pass.cpp @@ -8,6 +8,9 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 // UNSUPPORTED: libcpp-has-no-incomplete-format +// This test uses std::filesystem::path, which was introduced in macOS 10.15 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} + // // template @@ -107,9 +110,7 @@ void test_P0645() { assert_is_formattable<__uint128_t, CharT>(); #endif - assert_is_formattable(); - assert_is_formattable(); - assert_is_formattable(); + // floating-point types are tested in concept.formattable.float.compile.pass.cpp assert_is_formattable(); assert_is_formattable(); diff --git a/libcxx/test/std/utilities/format/format.formattable/concept.formattable.float.compile.pass.cpp b/libcxx/test/std/utilities/format/format.formattable/concept.formattable.float.compile.pass.cpp new file mode 100644 index 0000000000000..09b957f9d1682 --- /dev/null +++ b/libcxx/test/std/utilities/format/format.formattable/concept.formattable.float.compile.pass.cpp @@ -0,0 +1,58 @@ +//===----------------------------------------------------------------------===// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 +// UNSUPPORTED: libcpp-has-no-incomplete-format + +// + +// template +// concept formattable = ... + +#include +#include + +#include "test_macros.h" + +template +void assert_is_not_formattable() { + static_assert(!std::formattable); +} + +template +void assert_is_formattable() { + // Only formatters for CharT == char || CharT == wchar_t are enabled for the + // standard formatters. When CharT is a different type the formatter should + // be disabled. + if constexpr (std::same_as +#ifndef TEST_HAS_NO_WIDE_CHARACTERS + || std::same_as +#endif + ) + static_assert(std::formattable); + else + assert_is_not_formattable(); +} + +template +void test() { + assert_is_formattable(); + assert_is_formattable(); + assert_is_formattable(); +} + +void test() { + test(); +#ifndef TEST_HAS_NO_WIDE_CHARACTERS + test(); +#endif + test(); + test(); + test(); + + test(); +} diff --git a/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.floating_point.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.floating_point.pass.cpp index dc40acecc8a19..9f701dfd015a8 100644 --- a/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.floating_point.pass.cpp +++ b/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.floating_point.pass.cpp @@ -8,6 +8,9 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-has-no-incomplete-format +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // [format.formatter.spec]: diff --git a/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/check_arg_id.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/check_arg_id.pass.cpp index bc6b418862525..f106105b984cf 100644 --- a/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/check_arg_id.pass.cpp +++ b/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/check_arg_id.pass.cpp @@ -9,9 +9,6 @@ // UNSUPPORTED: no-exceptions // UNSUPPORTED: libcpp-has-no-incomplete-format -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} - // // constexpr void check_arg_id(size_t id); diff --git a/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/next_arg_id.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/next_arg_id.pass.cpp index 2d6de1f2f3354..03da8fde392bb 100644 --- a/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/next_arg_id.pass.cpp +++ b/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/next_arg_id.pass.cpp @@ -9,9 +9,6 @@ // UNSUPPORTED: no-exceptions // UNSUPPORTED: libcpp-has-no-incomplete-format -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}} - // // constexpr size_t next_arg_id(); diff --git a/libcxx/test/std/utilities/format/format.functions/P2418.pass.cpp b/libcxx/test/std/utilities/format/format.functions/P2418.pass.cpp index 9a5baecbb5f97..1e6893d093451 100644 --- a/libcxx/test/std/utilities/format/format.functions/P2418.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/P2418.pass.cpp @@ -10,6 +10,10 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // Tests whether a move only type can be formatted. This is required by // P2418R2 "Add support for std::generator-like types to std::format" diff --git a/libcxx/test/std/utilities/format/format.functions/ascii.pass.cpp b/libcxx/test/std/utilities/format/format.functions/ascii.pass.cpp index 4b94ebb9ec6b6..74f20f2d797d0 100644 --- a/libcxx/test/std/utilities/format/format.functions/ascii.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/ascii.pass.cpp @@ -11,6 +11,10 @@ // Force unicode to be disabled. // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_HAS_NO_UNICODE +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // Tests Unicode is ignored and handled as ASCII. diff --git a/libcxx/test/std/utilities/format/format.functions/escaped_output.ascii.pass.cpp b/libcxx/test/std/utilities/format/format.functions/escaped_output.ascii.pass.cpp index 5ceedf9f05c42..911e938887f97 100644 --- a/libcxx/test/std/utilities/format/format.functions/escaped_output.ascii.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/escaped_output.ascii.pass.cpp @@ -8,6 +8,10 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 // UNSUPPORTED: libcpp-has-no-incomplete-format +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // Force unicode to be disabled. // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_HAS_NO_UNICODE diff --git a/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp b/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp index 6d002a10c1479..e3ab2d16bd26f 100644 --- a/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/escaped_output.unicode.pass.cpp @@ -15,6 +15,10 @@ // UNSUPPORTED: msvc, target={{.+}}-windows-gnu // UNSUPPORTED: LIBCXX-AIX-FIXME +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // This test the debug string type for the formatter specializations for char diff --git a/libcxx/test/std/utilities/format/format.functions/format.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format.locale.pass.cpp index 61c7abd8bedc8..eb90c75da8a05 100644 --- a/libcxx/test/std/utilities/format/format.functions/format.locale.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format.locale.pass.cpp @@ -11,6 +11,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED:gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // template diff --git a/libcxx/test/std/utilities/format/format.functions/format.locale.verify.cpp b/libcxx/test/std/utilities/format/format.functions/format.locale.verify.cpp index de182c7cb528b..1281a0b61e2a8 100644 --- a/libcxx/test/std/utilities/format/format.functions/format.locale.verify.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format.locale.verify.cpp @@ -9,6 +9,10 @@ // UNSUPPORTED: no-localization // UNSUPPORTED: libcpp-has-no-incomplete-format +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // Basic test to validate ill-formed code is properly detected. // diff --git a/libcxx/test/std/utilities/format/format.functions/format.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format.pass.cpp index 6b06fcf68d845..f444c51d511b0 100644 --- a/libcxx/test/std/utilities/format/format.functions/format.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format.pass.cpp @@ -10,6 +10,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // Note this formatter shows additional information when tests are failing. // This aids the development. Since other formatters fail in the same fashion // they don't have this additional output. diff --git a/libcxx/test/std/utilities/format/format.functions/format.verify.cpp b/libcxx/test/std/utilities/format/format.functions/format.verify.cpp index 23c9c2c103f1b..45829313a6267 100644 --- a/libcxx/test/std/utilities/format/format.functions/format.verify.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format.verify.cpp @@ -8,6 +8,10 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-has-no-incomplete-format +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // Basic test to validate ill-formed code is properly detected. // diff --git a/libcxx/test/std/utilities/format/format.functions/format_to.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format_to.locale.pass.cpp index fd3eb06c95ffc..7079570813f20 100644 --- a/libcxx/test/std/utilities/format/format.functions/format_to.locale.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format_to.locale.pass.cpp @@ -11,6 +11,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // template diff --git a/libcxx/test/std/utilities/format/format.functions/format_to.locale.verify.cpp b/libcxx/test/std/utilities/format/format.functions/format_to.locale.verify.cpp index e3990603a5fc8..573257c228cc8 100644 --- a/libcxx/test/std/utilities/format/format.functions/format_to.locale.verify.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format_to.locale.verify.cpp @@ -9,6 +9,10 @@ // UNSUPPORTED: no-localization // UNSUPPORTED: libcpp-has-no-incomplete-format +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // Basic test to validate ill-formed code is properly detected. // diff --git a/libcxx/test/std/utilities/format/format.functions/format_to.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format_to.pass.cpp index c81a902ab62b0..cf8d55714728b 100644 --- a/libcxx/test/std/utilities/format/format.functions/format_to.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format_to.pass.cpp @@ -10,6 +10,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // template diff --git a/libcxx/test/std/utilities/format/format.functions/format_to.verify.cpp b/libcxx/test/std/utilities/format/format.functions/format_to.verify.cpp index a40b532dcc922..bd1fcc2b4b777 100644 --- a/libcxx/test/std/utilities/format/format.functions/format_to.verify.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format_to.verify.cpp @@ -8,6 +8,10 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-has-no-incomplete-format +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // Basic test to validate ill-formed code is properly detected. diff --git a/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.pass.cpp index 8a9a5395dcc49..de32982c1562f 100644 --- a/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.pass.cpp @@ -11,6 +11,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // template diff --git a/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.verify.cpp b/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.verify.cpp index ec4e2927c7368..b9d77de3f90f1 100644 --- a/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.verify.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.verify.cpp @@ -9,6 +9,10 @@ // UNSUPPORTED: no-localization // UNSUPPORTED: libcpp-has-no-incomplete-format +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // Basic test to validate ill-formed code is properly detected. // diff --git a/libcxx/test/std/utilities/format/format.functions/format_to_n.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format_to_n.pass.cpp index b5effa320aab2..cfdeae9fd9fa6 100644 --- a/libcxx/test/std/utilities/format/format.functions/format_to_n.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format_to_n.pass.cpp @@ -10,6 +10,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // template diff --git a/libcxx/test/std/utilities/format/format.functions/format_to_n.verify.cpp b/libcxx/test/std/utilities/format/format.functions/format_to_n.verify.cpp index 7c064c584c8bb..b84615d590152 100644 --- a/libcxx/test/std/utilities/format/format.functions/format_to_n.verify.cpp +++ b/libcxx/test/std/utilities/format/format.functions/format_to_n.verify.cpp @@ -8,6 +8,10 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-has-no-incomplete-format +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // Basic test to validate ill-formed code is properly detected. // diff --git a/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp index 7d41ddb81a00f..43800b9da8a9f 100644 --- a/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp @@ -11,6 +11,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // template diff --git a/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.verify.cpp b/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.verify.cpp index dc80c12141c5f..d1b92d3fa0847 100644 --- a/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.verify.cpp +++ b/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.verify.cpp @@ -9,6 +9,10 @@ // UNSUPPORTED: no-localization // UNSUPPORTED: libcpp-has-no-incomplete-format +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // Basic test to validate ill-formed code is properly detected. // diff --git a/libcxx/test/std/utilities/format/format.functions/formatted_size.pass.cpp b/libcxx/test/std/utilities/format/format.functions/formatted_size.pass.cpp index cd31da125c849..6b03d34d9b271 100644 --- a/libcxx/test/std/utilities/format/format.functions/formatted_size.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/formatted_size.pass.cpp @@ -10,6 +10,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // template diff --git a/libcxx/test/std/utilities/format/format.functions/formatted_size.verify.cpp b/libcxx/test/std/utilities/format/format.functions/formatted_size.verify.cpp index 1662d893221bb..a135dd7cc9706 100644 --- a/libcxx/test/std/utilities/format/format.functions/formatted_size.verify.cpp +++ b/libcxx/test/std/utilities/format/format.functions/formatted_size.verify.cpp @@ -8,6 +8,10 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: libcpp-has-no-incomplete-format +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // Basic test to validate ill-formed code is properly detected. // diff --git a/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp b/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp index d5939e255423e..9863922f9abcc 100644 --- a/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp @@ -12,6 +12,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // REQUIRES: locale.en_US.UTF-8 // diff --git a/libcxx/test/std/utilities/format/format.functions/unicode.pass.cpp b/libcxx/test/std/utilities/format/format.functions/unicode.pass.cpp index db659f53cf097..efe243573f04a 100644 --- a/libcxx/test/std/utilities/format/format.functions/unicode.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/unicode.pass.cpp @@ -15,6 +15,10 @@ // UNSUPPORTED msvc, target={{.+}}-windows-gnu // UNSUPPORTED: LIBCXX-AIX-FIXME +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // Tests the Unicode width support of the standard format specifiers. diff --git a/libcxx/test/std/utilities/format/format.functions/vformat.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/vformat.locale.pass.cpp index 4136123d6a9f5..7755b785518f3 100644 --- a/libcxx/test/std/utilities/format/format.functions/vformat.locale.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/vformat.locale.pass.cpp @@ -11,6 +11,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // string vformat(const locale& loc, string_view fmt, format_args args); diff --git a/libcxx/test/std/utilities/format/format.functions/vformat.pass.cpp b/libcxx/test/std/utilities/format/format.functions/vformat.pass.cpp index 8a95b5524fe9f..7c4c4a10ea5d8 100644 --- a/libcxx/test/std/utilities/format/format.functions/vformat.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/vformat.pass.cpp @@ -10,6 +10,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // string vformat(string_view fmt, format_args args); diff --git a/libcxx/test/std/utilities/format/format.functions/vformat_to.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/vformat_to.locale.pass.cpp index 6c5e7f4ab5787..e1a740253d586 100644 --- a/libcxx/test/std/utilities/format/format.functions/vformat_to.locale.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/vformat_to.locale.pass.cpp @@ -11,6 +11,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // template diff --git a/libcxx/test/std/utilities/format/format.functions/vformat_to.pass.cpp b/libcxx/test/std/utilities/format/format.functions/vformat_to.pass.cpp index 77c783411127f..92b5409e1409e 100644 --- a/libcxx/test/std/utilities/format/format.functions/vformat_to.pass.cpp +++ b/libcxx/test/std/utilities/format/format.functions/vformat_to.pass.cpp @@ -10,6 +10,9 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + // // template diff --git a/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/format.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/format.pass.cpp index 31b7e5658b125..8f398994d251b 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/format.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/format.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/parse.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/parse.pass.cpp index a7a160989b336..c848f4ff2fc26 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/parse.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/parse.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/set_brackets.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/set_brackets.pass.cpp index 6405b5ec22e51..c17edb28f6175 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/set_brackets.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/set_brackets.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/set_separator.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/set_separator.pass.cpp index 21cee612bb2b0..3f91e7bc633a4 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/set_separator.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.fmtdef/set_separator.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/utilities/format/format.range/format.range.fmtkind/format_kind.compile.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.fmtkind/format_kind.compile.pass.cpp index 7179a674a37ad..c2d2ec2968508 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.fmtkind/format_kind.compile.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.fmtkind/format_kind.compile.pass.cpp @@ -8,6 +8,9 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 // UNSUPPORTED: libcpp-has-no-incomplete-format +// This test uses std::filesystem::path, which was introduced in macOS 10.15 +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14}} + // // template diff --git a/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/format.functions.format.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/format.functions.format.pass.cpp index 7a0a2d18913cf..b3c8afd8bae3e 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/format.functions.format.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/format.functions.format.pass.cpp @@ -11,9 +11,8 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} // diff --git a/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/format.functions.vformat.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/format.functions.vformat.pass.cpp index 613eb5ea06392..d6b0f7e9f1c0f 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/format.functions.vformat.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/format.functions.vformat.pass.cpp @@ -11,9 +11,8 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} // diff --git a/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/format.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/format.pass.cpp index b459ba8ff2f2d..2275baee237cf 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/format.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/format.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/parse.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/parse.pass.cpp index 92763be54e15a..be117a6de2ecf 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/parse.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.fmtmap/parse.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/utilities/format/format.range/format.range.fmtset/format.functions.format.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.fmtset/format.functions.format.pass.cpp index 7df3284f72b71..1f6a550ee232f 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.fmtset/format.functions.format.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.fmtset/format.functions.format.pass.cpp @@ -11,9 +11,8 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} // diff --git a/libcxx/test/std/utilities/format/format.range/format.range.fmtset/format.functions.vformat.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.fmtset/format.functions.vformat.pass.cpp index e411b036acbe1..e33a0bc383d14 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.fmtset/format.functions.vformat.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.fmtset/format.functions.vformat.pass.cpp @@ -11,9 +11,8 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} // diff --git a/libcxx/test/std/utilities/format/format.range/format.range.fmtset/format.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.fmtset/format.pass.cpp index 5ca3bfe12012e..dcb3d67270019 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.fmtset/format.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.fmtset/format.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/utilities/format/format.range/format.range.fmtset/parse.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.fmtset/parse.pass.cpp index 8b25021c984d0..88940525925fa 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.fmtset/parse.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.fmtset/parse.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/utilities/format/format.range/format.range.formatter/format.functions.format.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.formatter/format.functions.format.pass.cpp index e1ab825626b56..297f7b22779f1 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.formatter/format.functions.format.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.formatter/format.functions.format.pass.cpp @@ -11,9 +11,8 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} // diff --git a/libcxx/test/std/utilities/format/format.range/format.range.formatter/format.functions.vformat.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.formatter/format.functions.vformat.pass.cpp index 98bda2debb670..c28cf547da7b3 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.formatter/format.functions.vformat.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.formatter/format.functions.vformat.pass.cpp @@ -11,9 +11,8 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} // diff --git a/libcxx/test/std/utilities/format/format.range/format.range.formatter/format.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.formatter/format.pass.cpp index 1fdc5eb726d69..faf9e1a18ee1b 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.formatter/format.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.formatter/format.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/utilities/format/format.range/format.range.formatter/parse.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.formatter/parse.pass.cpp index ce1c0c93130b6..c440b1ac2b168 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.formatter/parse.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.formatter/parse.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/utilities/format/format.range/format.range.formatter/set_brackets.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.formatter/set_brackets.pass.cpp index c399a81f2a041..0b0e3a16c4109 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.formatter/set_brackets.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.formatter/set_brackets.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/utilities/format/format.range/format.range.formatter/set_separator.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.formatter/set_separator.pass.cpp index 192ddcd16c44a..c63cc52403f45 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.formatter/set_separator.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.formatter/set_separator.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/utilities/format/format.range/format.range.formatter/underlying.pass.cpp b/libcxx/test/std/utilities/format/format.range/format.range.formatter/underlying.pass.cpp index 10a330f0baf1e..52ac58a726651 100644 --- a/libcxx/test/std/utilities/format/format.range/format.range.formatter/underlying.pass.cpp +++ b/libcxx/test/std/utilities/format/format.range/format.range.formatter/underlying.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template diff --git a/libcxx/test/std/utilities/format/format.tuple/format.functions.format.pass.cpp b/libcxx/test/std/utilities/format/format.tuple/format.functions.format.pass.cpp index 75791fb945bcf..f5853f98ca97f 100644 --- a/libcxx/test/std/utilities/format/format.tuple/format.functions.format.pass.cpp +++ b/libcxx/test/std/utilities/format/format.tuple/format.functions.format.pass.cpp @@ -11,9 +11,9 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} // diff --git a/libcxx/test/std/utilities/format/format.tuple/format.functions.format.verify.cpp b/libcxx/test/std/utilities/format/format.tuple/format.functions.format.verify.cpp index 58685f956be12..5967d8630e065 100644 --- a/libcxx/test/std/utilities/format/format.tuple/format.functions.format.verify.cpp +++ b/libcxx/test/std/utilities/format/format.tuple/format.functions.format.verify.cpp @@ -8,6 +8,10 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17, c++20 // UNSUPPORTED: libcpp-has-no-incomplete-format +// TODO FMT This test should not require std::to_chars(floating-point) +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} + #include #include diff --git a/libcxx/test/std/utilities/format/format.tuple/format.functions.vformat.pass.cpp b/libcxx/test/std/utilities/format/format.tuple/format.functions.vformat.pass.cpp index 9445ddb517cad..2cca15a6d5dc8 100644 --- a/libcxx/test/std/utilities/format/format.tuple/format.functions.vformat.pass.cpp +++ b/libcxx/test/std/utilities/format/format.tuple/format.functions.vformat.pass.cpp @@ -10,9 +10,8 @@ // TODO FMT Evaluate gcc-12 status // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} +// This test requires std::to_chars(floating-point), which is in the dylib +// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{10.9|10.10|10.11|10.12|10.13|10.14|10.15|11.0}} // diff --git a/libcxx/test/std/utilities/format/format.tuple/format.pass.cpp b/libcxx/test/std/utilities/format/format.tuple/format.pass.cpp index 017201481fa54..2d2e60cc20dd9 100644 --- a/libcxx/test/std/utilities/format/format.tuple/format.pass.cpp +++ b/libcxx/test/std/utilities/format/format.tuple/format.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template... Ts> @@ -61,7 +57,7 @@ void test() { test(SV("(1)"), std::tuple{1}); test(SV("(1, 1)"), std::tuple{1, CharT('1')}); test(SV("(1, 1)"), std::pair{1, CharT('1')}); - test(SV("(1, 1, 1)"), std::tuple{1, CharT('1'), 1.0}); + test(SV("(1, 1, true)"), std::tuple{1, CharT('1'), true}); } void test() { diff --git a/libcxx/test/std/utilities/format/format.tuple/parse.pass.cpp b/libcxx/test/std/utilities/format/format.tuple/parse.pass.cpp index 05c90557cd54f..f38c9fad2df92 100644 --- a/libcxx/test/std/utilities/format/format.tuple/parse.pass.cpp +++ b/libcxx/test/std/utilities/format/format.tuple/parse.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template... Ts> @@ -64,7 +60,7 @@ constexpr void test() { test>(); test>(); test>(); - test>(); + test>(); } constexpr bool test() { diff --git a/libcxx/test/std/utilities/format/format.tuple/set_brackets.pass.cpp b/libcxx/test/std/utilities/format/format.tuple/set_brackets.pass.cpp index 74af4f32fcf1d..63efdb077502e 100644 --- a/libcxx/test/std/utilities/format/format.tuple/set_brackets.pass.cpp +++ b/libcxx/test/std/utilities/format/format.tuple/set_brackets.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // template... Ts> @@ -49,7 +45,7 @@ constexpr void test() { test>(); test>(); test>(); - test>(); + test>(); } constexpr bool test() { diff --git a/libcxx/test/std/utilities/format/format.tuple/set_separator.pass.cpp b/libcxx/test/std/utilities/format/format.tuple/set_separator.pass.cpp index 0258ae215ed22..92a77b3357316 100644 --- a/libcxx/test/std/utilities/format/format.tuple/set_separator.pass.cpp +++ b/libcxx/test/std/utilities/format/format.tuple/set_separator.pass.cpp @@ -11,10 +11,6 @@ // TODO FMT Fix this test using GCC, it currently times out. // UNSUPPORTED: gcc-12 -// This test requires the dylib support introduced in D92214. -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{.+}} -// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx11.{{.+}} - // // class range_formatter @@ -49,7 +45,7 @@ constexpr void test() { test>(); test>(); test>(); - test>(); + test>(); } constexpr bool test() { diff --git a/libcxx/utils/ci/run-buildbot b/libcxx/utils/ci/run-buildbot index 8ff6decb0a344..6591a8edb9e16 100755 --- a/libcxx/utils/ci/run-buildbot +++ b/libcxx/utils/ci/run-buildbot @@ -492,9 +492,6 @@ apple-system-backdeployment-assertions-*) PARAMS+=";unwind_runtime_root=${OSX_ROOTS}/macOS/libunwind/${DEPLOYMENT_TARGET}" PARAMS+=";use_system_cxx_lib=True" PARAMS+=";enable_assertions=True" - # TODO: Enable experimental features during back-deployment -- right now some of the availability - # annotations are incorrect, leading to test failures that could be avoided. - PARAMS+=";enable_experimental=False" generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Apple.cmake" \ -DLIBCXX_TEST_CONFIG="apple-libc++-backdeployment.cfg.in" \ @@ -533,9 +530,6 @@ apple-system-backdeployment-*) PARAMS+=";abi_runtime_root=${OSX_ROOTS}/macOS/libc++abi/${DEPLOYMENT_TARGET}" PARAMS+=";unwind_runtime_root=${OSX_ROOTS}/macOS/libunwind/${DEPLOYMENT_TARGET}" PARAMS+=";use_system_cxx_lib=True" - # TODO: Enable experimental features during back-deployment -- right now some of the availability - # annotations are incorrect, leading to test failures that could be avoided. - PARAMS+=";enable_experimental=False" generate-cmake -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Apple.cmake" \ -DLIBCXX_TEST_CONFIG="apple-libc++-backdeployment.cfg.in" \ From 0433abc8e03737982fe103bf1563393e738c1ba0 Mon Sep 17 00:00:00 2001 From: Christopher Ferris Date: Fri, 17 Mar 2023 15:25:48 -0700 Subject: [PATCH 342/691] [docs][scudo] Add information about M_PURGE_ALL. Add information about M_PURGE_ALL Reviewed By: Chia-hungDuan Differential Revision: https://reviews.llvm.org/D146336 --- llvm/docs/ScudoHardenedAllocator.rst | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/llvm/docs/ScudoHardenedAllocator.rst b/llvm/docs/ScudoHardenedAllocator.rst index 9c1cfa6edcd63..875d018c4d9ff 100644 --- a/llvm/docs/ScudoHardenedAllocator.rst +++ b/llvm/docs/ScudoHardenedAllocator.rst @@ -265,7 +265,16 @@ The following "mallopt" options are available (options are defined in | | the interval to the minimum and maximum value as | | | specified at compile time). | +---------------------------+-------------------------------------------------------+ -| M_PURGE | Forces immediate memory reclaiming (value is unused). | +| M_PURGE | Forces immediate memory reclaiming but does not | +| | reclaim everything. For smaller size classes, there | +| | is still some memory that is not reclaimed due to the | +| | extra time it takes and the small amount of memory | +| | that can be reclaimed. | +| | The value is ignored. | ++---------------------------+-------------------------------------------------------+ +| M_PURGE_ALL | Same as M_PURGE but will force release all possible | +| | memory regardless of how long it takes. | +| | The value is ignored. | +---------------------------+-------------------------------------------------------+ | M_MEMTAG_TUNING | Tunes the allocator's choice of memory tags to make | | | it more likely that a certain class of memory errors | From 22293a3d85e9b3c410269b7adab1a60f5dcb3aa4 Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Wed, 22 Mar 2023 15:49:50 -0500 Subject: [PATCH 343/691] Revert "[flang] Feature list plugin" due to failing build This reverts commit 823ddba1b325f30fc3fb2e9d695c211b856a4d5d. --- flang/examples/CMakeLists.txt | 1 - flang/examples/FeatureList/CMakeLists.txt | 9 - flang/examples/FeatureList/FeatureList.cpp | 761 ------------------ flang/test/CMakeLists.txt | 1 - flang/test/Examples/feature-list-class.f90 | 88 -- .../test/Examples/feature-list-functions.f90 | 76 -- 6 files changed, 936 deletions(-) delete mode 100644 flang/examples/FeatureList/CMakeLists.txt delete mode 100644 flang/examples/FeatureList/FeatureList.cpp delete mode 100644 flang/test/Examples/feature-list-class.f90 delete mode 100644 flang/test/Examples/feature-list-functions.f90 diff --git a/flang/examples/CMakeLists.txt b/flang/examples/CMakeLists.txt index 8cc66ddbbbb0e..23fea3920efb6 100644 --- a/flang/examples/CMakeLists.txt +++ b/flang/examples/CMakeLists.txt @@ -1,4 +1,3 @@ add_subdirectory(ExternalHelloWorld) add_subdirectory(PrintFlangFunctionNames) add_subdirectory(FlangOmpReport) -add_subdirectory(FeatureList) diff --git a/flang/examples/FeatureList/CMakeLists.txt b/flang/examples/FeatureList/CMakeLists.txt deleted file mode 100644 index e17a7bebbff05..0000000000000 --- a/flang/examples/FeatureList/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -add_llvm_example_library(flangFeatureList - MODULE - FeatureList.cpp - - DEPENDS - acc_gen - flangFrontend - omp_gen -) diff --git a/flang/examples/FeatureList/FeatureList.cpp b/flang/examples/FeatureList/FeatureList.cpp deleted file mode 100644 index 7d7e63e148bc0..0000000000000 --- a/flang/examples/FeatureList/FeatureList.cpp +++ /dev/null @@ -1,761 +0,0 @@ -//===-- FeatureList.cpp ---------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// A plugin that counts the amount of times a particular parse tree node -// occurs. This plugin should cover each feature covered in dump-parse-tree.h -// -//===----------------------------------------------------------------------===// - -#include "flang/Frontend/FrontendActions.h" -#include "flang/Frontend/FrontendPluginRegistry.h" -#include "flang/Parser/parse-tree-visitor.h" -#include "flang/Parser/parse-tree.h" -#include "flang/Parser/parsing.h" - -#include - -using namespace Fortran::frontend; -using namespace Fortran::parser; -using namespace Fortran; - -#define READ_FEATURE_CUST(classname, n) \ - bool Pre(const classname &) { \ - record(#n); \ - return true; \ - } \ - void Post(const classname &) {} - -#define READ_FEATURE(classname) READ_FEATURE_CUST(classname, classname) - -struct NodeVisitor { -private: - std::map frequencies; - - void record(const char *name) { - const auto [it, ins] = frequencies.insert({name, 1}); - if (!ins) { - frequencies[name] = it->second + 1; - } - } - -public: - const std::map &getFrequencies() const { - return frequencies; - } - - READ_FEATURE_CUST(format::ControlEditDesc, ControlEditDesc) - READ_FEATURE_CUST(format::DerivedTypeDataEditDesc, DerivedTypeDataEditDesc) - READ_FEATURE_CUST(format::FormatItem, FormatItem) - READ_FEATURE_CUST(format::FormatSpecification, FormatSpecification) - READ_FEATURE_CUST( - format::IntrinsicTypeDataEditDesc, IntrinsicTypeDataEditDesc) - READ_FEATURE(Abstract) - READ_FEATURE(AccAtomicCapture) - READ_FEATURE(AccAtomicCapture::Stmt1) - READ_FEATURE(AccAtomicCapture::Stmt2) - READ_FEATURE(AccAtomicRead) - READ_FEATURE(AccAtomicUpdate) - READ_FEATURE(AccAtomicWrite) - READ_FEATURE(AccBeginBlockDirective) - READ_FEATURE(AccBeginCombinedDirective) - READ_FEATURE(AccBeginLoopDirective) - READ_FEATURE(AccBlockDirective) - READ_FEATURE(AccClause) - READ_FEATURE(AccBindClause) - READ_FEATURE(AccDefaultClause) - READ_FEATURE(AccClauseList) - READ_FEATURE(AccCombinedDirective) - READ_FEATURE(AccDataModifier) - READ_FEATURE(AccDataModifier::Modifier) - READ_FEATURE(AccDeclarativeDirective) - READ_FEATURE(AccEndAtomic) - READ_FEATURE(AccEndBlockDirective) - READ_FEATURE(AccEndCombinedDirective) - READ_FEATURE(AccGangArgument) - READ_FEATURE(AccObject) - READ_FEATURE(AccObjectList) - READ_FEATURE(AccObjectListWithModifier) - READ_FEATURE(AccObjectListWithReduction) - READ_FEATURE(AccReductionOperator) - READ_FEATURE(AccReductionOperator::Operator) - READ_FEATURE(AccSizeExpr) - READ_FEATURE(AccSizeExprList) - READ_FEATURE(AccSelfClause) - READ_FEATURE(AccStandaloneDirective) - READ_FEATURE(AccDeviceTypeExpr) - READ_FEATURE(AccDeviceTypeExprList) - READ_FEATURE(AccTileExpr) - READ_FEATURE(AccTileExprList) - READ_FEATURE(AccLoopDirective) - READ_FEATURE(AccWaitArgument) - READ_FEATURE(AcImpliedDo) - READ_FEATURE(AcImpliedDoControl) - READ_FEATURE(AcValue) - READ_FEATURE(AccessStmt) - READ_FEATURE(AccessId) - READ_FEATURE(AccessSpec) - READ_FEATURE(AccessSpec::Kind) - READ_FEATURE(AcSpec) - READ_FEATURE(ActionStmt) - READ_FEATURE(ActualArg) - READ_FEATURE(ActualArg::PercentRef) - READ_FEATURE(ActualArg::PercentVal) - READ_FEATURE(ActualArgSpec) - READ_FEATURE(AcValue::Triplet) - READ_FEATURE(AllocOpt) - READ_FEATURE(AllocOpt::Mold) - READ_FEATURE(AllocOpt::Source) - READ_FEATURE(Allocatable) - READ_FEATURE(AllocatableStmt) - READ_FEATURE(AllocateCoarraySpec) - READ_FEATURE(AllocateObject) - READ_FEATURE(AllocateShapeSpec) - READ_FEATURE(AllocateStmt) - READ_FEATURE(Allocation) - READ_FEATURE(AltReturnSpec) - READ_FEATURE(ArithmeticIfStmt) - READ_FEATURE(ArrayConstructor) - READ_FEATURE(ArrayElement) - READ_FEATURE(ArraySpec) - READ_FEATURE(AssignStmt) - READ_FEATURE(AssignedGotoStmt) - READ_FEATURE(AssignmentStmt) - READ_FEATURE(AssociateConstruct) - READ_FEATURE(AssociateStmt) - READ_FEATURE(Association) - READ_FEATURE(AssumedImpliedSpec) - READ_FEATURE(AssumedRankSpec) - READ_FEATURE(AssumedShapeSpec) - READ_FEATURE(AssumedSizeSpec) - READ_FEATURE(Asynchronous) - READ_FEATURE(AsynchronousStmt) - READ_FEATURE(AttrSpec) - READ_FEATURE(BOZLiteralConstant) - READ_FEATURE(BackspaceStmt) - READ_FEATURE(BasedPointer) - READ_FEATURE(BasedPointerStmt) - READ_FEATURE(BindAttr) - READ_FEATURE(BindAttr::Deferred) - READ_FEATURE(BindAttr::Non_Overridable) - READ_FEATURE(BindEntity) - READ_FEATURE(BindEntity::Kind) - READ_FEATURE(BindStmt) - READ_FEATURE(Block) - READ_FEATURE(BlockConstruct) - READ_FEATURE(BlockData) - READ_FEATURE(BlockDataStmt) - READ_FEATURE(BlockSpecificationPart) - READ_FEATURE(BlockStmt) - READ_FEATURE(BoundsRemapping) - READ_FEATURE(BoundsSpec) - READ_FEATURE(Call) - READ_FEATURE(CallStmt) - READ_FEATURE(CaseConstruct) - READ_FEATURE(CaseConstruct::Case) - READ_FEATURE(CaseSelector) - READ_FEATURE(CaseStmt) - READ_FEATURE(CaseValueRange) - READ_FEATURE(CaseValueRange::Range) - READ_FEATURE(ChangeTeamConstruct) - READ_FEATURE(ChangeTeamStmt) - READ_FEATURE(CharLength) - READ_FEATURE(CharLiteralConstant) - READ_FEATURE(CharLiteralConstantSubstring) - READ_FEATURE(CharSelector) - READ_FEATURE(CharSelector::LengthAndKind) - READ_FEATURE(CloseStmt) - READ_FEATURE(CloseStmt::CloseSpec) - READ_FEATURE(CoarrayAssociation) - READ_FEATURE(CoarraySpec) - READ_FEATURE(CodimensionDecl) - READ_FEATURE(CodimensionStmt) - READ_FEATURE(CoindexedNamedObject) - READ_FEATURE(CommonBlockObject) - READ_FEATURE(CommonStmt) - READ_FEATURE(CommonStmt::Block) - READ_FEATURE(CompilerDirective) - READ_FEATURE(CompilerDirective::IgnoreTKR) - READ_FEATURE(CompilerDirective::LoopCount) - READ_FEATURE(CompilerDirective::NameValue) - READ_FEATURE(ComplexLiteralConstant) - READ_FEATURE(ComplexPart) - READ_FEATURE(ComponentArraySpec) - READ_FEATURE(ComponentAttrSpec) - READ_FEATURE(ComponentDataSource) - READ_FEATURE(ComponentDecl) - READ_FEATURE(FillDecl) - READ_FEATURE(ComponentOrFill) - READ_FEATURE(ComponentDefStmt) - READ_FEATURE(ComponentSpec) - READ_FEATURE(ComputedGotoStmt) - READ_FEATURE(ConcurrentControl) - READ_FEATURE(ConcurrentHeader) - READ_FEATURE(ConnectSpec) - READ_FEATURE(ConnectSpec::CharExpr) - READ_FEATURE(ConnectSpec::CharExpr::Kind) - READ_FEATURE(ConnectSpec::Newunit) - READ_FEATURE(ConnectSpec::Recl) - READ_FEATURE(ContainsStmt) - READ_FEATURE(Contiguous) - READ_FEATURE(ContiguousStmt) - READ_FEATURE(ContinueStmt) - READ_FEATURE(CriticalConstruct) - READ_FEATURE(CriticalStmt) - READ_FEATURE(CycleStmt) - READ_FEATURE(DataComponentDefStmt) - READ_FEATURE(DataIDoObject) - READ_FEATURE(DataImpliedDo) - READ_FEATURE(DataRef) - READ_FEATURE(DataStmt) - READ_FEATURE(DataStmtConstant) - READ_FEATURE(DataStmtObject) - READ_FEATURE(DataStmtRepeat) - READ_FEATURE(DataStmtSet) - READ_FEATURE(DataStmtValue) - READ_FEATURE(DeallocateStmt) - READ_FEATURE(DeclarationConstruct) - READ_FEATURE(DeclarationTypeSpec) - READ_FEATURE(DeclarationTypeSpec::Class) - READ_FEATURE(DeclarationTypeSpec::ClassStar) - READ_FEATURE(DeclarationTypeSpec::Record) - READ_FEATURE(DeclarationTypeSpec::Type) - READ_FEATURE(DeclarationTypeSpec::TypeStar) - READ_FEATURE(Default) - READ_FEATURE(DeferredCoshapeSpecList) - READ_FEATURE(DeferredShapeSpecList) - READ_FEATURE(DefinedOpName) - READ_FEATURE(DefinedOperator) - READ_FEATURE(DefinedOperator::IntrinsicOperator) - READ_FEATURE(DerivedTypeDef) - READ_FEATURE(DerivedTypeSpec) - READ_FEATURE(DerivedTypeStmt) - READ_FEATURE(Designator) - READ_FEATURE(DimensionStmt) - READ_FEATURE(DimensionStmt::Declaration) - READ_FEATURE(DoConstruct) - READ_FEATURE(DummyArg) - READ_FEATURE(ElseIfStmt) - READ_FEATURE(ElseStmt) - READ_FEATURE(ElsewhereStmt) - READ_FEATURE(EndAssociateStmt) - READ_FEATURE(EndBlockDataStmt) - READ_FEATURE(EndBlockStmt) - READ_FEATURE(EndChangeTeamStmt) - READ_FEATURE(EndCriticalStmt) - READ_FEATURE(EndDoStmt) - READ_FEATURE(EndEnumStmt) - READ_FEATURE(EndForallStmt) - READ_FEATURE(EndFunctionStmt) - READ_FEATURE(EndIfStmt) - READ_FEATURE(EndInterfaceStmt) - READ_FEATURE(EndLabel) - READ_FEATURE(EndModuleStmt) - READ_FEATURE(EndMpSubprogramStmt) - READ_FEATURE(EndProgramStmt) - READ_FEATURE(EndSelectStmt) - READ_FEATURE(EndSubmoduleStmt) - READ_FEATURE(EndSubroutineStmt) - READ_FEATURE(EndTypeStmt) - READ_FEATURE(EndWhereStmt) - READ_FEATURE(EndfileStmt) - READ_FEATURE(EntityDecl) - READ_FEATURE(EntryStmt) - READ_FEATURE(EnumDef) - READ_FEATURE(EnumDefStmt) - READ_FEATURE(Enumerator) - READ_FEATURE(EnumeratorDefStmt) - READ_FEATURE(EorLabel) - READ_FEATURE(EquivalenceObject) - READ_FEATURE(EquivalenceStmt) - READ_FEATURE(ErrLabel) - READ_FEATURE(ErrorRecovery) - READ_FEATURE(EventPostStmt) - READ_FEATURE(EventWaitStmt) - READ_FEATURE(EventWaitStmt::EventWaitSpec) - READ_FEATURE(ExecutableConstruct) - READ_FEATURE(ExecutionPart) - READ_FEATURE(ExecutionPartConstruct) - READ_FEATURE(ExitStmt) - READ_FEATURE(ExplicitCoshapeSpec) - READ_FEATURE(ExplicitShapeSpec) - READ_FEATURE(Expr) - READ_FEATURE(Expr::Parentheses) - READ_FEATURE(Expr::UnaryPlus) - READ_FEATURE(Expr::Negate) - READ_FEATURE(Expr::NOT) - READ_FEATURE(Expr::PercentLoc) - READ_FEATURE(Expr::DefinedUnary) - READ_FEATURE(Expr::Power) - READ_FEATURE(Expr::Multiply) - READ_FEATURE(Expr::Divide) - READ_FEATURE(Expr::Add) - READ_FEATURE(Expr::Subtract) - READ_FEATURE(Expr::Concat) - READ_FEATURE(Expr::LT) - READ_FEATURE(Expr::LE) - READ_FEATURE(Expr::EQ) - READ_FEATURE(Expr::NE) - READ_FEATURE(Expr::GE) - READ_FEATURE(Expr::GT) - READ_FEATURE(Expr::AND) - READ_FEATURE(Expr::OR) - READ_FEATURE(Expr::EQV) - READ_FEATURE(Expr::NEQV) - READ_FEATURE(Expr::DefinedBinary) - READ_FEATURE(Expr::ComplexConstructor) - READ_FEATURE(External) - READ_FEATURE(ExternalStmt) - READ_FEATURE(FailImageStmt) - READ_FEATURE(FileUnitNumber) - READ_FEATURE(FinalProcedureStmt) - READ_FEATURE(FlushStmt) - READ_FEATURE(ForallAssignmentStmt) - READ_FEATURE(ForallBodyConstruct) - READ_FEATURE(ForallConstruct) - READ_FEATURE(ForallConstructStmt) - READ_FEATURE(ForallStmt) - READ_FEATURE(FormTeamStmt) - READ_FEATURE(FormTeamStmt::FormTeamSpec) - READ_FEATURE(Format) - READ_FEATURE(FormatStmt) - READ_FEATURE(FunctionReference) - READ_FEATURE(FunctionStmt) - READ_FEATURE(FunctionSubprogram) - READ_FEATURE(GenericSpec) - READ_FEATURE(GenericSpec::Assignment) - READ_FEATURE(GenericSpec::ReadFormatted) - READ_FEATURE(GenericSpec::ReadUnformatted) - READ_FEATURE(GenericSpec::WriteFormatted) - READ_FEATURE(GenericSpec::WriteUnformatted) - READ_FEATURE(GenericStmt) - READ_FEATURE(GotoStmt) - READ_FEATURE(HollerithLiteralConstant) - READ_FEATURE(IdExpr) - READ_FEATURE(IdVariable) - READ_FEATURE(IfConstruct) - READ_FEATURE(IfConstruct::ElseBlock) - READ_FEATURE(IfConstruct::ElseIfBlock) - READ_FEATURE(IfStmt) - READ_FEATURE(IfThenStmt) - READ_FEATURE(TeamValue) - READ_FEATURE(ImageSelector) - READ_FEATURE(ImageSelectorSpec) - READ_FEATURE(ImageSelectorSpec::Stat) - READ_FEATURE(ImageSelectorSpec::Team_Number) - READ_FEATURE(ImplicitPart) - READ_FEATURE(ImplicitPartStmt) - READ_FEATURE(ImplicitSpec) - READ_FEATURE(ImplicitStmt) - READ_FEATURE(ImplicitStmt::ImplicitNoneNameSpec) - READ_FEATURE(ImpliedShapeSpec) - READ_FEATURE(ImportStmt) - READ_FEATURE(Initialization) - READ_FEATURE(InputImpliedDo) - READ_FEATURE(InputItem) - READ_FEATURE(InquireSpec) - READ_FEATURE(InquireSpec::CharVar) - READ_FEATURE(InquireSpec::CharVar::Kind) - READ_FEATURE(InquireSpec::IntVar) - READ_FEATURE(InquireSpec::IntVar::Kind) - READ_FEATURE(InquireSpec::LogVar) - READ_FEATURE(InquireSpec::LogVar::Kind) - READ_FEATURE(InquireStmt) - READ_FEATURE(InquireStmt::Iolength) - READ_FEATURE(IntegerTypeSpec) - READ_FEATURE(IntentSpec) - READ_FEATURE(IntentSpec::Intent) - READ_FEATURE(IntentStmt) - READ_FEATURE(InterfaceBlock) - READ_FEATURE(InterfaceBody) - READ_FEATURE(InterfaceBody::Function) - READ_FEATURE(InterfaceBody::Subroutine) - READ_FEATURE(InterfaceSpecification) - READ_FEATURE(InterfaceStmt) - READ_FEATURE(InternalSubprogram) - READ_FEATURE(InternalSubprogramPart) - READ_FEATURE(Intrinsic) - READ_FEATURE(IntrinsicStmt) - READ_FEATURE(IntrinsicTypeSpec) - READ_FEATURE(IntrinsicTypeSpec::Character) - READ_FEATURE(IntrinsicTypeSpec::Complex) - READ_FEATURE(IntrinsicTypeSpec::DoubleComplex) - READ_FEATURE(IntrinsicTypeSpec::DoublePrecision) - READ_FEATURE(IntrinsicTypeSpec::Logical) - READ_FEATURE(IntrinsicTypeSpec::Real) - READ_FEATURE(IoControlSpec) - READ_FEATURE(IoControlSpec::Asynchronous) - READ_FEATURE(IoControlSpec::CharExpr) - READ_FEATURE(IoControlSpec::CharExpr::Kind) - READ_FEATURE(IoControlSpec::Pos) - READ_FEATURE(IoControlSpec::Rec) - READ_FEATURE(IoControlSpec::Size) - READ_FEATURE(IoUnit) - READ_FEATURE(Keyword) - READ_FEATURE(KindParam) - READ_FEATURE(KindSelector) - READ_FEATURE(KindSelector::StarSize) - READ_FEATURE(LabelDoStmt) - READ_FEATURE(LanguageBindingSpec) - READ_FEATURE(LengthSelector) - READ_FEATURE(LetterSpec) - READ_FEATURE(LiteralConstant) - READ_FEATURE(IntLiteralConstant) - READ_FEATURE(LocalitySpec) - READ_FEATURE(LocalitySpec::DefaultNone) - READ_FEATURE(LocalitySpec::Local) - READ_FEATURE(LocalitySpec::LocalInit) - READ_FEATURE(LocalitySpec::Shared) - READ_FEATURE(LockStmt) - READ_FEATURE(LockStmt::LockStat) - READ_FEATURE(LogicalLiteralConstant) - READ_FEATURE(LoopControl) - READ_FEATURE(LoopControl::Concurrent) - READ_FEATURE(MainProgram) - READ_FEATURE(Map) - READ_FEATURE(Map::EndMapStmt) - READ_FEATURE(Map::MapStmt) - READ_FEATURE(MaskedElsewhereStmt) - READ_FEATURE(Module) - READ_FEATURE(ModuleStmt) - READ_FEATURE(ModuleSubprogram) - READ_FEATURE(ModuleSubprogramPart) - READ_FEATURE(MpSubprogramStmt) - READ_FEATURE(MsgVariable) - READ_FEATURE(Name) - READ_FEATURE(NamedConstant) - READ_FEATURE(NamedConstantDef) - READ_FEATURE(NamelistStmt) - READ_FEATURE(NamelistStmt::Group) - READ_FEATURE(NonLabelDoStmt) - READ_FEATURE(NoPass) - READ_FEATURE(NullifyStmt) - READ_FEATURE(NullInit) - READ_FEATURE(ObjectDecl) - READ_FEATURE(OldParameterStmt) - READ_FEATURE(OmpAlignedClause) - READ_FEATURE(OmpAtomic) - READ_FEATURE(OmpAtomicCapture) - READ_FEATURE(OmpAtomicCapture::Stmt1) - READ_FEATURE(OmpAtomicCapture::Stmt2) - READ_FEATURE(OmpAtomicRead) - READ_FEATURE(OmpAtomicUpdate) - READ_FEATURE(OmpAtomicWrite) - READ_FEATURE(OmpBeginBlockDirective) - READ_FEATURE(OmpBeginLoopDirective) - READ_FEATURE(OmpBeginSectionsDirective) - READ_FEATURE(OmpBlockDirective) - READ_FEATURE(OmpCancelType) - READ_FEATURE(OmpCancelType::Type) - READ_FEATURE(OmpClause) - READ_FEATURE(OmpClauseList) - READ_FEATURE(OmpCriticalDirective) - READ_FEATURE(OmpDeclareTargetSpecifier) - READ_FEATURE(OmpDeclareTargetWithClause) - READ_FEATURE(OmpDeclareTargetWithList) - READ_FEATURE(OmpDefaultClause) - READ_FEATURE(OmpDefaultClause::Type) - READ_FEATURE(OmpDefaultmapClause) - READ_FEATURE(OmpDefaultmapClause::ImplicitBehavior) - READ_FEATURE(OmpDefaultmapClause::VariableCategory) - READ_FEATURE(OmpDependClause) - READ_FEATURE(OmpDependClause::InOut) - READ_FEATURE(OmpDependClause::Sink) - READ_FEATURE(OmpDependClause::Source) - READ_FEATURE(OmpDependenceType) - READ_FEATURE(OmpDependenceType::Type) - READ_FEATURE(OmpDependSinkVec) - READ_FEATURE(OmpDependSinkVecLength) - READ_FEATURE(OmpEndAtomic) - READ_FEATURE(OmpEndBlockDirective) - READ_FEATURE(OmpEndCriticalDirective) - READ_FEATURE(OmpEndLoopDirective) - READ_FEATURE(OmpEndSectionsDirective) - READ_FEATURE(OmpIfClause) - READ_FEATURE(OmpIfClause::DirectiveNameModifier) - READ_FEATURE(OmpLinearClause) - READ_FEATURE(OmpLinearClause::WithModifier) - READ_FEATURE(OmpLinearClause::WithoutModifier) - READ_FEATURE(OmpLinearModifier) - READ_FEATURE(OmpLinearModifier::Type) - READ_FEATURE(OmpLoopDirective) - READ_FEATURE(OmpMapClause) - READ_FEATURE(OmpMapType) - READ_FEATURE(OmpMapType::Always) - READ_FEATURE(OmpMapType::Type) - READ_FEATURE(OmpObject) - READ_FEATURE(OmpObjectList) - READ_FEATURE(OmpOrderClause) - READ_FEATURE(OmpOrderClause::Type) - READ_FEATURE(OmpOrderModifier) - READ_FEATURE(OmpOrderModifier::Kind) - READ_FEATURE(OmpProcBindClause) - READ_FEATURE(OmpProcBindClause::Type) - READ_FEATURE(OmpReductionClause) - READ_FEATURE(OmpInReductionClause) - READ_FEATURE(OmpReductionCombiner) - READ_FEATURE(OmpReductionCombiner::FunctionCombiner) - READ_FEATURE(OmpReductionInitializerClause) - READ_FEATURE(OmpReductionOperator) - READ_FEATURE(OmpAllocateClause) - READ_FEATURE(OmpAllocateClause::Allocator) - READ_FEATURE(OmpScheduleClause) - READ_FEATURE(OmpScheduleClause::ScheduleType) - READ_FEATURE(OmpDeviceClause) - READ_FEATURE(OmpDeviceClause::DeviceModifier) - READ_FEATURE(OmpDeviceTypeClause) - READ_FEATURE(OmpDeviceTypeClause::Type) - READ_FEATURE(OmpScheduleModifier) - READ_FEATURE(OmpScheduleModifier::Modifier1) - READ_FEATURE(OmpScheduleModifier::Modifier2) - READ_FEATURE(OmpScheduleModifierType) - READ_FEATURE(OmpScheduleModifierType::ModType) - READ_FEATURE(OmpSectionBlocks) - READ_FEATURE(OmpSectionsDirective) - READ_FEATURE(OmpSimpleStandaloneDirective) - READ_FEATURE(Only) - READ_FEATURE(OpenACCAtomicConstruct) - READ_FEATURE(OpenACCBlockConstruct) - READ_FEATURE(OpenACCCacheConstruct) - READ_FEATURE(OpenACCCombinedConstruct) - READ_FEATURE(OpenACCConstruct) - READ_FEATURE(OpenACCDeclarativeConstruct) - READ_FEATURE(OpenACCLoopConstruct) - READ_FEATURE(OpenACCRoutineConstruct) - READ_FEATURE(OpenACCStandaloneDeclarativeConstruct) - READ_FEATURE(OpenACCStandaloneConstruct) - READ_FEATURE(OpenACCWaitConstruct) - READ_FEATURE(OpenMPAtomicConstruct) - READ_FEATURE(OpenMPBlockConstruct) - READ_FEATURE(OpenMPCancelConstruct) - READ_FEATURE(OpenMPCancelConstruct::If) - READ_FEATURE(OpenMPCancellationPointConstruct) - READ_FEATURE(OpenMPConstruct) - READ_FEATURE(OpenMPCriticalConstruct) - READ_FEATURE(OpenMPDeclarativeAllocate) - READ_FEATURE(OpenMPDeclarativeConstruct) - READ_FEATURE(OpenMPDeclareReductionConstruct) - READ_FEATURE(OpenMPDeclareSimdConstruct) - READ_FEATURE(OpenMPDeclareTargetConstruct) - READ_FEATURE(OmpMemoryOrderClause) - READ_FEATURE(OmpAtomicClause) - READ_FEATURE(OmpAtomicClauseList) - READ_FEATURE(OmpAtomicDefaultMemOrderClause) - READ_FEATURE(OmpAtomicDefaultMemOrderClause::Type) - READ_FEATURE(OpenMPFlushConstruct) - READ_FEATURE(OpenMPLoopConstruct) - READ_FEATURE(OpenMPExecutableAllocate) - READ_FEATURE(OpenMPRequiresConstruct) - READ_FEATURE(OpenMPSimpleStandaloneConstruct) - READ_FEATURE(OpenMPStandaloneConstruct) - READ_FEATURE(OpenMPSectionConstruct) - READ_FEATURE(OpenMPSectionsConstruct) - READ_FEATURE(OpenMPThreadprivate) - READ_FEATURE(OpenStmt) - READ_FEATURE(Optional) - READ_FEATURE(OptionalStmt) - READ_FEATURE(OtherSpecificationStmt) - READ_FEATURE(OutputImpliedDo) - READ_FEATURE(OutputItem) - READ_FEATURE(Parameter) - READ_FEATURE(ParameterStmt) - READ_FEATURE(ParentIdentifier) - READ_FEATURE(Pass) - READ_FEATURE(PauseStmt) - READ_FEATURE(Pointer) - READ_FEATURE(PointerAssignmentStmt) - READ_FEATURE(PointerAssignmentStmt::Bounds) - READ_FEATURE(PointerDecl) - READ_FEATURE(PointerObject) - READ_FEATURE(PointerStmt) - READ_FEATURE(PositionOrFlushSpec) - READ_FEATURE(PrefixSpec) - READ_FEATURE(PrefixSpec::Elemental) - READ_FEATURE(PrefixSpec::Impure) - READ_FEATURE(PrefixSpec::Module) - READ_FEATURE(PrefixSpec::Non_Recursive) - READ_FEATURE(PrefixSpec::Pure) - READ_FEATURE(PrefixSpec::Recursive) - READ_FEATURE(PrintStmt) - READ_FEATURE(PrivateStmt) - READ_FEATURE(PrivateOrSequence) - READ_FEATURE(ProcAttrSpec) - READ_FEATURE(ProcComponentAttrSpec) - READ_FEATURE(ProcComponentDefStmt) - READ_FEATURE(ProcComponentRef) - READ_FEATURE(ProcDecl) - READ_FEATURE(ProcInterface) - READ_FEATURE(ProcPointerInit) - READ_FEATURE(ProcedureDeclarationStmt) - READ_FEATURE(ProcedureDesignator) - READ_FEATURE(ProcedureStmt) - READ_FEATURE(ProcedureStmt::Kind) - READ_FEATURE(Program) - READ_FEATURE(ProgramStmt) - READ_FEATURE(ProgramUnit) - READ_FEATURE(Protected) - READ_FEATURE(ProtectedStmt) - READ_FEATURE(ReadStmt) - READ_FEATURE(RealLiteralConstant) - READ_FEATURE(RealLiteralConstant::Real) - READ_FEATURE(Rename) - READ_FEATURE(Rename::Names) - READ_FEATURE(Rename::Operators) - READ_FEATURE(ReturnStmt) - READ_FEATURE(RewindStmt) - READ_FEATURE(Save) - READ_FEATURE(SaveStmt) - READ_FEATURE(SavedEntity) - READ_FEATURE(SavedEntity::Kind) - READ_FEATURE(SectionSubscript) - READ_FEATURE(SelectCaseStmt) - READ_FEATURE(SelectRankCaseStmt) - READ_FEATURE(SelectRankCaseStmt::Rank) - READ_FEATURE(SelectRankConstruct) - READ_FEATURE(SelectRankConstruct::RankCase) - READ_FEATURE(SelectRankStmt) - READ_FEATURE(SelectTypeConstruct) - READ_FEATURE(SelectTypeConstruct::TypeCase) - READ_FEATURE(SelectTypeStmt) - READ_FEATURE(Selector) - READ_FEATURE(SeparateModuleSubprogram) - READ_FEATURE(SequenceStmt) - READ_FEATURE(Sign) - READ_FEATURE(SignedComplexLiteralConstant) - READ_FEATURE(SignedIntLiteralConstant) - READ_FEATURE(SignedRealLiteralConstant) - READ_FEATURE(SpecificationConstruct) - READ_FEATURE(SpecificationExpr) - READ_FEATURE(SpecificationPart) - READ_FEATURE(Star) - READ_FEATURE(StatOrErrmsg) - READ_FEATURE(StatVariable) - READ_FEATURE(StatusExpr) - READ_FEATURE(StmtFunctionStmt) - READ_FEATURE(StopCode) - READ_FEATURE(StopStmt) - READ_FEATURE(StopStmt::Kind) - READ_FEATURE(StructureComponent) - READ_FEATURE(StructureConstructor) - READ_FEATURE(StructureDef) - READ_FEATURE(StructureDef::EndStructureStmt) - READ_FEATURE(StructureField) - READ_FEATURE(StructureStmt) - READ_FEATURE(Submodule) - READ_FEATURE(SubmoduleStmt) - READ_FEATURE(SubroutineStmt) - READ_FEATURE(SubroutineSubprogram) - READ_FEATURE(SubscriptTriplet) - READ_FEATURE(Substring) - READ_FEATURE(SubstringInquiry) - READ_FEATURE(SubstringRange) - READ_FEATURE(Suffix) - READ_FEATURE(SyncAllStmt) - READ_FEATURE(SyncImagesStmt) - READ_FEATURE(SyncImagesStmt::ImageSet) - READ_FEATURE(SyncMemoryStmt) - READ_FEATURE(SyncTeamStmt) - READ_FEATURE(Target) - READ_FEATURE(TargetStmt) - READ_FEATURE(TypeAttrSpec) - READ_FEATURE(TypeAttrSpec::BindC) - READ_FEATURE(TypeAttrSpec::Extends) - READ_FEATURE(TypeBoundGenericStmt) - READ_FEATURE(TypeBoundProcBinding) - READ_FEATURE(TypeBoundProcDecl) - READ_FEATURE(TypeBoundProcedurePart) - READ_FEATURE(TypeBoundProcedureStmt) - READ_FEATURE(TypeBoundProcedureStmt::WithInterface) - READ_FEATURE(TypeBoundProcedureStmt::WithoutInterface) - READ_FEATURE(TypeDeclarationStmt) - READ_FEATURE(TypeGuardStmt) - READ_FEATURE(TypeGuardStmt::Guard) - READ_FEATURE(TypeParamDecl) - READ_FEATURE(TypeParamDefStmt) - READ_FEATURE(common::TypeParamAttr) - READ_FEATURE(TypeParamSpec) - READ_FEATURE(TypeParamValue) - READ_FEATURE(TypeParamValue::Deferred) - READ_FEATURE(TypeSpec) - READ_FEATURE(Union) - READ_FEATURE(Union::EndUnionStmt) - READ_FEATURE(Union::UnionStmt) - READ_FEATURE(UnlockStmt) - READ_FEATURE(UseStmt) - READ_FEATURE(UseStmt::ModuleNature) - READ_FEATURE(Value) - READ_FEATURE(ValueStmt) - READ_FEATURE(Variable) - READ_FEATURE(Verbatim) - READ_FEATURE(Volatile) - READ_FEATURE(VolatileStmt) - READ_FEATURE(WaitSpec) - READ_FEATURE(WaitStmt) - READ_FEATURE(WhereBodyConstruct) - READ_FEATURE(WhereConstruct) - READ_FEATURE(WhereConstruct::Elsewhere) - READ_FEATURE(WhereConstruct::MaskedElsewhere) - READ_FEATURE(WhereConstructStmt) - READ_FEATURE(WhereStmt) - READ_FEATURE(WriteStmt) - - READ_FEATURE(llvm::omp::Directive) - READ_FEATURE(llvm::omp::Clause) - READ_FEATURE(llvm::acc::Directive) - READ_FEATURE(llvm::acc::DefaultValue) - - template bool Pre(const A &) { return true; } - template void Post(const A &) {} - - template bool Pre(const Statement &) { return true; } - template void Post(const Statement &) {} - - template bool Pre(const UnlabeledStatement &) { return true; } - template void Post(const UnlabeledStatement &) {} - - template bool Pre(const common::Indirection &) { - return true; - } - template void Post(const common::Indirection &) {} - - template bool Pre(const Scalar &) { return true; } - template void Post(const Scalar &) {} - - template bool Pre(const Constant &) { return true; } - template void Post(const Constant &) {} - - template bool Pre(const Integer &) { return true; } - template void Post(const Integer &) {} - - template bool Pre(const Logical &) { return true; } - template void Post(const Logical &) {} - - template bool Pre(const DefaultChar &) { return true; } - template void Post(const DefaultChar &) {} - - template bool Pre(const std::tuple &) { return true; } - template void Post(const std::tuple &) {} - - template bool Pre(const std::variant &) { return true; } - template void Post(const std::variant &) {} -}; - -class FeatureListAction : public PluginParseTreeAction { - void executeAction() override { - NodeVisitor visitor; - Fortran::parser::Walk(getParsing().parseTree(), visitor); - - for (auto const &[feature, frequency] : visitor.getFrequencies()) { - llvm::outs() << feature << ": " << frequency << "\n"; - } - } - - bool beginSourceFileAction() override { return runPrescan() && runParse(); } -}; - -static FrontendPluginRegistry::Add X( - "feature-list", "List program features"); diff --git a/flang/test/CMakeLists.txt b/flang/test/CMakeLists.txt index 7d96a72e5f36d..4de1036dfb52b 100644 --- a/flang/test/CMakeLists.txt +++ b/flang/test/CMakeLists.txt @@ -79,7 +79,6 @@ if (LLVM_BUILD_EXAMPLES) list(APPEND FLANG_TEST_DEPENDS flangPrintFunctionNames flangOmpReport - flangFeatureList ) endif () diff --git a/flang/test/Examples/feature-list-class.f90 b/flang/test/Examples/feature-list-class.f90 deleted file mode 100644 index cba361b677f2a..0000000000000 --- a/flang/test/Examples/feature-list-class.f90 +++ /dev/null @@ -1,88 +0,0 @@ -! UNSUPPORTED: system-windows -! REQUIRES: plugins, shell, examples - -! RUN: %flang_fc1 -load %llvmshlibdir/flangFeatureList%pluginext \ -! RUN: -plugin feature-list %s 2>&1 | FileCheck %s - -module list_features_test - implicit none - - type :: test_class_1 - integer :: a - real :: b - contains - procedure :: sum => sum_test_class_1 - procedure :: set => set_values_test_class_1 - end type -contains - real function sum_test_class_1(self) - class(test_class_1), intent(in) :: self - sum_test_class_1 = self%a + self%b - end function - - subroutine set_values_test_class_1(self, a, b) - class(test_class_1), intent(out) :: self - integer, intent(in) :: a, b - self%a = a - self%b = b - end subroutine -end module list_features_test - -! CHECK: Name: 32 -! CHECK-NEXT: DerivedTypeSpec: 2 -! CHECK-NEXT: Expr::Add: 1 -! CHECK-NEXT: IntrinsicTypeSpec: 4 -! CHECK-NEXT: IntegerTypeSpec: 2 -! CHECK-NEXT: IntrinsicTypeSpec::Real: 2 -! CHECK-NEXT: DataRef: 11 -! CHECK-NEXT: StructureComponent: 4 -! CHECK-NEXT: Designator: 7 -! CHECK-NEXT: Expr: 5 -! CHECK-NEXT: Variable: 3 -! CHECK-NEXT: AttrSpec: 3 -! CHECK-NEXT: IntentSpec: 3 -! CHECK-NEXT: IntentSpec::Intent: 3 -! CHECK-NEXT: DummyArg: 3 -! CHECK-NEXT: DeclarationTypeSpec: 6 -! CHECK-NEXT: DeclarationTypeSpec::Class: 2 -! CHECK-NEXT: ImplicitStmt: 1 -! CHECK-NEXT: ImplicitPart: 3 -! CHECK-NEXT: ImplicitPartStmt: 1 -! CHECK-NEXT: PrefixSpec: 1 -! CHECK-NEXT: Module: 1 -! CHECK-NEXT: AssignmentStmt: 3 -! CHECK-NEXT: ActionStmt: 3 -! CHECK-NEXT: Block: 2 -! CHECK-NEXT: TypeBoundProcBinding: 2 -! CHECK-NEXT: TypeBoundProcedureStmt: 2 -! CHECK-NEXT: TypeBoundProcDecl: 2 -! CHECK-NEXT: TypeBoundProcedureStmt::WithoutInterface: 2 -! CHECK-NEXT: ComponentOrFill: 2 -! CHECK-NEXT: ComponentDecl: 2 -! CHECK-NEXT: DataComponentDefStmt: 2 -! CHECK-NEXT: ComponentDefStmt: 2 -! CHECK-NEXT: TypeBoundProcedurePart: 1 -! CHECK-NEXT: ContainsStmt: 2 -! CHECK-NEXT: EndTypeStmt: 1 -! CHECK-NEXT: DerivedTypeDef: 1 -! CHECK-NEXT: DerivedTypeStmt: 1 -! CHECK-NEXT: EntityDecl: 4 -! CHECK-NEXT: SpecificationConstruct: 4 -! CHECK-NEXT: TypeDeclarationStmt: 3 -! CHECK-NEXT: DeclarationConstruct: 4 -! CHECK-NEXT: EndFunctionStmt: 1 -! CHECK-NEXT: FunctionStmt: 1 -! CHECK-NEXT: EndSubroutineStmt: 1 -! CHECK-NEXT: SubroutineStmt: 1 -! CHECK-NEXT: ExecutionPartConstruct: 3 -! CHECK-NEXT: ExecutableConstruct: 3 -! CHECK-NEXT: SpecificationPart: 3 -! CHECK-NEXT: FunctionSubprogram: 1 -! CHECK-NEXT: ExecutionPart: 2 -! CHECK-NEXT: SubroutineSubprogram: 1 -! CHECK-NEXT: ModuleSubprogram: 2 -! CHECK-NEXT: ProgramUnit: 1 -! CHECK-NEXT: Program: 1 -! CHECK-NEXT: ModuleSubprogramPart: 1 -! CHECK-NEXT: EndModuleStmt: 1 -! CHECK-NEXT: ModuleStmt: 1 diff --git a/flang/test/Examples/feature-list-functions.f90 b/flang/test/Examples/feature-list-functions.f90 deleted file mode 100644 index a1913dda697c7..0000000000000 --- a/flang/test/Examples/feature-list-functions.f90 +++ /dev/null @@ -1,76 +0,0 @@ -! UNSUPPORTED: system-windows -! REQUIRES: plugins, shell, examples - -! RUN: %flang_fc1 -load %llvmshlibdir/flangFeatureList%pluginext \ -! RUN: -plugin feature-list %s 2>&1 | FileCheck %s - -program list_features_test - implicit none - call test_sub(test_func(2, 3), 4) -contains - subroutine test_sub(a, b) - integer, intent(in) :: a, b - print "(I0)", a + b - end subroutine - - integer function test_func(a, b) - integer, intent(in) :: a, b - test_func = a * b - end function -end program list_features_test - -! CHECK: Name: 19 -! CHECK-NEXT: IntLiteralConstant: 3 -! CHECK-NEXT: LiteralConstant: 4 -! CHECK-NEXT: CharLiteralConstant: 1 -! CHECK-NEXT: FunctionReference: 1 -! CHECK-NEXT: Call: 2 -! CHECK-NEXT: Expr::Multiply: 1 -! CHECK-NEXT: Expr::Add: 1 -! CHECK-NEXT: IntrinsicTypeSpec: 3 -! CHECK-NEXT: IntegerTypeSpec: 3 -! CHECK-NEXT: Format: 1 -! CHECK-NEXT: DataRef: 5 -! CHECK-NEXT: ProcedureDesignator: 2 -! CHECK-NEXT: Designator: 5 -! CHECK-NEXT: ActualArgSpec: 4 -! CHECK-NEXT: ActualArg: 4 -! CHECK-NEXT: Expr: 11 -! CHECK-NEXT: Variable: 1 -! CHECK-NEXT: AttrSpec: 2 -! CHECK-NEXT: IntentSpec: 2 -! CHECK-NEXT: IntentSpec::Intent: 2 -! CHECK-NEXT: DummyArg: 2 -! CHECK-NEXT: DeclarationTypeSpec: 3 -! CHECK-NEXT: ImplicitStmt: 1 -! CHECK-NEXT: ImplicitPart: 3 -! CHECK-NEXT: ImplicitPartStmt: 1 -! CHECK-NEXT: PrefixSpec: 1 -! CHECK-NEXT: OutputItem: 1 -! CHECK-NEXT: AssignmentStmt: 1 -! CHECK-NEXT: ActionStmt: 3 -! CHECK-NEXT: PrintStmt: 1 -! CHECK-NEXT: CallStmt: 1 -! CHECK-NEXT: Block: 3 -! CHECK-NEXT: ContainsStmt: 1 -! CHECK-NEXT: EntityDecl: 4 -! CHECK-NEXT: SpecificationConstruct: 2 -! CHECK-NEXT: TypeDeclarationStmt: 2 -! CHECK-NEXT: DeclarationConstruct: 2 -! CHECK-NEXT: EndFunctionStmt: 1 -! CHECK-NEXT: FunctionStmt: 1 -! CHECK-NEXT: EndSubroutineStmt: 1 -! CHECK-NEXT: SubroutineStmt: 1 -! CHECK-NEXT: ExecutionPartConstruct: 3 -! CHECK-NEXT: ExecutableConstruct: 3 -! CHECK-NEXT: SpecificationPart: 3 -! CHECK-NEXT: FunctionSubprogram: 1 -! CHECK-NEXT: ExecutionPart: 3 -! CHECK-NEXT: InternalSubprogramPart: 1 -! CHECK-NEXT: InternalSubprogram: 2 -! CHECK-NEXT: SubroutineSubprogram: 1 -! CHECK-NEXT: ProgramUnit: 1 -! CHECK-NEXT: MainProgram: 1 -! CHECK-NEXT: Program: 1 -! CHECK-NEXT: EndProgramStmt: 1 -! CHECK-NEXT: ProgramStmt: 1 From 473e9adb84c29764da51e4d2a995fce9b2e2ffa5 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 22 Mar 2023 14:01:39 -0700 Subject: [PATCH 344/691] [MSAN] Update vector load/store tests to use proper attribute I had made a mistake when pre-committing the tests; caught in review of D146157. --- .../MemorySanitizer/vector-load-store.ll | 215 +++++++++++++++--- 1 file changed, 180 insertions(+), 35 deletions(-) diff --git a/llvm/test/Instrumentation/MemorySanitizer/vector-load-store.ll b/llvm/test/Instrumentation/MemorySanitizer/vector-load-store.ll index 52c60e9b83b29..a2245763abbc7 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/vector-load-store.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/vector-load-store.ll @@ -6,100 +6,210 @@ target triple = "x86_64-unknown-linux-gnu" target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -define void @load.v1i32(ptr %p) sanitize_address { +define void @load.v1i32(ptr %p) sanitize_memory { ; CHECK-LABEL: @load.v1i32( ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i32>, ptr [[TMP4]], align 4 ; CHECK-NEXT: ret void ; ; ADDR-LABEL: @load.v1i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; ADDR-NEXT: call void @llvm.donothing() -; ADDR-NEXT: [[TMP1:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 4 +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0:![0-9]+]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3:[0-9]+]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 4 +; ADDR-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 +; ADDR-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; ADDR-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; ADDR-NEXT: [[_MSLD:%.*]] = load <1 x i32>, ptr [[TMP7]], align 4 ; ADDR-NEXT: ret void ; ; ORIGINS-LABEL: @load.v1i32( ; ORIGINS-NEXT: call void @llvm.donothing() ; ORIGINS-NEXT: [[TMP1:%.*]] = load <1 x i32>, ptr [[P:%.*]], align 4 +; ORIGINS-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; ORIGINS-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; ORIGINS-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; ORIGINS-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], 17592186044416 +; ORIGINS-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ORIGINS-NEXT: [[_MSLD:%.*]] = load <1 x i32>, ptr [[TMP4]], align 4 +; ORIGINS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 ; ORIGINS-NEXT: ret void ; load <1 x i32>, ptr %p ret void } -define void @load.v2i32(ptr %p) sanitize_address { +define void @load.v2i32(ptr %p) sanitize_memory { ; CHECK-LABEL: @load.v2i32( ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8 ; CHECK-NEXT: ret void ; ; ADDR-LABEL: @load.v2i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; ADDR-NEXT: call void @llvm.donothing() -; ADDR-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 8 +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 8 +; ADDR-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 +; ADDR-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; ADDR-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; ADDR-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8 ; ADDR-NEXT: ret void ; ; ORIGINS-LABEL: @load.v2i32( ; ORIGINS-NEXT: call void @llvm.donothing() ; ORIGINS-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[P:%.*]], align 8 +; ORIGINS-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; ORIGINS-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; ORIGINS-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; ORIGINS-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], 17592186044416 +; ORIGINS-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ORIGINS-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 8 +; ORIGINS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 8 ; ORIGINS-NEXT: ret void ; load <2 x i32>, ptr %p ret void } -define void @load.v4i32(ptr %p) sanitize_address { +define void @load.v4i32(ptr %p) sanitize_memory { ; CHECK-LABEL: @load.v4i32( ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 16 ; CHECK-NEXT: ret void ; ; ADDR-LABEL: @load.v4i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; ADDR-NEXT: call void @llvm.donothing() -; ADDR-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16 +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16 +; ADDR-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 +; ADDR-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; ADDR-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; ADDR-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16 ; ADDR-NEXT: ret void ; ; ORIGINS-LABEL: @load.v4i32( ; ORIGINS-NEXT: call void @llvm.donothing() ; ORIGINS-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 16 +; ORIGINS-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; ORIGINS-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; ORIGINS-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; ORIGINS-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], 17592186044416 +; ORIGINS-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ORIGINS-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 16 +; ORIGINS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 16 ; ORIGINS-NEXT: ret void ; load <4 x i32>, ptr %p ret void } -define void @load.v8i32(ptr %p) sanitize_address { +define void @load.v8i32(ptr %p) sanitize_memory { ; CHECK-LABEL: @load.v8i32( ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[P:%.*]], align 32 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 ; CHECK-NEXT: ret void ; ; ADDR-LABEL: @load.v8i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; ADDR-NEXT: call void @llvm.donothing() -; ADDR-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[P:%.*]], align 32 +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[P:%.*]], align 32 +; ADDR-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 +; ADDR-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; ADDR-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; ADDR-NEXT: [[_MSLD:%.*]] = load <8 x i32>, ptr [[TMP7]], align 32 ; ADDR-NEXT: ret void ; ; ORIGINS-LABEL: @load.v8i32( ; ORIGINS-NEXT: call void @llvm.donothing() ; ORIGINS-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[P:%.*]], align 32 +; ORIGINS-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; ORIGINS-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; ORIGINS-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; ORIGINS-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], 17592186044416 +; ORIGINS-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ORIGINS-NEXT: [[_MSLD:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +; ORIGINS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 32 ; ORIGINS-NEXT: ret void ; load <8 x i32>, ptr %p ret void } -define void @load.v16i32(ptr %p) sanitize_address { +define void @load.v16i32(ptr %p) sanitize_memory { ; CHECK-LABEL: @load.v16i32( ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[P:%.*]], align 64 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP4]], align 64 ; CHECK-NEXT: ret void ; ; ADDR-LABEL: @load.v16i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; ADDR-NEXT: call void @llvm.donothing() -; ADDR-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[P:%.*]], align 64 +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr [[P:%.*]], align 64 +; ADDR-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 +; ADDR-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; ADDR-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; ADDR-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP7]], align 64 ; ADDR-NEXT: ret void ; ; ORIGINS-LABEL: @load.v16i32( ; ORIGINS-NEXT: call void @llvm.donothing() ; ORIGINS-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[P:%.*]], align 64 +; ORIGINS-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; ORIGINS-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; ORIGINS-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; ORIGINS-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], 17592186044416 +; ORIGINS-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ORIGINS-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP4]], align 64 +; ORIGINS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 64 ; ORIGINS-NEXT: ret void ; load <16 x i32>, ptr %p @@ -107,7 +217,7 @@ define void @load.v16i32(ptr %p) sanitize_address { } -define void @store.v1i32(ptr %p) sanitize_address { +define void @store.v1i32(ptr %p) sanitize_memory { ; CHECK-LABEL: @store.v1i32( ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 @@ -118,11 +228,18 @@ define void @store.v1i32(ptr %p) sanitize_address { ; CHECK-NEXT: ret void ; ; ADDR-LABEL: @store.v1i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; ADDR-NEXT: call void @llvm.donothing() -; ADDR-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 -; ADDR-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 -; ADDR-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr -; ADDR-NEXT: store <1 x i32> zeroinitializer, ptr [[TMP3]], align 4 +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ADDR-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; ADDR-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ADDR-NEXT: store <1 x i32> zeroinitializer, ptr [[TMP6]], align 4 ; ADDR-NEXT: store <1 x i32> zeroinitializer, ptr [[P]], align 4 ; ADDR-NEXT: ret void ; @@ -141,7 +258,7 @@ define void @store.v1i32(ptr %p) sanitize_address { ret void } -define void @store.v2i32(ptr %p) sanitize_address { +define void @store.v2i32(ptr %p) sanitize_memory { ; CHECK-LABEL: @store.v2i32( ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 @@ -152,11 +269,18 @@ define void @store.v2i32(ptr %p) sanitize_address { ; CHECK-NEXT: ret void ; ; ADDR-LABEL: @store.v2i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; ADDR-NEXT: call void @llvm.donothing() -; ADDR-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 -; ADDR-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 -; ADDR-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr -; ADDR-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP3]], align 8 +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ADDR-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; ADDR-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ADDR-NEXT: store <2 x i32> zeroinitializer, ptr [[TMP6]], align 8 ; ADDR-NEXT: store <2 x i32> zeroinitializer, ptr [[P]], align 8 ; ADDR-NEXT: ret void ; @@ -175,7 +299,7 @@ define void @store.v2i32(ptr %p) sanitize_address { ret void } -define void @store.v4i32(ptr %p) sanitize_address { +define void @store.v4i32(ptr %p) sanitize_memory { ; CHECK-LABEL: @store.v4i32( ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 @@ -186,11 +310,18 @@ define void @store.v4i32(ptr %p) sanitize_address { ; CHECK-NEXT: ret void ; ; ADDR-LABEL: @store.v4i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; ADDR-NEXT: call void @llvm.donothing() -; ADDR-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 -; ADDR-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 -; ADDR-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr -; ADDR-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP3]], align 16 +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ADDR-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; ADDR-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ADDR-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP6]], align 16 ; ADDR-NEXT: store <4 x i32> zeroinitializer, ptr [[P]], align 16 ; ADDR-NEXT: ret void ; @@ -209,7 +340,7 @@ define void @store.v4i32(ptr %p) sanitize_address { ret void } -define void @store.v8i32(ptr %p) sanitize_address { +define void @store.v8i32(ptr %p) sanitize_memory { ; CHECK-LABEL: @store.v8i32( ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 @@ -220,11 +351,18 @@ define void @store.v8i32(ptr %p) sanitize_address { ; CHECK-NEXT: ret void ; ; ADDR-LABEL: @store.v8i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; ADDR-NEXT: call void @llvm.donothing() -; ADDR-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 -; ADDR-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 -; ADDR-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr -; ADDR-NEXT: store <8 x i32> zeroinitializer, ptr [[TMP3]], align 32 +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ADDR-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; ADDR-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ADDR-NEXT: store <8 x i32> zeroinitializer, ptr [[TMP6]], align 32 ; ADDR-NEXT: store <8 x i32> zeroinitializer, ptr [[P]], align 32 ; ADDR-NEXT: ret void ; @@ -243,7 +381,7 @@ define void @store.v8i32(ptr %p) sanitize_address { ret void } -define void @store.v16i32(ptr %p) sanitize_address { +define void @store.v16i32(ptr %p) sanitize_memory { ; CHECK-LABEL: @store.v16i32( ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 @@ -254,11 +392,18 @@ define void @store.v16i32(ptr %p) sanitize_address { ; CHECK-NEXT: ret void ; ; ADDR-LABEL: @store.v16i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 ; ADDR-NEXT: call void @llvm.donothing() -; ADDR-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 -; ADDR-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 -; ADDR-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr -; ADDR-NEXT: store <16 x i32> zeroinitializer, ptr [[TMP3]], align 64 +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ADDR-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; ADDR-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ADDR-NEXT: store <16 x i32> zeroinitializer, ptr [[TMP6]], align 64 ; ADDR-NEXT: store <16 x i32> zeroinitializer, ptr [[P]], align 64 ; ADDR-NEXT: ret void ; From e73186796db97633332434da69c4e9057e460a59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Gr=C3=A4nitz?= Date: Wed, 22 Mar 2023 22:11:26 +0100 Subject: [PATCH 345/691] [JITLink] Deterministic JITDylib symbol table dumps Sort symbols before dumping so we get a deterministic order and can check them in tests. Reviewed By: lhames Differential Revision: https://reviews.llvm.org/D146658 --- llvm/lib/ExecutionEngine/Orc/Core.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index 82fa4bed914e6..9b6712818363e 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -1438,16 +1438,23 @@ void JITDylib::dump(raw_ostream &OS) { OS << "Link order: " << LinkOrder << "\n" << "Symbol table:\n"; - for (auto &KV : Symbols) { + // Sort symbols so we get a deterministic order and can check them in tests. + std::vector> SymbolsSorted; + for (auto &KV : Symbols) + SymbolsSorted.emplace_back(KV.first, &KV.second); + std::sort(SymbolsSorted.begin(), SymbolsSorted.end(), + [](const auto &L, const auto &R) { return *L.first < *R.first; }); + + for (auto &KV : SymbolsSorted) { OS << " \"" << *KV.first << "\": "; - if (auto Addr = KV.second.getAddress()) + if (auto Addr = KV.second->getAddress()) OS << Addr; else OS << " "; - OS << " " << KV.second.getFlags() << " " << KV.second.getState(); + OS << " " << KV.second->getFlags() << " " << KV.second->getState(); - if (KV.second.hasMaterializerAttached()) { + if (KV.second->hasMaterializerAttached()) { OS << " (Materializer "; auto I = UnmaterializedInfos.find(KV.first); assert(I != UnmaterializedInfos.end() && From d59a43fe2ad81f5c3918c9ef79a986955256f7ea Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Wed, 22 Mar 2023 17:39:27 +0100 Subject: [PATCH 346/691] [libc++] Qualifies intptr_t and uintptr_t. This has been done using the following command find libcxx/test -type f -exec perl -pi -e 's|^([^/]+?)((?, std::atomic_char32_t>::value), ""); // Added by LWG 2441 - static_assert((std::is_same, std::atomic_intptr_t>::value), ""); - static_assert((std::is_same, std::atomic_uintptr_t>::value), ""); + static_assert((std::is_same, std::atomic_intptr_t>::value), ""); + static_assert((std::is_same, std::atomic_uintptr_t>::value), ""); static_assert((std::is_same, std::atomic_int8_t>::value), ""); static_assert((std::is_same, std::atomic_uint8_t>::value), ""); diff --git a/libcxx/test/std/atomics/stdatomic.h.syn/types.compile.pass.cpp b/libcxx/test/std/atomics/stdatomic.h.syn/types.compile.pass.cpp index 3121e7c57bafe..28125888f27df 100644 --- a/libcxx/test/std/atomics/stdatomic.h.syn/types.compile.pass.cpp +++ b/libcxx/test/std/atomics/stdatomic.h.syn/types.compile.pass.cpp @@ -200,8 +200,8 @@ void f() { static_assert(std::is_same_v, ::atomic_int_fast64_t>); static_assert(std::is_same_v, ::atomic_uint_fast64_t>); - static_assert(std::is_same_v, ::atomic_intptr_t>); - static_assert(std::is_same_v, ::atomic_uintptr_t>); + static_assert(std::is_same_v, ::atomic_intptr_t>); + static_assert(std::is_same_v, ::atomic_uintptr_t>); static_assert(std::is_same_v, ::atomic_size_t>); static_assert(std::is_same_v, ::atomic_ptrdiff_t>); static_assert(std::is_same_v, ::atomic_intmax_t>); diff --git a/libcxx/test/std/atomics/types.pass.cpp b/libcxx/test/std/atomics/types.pass.cpp index 63ab0f30c4a75..cb8dde0d513db 100644 --- a/libcxx/test/std/atomics/types.pass.cpp +++ b/libcxx/test/std/atomics/types.pass.cpp @@ -146,8 +146,8 @@ int main(int, char**) test< std::int64_t> (); test (); - test (); - test (); + test (); + test (); test (); test (); test (); diff --git a/libcxx/test/std/experimental/simd/simd.cons/generator.pass.cpp b/libcxx/test/std/experimental/simd/simd.cons/generator.pass.cpp index b28e1af29a89e..05a12895e9213 100644 --- a/libcxx/test/std/experimental/simd/simd.cons/generator.pass.cpp +++ b/libcxx/test/std/experimental/simd/simd.cons/generator.pass.cpp @@ -47,7 +47,7 @@ struct identity { void compile_generator() { supported_simd128_ctor(identity()); not_supported_simd128_ctor([](int i) { return float(i); }); - not_supported_simd128_ctor([](intptr_t i) { return (int*)(i); }); + not_supported_simd128_ctor([](std::intptr_t i) { return (int*)(i); }); not_supported_simd128_ctor([](int* i) { return i; }); } diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.fail.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.fail.cpp index 1d4699c18d8ea..89e2b6bf397b0 100644 --- a/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.fail.cpp +++ b/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.fail.cpp @@ -45,7 +45,7 @@ int main(int, char**) static_assert(toobig(), ""); // expected-error-re {{{{(static_assert|static assertion)}} expression is not an integral constant expression}} static_assert(toobig(), ""); // expected-error-re {{{{(static_assert|static assertion)}} expression is not an integral constant expression}} static_assert(toobig(), ""); // expected-error-re {{{{(static_assert|static assertion)}} expression is not an integral constant expression}} - static_assert(toobig(), ""); // expected-error-re {{{{(static_assert|static assertion)}} expression is not an integral constant expression}} + static_assert(toobig(), ""); // expected-error-re {{{{(static_assert|static assertion)}} expression is not an integral constant expression}} return 0; } diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.pass.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.pass.cpp index cbaf5dfbec19c..a4d70dc8c52c9 100644 --- a/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.pass.cpp +++ b/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.pass.cpp @@ -89,7 +89,7 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); @@ -120,7 +120,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); static_assert(test()); test(); @@ -136,7 +136,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); test(); return 0; diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/bit_floor.pass.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/bit_floor.pass.cpp index 473238be5e92c..0af4cddb074d1 100644 --- a/libcxx/test/std/numerics/bit/bit.pow.two/bit_floor.pass.cpp +++ b/libcxx/test/std/numerics/bit/bit.pow.two/bit_floor.pass.cpp @@ -86,7 +86,7 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); @@ -118,7 +118,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); static_assert(test()); test(); @@ -135,7 +135,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); test(); return 0; diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/bit_width.pass.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/bit_width.pass.cpp index 8110048e13960..747b4e02bac5c 100644 --- a/libcxx/test/std/numerics/bit/bit.pow.two/bit_width.pass.cpp +++ b/libcxx/test/std/numerics/bit/bit.pow.two/bit_width.pass.cpp @@ -91,7 +91,7 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); @@ -122,7 +122,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); static_assert(test()); test(); @@ -138,7 +138,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); test(); return 0; diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/has_single_bit.pass.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/has_single_bit.pass.cpp index 1c30f5cec5191..398fee8cafc1d 100644 --- a/libcxx/test/std/numerics/bit/bit.pow.two/has_single_bit.pass.cpp +++ b/libcxx/test/std/numerics/bit/bit.pow.two/has_single_bit.pass.cpp @@ -89,7 +89,7 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); @@ -120,7 +120,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); static_assert(test()); test(); @@ -136,7 +136,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); test(); return 0; diff --git a/libcxx/test/std/numerics/bit/bitops.count/countl_one.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/countl_one.pass.cpp index b236e37ee8791..7b8f6da809b26 100644 --- a/libcxx/test/std/numerics/bit/bitops.count/countl_one.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.count/countl_one.pass.cpp @@ -86,7 +86,7 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); @@ -117,7 +117,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); static_assert(test()); test(); @@ -133,7 +133,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); test(); return 0; diff --git a/libcxx/test/std/numerics/bit/bitops.count/countl_zero.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/countl_zero.pass.cpp index 58c953f0b97a8..4b0f8156d621b 100644 --- a/libcxx/test/std/numerics/bit/bitops.count/countl_zero.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.count/countl_zero.pass.cpp @@ -85,7 +85,7 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); @@ -116,7 +116,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); static_assert(test()); test(); @@ -132,7 +132,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); test(); return 0; diff --git a/libcxx/test/std/numerics/bit/bitops.count/countr_one.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/countr_one.pass.cpp index 208e694e0282a..b88a770745d5d 100644 --- a/libcxx/test/std/numerics/bit/bitops.count/countr_one.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.count/countr_one.pass.cpp @@ -90,7 +90,7 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); @@ -121,7 +121,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); static_assert(test()); test(); @@ -137,7 +137,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); test(); return 0; diff --git a/libcxx/test/std/numerics/bit/bitops.count/countr_zero.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/countr_zero.pass.cpp index 0d14d9e71044b..2f2f81d961ad9 100644 --- a/libcxx/test/std/numerics/bit/bitops.count/countr_zero.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.count/countr_zero.pass.cpp @@ -87,7 +87,7 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); @@ -118,7 +118,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); static_assert(test()); test(); @@ -134,7 +134,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); test(); return 0; diff --git a/libcxx/test/std/numerics/bit/bitops.count/popcount.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/popcount.pass.cpp index 383338a2f21bd..605b84ed35158 100644 --- a/libcxx/test/std/numerics/bit/bitops.count/popcount.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.count/popcount.pass.cpp @@ -97,7 +97,7 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); @@ -128,7 +128,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); static_assert(test()); test(); @@ -144,7 +144,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); test(); return 0; diff --git a/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp b/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp index 6cc1410eb682f..2b56ae15682e0 100644 --- a/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp @@ -85,7 +85,7 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); @@ -117,7 +117,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); static_assert(test()); test(); @@ -134,7 +134,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); test(); return 0; diff --git a/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp b/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp index b218bb0397335..fee122fe607b5 100644 --- a/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp +++ b/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp @@ -86,7 +86,7 @@ int main(int, char**) static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); - static_assert(!std::is_invocable_v); + static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); static_assert(!std::is_invocable_v); @@ -118,7 +118,7 @@ int main(int, char**) static_assert(test()); static_assert(test()); static_assert(test()); - static_assert(test()); + static_assert(test()); static_assert(test()); test(); @@ -135,7 +135,7 @@ int main(int, char**) test(); test(); test(); - test(); + test(); test(); return 0; diff --git a/libcxx/test/std/ranges/range.adaptors/range.elements/iterator/ctor.other.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.elements/iterator/ctor.other.pass.cpp index 6756474024821..4c94cd5edf62e 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.elements/iterator/ctor.other.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.elements/iterator/ctor.other.pass.cpp @@ -21,7 +21,7 @@ template struct ConvertibleIter : IterBase> { using iterator_category = std::random_access_iterator_tag; using value_type = std::tuple; - using difference_type = intptr_t; + using difference_type = std::intptr_t; bool movedFromOtherConst = false; int i = 0; diff --git a/libcxx/test/std/ranges/range.adaptors/range.elements/sentinel/equality.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.elements/sentinel/equality.pass.cpp index 55477cc997587..df95e07c97d97 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.elements/sentinel/equality.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.elements/sentinel/equality.pass.cpp @@ -23,7 +23,7 @@ struct Iter { std::tuple* it_; using value_type = std::tuple; - using difference_type = intptr_t; + using difference_type = std::intptr_t; using iterator_concept = std::input_iterator_tag; constexpr decltype(auto) operator*() const { return *it_; } diff --git a/libcxx/test/std/ranges/range.adaptors/range.elements/types.h b/libcxx/test/std/ranges/range.adaptors/range.elements/types.h index a1c0884b60719..f1ee165c3cc63 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.elements/types.h +++ b/libcxx/test/std/ranges/range.adaptors/range.elements/types.h @@ -93,7 +93,7 @@ template struct IterBase { using iterator_concept = std::random_access_iterator_tag; using value_type = std::tuple; - using difference_type = intptr_t; + using difference_type = std::intptr_t; constexpr std::tuple operator*() const { return std::tuple(5); } diff --git a/libcxx/test/std/ranges/range.adaptors/range.split/types.h b/libcxx/test/std/ranges/range.adaptors/range.split/types.h index ff2ce38317cd9..aa47faf0f468b 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.split/types.h +++ b/libcxx/test/std/ranges/range.adaptors/range.split/types.h @@ -20,7 +20,7 @@ template struct ForwardIterBase { using iterator_concept = std::forward_iterator_tag; using value_type = int; - using difference_type = intptr_t; + using difference_type = std::intptr_t; constexpr int operator*() const { return 5; } diff --git a/libcxx/test/std/ranges/range.adaptors/range.take.while/sentinel/equality.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.take.while/sentinel/equality.pass.cpp index 3d5b835c01c27..db3e5764421af 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.take.while/sentinel/equality.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.take.while/sentinel/equality.pass.cpp @@ -26,7 +26,7 @@ struct Iter { int* it_; using value_type = int; - using difference_type = intptr_t; + using difference_type = std::intptr_t; using iterator_concept = std::input_iterator_tag; constexpr decltype(auto) operator*() const { return *it_; } diff --git a/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/arithmetic.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/arithmetic.pass.cpp index ce7858f1a8921..efe64b31f79fb 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/arithmetic.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/arithmetic.pass.cpp @@ -63,7 +63,7 @@ constexpr bool test() { assert(&y1 == &(b[3])); using Iter = decltype(it1); - static_assert(canPlusEqual); + static_assert(canPlusEqual); } { @@ -83,7 +83,7 @@ constexpr bool test() { assert(&y1 == &(b[2])); using Iter = decltype(it1); - static_assert(canMinusEqual); + static_assert(canMinusEqual); } { @@ -116,12 +116,12 @@ constexpr bool test() { // One of the ranges is not random access std::ranges::zip_view v(a, b, ForwardSizedView{buffer1}); using Iter = decltype(v.begin()); - static_assert(!std::invocable, Iter, intptr_t>); - static_assert(!std::invocable, intptr_t, Iter>); - static_assert(!canPlusEqual); - static_assert(!std::invocable, Iter, intptr_t>); + static_assert(!std::invocable, Iter, std::intptr_t>); + static_assert(!std::invocable, std::intptr_t, Iter>); + static_assert(!canPlusEqual); + static_assert(!std::invocable, Iter, std::intptr_t>); static_assert(std::invocable, Iter, Iter>); - static_assert(!canMinusEqual); + static_assert(!canMinusEqual); } { diff --git a/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/compare.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/compare.pass.cpp index 19b5b99993495..fcbff722c39b3 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/compare.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/compare.pass.cpp @@ -41,7 +41,7 @@ struct LessThanIterator { using iterator_category = std::random_access_iterator_tag; using value_type = int; - using difference_type = intptr_t; + using difference_type = std::intptr_t; constexpr int& operator*() const { return *it_; } constexpr int& operator[](difference_type n) const { return it_[n]; } diff --git a/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/ctor.default.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/ctor.default.pass.cpp index 8c038abdb24f5..98078b2ce3095 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/ctor.default.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/ctor.default.pass.cpp @@ -20,7 +20,7 @@ struct PODIter { using iterator_category = std::random_access_iterator_tag; using value_type = int; - using difference_type = intptr_t; + using difference_type = std::intptr_t; constexpr int operator*() const { return i; } diff --git a/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/member_types.compile.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/member_types.compile.pass.cpp index 6b0c086d8c4fc..c19f6c2b16524 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/member_types.compile.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.zip/iterator/member_types.compile.pass.cpp @@ -144,16 +144,16 @@ void test() { { // difference_type of single view - std::ranges::zip_view v{DiffTypeRange{}}; + std::ranges::zip_view v{DiffTypeRange{}}; using Iter = decltype(v.begin()); - static_assert(std::is_same_v); + static_assert(std::is_same_v); } { // difference_type of multiple views should be the common type - std::ranges::zip_view v{DiffTypeRange{}, DiffTypeRange{}}; + std::ranges::zip_view v{DiffTypeRange{}, DiffTypeRange{}}; using Iter = decltype(v.begin()); - static_assert(std::is_same_v>); + static_assert(std::is_same_v>); } const std::array foos{Foo{}}; diff --git a/libcxx/test/std/ranges/range.adaptors/range.zip/sentinel/minus.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.zip/sentinel/minus.pass.cpp index 7f8b513a97717..c4c85bc24e1e8 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.zip/sentinel/minus.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.zip/sentinel/minus.pass.cpp @@ -34,7 +34,7 @@ struct convertible_forward_sized_iterator { using iterator_category = std::forward_iterator_tag; using value_type = int; - using difference_type = intptr_t; + using difference_type = std::intptr_t; convertible_forward_sized_iterator() = default; constexpr convertible_forward_sized_iterator(Base it) : it_(it) {} diff --git a/libcxx/test/std/ranges/range.adaptors/range.zip/types.h b/libcxx/test/std/ranges/range.adaptors/range.zip/types.h index 299ffeac0489a..fa82b836f529b 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.zip/types.h +++ b/libcxx/test/std/ranges/range.adaptors/range.zip/types.h @@ -201,7 +201,7 @@ struct forward_sized_iterator { using iterator_category = std::forward_iterator_tag; using value_type = int; - using difference_type = intptr_t; + using difference_type = std::intptr_t; using pointer = Base; using reference = decltype(*Base{}); @@ -405,7 +405,7 @@ struct iter_move_swap_iterator { using iterator_category = std::input_iterator_tag; using value_type = int; - using difference_type = intptr_t; + using difference_type = std::intptr_t; constexpr int operator*() const { return i; } diff --git a/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.pointer.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.pointer.pass.cpp index 37d50f3d17017..83a3df3d1e447 100644 --- a/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.pointer.pass.cpp +++ b/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/formatter.pointer.pass.cpp @@ -59,7 +59,7 @@ void test(StringT expected, StringViewT fmt, PointerT arg) { buffer[0] = CharT('0'); buffer[1] = CharT('x'); expected.append(buffer.begin(), - std::to_chars(buffer.begin() + 2, buffer.end(), reinterpret_cast(arg), 16).ptr); + std::to_chars(buffer.begin() + 2, buffer.end(), reinterpret_cast(arg), 16).ptr); } assert(result == expected); } diff --git a/libcxx/test/std/utilities/function.objects/unord.hash/integral.pass.cpp b/libcxx/test/std/utilities/function.objects/unord.hash/integral.pass.cpp index c645ad8f476f1..124eb843d298e 100644 --- a/libcxx/test/std/utilities/function.objects/unord.hash/integral.pass.cpp +++ b/libcxx/test/std/utilities/function.objects/unord.hash/integral.pass.cpp @@ -90,7 +90,7 @@ int main(int, char**) test(); test(); - test(); + test(); test(); test(); @@ -108,7 +108,7 @@ int main(int, char**) test(); test(); - test(); + test(); #ifndef TEST_HAS_NO_INT128 test<__int128_t>(); diff --git a/libcxx/test/std/utilities/memory/temporary.buffer/overaligned.pass.cpp b/libcxx/test/std/utilities/memory/temporary.buffer/overaligned.pass.cpp index a4f534e4b212c..3e1006ad984cd 100644 --- a/libcxx/test/std/utilities/memory/temporary.buffer/overaligned.pass.cpp +++ b/libcxx/test/std/utilities/memory/temporary.buffer/overaligned.pass.cpp @@ -40,7 +40,7 @@ int main(int, char**) { std::pair ip = std::get_temporary_buffer(5); assert(!(ip.first == nullptr) ^ (ip.second == 0)); - assert(reinterpret_cast(ip.first) % alignof(A) == 0); + assert(reinterpret_cast(ip.first) % alignof(A) == 0); std::return_temporary_buffer(ip.first); return 0; diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared_construct.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared_construct.pass.cpp index e354d4a2721d5..4281cc1aa9e0d 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared_construct.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.create/allocate_shared_construct.pass.cpp @@ -123,7 +123,7 @@ struct Bar { }; void test_aligned(void* p, std::size_t align) { - assert(reinterpret_cast(p) % align == 0); + assert(reinterpret_cast(p) % align == 0); } int main(int, char**) { diff --git a/libcxx/test/std/utilities/meta/meta.unary.prop.query/alignment_of.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary.prop.query/alignment_of.pass.cpp index a1c24b745b441..66318951a8c97 100644 --- a/libcxx/test/std/utilities/meta/meta.unary.prop.query/alignment_of.pass.cpp +++ b/libcxx/test/std/utilities/meta/meta.unary.prop.query/alignment_of.pass.cpp @@ -43,8 +43,8 @@ int main(int, char**) { test_alignment_of(); test_alignment_of(); - test_alignment_of(); - test_alignment_of(); + test_alignment_of(); + test_alignment_of(); test_alignment_of(); test_alignment_of(); test_alignment_of(); From 71a5958406fb8d13ed3692db7696b68f59752053 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Wed, 22 Mar 2023 22:20:10 +0100 Subject: [PATCH 347/691] [libc++] Remove __mutex_base header This header should have been removed in https://reviews.llvm.org/D146228, but there was a merge conflict. --- libcxx/include/__mutex_base | 521 ------------------------------------ 1 file changed, 521 deletions(-) delete mode 100644 libcxx/include/__mutex_base diff --git a/libcxx/include/__mutex_base b/libcxx/include/__mutex_base deleted file mode 100644 index 191955363a2d3..0000000000000 --- a/libcxx/include/__mutex_base +++ /dev/null @@ -1,521 +0,0 @@ -// -*- C++ -*- -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef _LIBCPP___MUTEX_BASE -#define _LIBCPP___MUTEX_BASE - -#include <__chrono/duration.h> -#include <__chrono/steady_clock.h> -#include <__chrono/system_clock.h> -#include <__chrono/time_point.h> -#include <__config> -#include <__threading_support> -#include <__type_traits/is_floating_point.h> -#include <__type_traits/is_nothrow_default_constructible.h> -#include -#include -#include - -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif - -_LIBCPP_PUSH_MACROS -#include <__undef_macros> - - -_LIBCPP_BEGIN_NAMESPACE_STD - -#ifndef _LIBCPP_HAS_NO_THREADS - -class _LIBCPP_TYPE_VIS _LIBCPP_THREAD_SAFETY_ANNOTATION(capability("mutex")) mutex -{ - __libcpp_mutex_t __m_ = _LIBCPP_MUTEX_INITIALIZER; - -public: - _LIBCPP_INLINE_VISIBILITY - _LIBCPP_CONSTEXPR mutex() = default; - - mutex(const mutex&) = delete; - mutex& operator=(const mutex&) = delete; - -#if defined(_LIBCPP_HAS_TRIVIAL_MUTEX_DESTRUCTION) - ~mutex() = default; -#else - ~mutex() _NOEXCEPT; -#endif - - void lock() _LIBCPP_THREAD_SAFETY_ANNOTATION(acquire_capability()); - bool try_lock() _NOEXCEPT _LIBCPP_THREAD_SAFETY_ANNOTATION(try_acquire_capability(true)); - void unlock() _NOEXCEPT _LIBCPP_THREAD_SAFETY_ANNOTATION(release_capability()); - - typedef __libcpp_mutex_t* native_handle_type; - _LIBCPP_INLINE_VISIBILITY native_handle_type native_handle() {return &__m_;} -}; - -static_assert(is_nothrow_default_constructible::value, - "the default constructor for std::mutex must be nothrow"); - -struct _LIBCPP_TYPE_VIS defer_lock_t { explicit defer_lock_t() = default; }; -struct _LIBCPP_TYPE_VIS try_to_lock_t { explicit try_to_lock_t() = default; }; -struct _LIBCPP_TYPE_VIS adopt_lock_t { explicit adopt_lock_t() = default; }; - -# if defined(_LIBCPP_BUILDING_LIBRARY) -extern _LIBCPP_EXPORTED_FROM_ABI const defer_lock_t defer_lock; -extern _LIBCPP_EXPORTED_FROM_ABI const try_to_lock_t try_to_lock; -extern _LIBCPP_EXPORTED_FROM_ABI const adopt_lock_t adopt_lock; -# elif !defined(_LIBCPP_CXX03_LANG) -/* inline */ constexpr defer_lock_t defer_lock = defer_lock_t(); -/* inline */ constexpr try_to_lock_t try_to_lock = try_to_lock_t(); -/* inline */ constexpr adopt_lock_t adopt_lock = adopt_lock_t(); -# endif - -template -class _LIBCPP_TEMPLATE_VIS _LIBCPP_THREAD_SAFETY_ANNOTATION(scoped_lockable) -lock_guard -{ -public: - typedef _Mutex mutex_type; - -private: - mutex_type& __m_; -public: - - _LIBCPP_NODISCARD_EXT _LIBCPP_INLINE_VISIBILITY - explicit lock_guard(mutex_type& __m) _LIBCPP_THREAD_SAFETY_ANNOTATION(acquire_capability(__m)) - : __m_(__m) {__m_.lock();} - - _LIBCPP_NODISCARD_EXT _LIBCPP_INLINE_VISIBILITY - lock_guard(mutex_type& __m, adopt_lock_t) _LIBCPP_THREAD_SAFETY_ANNOTATION(requires_capability(__m)) - : __m_(__m) {} - _LIBCPP_INLINE_VISIBILITY - ~lock_guard() _LIBCPP_THREAD_SAFETY_ANNOTATION(release_capability()) {__m_.unlock();} - -private: - lock_guard(lock_guard const&) = delete; - lock_guard& operator=(lock_guard const&) = delete; -}; -_LIBCPP_CTAD_SUPPORTED_FOR_TYPE(lock_guard); - -template -class _LIBCPP_TEMPLATE_VIS unique_lock -{ -public: - typedef _Mutex mutex_type; - -private: - mutex_type* __m_; - bool __owns_; - -public: - _LIBCPP_INLINE_VISIBILITY - unique_lock() _NOEXCEPT : __m_(nullptr), __owns_(false) {} - _LIBCPP_INLINE_VISIBILITY - explicit unique_lock(mutex_type& __m) - : __m_(_VSTD::addressof(__m)), __owns_(true) {__m_->lock();} - _LIBCPP_INLINE_VISIBILITY - unique_lock(mutex_type& __m, defer_lock_t) _NOEXCEPT - : __m_(_VSTD::addressof(__m)), __owns_(false) {} - _LIBCPP_INLINE_VISIBILITY - unique_lock(mutex_type& __m, try_to_lock_t) - : __m_(_VSTD::addressof(__m)), __owns_(__m.try_lock()) {} - _LIBCPP_INLINE_VISIBILITY - unique_lock(mutex_type& __m, adopt_lock_t) - : __m_(_VSTD::addressof(__m)), __owns_(true) {} - template - _LIBCPP_INLINE_VISIBILITY - unique_lock(mutex_type& __m, const chrono::time_point<_Clock, _Duration>& __t) - : __m_(_VSTD::addressof(__m)), __owns_(__m.try_lock_until(__t)) {} - template - _LIBCPP_INLINE_VISIBILITY - unique_lock(mutex_type& __m, const chrono::duration<_Rep, _Period>& __d) - : __m_(_VSTD::addressof(__m)), __owns_(__m.try_lock_for(__d)) {} - _LIBCPP_INLINE_VISIBILITY - ~unique_lock() - { - if (__owns_) - __m_->unlock(); - } - - unique_lock(unique_lock const&) = delete; - unique_lock& operator=(unique_lock const&) = delete; - - _LIBCPP_INLINE_VISIBILITY - unique_lock(unique_lock&& __u) _NOEXCEPT - : __m_(__u.__m_), __owns_(__u.__owns_) - {__u.__m_ = nullptr; __u.__owns_ = false;} - _LIBCPP_INLINE_VISIBILITY - unique_lock& operator=(unique_lock&& __u) _NOEXCEPT - { - if (__owns_) - __m_->unlock(); - __m_ = __u.__m_; - __owns_ = __u.__owns_; - __u.__m_ = nullptr; - __u.__owns_ = false; - return *this; - } - - void lock(); - bool try_lock(); - - template - bool try_lock_for(const chrono::duration<_Rep, _Period>& __d); - template - bool try_lock_until(const chrono::time_point<_Clock, _Duration>& __t); - - void unlock(); - - _LIBCPP_INLINE_VISIBILITY - void swap(unique_lock& __u) _NOEXCEPT - { - _VSTD::swap(__m_, __u.__m_); - _VSTD::swap(__owns_, __u.__owns_); - } - _LIBCPP_INLINE_VISIBILITY - mutex_type* release() _NOEXCEPT - { - mutex_type* __m = __m_; - __m_ = nullptr; - __owns_ = false; - return __m; - } - - _LIBCPP_INLINE_VISIBILITY - bool owns_lock() const _NOEXCEPT {return __owns_;} - _LIBCPP_INLINE_VISIBILITY - explicit operator bool() const _NOEXCEPT {return __owns_;} - _LIBCPP_INLINE_VISIBILITY - mutex_type* mutex() const _NOEXCEPT {return __m_;} -}; -_LIBCPP_CTAD_SUPPORTED_FOR_TYPE(unique_lock); - -template -void -unique_lock<_Mutex>::lock() -{ - if (__m_ == nullptr) - __throw_system_error(EPERM, "unique_lock::lock: references null mutex"); - if (__owns_) - __throw_system_error(EDEADLK, "unique_lock::lock: already locked"); - __m_->lock(); - __owns_ = true; -} - -template -bool -unique_lock<_Mutex>::try_lock() -{ - if (__m_ == nullptr) - __throw_system_error(EPERM, "unique_lock::try_lock: references null mutex"); - if (__owns_) - __throw_system_error(EDEADLK, "unique_lock::try_lock: already locked"); - __owns_ = __m_->try_lock(); - return __owns_; -} - -template -template -bool -unique_lock<_Mutex>::try_lock_for(const chrono::duration<_Rep, _Period>& __d) -{ - if (__m_ == nullptr) - __throw_system_error(EPERM, "unique_lock::try_lock_for: references null mutex"); - if (__owns_) - __throw_system_error(EDEADLK, "unique_lock::try_lock_for: already locked"); - __owns_ = __m_->try_lock_for(__d); - return __owns_; -} - -template -template -bool -unique_lock<_Mutex>::try_lock_until(const chrono::time_point<_Clock, _Duration>& __t) -{ - if (__m_ == nullptr) - __throw_system_error(EPERM, "unique_lock::try_lock_until: references null mutex"); - if (__owns_) - __throw_system_error(EDEADLK, "unique_lock::try_lock_until: already locked"); - __owns_ = __m_->try_lock_until(__t); - return __owns_; -} - -template -void -unique_lock<_Mutex>::unlock() -{ - if (!__owns_) - __throw_system_error(EPERM, "unique_lock::unlock: not locked"); - __m_->unlock(); - __owns_ = false; -} - -template -inline _LIBCPP_INLINE_VISIBILITY -void -swap(unique_lock<_Mutex>& __x, unique_lock<_Mutex>& __y) _NOEXCEPT - {__x.swap(__y);} - -//enum class cv_status -_LIBCPP_DECLARE_STRONG_ENUM(cv_status) -{ - no_timeout, - timeout -}; -_LIBCPP_DECLARE_STRONG_ENUM_EPILOG(cv_status) - -class _LIBCPP_TYPE_VIS condition_variable -{ - __libcpp_condvar_t __cv_ = _LIBCPP_CONDVAR_INITIALIZER; -public: - _LIBCPP_INLINE_VISIBILITY - _LIBCPP_CONSTEXPR condition_variable() _NOEXCEPT = default; - -#ifdef _LIBCPP_HAS_TRIVIAL_CONDVAR_DESTRUCTION - ~condition_variable() = default; -#else - ~condition_variable(); -#endif - - condition_variable(const condition_variable&) = delete; - condition_variable& operator=(const condition_variable&) = delete; - - void notify_one() _NOEXCEPT; - void notify_all() _NOEXCEPT; - - void wait(unique_lock& __lk) _NOEXCEPT; - template - _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS - void wait(unique_lock& __lk, _Predicate __pred); - - template - _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS - cv_status - wait_until(unique_lock& __lk, - const chrono::time_point<_Clock, _Duration>& __t); - - template - _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS - bool - wait_until(unique_lock& __lk, - const chrono::time_point<_Clock, _Duration>& __t, - _Predicate __pred); - - template - _LIBCPP_METHOD_TEMPLATE_IMPLICIT_INSTANTIATION_VIS - cv_status - wait_for(unique_lock& __lk, - const chrono::duration<_Rep, _Period>& __d); - - template - bool - _LIBCPP_INLINE_VISIBILITY - wait_for(unique_lock& __lk, - const chrono::duration<_Rep, _Period>& __d, - _Predicate __pred); - - typedef __libcpp_condvar_t* native_handle_type; - _LIBCPP_INLINE_VISIBILITY native_handle_type native_handle() {return &__cv_;} - -private: - void __do_timed_wait(unique_lock& __lk, - chrono::time_point) _NOEXCEPT; -#if defined(_LIBCPP_HAS_COND_CLOCKWAIT) - void __do_timed_wait(unique_lock& __lk, - chrono::time_point) _NOEXCEPT; -#endif - template - void __do_timed_wait(unique_lock& __lk, - chrono::time_point<_Clock, chrono::nanoseconds>) _NOEXCEPT; -}; -#endif // !_LIBCPP_HAS_NO_THREADS - -template -inline _LIBCPP_INLINE_VISIBILITY -__enable_if_t::value, chrono::nanoseconds> -__safe_nanosecond_cast(chrono::duration<_Rep, _Period> __d) -{ - using namespace chrono; - using __ratio = ratio_divide<_Period, nano>; - using __ns_rep = nanoseconds::rep; - _Rep __result_float = __d.count() * __ratio::num / __ratio::den; - - _Rep __result_max = numeric_limits<__ns_rep>::max(); - if (__result_float >= __result_max) { - return nanoseconds::max(); - } - - _Rep __result_min = numeric_limits<__ns_rep>::min(); - if (__result_float <= __result_min) { - return nanoseconds::min(); - } - - return nanoseconds(static_cast<__ns_rep>(__result_float)); -} - -template -inline _LIBCPP_INLINE_VISIBILITY -__enable_if_t::value, chrono::nanoseconds> -__safe_nanosecond_cast(chrono::duration<_Rep, _Period> __d) -{ - using namespace chrono; - if (__d.count() == 0) { - return nanoseconds(0); - } - - using __ratio = ratio_divide<_Period, nano>; - using __ns_rep = nanoseconds::rep; - __ns_rep __result_max = numeric_limits<__ns_rep>::max(); - if (__d.count() > 0 && __d.count() > __result_max / __ratio::num) { - return nanoseconds::max(); - } - - __ns_rep __result_min = numeric_limits<__ns_rep>::min(); - if (__d.count() < 0 && __d.count() < __result_min / __ratio::num) { - return nanoseconds::min(); - } - - __ns_rep __result = __d.count() * __ratio::num / __ratio::den; - if (__result == 0) { - return nanoseconds(1); - } - - return nanoseconds(__result); -} - -#ifndef _LIBCPP_HAS_NO_THREADS -template -void -condition_variable::wait(unique_lock& __lk, _Predicate __pred) -{ - while (!__pred()) - wait(__lk); -} - -template -cv_status -condition_variable::wait_until(unique_lock& __lk, - const chrono::time_point<_Clock, _Duration>& __t) -{ - using namespace chrono; - using __clock_tp_ns = time_point<_Clock, nanoseconds>; - - typename _Clock::time_point __now = _Clock::now(); - if (__t <= __now) - return cv_status::timeout; - - __clock_tp_ns __t_ns = __clock_tp_ns(_VSTD::__safe_nanosecond_cast(__t.time_since_epoch())); - - __do_timed_wait(__lk, __t_ns); - return _Clock::now() < __t ? cv_status::no_timeout : cv_status::timeout; -} - -template -bool -condition_variable::wait_until(unique_lock& __lk, - const chrono::time_point<_Clock, _Duration>& __t, - _Predicate __pred) -{ - while (!__pred()) - { - if (wait_until(__lk, __t) == cv_status::timeout) - return __pred(); - } - return true; -} - -template -cv_status -condition_variable::wait_for(unique_lock& __lk, - const chrono::duration<_Rep, _Period>& __d) -{ - using namespace chrono; - if (__d <= __d.zero()) - return cv_status::timeout; - using __ns_rep = nanoseconds::rep; - steady_clock::time_point __c_now = steady_clock::now(); - -#if defined(_LIBCPP_HAS_COND_CLOCKWAIT) - using __clock_tp_ns = time_point; - __ns_rep __now_count_ns = _VSTD::__safe_nanosecond_cast(__c_now.time_since_epoch()).count(); -#else - using __clock_tp_ns = time_point; - __ns_rep __now_count_ns = _VSTD::__safe_nanosecond_cast(system_clock::now().time_since_epoch()).count(); -#endif - - __ns_rep __d_ns_count = _VSTD::__safe_nanosecond_cast(__d).count(); - - if (__now_count_ns > numeric_limits<__ns_rep>::max() - __d_ns_count) { - __do_timed_wait(__lk, __clock_tp_ns::max()); - } else { - __do_timed_wait(__lk, __clock_tp_ns(nanoseconds(__now_count_ns + __d_ns_count))); - } - - return steady_clock::now() - __c_now < __d ? cv_status::no_timeout : - cv_status::timeout; -} - -template -inline -bool -condition_variable::wait_for(unique_lock& __lk, - const chrono::duration<_Rep, _Period>& __d, - _Predicate __pred) -{ - return wait_until(__lk, chrono::steady_clock::now() + __d, - _VSTD::move(__pred)); -} - -#if defined(_LIBCPP_HAS_COND_CLOCKWAIT) -inline -void -condition_variable::__do_timed_wait(unique_lock& __lk, - chrono::time_point __tp) _NOEXCEPT -{ - using namespace chrono; - if (!__lk.owns_lock()) - __throw_system_error(EPERM, - "condition_variable::timed wait: mutex not locked"); - nanoseconds __d = __tp.time_since_epoch(); - timespec __ts; - seconds __s = duration_cast(__d); - using __ts_sec = decltype(__ts.tv_sec); - const __ts_sec __ts_sec_max = numeric_limits<__ts_sec>::max(); - if (__s.count() < __ts_sec_max) - { - __ts.tv_sec = static_cast<__ts_sec>(__s.count()); - __ts.tv_nsec = (__d - __s).count(); - } - else - { - __ts.tv_sec = __ts_sec_max; - __ts.tv_nsec = giga::num - 1; - } - int __ec = pthread_cond_clockwait(&__cv_, __lk.mutex()->native_handle(), CLOCK_MONOTONIC, &__ts); - if (__ec != 0 && __ec != ETIMEDOUT) - __throw_system_error(__ec, "condition_variable timed_wait failed"); -} -#endif // _LIBCPP_HAS_COND_CLOCKWAIT - -template -inline -void -condition_variable::__do_timed_wait(unique_lock& __lk, - chrono::time_point<_Clock, chrono::nanoseconds> __tp) _NOEXCEPT -{ - wait_for(__lk, __tp - _Clock::now()); -} - -#endif // !_LIBCPP_HAS_NO_THREADS - -_LIBCPP_END_NAMESPACE_STD - -_LIBCPP_POP_MACROS - -#endif // _LIBCPP___MUTEX_BASE From 4d18d97b594ccaa3cbd79beb4afef45e4156dc8d Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Thu, 9 Mar 2023 11:10:32 -0800 Subject: [PATCH 348/691] [lldb] Fix dwim-print error message for missing expr --- lldb/source/Commands/CommandObjectDWIMPrint.cpp | 10 ++++++---- lldb/test/API/commands/dwim-print/TestDWIMPrint.py | 7 +++++++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/lldb/source/Commands/CommandObjectDWIMPrint.cpp b/lldb/source/Commands/CommandObjectDWIMPrint.cpp index 419a27acc8181..ed816195350e9 100644 --- a/lldb/source/Commands/CommandObjectDWIMPrint.cpp +++ b/lldb/source/Commands/CommandObjectDWIMPrint.cpp @@ -61,14 +61,16 @@ bool CommandObjectDWIMPrint::DoExecute(StringRef command, OptionsWithRaw args{command}; StringRef expr = args.GetRawPart(); + if (expr.empty()) { + result.AppendErrorWithFormatv("'{0}' takes a variable or expression", + m_cmd_name); + return false; + } + if (args.HasArgs()) { if (!ParseOptionsAndNotify(args.GetArgs(), result, m_option_group, m_exe_ctx)) return false; - } else if (command.empty()) { - result.AppendErrorWithFormatv("'{0}' takes a variable or expression", - m_cmd_name); - return false; } // If the user has not specified, default to disabling persistent results. diff --git a/lldb/test/API/commands/dwim-print/TestDWIMPrint.py b/lldb/test/API/commands/dwim-print/TestDWIMPrint.py index 22d18f91d0a59..9f69895f43692 100644 --- a/lldb/test/API/commands/dwim-print/TestDWIMPrint.py +++ b/lldb/test/API/commands/dwim-print/TestDWIMPrint.py @@ -107,3 +107,10 @@ def test_expression_language(self): lldbutil.run_to_name_breakpoint(self, "main") self._expect_cmd(f"dwim-print -l c++ -- argc", "frame variable") self._expect_cmd(f"dwim-print -l c++ -- argc + 1", "expression") + + def test_empty_expression(self): + self.build() + lldbutil.run_to_name_breakpoint(self, "main") + error_msg = "error: 'dwim-print' takes a variable or expression" + self.expect(f"dwim-print", error=True, startstr=error_msg) + self.expect(f"dwim-print -- ", error=True, startstr=error_msg) From fb8d894f23c5e805f0c87d89fb9d6c0eed3a0e72 Mon Sep 17 00:00:00 2001 From: Chia-hung Duan Date: Wed, 22 Mar 2023 21:46:23 +0000 Subject: [PATCH 349/691] [scudo] Early exit from the case can't do page release. There are heuristics to avoid marking blocks if there's little chance to release pages. So far, those logics only exist in block-marking section and we didn't leverage the results of those logics. For example, in a round of releaseToOS try, we know it's still 128 KB away from the release threshold. In the next round of releaseToOS, we can early exit if the number of pushed bytes is smaller than 128 KB without looping each memory group. This CL adds this heuristic and has reduced amount of time in checking the status of each memory group. This CL only applies this heuristic on SizeClassAllocator64. SizeClassAllocator32 has a smaller region/group size and has little impact on the default value. Reviewed By: cferris Differential Revision: https://reviews.llvm.org/D146312 --- compiler-rt/lib/scudo/standalone/primary64.h | 120 ++++++++++++++----- 1 file changed, 87 insertions(+), 33 deletions(-) diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h index 1cb6d02f6cd6a..bca5ab82f3d59 100644 --- a/compiler-rt/lib/scudo/standalone/primary64.h +++ b/compiler-rt/lib/scudo/standalone/primary64.h @@ -64,32 +64,8 @@ template class SizeClassAllocator64 { void init(s32 ReleaseToOsInterval) NO_THREAD_SAFETY_ANALYSIS { DCHECK(isAligned(reinterpret_cast(this), alignof(ThisT))); - DCHECK_EQ(PrimaryBase, 0U); - - // Reserve the space required for the Primary. - PrimaryBase = reinterpret_cast(map( - nullptr, PrimarySize, "scudo:primary_reserve", MAP_NOACCESS, &Data)); - u32 Seed; - const u64 Time = getMonotonicTimeFast(); - if (!getRandom(reinterpret_cast(&Seed), sizeof(Seed))) - Seed = static_cast(Time ^ (PrimaryBase >> 12)); const uptr PageSize = getPageSizeCached(); - for (uptr I = 0; I < NumClasses; I++) { - RegionInfo *Region = getRegionInfo(I); - // The actual start of a region is offset by a random number of pages - // when PrimaryEnableRandomOffset is set. - Region->RegionBeg = (PrimaryBase + (I << Config::PrimaryRegionSizeLog)) + - (Config::PrimaryEnableRandomOffset - ? ((getRandomModN(&Seed, 16) + 1) * PageSize) - : 0); - Region->RandState = getRandomU32(&Seed); - Region->ReleaseInfo.LastReleaseAtNs = Time; - } - shuffle(RegionInfoArray, NumClasses, &Seed); - - setOption(Option::ReleaseInterval, static_cast(ReleaseToOsInterval)); - const uptr GroupSize = (1U << GroupSizeLog); const uptr PagesInGroup = GroupSize / PageSize; const uptr MinSizeClass = getSizeByClassId(1); @@ -126,6 +102,37 @@ template class SizeClassAllocator64 { // use its size of in-use blocks as a heuristic. SmallerBlockReleasePageDelta = PagesInGroup * (1 + MinSizeClass / 16U) / 100; + + DCHECK_EQ(PrimaryBase, 0U); + // Reserve the space required for the Primary. + PrimaryBase = reinterpret_cast(map( + nullptr, PrimarySize, "scudo:primary_reserve", MAP_NOACCESS, &Data)); + + u32 Seed; + const u64 Time = getMonotonicTimeFast(); + if (!getRandom(reinterpret_cast(&Seed), sizeof(Seed))) + Seed = static_cast(Time ^ (PrimaryBase >> 12)); + + for (uptr I = 0; I < NumClasses; I++) { + RegionInfo *Region = getRegionInfo(I); + // The actual start of a region is offset by a random number of pages + // when PrimaryEnableRandomOffset is set. + Region->RegionBeg = (PrimaryBase + (I << Config::PrimaryRegionSizeLog)) + + (Config::PrimaryEnableRandomOffset + ? ((getRandomModN(&Seed, 16) + 1) * PageSize) + : 0); + Region->RandState = getRandomU32(&Seed); + // Releasing small blocks is expensive, set a higher threshold to avoid + // frequent page releases. + if (isSmallBlock(getSizeByClassId(I))) + Region->TryReleaseThreshold = PageSize * SmallerBlockReleasePageDelta; + else + Region->TryReleaseThreshold = PageSize; + Region->ReleaseInfo.LastReleaseAtNs = Time; + } + shuffle(RegionInfoArray, NumClasses, &Seed); + + setOption(Option::ReleaseInterval, static_cast(ReleaseToOsInterval)); } void unmapTestOnly() NO_THREAD_SAFETY_ANALYSIS { @@ -440,6 +447,8 @@ template class SizeClassAllocator64 { uptr MappedUser GUARDED_BY(Mutex) = 0; // Bytes allocated for user memory. uptr AllocatedUser GUARDED_BY(Mutex) = 0; + // The minimum size of pushed blocks to trigger page release. + uptr TryReleaseThreshold GUARDED_BY(Mutex) = 0; MapPlatformData Data GUARDED_BY(Mutex) = {}; ReleaseToOsInfo ReleaseInfo GUARDED_BY(Mutex) = {}; bool Exhausted GUARDED_BY(Mutex) = false; @@ -486,6 +495,11 @@ template class SizeClassAllocator64 { return Base + (CompactPtrGroupBase << CompactPtrScale); } + ALWAYS_INLINE static bool isSmallBlock(uptr BlockSize) { + const uptr PageSize = getPageSizeCached(); + return BlockSize < PageSize / 16U; + } + // Push the blocks to their batch group. The layout will be like, // // FreeList - > BG -> BG -> BG @@ -823,14 +837,15 @@ template class SizeClassAllocator64 { return 0; // Nothing new to release. const bool CheckDensity = - BlockSize < PageSize / 16U && ReleaseType != ReleaseToOS::ForceAll; + isSmallBlock(BlockSize) && ReleaseType != ReleaseToOS::ForceAll; // Releasing smaller blocks is expensive, so we want to make sure that a // significant amount of bytes are free, and that there has been a good // amount of batches pushed to the freelist before attempting to release. if (CheckDensity) { if (ReleaseType == ReleaseToOS::Normal && - BytesPushed < Region->AllocatedUser / 16U) + BytesPushed < Region->TryReleaseThreshold) { return 0; + } } if (ReleaseType == ReleaseToOS::Normal) { @@ -865,11 +880,18 @@ template class SizeClassAllocator64 { // of groups. uptr NumberOfBatchGroups = Region->FreeList.size(); + // We are examining each group and will take the minimum distance to the + // release threshold as the next Region::TryReleaseThreshold(). Note that if + // the size of free blocks has reached the release threshold, the distance + // to the next release will be PageSize * SmallerBlockReleasePageDelta. See + // the comment on `SmallerBlockReleasePageDelta` for more details. + uptr MinDistToThreshold = GroupSize; + for (BatchGroup *BG = Region->FreeList.front(), *Prev = nullptr; BG != nullptr;) { const uptr PushedBytesDelta = - BG->PushedBlocks - BG->PushedBlocksAtLastCheckpoint; - if (PushedBytesDelta * BlockSize < PageSize) { + (BG->PushedBlocks - BG->PushedBlocksAtLastCheckpoint) * BlockSize; + if (PushedBytesDelta < PageSize) { Prev = BG; BG = BG->Next; continue; @@ -913,16 +935,38 @@ template class SizeClassAllocator64 { // that this heuristic only applies when all the spaces in a BatchGroup // are allocated. if (CheckDensity) { - const bool HighDensity = (BytesInBG * 100U) / AllocatedGroupSize >= - (100U - 1U - BlockSize / 16U); + const uptr ReleaseThreshold = + (AllocatedGroupSize * (100 - 1U - BlockSize / 16U)) / 100U; + const bool HighDensity = BytesInBG >= ReleaseThreshold; const bool MayHaveReleasedAll = NumBlocks >= (GroupSize / BlockSize); // If all blocks in the group are released, we will do range marking // which is fast. Otherwise, we will wait until we have accumulated // a certain amount of free memory. const bool ReachReleaseDelta = - MayHaveReleasedAll ? true - : PushedBytesDelta * BlockSize >= - PageSize * SmallerBlockReleasePageDelta; + MayHaveReleasedAll + ? true + : PushedBytesDelta >= PageSize * SmallerBlockReleasePageDelta; + + if (!HighDensity) { + DCHECK_LE(BytesInBG, ReleaseThreshold); + // The following is the usage of a memroy group, + // + // BytesInBG ReleaseThreshold + // / \ v + // +---+---------------------------+-----+ + // | | | | | + // +---+---------------------------+-----+ + // \ / ^ + // PushedBytesDelta GroupEnd + MinDistToThreshold = + Min(MinDistToThreshold, + ReleaseThreshold - BytesInBG + PushedBytesDelta); + } else { + // If it reaches high density at this round, the next time we will try + // to release is based on SmallerBlockReleasePageDelta + MinDistToThreshold = + Min(MinDistToThreshold, PageSize * SmallerBlockReleasePageDelta); + } if (!HighDensity || !ReachReleaseDelta) { Prev = BG; @@ -976,6 +1020,16 @@ template class SizeClassAllocator64 { GroupToRelease.push_back(Cur); } + // Only small blocks have the adaptive `TryReleaseThreshold`. + if (isSmallBlock(BlockSize)) { + // If the MinDistToThreshold is not updated, that means each memory group + // may have only pushed less than a page size. In that case, just set it + // back to normal. + if (MinDistToThreshold == GroupSize) + MinDistToThreshold = PageSize * SmallerBlockReleasePageDelta; + Region->TryReleaseThreshold = MinDistToThreshold; + } + if (GroupToRelease.empty()) return 0; From fe27495be2040007c7b20844a9371b06156ab405 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Thu, 29 Dec 2022 12:11:38 -0800 Subject: [PATCH 350/691] [MemProf] Context disambiguation cloning pass [patch 1b/3] Adds support for building the graph in ThinLTO from MemProf summaries. Follow-on patches will contain the support for cloning on the graph and in the IR. Depends on D140908. Differential Revision: https://reviews.llvm.org/D145836 --- llvm/include/llvm/IR/ModuleSummaryIndex.h | 10 + .../IPO/MemProfContextDisambiguation.h | 7 + llvm/lib/LTO/LTO.cpp | 16 +- .../IPO/MemProfContextDisambiguation.cpp | 221 +++++++++- llvm/test/ThinLTO/X86/memprof-basic.ll | 157 +++++++ .../X86/memprof-duplicate-context-ids.ll | 229 ++++++++++ .../X86/memprof-duplicate-context-ids2.ll | 390 ++++++++++++++++++ llvm/test/ThinLTO/X86/memprof-indirectcall.ll | 266 ++++++++++++ llvm/test/ThinLTO/X86/memprof-inlined.ll | 186 +++++++++ llvm/test/ThinLTO/X86/memprof-inlined2.ll | 124 ++++++ 10 files changed, 1599 insertions(+), 7 deletions(-) create mode 100644 llvm/test/ThinLTO/X86/memprof-basic.ll create mode 100644 llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll create mode 100644 llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll create mode 100644 llvm/test/ThinLTO/X86/memprof-indirectcall.ll create mode 100644 llvm/test/ThinLTO/X86/memprof-inlined.ll create mode 100644 llvm/test/ThinLTO/X86/memprof-inlined2.ll diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h index 18853102799b4..0c178ccef3bbb 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -988,12 +988,22 @@ class FunctionSummary : public GlobalValueSummary { return {}; } + CallsitesTy &mutableCallsites() { + assert(Callsites); + return *Callsites; + } + ArrayRef allocs() const { if (Allocs) return *Allocs; return {}; } + AllocsTy &mutableAllocs() { + assert(Allocs); + return *Allocs; + } + friend struct GraphTraits; }; diff --git a/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h b/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h index 56e56ed67f7df..475ea48cca932 100644 --- a/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h +++ b/llvm/include/llvm/Transforms/IPO/MemProfContextDisambiguation.h @@ -19,9 +19,12 @@ #include "llvm/ADT/StringSet.h" #include "llvm/IR/GlobalValue.h" #include "llvm/IR/PassManager.h" +#include namespace llvm { +class GlobalValueSummary; class Module; +class ModuleSummaryIndex; class MemProfContextDisambiguation : public PassInfoMixin { @@ -32,6 +35,10 @@ class MemProfContextDisambiguation MemProfContextDisambiguation() {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); + + void run(ModuleSummaryIndex &Index, + function_ref + isPrevailing); }; } // end namespace llvm diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index 1f273a8e5025f..ee6b8c3aa234d 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -51,6 +51,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h" #include "llvm/Transforms/IPO/WholeProgramDevirt.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" #include "llvm/Transforms/Utils/SplitModule.h" @@ -75,6 +76,9 @@ cl::opt EnableLTOInternalization( cl::desc("Enable global value internalization in LTO")); } +/// Enable MemProf context disambiguation for thin link. +extern cl::opt EnableMemProfContextDisambiguation; + // Computes a unique hash for the Module considering the current list of // export/import and other global analysis results. // The hash is produced in \p Key. @@ -1539,6 +1543,14 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, runWholeProgramDevirtOnIndex(ThinLTO.CombinedIndex, ExportedGUIDs, LocalWPDTargetsMap); + auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) { + return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath(); + }; + if (EnableMemProfContextDisambiguation) { + MemProfContextDisambiguation ContextDisambiguation; + ContextDisambiguation.run(ThinLTO.CombinedIndex, isPrevailing); + } + if (Conf.OptLevel > 0) ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, ImportLists, ExportLists); @@ -1580,10 +1592,6 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache, updateIndexWPDForExports(ThinLTO.CombinedIndex, isExported, LocalWPDTargetsMap); - auto isPrevailing = [&](GlobalValue::GUID GUID, - const GlobalValueSummary *S) { - return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath(); - }; thinLTOInternalizeAndPromoteInIndex(ThinLTO.CombinedIndex, isExported, isPrevailing); diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index 5a6625743eecf..b2fcea1ec8694 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -14,9 +14,9 @@ // subsequently annotated with an attribute for later transformation. // // The transformations can be performed either directly on IR (regular LTO), or -// (eventually) on a ThinLTO index (later applied to the IR during the ThinLTO -// backend). Both types of LTO operate on a the same base graph representation, -// which uses CRTP to support either IR or Index formats. +// on a ThinLTO index (and later applied to the IR during the ThinLTO backend). +// Both types of LTO operate on a the same base graph representation, which +// uses CRTP to support either IR or Index formats. // //===----------------------------------------------------------------------===// @@ -28,9 +28,11 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/MemoryProfileInfo.h" +#include "llvm/Analysis/ModuleSummaryAnalysis.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" +#include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" @@ -458,6 +460,56 @@ class ModuleCallsiteContextGraph const Module &Mod; }; +/// Represents a call in the summary index graph, which can either be an +/// allocation or an interior callsite node in an allocation's context. +/// Holds a pointer to the corresponding data structure in the index. +struct IndexCall : public PointerUnion { + IndexCall() : PointerUnion() {} + IndexCall(std::nullptr_t) : IndexCall() {} + IndexCall(CallsiteInfo *StackNode) : PointerUnion(StackNode) {} + IndexCall(AllocInfo *AllocNode) : PointerUnion(AllocNode) {} + + IndexCall *operator->() { return this; } + + void print(raw_ostream &OS) const { + if (auto *AI = dyn_cast()) + OS << *AI; + else { + auto *CI = dyn_cast(); + assert(CI); + OS << *CI; + } + } +}; + +/// CRTP derived class for graphs built from summary index (ThinLTO). +class IndexCallsiteContextGraph + : public CallsiteContextGraph { +public: + IndexCallsiteContextGraph( + ModuleSummaryIndex &Index, + function_ref + isPrevailing); + +private: + friend CallsiteContextGraph; + + uint64_t getStackId(uint64_t IdOrIndex) const; + bool calleeMatchesFunc(IndexCall &Call, const FunctionSummary *Func); + uint64_t getLastStackId(IndexCall &Call); + std::vector getStackIdsWithContextNodesForCall(IndexCall &Call); + std::string getLabel(const FunctionSummary *Func, const IndexCall &Call, + unsigned CloneNo) const; + + // Saves mapping from function summaries containing memprof records back to + // its VI, for use in checking and debugging. + std::map FSToVIMap; + + const ModuleSummaryIndex &Index; +}; + namespace { struct FieldSeparator { @@ -475,6 +527,20 @@ raw_ostream &operator<<(raw_ostream &OS, FieldSeparator &FS) { return OS << FS.Sep; } +// Map the uint8_t alloc types (which may contain NotCold|Cold) to the alloc +// type we should actually use on the corresponding allocation. +// If we can't clone a node that has NotCold+Cold alloc type, we will fall +// back to using NotCold. So don't bother cloning to distinguish NotCold+Cold +// from NotCold. +AllocationType allocTypeToUse(uint8_t AllocTypes) { + assert(AllocTypes != (uint8_t)AllocationType::None); + if (AllocTypes == + ((uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold)) + return AllocationType::NotCold; + else + return (AllocationType)AllocTypes; +} + } // end anonymous namespace template @@ -1118,6 +1184,20 @@ uint64_t ModuleCallsiteContextGraph::getLastStackId(Instruction *Call) { return CallsiteContext.back(); } +uint64_t IndexCallsiteContextGraph::getLastStackId(IndexCall &Call) { + assert(Call.is()); + CallStack::const_iterator> + CallsiteContext(Call.dyn_cast()); + // Need to convert index into stack id. + return Index.getStackIdAtIndex(CallsiteContext.back()); +} + +static std::string getMemProfFuncName(Twine Base, unsigned CloneNo) { + if (!CloneNo) + return Base.str(); + return (Base + ".memprof." + Twine(CloneNo)).str(); +} + std::string ModuleCallsiteContextGraph::getLabel(const Function *Func, const Instruction *Call, unsigned CloneNo) const { @@ -1126,6 +1206,22 @@ std::string ModuleCallsiteContextGraph::getLabel(const Function *Func, .str(); } +std::string IndexCallsiteContextGraph::getLabel(const FunctionSummary *Func, + const IndexCall &Call, + unsigned CloneNo) const { + auto VI = FSToVIMap.find(Func); + assert(VI != FSToVIMap.end()); + if (Call.is()) + return (VI->second.name() + " -> alloc").str(); + else { + auto *Callsite = Call.dyn_cast(); + return (VI->second.name() + " -> " + + getMemProfFuncName(Callsite->Callee.name(), + Callsite->Clones[CloneNo])) + .str(); + } +} + std::vector ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall( Instruction *Call) { @@ -1135,6 +1231,16 @@ ModuleCallsiteContextGraph::getStackIdsWithContextNodesForCall( CallsiteContext); } +std::vector +IndexCallsiteContextGraph::getStackIdsWithContextNodesForCall(IndexCall &Call) { + assert(Call.is()); + CallStack::const_iterator> + CallsiteContext(Call.dyn_cast()); + return getStackIdsWithContextNodes::const_iterator>( + CallsiteContext); +} + template template std::vector @@ -1207,6 +1313,84 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(Module &M) : Mod(M) { Call.call()->setMetadata(LLVMContext::MD_callsite, nullptr); } +IndexCallsiteContextGraph::IndexCallsiteContextGraph( + ModuleSummaryIndex &Index, + function_ref + isPrevailing) + : Index(Index) { + for (auto &I : Index) { + auto VI = Index.getValueInfo(I); + for (auto &S : VI.getSummaryList()) { + // We should only add the prevailing nodes. Otherwise we may try to clone + // in a weak copy that won't be linked (and may be different than the + // prevailing version). + // We only keep the memprof summary on the prevailing copy now when + // building the combined index, as a space optimization, however don't + // rely on this optimization. The linker doesn't resolve local linkage + // values so don't check whether those are prevailing. + if (!GlobalValue::isLocalLinkage(S->linkage()) && + !isPrevailing(VI.getGUID(), S.get())) + continue; + auto *FS = dyn_cast(S.get()); + if (!FS) + continue; + std::vector CallsWithMetadata; + if (!FS->allocs().empty()) { + for (auto &AN : FS->mutableAllocs()) { + // This can happen because of recursion elimination handling that + // currently exists in ModuleSummaryAnalysis. Skip these for now. + // We still added them to the summary because we need to be able to + // correlate properly in applyImport in the backends. + if (AN.MIBs.empty()) + continue; + CallsWithMetadata.push_back({&AN}); + auto *AllocNode = addAllocNode({&AN}, FS); + // Pass an empty CallStack to the CallsiteContext (second) + // parameter, since for ThinLTO we already collapsed out the inlined + // stack ids on the allocation call during ModuleSummaryAnalysis. + CallStack::const_iterator> + EmptyContext; + // Now add all of the MIBs and their stack nodes. + for (auto &MIB : AN.MIBs) { + CallStack::const_iterator> + StackContext(&MIB); + addStackNodesForMIB::const_iterator>( + AllocNode, StackContext, EmptyContext, MIB.AllocType); + } + assert(AllocNode->AllocTypes != (uint8_t)AllocationType::None); + // Initialize version 0 on the summary alloc node to the current alloc + // type, unless it has both types in which case make it default, so + // that in the case where we aren't able to clone the original version + // always ends up with the default allocation behavior. + AN.Versions[0] = (uint8_t)allocTypeToUse(AllocNode->AllocTypes); + } + } + // For callsite metadata, add to list for this function for later use. + if (!FS->callsites().empty()) + for (auto &SN : FS->mutableCallsites()) + CallsWithMetadata.push_back({&SN}); + + if (!CallsWithMetadata.empty()) + FuncToCallsWithMetadata.push_back({FS, CallsWithMetadata}); + + if (!FS->allocs().empty() || !FS->callsites().empty()) + FSToVIMap[FS] = VI; + } + } + + if (DumpCCG) { + dbgs() << "CCG before updating call stack chains:\n"; + dbgs() << *this; + } + + if (ExportToDot) + exportToDot("prestackupdate"); + + updateStackNodes(); + + handleCallsitesWithMultipleTargets(); +} + template void CallsiteContextGraph::handleCallsitesWithMultipleTargets() { @@ -1251,6 +1435,12 @@ uint64_t ModuleCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const { return IdOrIndex; } +uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const { + // In the Index case this is an index into the stack id list in the summary + // index, convert it to an Id. + return Index.getStackIdAtIndex(IdOrIndex); +} + bool ModuleCallsiteContextGraph::calleeMatchesFunc(Instruction *Call, const Function *Func) { auto *CB = dyn_cast(Call); @@ -1264,6 +1454,23 @@ bool ModuleCallsiteContextGraph::calleeMatchesFunc(Instruction *Call, return Alias && Alias->getAliasee() == Func; } +bool IndexCallsiteContextGraph::calleeMatchesFunc(IndexCall &Call, + const FunctionSummary *Func) { + ValueInfo Callee = Call.dyn_cast()->Callee; + // If there is no summary list then this is a call to an externally defined + // symbol. + AliasSummary *Alias = + Callee.getSummaryList().empty() + ? nullptr + : dyn_cast(Callee.getSummaryList()[0].get()); + assert(FSToVIMap.count(Func)); + return Callee == FSToVIMap[Func] || + // If callee is an alias, check the aliasee, since only function + // summary base objects will contain the stack node summaries and thus + // get a context node. + (Alias && Alias->getAliaseeVI() == FSToVIMap[Func]); +} + static std::string getAllocTypeString(uint8_t AllocTypes) { if (!AllocTypes) return "None"; @@ -1581,3 +1788,11 @@ PreservedAnalyses MemProfContextDisambiguation::run(Module &M, return PreservedAnalyses::all(); return PreservedAnalyses::none(); } + +void MemProfContextDisambiguation::run( + ModuleSummaryIndex &Index, + function_ref + isPrevailing) { + IndexCallsiteContextGraph CCG(Index, isPrevailing); + CCG.process(); +} diff --git a/llvm/test/ThinLTO/X86/memprof-basic.ll b/llvm/test/ThinLTO/X86/memprof-basic.ll new file mode 100644 index 0000000000000..d8c78d270f277 --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-basic.ll @@ -0,0 +1,157 @@ +;; Test callsite context graph generation for simple call graph with +;; two memprof contexts and no inlining. +;; +;; Original code looks like: +;; +;; char *bar() { +;; return new char[10]; +;; } +;; +;; char *baz() { +;; return bar(); +;; } +;; +;; char *foo() { +;; return baz(); +;; } +;; +;; int main(int argc, char **argv) { +;; char *x = foo(); +;; char *y = foo(); +;; memset(x, 0, 10); +;; memset(y, 0, 10); +;; delete[] x; +;; sleep(10); +;; delete[] y; +;; return 0; +;; } +;; +;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; memory freed after sleep(10) results in cold lifetimes. +;; +;; The IR was then reduced using llvm-reduce with the expected FileCheck input. + +; RUN: opt -thinlto-bc %s >%t.o +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_ZdaPv, \ +; RUN: -r=%t.o,sleep, \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ +; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP + +; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT + + +source_filename = "memprof-basic.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @main() { +entry: + %call = call ptr @_Z3foov(), !callsite !0 + %call1 = call ptr @_Z3foov(), !callsite !1 + ret i32 0 +} + +declare void @_ZdaPv() + +declare i32 @sleep() + +define internal ptr @_Z3barv() { +entry: + %call = call ptr @_Znam(i64 0), !memprof !2, !callsite !7 + ret ptr null +} + +declare ptr @_Znam(i64) + +define internal ptr @_Z3bazv() { +entry: + %call = call ptr @_Z3barv(), !callsite !8 + ret ptr null +} + +define internal ptr @_Z3foov() { +entry: + %call = call ptr @_Z3bazv(), !callsite !9 + ret ptr null +} + +; uselistorder directives +uselistorder ptr @_Z3foov, { 1, 0 } + +!0 = !{i64 8632435727821051414} +!1 = !{i64 -3421689549917153178} +!2 = !{!3, !5} +!3 = !{!4, !"notcold"} +!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!5 = !{!6, !"cold"} +!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!7 = !{i64 9086428284934609951} +!8 = !{i64 -5964873800580613432} +!9 = !{i64 2732490490862098848} + + +; DUMP: CCG before cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[BAR:0x[a-z0-9]+]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 1 StackIds: 2, 3, 0 +; DUMP: AllocType 2 StackIds: 2, 3, 1 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 + +; DUMP: Node [[BAZ]] +; DUMP: Callee: 9832687305761716512 (_Z3barv) Clones: 0 StackIds: 2 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 + +; DUMP: Node [[FOO]] +; DUMP: Callee: 5878270615442837395 (_Z3bazv) Clones: 0 StackIds: 3 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 + +; DUMP: Node [[MAIN1]] +; DUMP: Callee: 6731117468105397038 (_Z3foov) Clones: 0 StackIds: 0 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: Callee: 6731117468105397038 (_Z3foov) Clones: 0 StackIds: 1 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: + + +; DOT: digraph "postbuild" { +; DOT: label="postbuild"; +; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> alloc}"]; +; DOT: Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 12481870273128938184\n_Z3bazv -\> _Z3barv}"]; +; DOT: Node[[BAZ]] -> Node[[BAR]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\n_Z3foov -\> _Z3bazv}"]; +; DOT: Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOT: } diff --git a/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll new file mode 100644 index 0000000000000..772b319e0715e --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll @@ -0,0 +1,229 @@ +;; Test callsite context graph generation for call graph with with MIBs +;; that have pruned contexts that partially match multiple inlined +;; callsite contexts, requiring duplication of context ids and nodes +;; while matching callsite nodes onto the graph. +;; +;; Original code looks like: +;; +;; char *D() { +;; return new char[10]; +;; } +;; +;; char *F() { +;; return D(); +;; } +;; +;; char *C() { +;; return D(); +;; } +;; +;; char *B() { +;; return C(); +;; } +;; +;; char *E() { +;; return C(); +;; } +;; int main(int argc, char **argv) { +;; char *x = B(); // cold +;; char *y = E(); // cold +;; char *z = F(); // default +;; memset(x, 0, 10); +;; memset(y, 0, 10); +;; memset(z, 0, 10); +;; delete[] z; +;; sleep(10); +;; delete[] x; +;; delete[] y; +;; return 0; +;; } +;; +;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; memory freed after sleep(10) results in cold lifetimes. +;; +;; The code below was created by forcing inlining of C into both B and E. +;; Since both allocation contexts via C are cold, the matched memprof +;; metadata has the context pruned above C's callsite. This requires +;; matching the stack node for C to callsites where it was inlined (i.e. +;; the callsites in B and E that have callsite metadata that includes C's). +;; It also requires duplication of that node in the graph as well as the +;; duplication of the context ids along that path through the graph, +;; so that we can represent the duplicated (via inlining) C callsite. +;; +;; The IR was then reduced using llvm-reduce with the expected FileCheck input. + +; RUN: opt -thinlto-bc %s >%t.o +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_ZdaPv, \ +; RUN: -r=%t.o,sleep, \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ +; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP + +; RUN: cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE +; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST + + +source_filename = "duplicate-context-ids.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define internal ptr @_Z1Dv() { +entry: + %call = call ptr @_Znam(i64 0), !memprof !0, !callsite !5 + ret ptr null +} + +declare ptr @_Znam(i64) + +define internal ptr @_Z1Fv() { +entry: + %call = call ptr @_Z1Dv(), !callsite !6 + ret ptr null +} + +define internal ptr @_Z1Cv() { +entry: + %call = call ptr @_Z1Dv(), !callsite !7 + ret ptr null +} + +define internal ptr @_Z1Bv() { +entry: + %call.i = call ptr @_Z1Dv(), !callsite !8 + ret ptr null +} + +define internal ptr @_Z1Ev() { +entry: + %call.i = call ptr @_Z1Dv(), !callsite !9 + ret ptr null +} + +declare i32 @main() + +declare void @_ZdaPv() + +declare i32 @sleep() + +!0 = !{!1, !3} +!1 = !{!2, !"cold"} +!2 = !{i64 6541423618768552252, i64 -6270142974039008131} +!3 = !{!4, !"notcold"} +!4 = !{i64 6541423618768552252, i64 -4903163940066524832} +!5 = !{i64 6541423618768552252} +!6 = !{i64 -4903163940066524832} +!7 = !{i64 -6270142974039008131} +!8 = !{i64 -6270142974039008131, i64 -184525619819294889} +!9 = !{i64 -6270142974039008131, i64 1905834578520680781} + + +;; After adding only the alloc node memprof metadata, we only have 2 contexts. + +; DUMP: CCG before updating call stack chains: +; DUMP: Callsite Context Graph: +; DUMP: Node [[D:0x[a-z0-9]+]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 2 StackIds: 0 +; DUMP: AllocType 1 StackIds: 1 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1 +; DUMP: Edge from Callee [[D]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2 + +; DUMP: Node [[C]] +; DUMP: null Call +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[C]] AllocTypes: Cold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[F]] +; DUMP: null Call +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 +; DUMP: CallerEdges: + +;; After updating for callsite metadata, we should have generated context ids 3 and 4, +;; along with 2 new nodes for those callsites. All have the same allocation type +;; behavior as the original C node. + +; DUMP: CCG before cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[D]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 2 StackIds: 0 +; DUMP: AllocType 1 StackIds: 1 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 3 4 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 +; DUMP: Edge from Callee [[D]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3 +; DUMP: Edge from Callee [[D]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 +; DUMP: Edge from Callee [[D]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1 + +; DUMP: Node [[F]] +; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 1 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2 +; DUMP: CallerEdges: + +; DUMP: Node [[C2]] +; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3 +; DUMP: CallerEdges: + +; DUMP: Node [[B]] +; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0, 2 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4 +; DUMP: CallerEdges: + +; DUMP: Node [[E]] +; DUMP: Callee: 4881081444663423788 (_Z1Dv) Clones: 0 StackIds: 0, 3 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1 +; DUMP: CallerEdges: + + +; DOTPRE: digraph "prestackupdate" { +; DOTPRE: label="prestackupdate"; +; DOTPRE: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> alloc}"]; +; DOTPRE: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12176601099670543485\nnull call (external)}"]; +; DOTPRE: Node[[C]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"]; +; DOTPRE: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\nnull call (external)}"]; +; DOTPRE: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"]; +; DOTPRE: } + + +; DOTPOST:digraph "postbuild" { +; DOTPOST: label="postbuild"; +; DOTPOST: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> alloc}"]; +; DOTPOST: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"]; +; DOTPOST: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"]; +; DOTPOST: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"]; +; DOTPOST: Node[[C]] -> Node[[D]][tooltip="ContextIds: 3",fillcolor="cyan"]; +; DOTPOST: Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"]; +; DOTPOST: Node[[B]] -> Node[[D]][tooltip="ContextIds: 4",fillcolor="cyan"]; +; DOTPOST: Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"]; +; DOTPOST: Node[[E]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"]; +; DOTPOST:} diff --git a/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll new file mode 100644 index 0000000000000..af7dece9421a9 --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll @@ -0,0 +1,390 @@ +;; Test callsite context graph generation for call graph with with MIBs +;; that have pruned contexts that partially match multiple inlined +;; callsite contexts, requiring duplication of context ids and nodes +;; while matching callsite nodes onto the graph. This test requires more +;; complex duplication due to multiple contexts for different allocations +;; that share some of the same callsite nodes. +;; +;; Original code looks like: +;; +;; char *D(bool Call1) { +;; if (Call1) +;; return new char[10]; +;; else +;; return new char[10]; +;; } +;; +;; char *C(bool Call1) { +;; return D(Call1); +;; } +;; +;; char *B(bool Call1) { +;; if (Call1) +;; return C(true); +;; else +;; return C(false); +;; } +;; +;; char *A(bool Call1) { +;; return B(Call1); +;; } +;; +;; char *A1() { +;; return A(true); +;; } +;; +;; char *A2() { +;; return A(true); +;; } +;; +;; char *A3() { +;; return A(false); +;; } +;; +;; char *A4() { +;; return A(false); +;; } +;; +;; char *E() { +;; return B(true); +;; } +;; +;; char *F() { +;; return B(false); +;; } +;; +;; int main(int argc, char **argv) { +;; char *a1 = A1(); // cold +;; char *a2 = A2(); // cold +;; char *e = E(); // default +;; char *a3 = A3(); // default +;; char *a4 = A4(); // default +;; char *f = F(); // cold +;; memset(a1, 0, 10); +;; memset(a2, 0, 10); +;; memset(e, 0, 10); +;; memset(a3, 0, 10); +;; memset(a4, 0, 10); +;; memset(f, 0, 10); +;; delete[] a3; +;; delete[] a4; +;; delete[] e; +;; sleep(10); +;; delete[] a1; +;; delete[] a2; +;; delete[] f; +;; return 0; +;; } +;; +;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; memory freed after sleep(10) results in cold lifetimes. +;; +;; The code below was created by forcing inlining of A into its callers, +;; without any other inlining or optimizations. Since both allocation contexts +;; via A for each allocation in D have the same allocation type (cold via +;; A1 and A2 for the first new in D, and non-cold via A3 and A4 for the second +;; new in D, the contexts for those respective allocations are pruned above A. +;; The allocations via E and F are to ensure we don't prune above B. +;; +;; The matching onto the inlined A[1234]->A sequences will require duplication +;; of the context id assigned to the context from A for each allocation in D. +;; This test ensures that we do this correctly in the presence of callsites +;; shared by the different duplicated context ids (i.e. callsite in C). +;; +;; The IR was then reduced using llvm-reduce with the expected FileCheck input. + +; RUN: opt -thinlto-bc %s >%t.o +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Z1Db,plx \ +; RUN: -r=%t.o,_Z1Cb,plx \ +; RUN: -r=%t.o,_Z1Bb,plx \ +; RUN: -r=%t.o,_Z1Ab,plx \ +; RUN: -r=%t.o,_Z2A1v,plx \ +; RUN: -r=%t.o,_Z2A2v,plx \ +; RUN: -r=%t.o,_Z2A3v,plx \ +; RUN: -r=%t.o,_Z2A4v,plx \ +; RUN: -r=%t.o,_Z1Ev,plx \ +; RUN: -r=%t.o,_Z1Fv,plx \ +; RUN: -r=%t.o,_ZdaPv, \ +; RUN: -r=%t.o,sleep, \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ +; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define ptr @_Z1Db(i1 %Call1) { +entry: + %call = call ptr @_Znam(i64 0), !memprof !0, !callsite !5 + br label %return + +if.else: ; No predecessors! + %call1 = call ptr @_Znam(i64 0), !memprof !6, !callsite !11 + br label %return + +return: ; preds = %if.else, %entry + ret ptr null +} + +declare ptr @_Znam(i64) + +define ptr @_Z1Cb(i1 %Call1) { +entry: + %call = call ptr @_Z1Db(i1 false), !callsite !12 + ret ptr null +} + +define ptr @_Z1Bb(i1 %Call1) { +entry: + %call = call ptr @_Z1Cb(i1 false), !callsite !13 + br label %return + +if.else: ; No predecessors! + %call1 = call ptr @_Z1Cb(i1 false), !callsite !14 + br label %return + +return: ; preds = %if.else, %entry + ret ptr null +} + +define ptr @_Z1Ab() { +entry: + %call = call ptr @_Z1Bb(i1 false), !callsite !15 + ret ptr null +} + +define ptr @_Z2A1v() { +entry: + %call.i = call ptr @_Z1Bb(i1 false), !callsite !16 + ret ptr null +} + +define ptr @_Z2A2v() { +entry: + %call.i = call ptr @_Z1Bb(i1 false), !callsite !17 + ret ptr null +} + +define ptr @_Z2A3v() { +entry: + %call.i = call ptr @_Z1Bb(i1 false), !callsite !18 + ret ptr null +} + +define ptr @_Z2A4v() { +entry: + %call.i = call ptr @_Z1Bb(i1 false), !callsite !19 + ret ptr null +} + +define ptr @_Z1Ev() { +entry: + %call = call ptr @_Z1Bb(i1 false), !callsite !20 + ret ptr null +} + +define ptr @_Z1Fv() { +entry: + %call = call ptr @_Z1Bb(i1 false), !callsite !21 + ret ptr null +} + +declare i32 @main() + +declare void @_ZdaPv() + +declare i32 @sleep() + +; uselistorder directives +uselistorder ptr @_Znam, { 1, 0 } + +!0 = !{!1, !3} +!1 = !{!2, !"notcold"} +!2 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 1905834578520680781} +!3 = !{!4, !"cold"} +!4 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 -6528110295079665978} +!5 = !{i64 4854880825882961848} +!6 = !{!7, !9} +!7 = !{!8, !"notcold"} +!8 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -6528110295079665978} +!9 = !{!10, !"cold"} +!10 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -4903163940066524832} +!11 = !{i64 -8775068539491628272} +!12 = !{i64 -904694911315397047} +!13 = !{i64 6532298921261778285} +!14 = !{i64 7859682663773658275} +!15 = !{i64 -6528110295079665978} +!16 = !{i64 -6528110295079665978, i64 5747919905719679568} +!17 = !{i64 -6528110295079665978, i64 -5753238080028016843} +!18 = !{i64 -6528110295079665978, i64 1794685869326395337} +!19 = !{i64 -6528110295079665978, i64 5462047985461644151} +!20 = !{i64 1905834578520680781} +!21 = !{i64 -4903163940066524832} + + +;; After adding only the alloc node memprof metadata, we only have 4 contexts (we only +;; match the interesting parts of the pre-update graph here). + +; DUMP: CCG before updating call stack chains: +; DUMP: Callsite Context Graph: + +; DUMP: Node [[D1:0x[a-z0-9]+]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 1 StackIds: 0, 1, 2 +; DUMP: AllocType 2 StackIds: 0, 1, 3 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 + +; DUMP: Node [[C:0x[a-z0-9]+]] +; DUMP: null Call +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 3 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: Edge from Callee [[D2:0x[a-z0-9]+]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 + +; DUMP: Node [[D2]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 1 StackIds: 0, 4, 3 +; DUMP: AllocType 2 StackIds: 0, 4, 5 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 3 4 + + +;; After updating for callsite metadata, we should have duplicated the context +;; ids coming from node A (2 and 3) 4 times, for the 4 different callers of A, +;; and used those on new nodes for those callers. Note that while in reality +;; we only have cold edges coming from A1 and A2 and noncold from A3 and A4, +;; due to the pruning we have lost this information and thus end up duplicating +;; both of A's contexts to all of the new nodes (which could result in some +;; unnecessary cloning. + +; DUMP: CCG before cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[D1]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 1 StackIds: 0, 1, 2 +; DUMP: AllocType 2 StackIds: 0, 1, 3 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 5 7 9 11 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 + +; DUMP: Node [[C]] +; DUMP: Callee: 11485875876353461977 (_Z1Db) Clones: 0 StackIds: 0 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 3 4 5 6 7 8 9 10 11 12 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 +; DUMP: Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[C]] to Caller: [[B1:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 +; DUMP: Edge from Callee [[C]] to Caller: [[B2:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 + +; DUMP: Node [[B1]] +; DUMP: Callee: 15062806102884567440 (_Z1Cb) Clones: 0 StackIds: 1 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 5 7 9 11 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[C]] to Caller: [[B1]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[B1]] to Caller: [[A2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 5 +; DUMP: Edge from Callee [[B1]] to Caller: [[A3:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 7 +; DUMP: Edge from Callee [[B1]] to Caller: [[A1:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 9 +; DUMP: Edge from Callee [[B1]] to Caller: [[A4:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 11 +; DUMP: Edge from Callee [[B1]] to Caller: [[A:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 + +; DUMP: Node [[E]] +; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 2 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[E]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[D2]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 1 StackIds: 0, 4, 3 +; DUMP: AllocType 2 StackIds: 0, 4, 5 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 3 4 6 8 10 12 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 + +; DUMP: Node [[B2]] +; DUMP: Callee: 15062806102884567440 (_Z1Cb) Clones: 0 StackIds: 4 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 3 4 6 8 10 12 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[C]] to Caller: [[B2]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[B2]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 +; DUMP: Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6 +; DUMP: Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8 +; DUMP: Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10 +; DUMP: Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12 +; DUMP: Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3 + +; DUMP: Node [[F]] +; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 5 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B2]] to Caller: [[F]] AllocTypes: Cold ContextIds: 4 +; DUMP: CallerEdges: + +; DUMP: Node [[A2]] +; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 3, 7 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 5 6 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[A2]] AllocTypes: Cold ContextIds: 5 +; DUMP: Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6 +; DUMP: CallerEdges: + +; DUMP: Node [[A3]] +; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 3, 8 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 7 8 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[A3]] AllocTypes: Cold ContextIds: 7 +; DUMP: Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8 +; DUMP: CallerEdges: + +; DUMP: Node [[A1]] +; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 3 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 9 10 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[A1]] AllocTypes: Cold ContextIds: 9 +; DUMP: Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10 +; DUMP: CallerEdges: + +; DUMP: Node [[A4]] +; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 3, 9 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 11 12 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[A4]] AllocTypes: Cold ContextIds: 11 +; DUMP: Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12 +; DUMP: CallerEdges: + +; DUMP: Node [[A]] +; DUMP: Callee: 9116113196563097487 (_Z1Bb) Clones: 0 StackIds: 3, 6 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 2 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[B1]] to Caller: [[A]] AllocTypes: Cold ContextIds: 2 +; DUMP: Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3 +; DUMP: CallerEdges: diff --git a/llvm/test/ThinLTO/X86/memprof-indirectcall.ll b/llvm/test/ThinLTO/X86/memprof-indirectcall.ll new file mode 100644 index 0000000000000..30c8bd27f37b7 --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-indirectcall.ll @@ -0,0 +1,266 @@ +;; Tests callsite context graph generation for call graph containing indirect +;; calls. Currently this should result in conservative behavior, such that the +;; indirect call receives a null call in its graph node, to prevent subsequent +;; cloning. +;; +;; Original code looks like: +;; +;; char *foo() { +;; return new char[10]; +;; } +;; class A { +;; public: +;; virtual char *x() { return foo(); } +;; }; +;; class B : public A { +;; public: +;; char *x() final { return foo(); } +;; }; +;; char *bar(A *a) { +;; return a->x(); +;; } +;; int main(int argc, char **argv) { +;; char *x = foo(); +;; char *y = foo(); +;; B b; +;; char *z = bar(&b); +;; char *w = bar(&b); +;; A a; +;; char *r = bar(&a); +;; char *s = bar(&a); +;; memset(x, 0, 10); +;; memset(y, 0, 10); +;; memset(z, 0, 10); +;; memset(w, 0, 10); +;; memset(r, 0, 10); +;; memset(s, 0, 10); +;; delete[] x; +;; delete[] w; +;; delete[] r; +;; sleep(10); +;; delete[] y; +;; delete[] z; +;; delete[] s; +;; return 0; +;; } +;; +;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; memory freed after sleep(10) results in cold lifetimes. +;; +;; Compiled without optimization to prevent inlining and devirtualization. +;; +;; The IR was then reduced using llvm-reduce with the expected FileCheck input. + +; RUN: opt -thinlto-bc %s >%t.o +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,sleep, \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -r=%t.o,_ZdaPv, \ +; RUN: -r=%t.o,_ZTVN10__cxxabiv120__si_class_type_infoE, \ +; RUN: -r=%t.o,_ZTVN10__cxxabiv117__class_type_infoE, \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ +; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP + +; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT + + +source_filename = "indirectcall.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@_ZTVN10__cxxabiv120__si_class_type_infoE = external global ptr +@_ZTVN10__cxxabiv117__class_type_infoE = external global ptr + +define internal ptr @_Z3barP1A(ptr %a) { +entry: + ret ptr null +} + +define i32 @main() { +entry: + %call = call ptr @_Z3foov(), !callsite !0 + %call1 = call ptr @_Z3foov(), !callsite !1 + %call2 = call ptr @_Z3barP1A(ptr null), !callsite !2 + %call3 = call ptr @_Z3barP1A(ptr null), !callsite !3 + %call4 = call ptr @_Z3barP1A(ptr null), !callsite !4 + %call5 = call ptr @_Z3barP1A(ptr null), !callsite !5 + ret i32 0 +} + +declare void @_ZdaPv() + +declare i32 @sleep() + +define internal ptr @_ZN1A1xEv() { +entry: + %call = call ptr @_Z3foov(), !callsite !6 + ret ptr null +} + +define internal ptr @_ZN1B1xEv() { +entry: + %call = call ptr @_Z3foov(), !callsite !7 + ret ptr null +} + +define internal ptr @_Z3foov() { +entry: + %call = call ptr @_Znam(i64 0), !memprof !8, !callsite !21 + ret ptr null +} + +declare ptr @_Znam(i64) + +; uselistorder directives +uselistorder ptr @_Z3foov, { 3, 2, 1, 0 } + +!0 = !{i64 8632435727821051414} +!1 = !{i64 -3421689549917153178} +!2 = !{i64 6792096022461663180} +!3 = !{i64 -2709642582978494015} +!4 = !{i64 748269490701775343} +!5 = !{i64 -5747251260480066785} +!6 = !{i64 8256774051149711748} +!7 = !{i64 -4831879094954754638} +!8 = !{!9, !11, !13, !15, !17, !19} +!9 = !{!10, !"notcold"} +!10 = !{i64 2732490490862098848, i64 8256774051149711748, i64 -4820244510750103755, i64 748269490701775343} +!11 = !{!12, !"cold"} +!12 = !{i64 2732490490862098848, i64 8256774051149711748, i64 -4820244510750103755, i64 -5747251260480066785} +!13 = !{!14, !"notcold"} +!14 = !{i64 2732490490862098848, i64 8632435727821051414} +!15 = !{!16, !"cold"} +!16 = !{i64 2732490490862098848, i64 -4831879094954754638, i64 -4820244510750103755, i64 6792096022461663180} +!17 = !{!18, !"notcold"} +!18 = !{i64 2732490490862098848, i64 -4831879094954754638, i64 -4820244510750103755, i64 -2709642582978494015} +!19 = !{!20, !"cold"} +!20 = !{i64 2732490490862098848, i64 -3421689549917153178} +!21 = !{i64 2732490490862098848} + + +; DUMP: CCG before cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[FOO:0x[a-z0-9]+]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 1 StackIds: 6, 8, 4 +; DUMP: AllocType 2 StackIds: 6, 8, 5 +; DUMP: AllocType 1 StackIds: 0 +; DUMP: AllocType 2 StackIds: 7, 8, 2 +; DUMP: AllocType 1 StackIds: 7, 8, 3 +; DUMP: AllocType 2 StackIds: 1 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 3 4 5 6 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[AX:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3 +; DUMP: Edge from Callee [[FOO]] to Caller: [[BX:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 4 5 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 6 + +; DUMP: Node [[AX]] +; DUMP: Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 6 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[AX]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[AX]] to Caller: [[BAR:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 + +;; Bar contains an indirect call, with multiple targets. It's call should be null. +; DUMP: Node [[BAR]] +; DUMP: null Call +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 4 5 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[AX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN3:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN4:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN5:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4 +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN6:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 5 + +; DUMP: Node [[MAIN3]] +; DUMP: Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 4 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN3]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN4]] +; DUMP: Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 5 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN4]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN1]] +; DUMP: Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 0 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3 +; DUMP: CallerEdges: + +; DUMP: Node [[BX]] +; DUMP: Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 7 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 4 5 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[BX]] AllocTypes: NotColdCold ContextIds: 4 5 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5 + +; DUMP: Node [[MAIN5]] +; DUMP: Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 2 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN5]] AllocTypes: Cold ContextIds: 4 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN6]] +; DUMP: Callee: 4095956691517954349 (_Z3barP1A) Clones: 0 StackIds: 3 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 5 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN6]] AllocTypes: NotCold ContextIds: 5 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: Callee: 12914368124089294956 (_Z3foov) Clones: 0 StackIds: 1 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 6 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 6 +; DUMP: CallerEdges: + + +; DOT: digraph "postbuild" { +; DOT: label="postbuild"; +; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2 3 4 5 6",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3foov -\> alloc}"]; +; DOT: Node[[AX:0x[a-z0-9]+]] [shape=record,tooltip="N[[AX]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 8256774051149711748\n_ZN1A1xEv -\> _Z3foov}"]; +; DOT: Node[[AX]] -> Node[[FOO]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13626499562959447861\nnull call (external)}"]; +; DOT: Node[[BAR]] -> Node[[AX]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOT: Node[[BAR]] -> Node[[BX:0x[a-z0-9]+]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"]; +; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 748269490701775343\nmain -\> _Z3barP1A}"]; +; DOT: Node[[MAIN1]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12699492813229484831\nmain -\> _Z3barP1A}"]; +; DOT: Node[[MAIN2]] -> Node[[BAR]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOT: Node[[MAIN3:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN3]] ContextIds: 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN3]] -> Node[[FOO]][tooltip="ContextIds: 3",fillcolor="brown1"]; +; DOT: Node[[BX]] [shape=record,tooltip="N[[BX]] ContextIds: 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13614864978754796978\n_ZN1B1xEv -\> _Z3foov}"]; +; DOT: Node[[BX]] -> Node[[FOO]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"]; +; DOT: Node[[MAIN4:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN4]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 6792096022461663180\nmain -\> _Z3barP1A}"]; +; DOT: Node[[MAIN4]] -> Node[[BAR]][tooltip="ContextIds: 4",fillcolor="cyan"]; +; DOT: Node[[MAIN5:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN5]] ContextIds: 5",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 15737101490731057601\nmain -\> _Z3barP1A}"]; +; DOT: Node[[MAIN5]] -> Node[[BAR]][tooltip="ContextIds: 5",fillcolor="brown1"]; +; DOT: Node[[MAIN6:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN6]] ContextIds: 6",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN6]] -> Node[[FOO]][tooltip="ContextIds: 6",fillcolor="cyan"]; +; DOT: } diff --git a/llvm/test/ThinLTO/X86/memprof-inlined.ll b/llvm/test/ThinLTO/X86/memprof-inlined.ll new file mode 100644 index 0000000000000..89cd878e99fb4 --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-inlined.ll @@ -0,0 +1,186 @@ +;; Test callsite context graph generation for call graph with two memprof +;; contexts and partial inlining, requiring generation of a new fused node to +;; represent the inlined sequence while matching callsite nodes onto the graph. +;; +;; Original code looks like: +;; +;; char *bar() { +;; return new char[10]; +;; } +;; +;; char *baz() { +;; return bar(); +;; } +;; +;; char *foo() { +;; return baz(); +;; } +;; +;; int main(int argc, char **argv) { +;; char *x = foo(); +;; char *y = foo(); +;; memset(x, 0, 10); +;; memset(y, 0, 10); +;; delete[] x; +;; sleep(10); +;; delete[] y; +;; return 0; +;; } +;; +;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; memory freed after sleep(10) results in cold lifetimes. +;; +;; The code below was created by forcing inlining of baz into foo, and +;; bar into baz. Due to the inlining of bar we will initially have two +;; allocation nodes in the graph. This tests that we correctly match +;; foo (with baz inlined) onto the graph nodes first, and generate a new +;; fused node for it. We should then not match baz (with bar inlined) as that +;; is not reached by the MIB contexts (since all calls from main will look +;; like main -> foo(+baz) -> bar after the inlining reflected in this IR). +;; +;; The IR was then reduced using llvm-reduce with the expected FileCheck input. + +; RUN: opt -thinlto-bc %s >%t.o +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_ZdaPv, \ +; RUN: -r=%t.o,sleep, \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \ +; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP + +; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT + + +source_filename = "inlined.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define internal ptr @_Z3barv() { +entry: + %call = call ptr @_Znam(i64 0), !memprof !0, !callsite !5 + ret ptr null +} + +declare ptr @_Znam(i64) + +define internal ptr @_Z3bazv() { +entry: + %call.i = call ptr @_Znam(i64 0), !memprof !0, !callsite !6 + ret ptr null +} + +define internal ptr @_Z3foov() { +entry: + %call.i = call ptr @_Z3barv(), !callsite !7 + ret ptr null +} + +define i32 @main() { +entry: + %call = call ptr @_Z3foov(), !callsite !8 + %call1 = call ptr @_Z3foov(), !callsite !9 + ret i32 0 +} + +declare void @_ZdaPv() + +declare i32 @sleep() + +!0 = !{!1, !3} +!1 = !{!2, !"notcold"} +!2 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!3 = !{!4, !"cold"} +!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!5 = !{i64 9086428284934609951} +!6 = !{i64 9086428284934609951, i64 -5964873800580613432} +!7 = !{i64 -5964873800580613432, i64 2732490490862098848} +!8 = !{i64 8632435727821051414} +!9 = !{i64 -3421689549917153178} + + +; DUMP: CCG before cloning: +; DUMP: Callsite Context Graph: + +; DUMP: Node [[BAZ:0x[a-z0-9]+]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 1 StackIds: 1, 2 +; DUMP: AllocType 2 StackIds: 1, 3 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO2:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 + +;; This is leftover from the MIB on the alloc inlined into baz. It is not +;; matched with any call, since there is no such node in the IR. Due to the +;; null call it will not participate in any context transformations. +; DUMP: Node [[FOO2]] +; DUMP: null Call +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 1 2 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 + +; DUMP: Node [[MAIN1]] +; DUMP: Callee: 2229562716906371625 (_Z3foov) Clones: 0 StackIds: 2 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 3 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[FOO:0x[a-z0-9]+]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3 +; DUMP: CallerEdges: + +; DUMP: Node [[MAIN2]] +; DUMP: Callee: 2229562716906371625 (_Z3foov) Clones: 0 StackIds: 3 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4 +; DUMP: CallerEdges: + +; DUMP: Node [[BAR:0x[a-z0-9]+]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 1 StackIds: 0, 1, 2 +; DUMP: AllocType 2 StackIds: 0, 1, 3 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 3 4 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 3 4 + +;; This is the node synthesized for the call to bar in foo that was created +;; by inlining baz into foo. +; DUMP: Node [[FOO]] +; DUMP: Callee: 16064618363798697104 (_Z3barv) Clones: 0 StackIds: 0, 1 (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 3 4 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 3 4 +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3 +; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4 + + +; DOT: digraph "postbuild" { +; DOT: label="postbuild"; +; DOT: Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3bazv -\> alloc}"]; +; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\nnull call (external)}"]; +; DOT: Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"]; +; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"]; +; DOT: Node[[MAIN1]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 3",fillcolor="brown1"]; +; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"]; +; DOT: Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 2",fillcolor="cyan"]; +; DOT: Node[[MAIN2]] -> Node[[FOO2]][tooltip="ContextIds: 4",fillcolor="cyan"]; +; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc2\n_Z3barv -\> alloc}"]; +; DOT: Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"]; +; DOT: Node[[FOO2]] -> Node[[BAR]][tooltip="ContextIds: 3 4",fillcolor="mediumorchid1"]; +; DOT: } diff --git a/llvm/test/ThinLTO/X86/memprof-inlined2.ll b/llvm/test/ThinLTO/X86/memprof-inlined2.ll new file mode 100644 index 0000000000000..1ffae8cd59cef --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof-inlined2.ll @@ -0,0 +1,124 @@ +;; Test callsite context graph generation for call graph with two memprof +;; contexts and multiple levels of inlining, requiring generation of new +;; fused nodes to represent the inlined sequence while matching callsite +;; nodes onto the graph. In particular this tests the case where a function +;; has inlined a callee containing an inlined callee. +;; +;; Original code looks like: +;; +;; char *bar() __attribute__((noinline)) { +;; return new char[10]; +;; } +;; +;; char *baz() { +;; return bar(); +;; } +;; +;; char *foo() { +;; return baz(); +;; } +;; +;; int main(int argc, char **argv) { +;; char *x = foo(); +;; char *y = foo(); +;; memset(x, 0, 10); +;; memset(y, 0, 10); +;; delete[] x; +;; sleep(10); +;; delete[] y; +;; return 0; +;; } +;; +;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the +;; memory freed after sleep(10) results in cold lifetimes. +;; +;; Both foo and baz are inlined into main, at both foo callsites. +;; We should update the graph for new fused nodes for both of those inlined +;; callsites to bar. +;; +;; Note that baz and bar are both dead due to the inlining, but have been left +;; in the input IR to ensure that the MIB call chain is matched to the longer +;; inline sequences from main. +;; +;; The IR was then reduced using llvm-reduce with the expected FileCheck input. + +; RUN: opt -thinlto-bc %s >%t.o +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -r=%t.o,main,plx \ +; RUN: -r=%t.o,_Z3barv,plx \ +; RUN: -r=%t.o,_Z3bazv,plx \ +; RUN: -r=%t.o,_Z3foov,plx \ +; RUN: -r=%t.o,_ZdaPv, \ +; RUN: -r=%t.o,sleep, \ +; RUN: -r=%t.o,_Znam, \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \ +; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP + + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define ptr @_Z3barv() { +entry: + %call = call ptr @_Znam(i64 0), !memprof !0, !callsite !5 + ret ptr null +} + +declare ptr @_Znam(i64) + +declare ptr @_Z3bazv() + +declare ptr @_Z3foov() + +define i32 @main() { +delete.end5: + %call.i.i = call ptr @_Z3barv(), !callsite !6 + %call.i.i8 = call ptr @_Z3barv(), !callsite !7 + ret i32 0 +} + +declare void @_ZdaPv() + +declare i32 @sleep() + +!0 = !{!1, !3} +!1 = !{!2, !"notcold"} +!2 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!3 = !{!4, !"cold"} +!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} +!5 = !{i64 9086428284934609951} +!6 = !{i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414} +!7 = !{i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178} + + +; DUMP: CCG before cloning: +; DUMP: Callsite Context Graph: +; DUMP: Node [[BAR:0x[a-z0-9]+]] +; DUMP: Versions: 1 MIB: +; DUMP: AllocType 1 StackIds: 0, 1, 2 +; DUMP: AllocType 2 StackIds: 0, 1, 3 +; DUMP: (clone 0) +; DUMP: AllocTypes: NotColdCold +; DUMP: ContextIds: 1 2 +; DUMP: CalleeEdges: +; DUMP: CallerEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1 +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2 + +;; This is the node synthesized for the first inlined call chain of main->foo->baz +; DUMP: Node [[MAIN1]] +; DUMP: Callee: 17377440600225628772 (_Z3barv) Clones: 0 StackIds: 0, 1, 2 (clone 0) +; DUMP: AllocTypes: NotCold +; DUMP: ContextIds: 1 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1 +; DUMP: CallerEdges: + +;; This is the node synthesized for the second inlined call chain of main->foo->baz +; DUMP: Node [[MAIN2]] +; DUMP: Callee: 17377440600225628772 (_Z3barv) Clones: 0 StackIds: 0, 1, 3 (clone 0) +; DUMP: AllocTypes: Cold +; DUMP: ContextIds: 2 +; DUMP: CalleeEdges: +; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2 +; DUMP: CallerEdges: From 201fdef40dd6ec193d18d39638454a3c972f1fec Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Mon, 20 Mar 2023 18:38:04 -0700 Subject: [PATCH 351/691] libclang: Pass Clang install directory to driver via argv[0]. Various driver features, such as the sysroot path detection for Android targets, rely on being able to find the Clang install directory (look for callers of `getDriver().getInstalledDir()`). However, the install directory isn't currently being plumbed through to the driver, which is conventionally done via the argv[0] passed to the Driver constructor. It looks like D14695 attempted to fix this by adding another API that allows specifying the argv[0]. However, rather than requiring every user of libclang to switch to this API for correct behavior, let's have the other existing APIs work by default, by using the existing logic in libclang for finding the install directory. Differential Revision: https://reviews.llvm.org/D146497 --- clang/docs/ReleaseNotes.rst | 8 ++++++++ clang/include/clang-c/Index.h | 9 +++++++-- clang/test/Index/record-completion-invocation.c | 2 +- clang/test/Index/record-parsing-invocation.c | 4 ++-- clang/tools/libclang/CIndex.cpp | 11 ++++++++++- 5 files changed, 28 insertions(+), 6 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 005bf99a62457..94e0f10a31743 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -368,6 +368,14 @@ libclang has an evaluable bit width. Fixes undefined behavior when called on a bit-field whose width depends on a template paramter. +- ``clang_parseTranslationUnit`` and ``clang_parseTranslationUnit2`` have been + changed to automatically locate the Clang installation directory relative to + the location of the libclang binary and use it for system headers installed + alongside the Clang installation. It is no longer necessary to manually + locate such system headers or use the ``clang_parseTranslationUnit2FullArgv`` + function for this purpose if libclang has been installed in the default + location. + Static Analyzer --------------- - Fix incorrect alignment attribute on the this parameter of certain diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index c7d32e6a152ae..8275f2941a41c 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -899,8 +899,13 @@ CINDEX_LINKAGE enum CXErrorCode clang_parseTranslationUnit2( /** * Same as clang_parseTranslationUnit2 but requires a full command line - * for \c command_line_args including argv[0]. This is useful if the standard - * library paths are relative to the binary. + * for \c command_line_args including argv[0]. + * + * This is useful if the driver uses paths relative to the binary and either + * you are targeting libclang versions older than Clang 17, or libclang is + * installed to a non-standard location. Clang 17 and newer will automatically + * use the correct argv[0] if libclang is installed in the lib directory + * parallel to the bin directory where the clang binary is installed. */ CINDEX_LINKAGE enum CXErrorCode clang_parseTranslationUnit2FullArgv( CXIndex CIdx, const char *source_filename, diff --git a/clang/test/Index/record-completion-invocation.c b/clang/test/Index/record-completion-invocation.c index 4b667134fa2d4..75eb9083908ae 100644 --- a/clang/test/Index/record-completion-invocation.c +++ b/clang/test/Index/record-completion-invocation.c @@ -9,4 +9,4 @@ // RUN: env LIBCLANG_DISABLE_CRASH_RECOVERY=1 CINDEXTEST_INVOCATION_EMISSION_PATH=%t not --crash c-index-test -code-completion-at=%s:10:1 "-remap-file=%s,%S/Inputs/record-parsing-invocation-remap.c" %s // RUN: cat %t/libclang-* | FileCheck %s -// CHECK: {"toolchain":"{{.*}}","libclang.operation":"complete","libclang.opts":1,"args":["clang","-fno-spell-checking","{{.*}}record-completion-invocation.c","-Xclang","-detailed-preprocessing-record","-fallow-editor-placeholders"],"invocation-args":["-code-completion-at={{.*}}record-completion-invocation.c:10:1"],"unsaved_file_hashes":[{"name":"{{.*}}record-completion-invocation.c","md5":"aee23773de90e665992b48209351d70e"}]} +// CHECK: {"toolchain":"{{.*}}","libclang.operation":"complete","libclang.opts":1,"args":["{{.*}}bin{{.*}}clang","-fno-spell-checking","{{.*}}record-completion-invocation.c","-Xclang","-detailed-preprocessing-record","-fallow-editor-placeholders"],"invocation-args":["-code-completion-at={{.*}}record-completion-invocation.c:10:1"],"unsaved_file_hashes":[{"name":"{{.*}}record-completion-invocation.c","md5":"aee23773de90e665992b48209351d70e"}]} diff --git a/clang/test/Index/record-parsing-invocation.c b/clang/test/Index/record-parsing-invocation.c index e0c4cdb05fb00..f370f014fb1cc 100644 --- a/clang/test/Index/record-parsing-invocation.c +++ b/clang/test/Index/record-parsing-invocation.c @@ -25,5 +25,5 @@ # pragma clang __debug parser_crash #endif -// CHECK: {"toolchain":"{{.*}}","libclang.operation":"parse","libclang.opts":1,"args":["clang","-fno-spell-checking","{{.*}}record-parsing-invocation.c","-Xclang","-detailed-preprocessing-record","-fallow-editor-placeholders"]} -// CHECK-UNSAVED: {"toolchain":"{{.*}}","libclang.operation":"parse","libclang.opts":1,"args":["clang","-fno-spell-checking","{{.*}}record-parsing-invocation.c","-Xclang","-detailed-preprocessing-record","-fallow-editor-placeholders"],"unsaved_file_hashes":[{"name":"{{.*}}record-parsing-invocation.c","md5":"aee23773de90e665992b48209351d70e"}]} +// CHECK: {"toolchain":"{{.*}}","libclang.operation":"parse","libclang.opts":1,"args":["{{.*}}bin{{.*}}clang","-fno-spell-checking","{{.*}}record-parsing-invocation.c","-Xclang","-detailed-preprocessing-record","-fallow-editor-placeholders"]} +// CHECK-UNSAVED: {"toolchain":"{{.*}}","libclang.operation":"parse","libclang.opts":1,"args":["{{.*}}bin{{.*}}clang","-fno-spell-checking","{{.*}}record-parsing-invocation.c","-Xclang","-detailed-preprocessing-record","-fallow-editor-placeholders"],"unsaved_file_hashes":[{"name":"{{.*}}record-parsing-invocation.c","md5":"aee23773de90e665992b48209351d70e"}]} diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 30416e46ce173..2aa12667d37e9 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -4013,8 +4013,17 @@ enum CXErrorCode clang_parseTranslationUnit2( struct CXUnsavedFile *unsaved_files, unsigned num_unsaved_files, unsigned options, CXTranslationUnit *out_TU) { noteBottomOfStack(); + + if (!CIdx) + return CXError_InvalidArguments; + + SmallString<64> ClangPath( + static_cast(CIdx)->getClangToolchainPath()); + llvm::sys::path::append(ClangPath, "bin"); + llvm::sys::path::append(ClangPath, "clang"); + SmallVector Args; - Args.push_back("clang"); + Args.push_back(ClangPath.c_str()); Args.append(command_line_args, command_line_args + num_command_line_args); return clang_parseTranslationUnit2FullArgv( CIdx, source_filename, Args.data(), Args.size(), unsaved_files, From e7596a99fca6d1df14275f5293e447a4d87af06a Mon Sep 17 00:00:00 2001 From: MalavikaSamak Date: Wed, 22 Mar 2023 15:31:00 -0700 Subject: [PATCH 352/691] [-Wunsafe-buffer-usage] Add Fixable for simple pointer dereference This patch introduces PointerDereferenceGadget, a FixableGadget that emits fixits to handle cases where a pointer that is identified as unsafe is dereferenced. The current implementation only handles cases where the strategy is to change the type of the raw pointer to std::span. The fixit for this strategy is to fetch the first element from the corresponding span instance. For example for the code below, the PointerDereferenceGadget emits a fixit for S3 (S1, S2 are to be handled by other gadgets): S1: int *ptr = new int[10]; S2: int val1 = ptr[k]; // Unsafe operation S3: int val2 = *ptr; => Fixit: int val2 = ptr[0]; Differential revision: https://reviews.llvm.org/D143206 --- .../Analyses/UnsafeBufferUsageGadgets.def | 1 + clang/lib/Analysis/UnsafeBufferUsage.cpp | 70 +++++++++++++++++++ ...safe-buffer-usage-fixits-pointer-deref.cpp | 55 +++++++++++++++ 3 files changed, 126 insertions(+) create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-deref.cpp diff --git a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def index 89f7c1ed2ba24..a8485682c1d1f 100644 --- a/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def +++ b/clang/include/clang/Analysis/Analyses/UnsafeBufferUsageGadgets.def @@ -32,6 +32,7 @@ WARNING_GADGET(PointerArithmetic) WARNING_GADGET(UnsafeBufferUsageAttr) FIXABLE_GADGET(ULCArraySubscript) FIXABLE_GADGET(DerefSimplePtrArithFixable) +FIXABLE_GADGET(PointerDereference) #undef FIXABLE_GADGET #undef WARNING_GADGET diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 04e11d0471a7d..95e4c8388bc44 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -463,6 +463,45 @@ class ULCArraySubscriptGadget : public FixableGadget { return {}; } }; + +class PointerDereferenceGadget : public FixableGadget { + static constexpr const char *const BaseDeclRefExprTag = "BaseDRE"; + static constexpr const char *const OperatorTag = "op"; + + const DeclRefExpr *BaseDeclRefExpr = nullptr; + const UnaryOperator *Op = nullptr; + +public: + PointerDereferenceGadget(const MatchFinder::MatchResult &Result) + : FixableGadget(Kind::PointerDereference), + BaseDeclRefExpr( + Result.Nodes.getNodeAs(BaseDeclRefExprTag)), + Op(Result.Nodes.getNodeAs(OperatorTag)) {} + + static bool classof(const Gadget *G) { + return G->getKind() == Kind::PointerDereference; + } + + static Matcher matcher() { + auto Target = + unaryOperator( + hasOperatorName("*"), + has(expr(ignoringParenImpCasts( + declRefExpr(to(varDecl())).bind(BaseDeclRefExprTag))))) + .bind(OperatorTag); + + return expr(isInUnspecifiedLvalueContext(Target)); + } + + DeclUseList getClaimedVarUseSites() const override { + return {BaseDeclRefExpr}; + } + + virtual const Stmt *getBaseStmt() const final { return Op; } + + virtual std::optional getFixits(const Strategy &S) const override; +}; + } // namespace namespace { @@ -914,6 +953,37 @@ DerefSimplePtrArithFixableGadget::getFixits(const Strategy &s) const { return std::nullopt; // something wrong or unsupported, give up } +std::optional +PointerDereferenceGadget::getFixits(const Strategy &S) const { + const VarDecl *VD = cast(BaseDeclRefExpr->getDecl()); + switch (S.lookup(VD)) { + case Strategy::Kind::Span: { + ASTContext &Ctx = VD->getASTContext(); + SourceManager &SM = Ctx.getSourceManager(); + // Required changes: *(ptr); => (ptr[0]); and *ptr; => ptr[0] + // Deletes the *operand + CharSourceRange derefRange = clang::CharSourceRange::getCharRange( + Op->getBeginLoc(), Op->getBeginLoc().getLocWithOffset(1)); + // Inserts the [0] + std::optional endOfOperand = + getEndCharLoc(BaseDeclRefExpr, SM, Ctx.getLangOpts()); + if (endOfOperand) { + return FixItList{{FixItHint::CreateRemoval(derefRange), + FixItHint::CreateInsertion( + endOfOperand.value().getLocWithOffset(1), "[0]")}}; + } + } + case Strategy::Kind::Iterator: + case Strategy::Kind::Array: + case Strategy::Kind::Vector: + llvm_unreachable("Strategy not implemented yet!"); + case Strategy::Kind::Wontfix: + llvm_unreachable("Invalid strategy!"); + } + + return std::nullopt; +} + // For a non-null initializer `Init` of `T *` type, this function returns // `FixItHint`s producing a list initializer `{Init, S}` as a part of a fix-it // to output stream. diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-deref.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-deref.cpp new file mode 100644 index 0000000000000..4a02bbdf71182 --- /dev/null +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-fixits-pointer-deref.cpp @@ -0,0 +1,55 @@ +// RUN: %clang_cc1 -std=c++20 -Wunsafe-buffer-usage -fdiagnostics-parseable-fixits %s 2>&1 | FileCheck %s + +void basic_dereference() { + int tmp; + auto p = new int[10]; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:11}:"std::span p" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:12-[[@LINE-2]]:12}:"{" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:23-[[@LINE-3]]:23}:", 10}" + tmp = p[5]; + int val = *p; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:13-[[@LINE-1]]:14}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:15-[[@LINE-2]]:15}:"[0]" +} + +int return_method() { + auto p = new int[10]; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:11}:"std::span p" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:12-[[@LINE-2]]:12}:"{" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:23-[[@LINE-3]]:23}:", 10}" + int tmp = p[5]; + return *p; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:10-[[@LINE-1]]:11}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:12-[[@LINE-2]]:12}:"[0]" +} + +void foo(int v) { +} + +void method_invocation() { + auto p = new int[10]; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:11}:"std::span p" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:12-[[@LINE-2]]:12}:"{" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:23-[[@LINE-3]]:23}:", 10}" + + int tmp = p[5]; + + foo(*p); + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:7-[[@LINE-1]]:8}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:9-[[@LINE-2]]:9}:"[0]" +} + +void binary_operation() { + auto p = new int[10]; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:3-[[@LINE-1]]:11}:"std::span p" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:12-[[@LINE-2]]:12}:"{" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-3]]:23-[[@LINE-3]]:23}:", 10}" + + int tmp = p[5]; + + int k = *p + 20; + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:12}:"" + // CHECK-DAG: fix-it:"{{.*}}":{[[@LINE-2]]:13-[[@LINE-2]]:13}:"[0]" + +} + From 909e5ce47a70181dead332826e93f89b2928f0c0 Mon Sep 17 00:00:00 2001 From: Jakub Kuderski Date: Wed, 22 Mar 2023 19:04:09 -0400 Subject: [PATCH 353/691] [mlir][arith] Add `uitofp` support to WIE This includes standard LIT tests and integration tests with the LLVM CPU runner. I plan to use this to implement `sitofp` in D146597. Reviewed By: antiagainst Differential Revision: https://reviews.llvm.org/D146606 --- .../Arith/Transforms/EmulateWideInt.cpp | 69 ++++++++++++++++- .../emulate-wide-int-canonicalization.mlir | 14 ++++ mlir/test/Dialect/Arith/emulate-wide-int.mlir | 56 ++++++++++++++ .../test-wide-int-emulation-uitofp-i32.mlir | 77 +++++++++++++++++++ 4 files changed, 214 insertions(+), 2 deletions(-) create mode 100644 mlir/test/Dialect/Arith/emulate-wide-int-canonicalization.mlir create mode 100644 mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-uitofp-i32.mlir diff --git a/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp b/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp index db3ddab483b5a..83f01397c4490 100644 --- a/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp +++ b/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp @@ -13,6 +13,7 @@ #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/Func/Transforms/FuncConversions.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/IR/TypeUtilities.h" #include "mlir/Support/LogicalResult.h" #include "mlir/Transforms/DialectConversion.h" #include "llvm/Support/FormatVariadic.h" @@ -906,6 +907,70 @@ struct ConvertShRSI final : OpConversionPattern { } }; +//===----------------------------------------------------------------------===// +// ConvertUIToFP +//===----------------------------------------------------------------------===// + +struct ConvertUIToFP final : OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(arith::UIToFPOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + Location loc = op.getLoc(); + + Type oldTy = op.getIn().getType(); + auto newTy = + dyn_cast_or_null(getTypeConverter()->convertType(oldTy)); + if (!newTy) + return rewriter.notifyMatchFailure( + loc, llvm::formatv("unsupported type: {0}", oldTy)); + unsigned newBitWidth = newTy.getElementTypeBitWidth(); + + auto [low, hi] = extractLastDimHalves(rewriter, loc, adaptor.getIn()); + Value lowInt = dropTrailingX1Dim(rewriter, loc, low); + Value hiInt = dropTrailingX1Dim(rewriter, loc, hi); + Value zeroCst = + createScalarOrSplatConstant(rewriter, loc, hiInt.getType(), 0); + + // The final result has the following form: + // if (hi == 0) return uitofp(low) + // else return uitofp(low) + uitofp(hi) * 2^BW + // + // where `BW` is the bitwidth of the narrowed integer type. We emit a + // select to make it easier to fold-away the `hi` part calculation when it + // is known to be zero. + // + // Note 1: The emulation is precise only for input values that have exact + // integer representation in the result floating point type, and may lead + // loss of precision otherwise. + // + // Note 2: We do not strictly need the `hi == 0`, case, but it makes + // constant folding easier. + Value hiEqZero = rewriter.create( + loc, arith::CmpIPredicate::eq, hiInt, zeroCst); + + Type resultTy = op.getType(); + Type resultElemTy = getElementTypeOrSelf(resultTy); + Value lowFp = rewriter.create(loc, resultTy, lowInt); + Value hiFp = rewriter.create(loc, resultTy, hiInt); + + int64_t pow2Int = int64_t(1) << newBitWidth; + Attribute pow2Attr = + rewriter.getFloatAttr(resultElemTy, static_cast(pow2Int)); + if (auto vecTy = dyn_cast(resultTy)) + pow2Attr = SplatElementsAttr::get(vecTy, pow2Attr); + + Value pow2Val = rewriter.create(loc, resultTy, pow2Attr); + + Value hiVal = rewriter.create(loc, hiFp, pow2Val); + Value result = rewriter.create(loc, lowFp, hiVal); + + rewriter.replaceOpWithNewOp(op, hiEqZero, lowFp, result); + return success(); + } +}; + //===----------------------------------------------------------------------===// // ConvertTruncI //===----------------------------------------------------------------------===// @@ -1080,6 +1145,6 @@ void arith::populateArithWideIntEmulationPatterns( ConvertIndexCastIntToIndex, ConvertIndexCastIntToIndex, ConvertIndexCastIndexToInt, - ConvertIndexCastIndexToInt>( - typeConverter, patterns.getContext()); + ConvertIndexCastIndexToInt, + ConvertUIToFP>(typeConverter, patterns.getContext()); } diff --git a/mlir/test/Dialect/Arith/emulate-wide-int-canonicalization.mlir b/mlir/test/Dialect/Arith/emulate-wide-int-canonicalization.mlir new file mode 100644 index 0000000000000..0c95ab8284afa --- /dev/null +++ b/mlir/test/Dialect/Arith/emulate-wide-int-canonicalization.mlir @@ -0,0 +1,14 @@ +// RUN: mlir-opt --arith-emulate-wide-int="widest-int-supported=32" --canonicalize %s | FileCheck %s + +// Check that we can fold away the 'hi' part calculation when it is know to be zero. +// +// CHECK-LABEL: func @uitofp_i16_ext_f64 +// CHECK-SAME: ([[ARG:%.+]]: i16) -> f64 +// CHECK-NEXT: [[EXT:%.+]] = arith.extui [[ARG]] : i16 to i32 +// CHECK-NEXT: [[FP:%.+]] = arith.uitofp [[EXT]] : i32 to f64 +// CHECK-NEXT: return [[FP]] : f64 +func.func @uitofp_i16_ext_f64(%a : i16) -> f64 { + %ext = arith.extui %a : i16 to i64 + %r = arith.uitofp %ext : i64 to f64 + return %r : f64 +} diff --git a/mlir/test/Dialect/Arith/emulate-wide-int.mlir b/mlir/test/Dialect/Arith/emulate-wide-int.mlir index 80edc6f2ad001..55b4e7f89b0ac 100644 --- a/mlir/test/Dialect/Arith/emulate-wide-int.mlir +++ b/mlir/test/Dialect/Arith/emulate-wide-int.mlir @@ -908,3 +908,59 @@ func.func @xori_vector_a_b(%a : vector<3xi64>, %b : vector<3xi64>) -> vector<3xi %x = arith.xori %a, %b : vector<3xi64> return %x : vector<3xi64> } + +// CHECK-LABEL: func @uitofp_i64_f64 +// CHECK-SAME: ([[ARG:%.+]]: vector<2xi32>) -> f64 +// CHECK-NEXT: [[LOW:%.+]] = vector.extract [[ARG]][0] : vector<2xi32> +// CHECK-NEXT: [[HI:%.+]] = vector.extract [[ARG]][1] : vector<2xi32> +// CHECK-NEXT: [[CST0:%.+]] = arith.constant 0 : i32 +// CHECK-NEXT: [[HIEQ0:%.+]] = arith.cmpi eq, [[HI]], [[CST0]] : i32 +// CHECK-NEXT: [[LOWFP:%.+]] = arith.uitofp [[LOW]] : i32 to f64 +// CHECK-NEXT: [[HIFP:%.+]] = arith.uitofp [[HI]] : i32 to f64 +// CHECK-NEXT: [[POW:%.+]] = arith.constant 0x41F0000000000000 : f64 +// CHECK-NEXT: [[RESHI:%.+]] = arith.mulf [[HIFP]], [[POW]] : f64 +// CHECK-NEXT: [[RES:%.+]] = arith.addf [[LOWFP]], [[RESHI]] : f64 +// CHECK-NEXT: [[SEL:%.+]] = arith.select [[HIEQ0]], [[LOWFP]], [[RES]] : f64 +// CHECK-NEXT: return [[SEL]] : f64 +func.func @uitofp_i64_f64(%a : i64) -> f64 { + %r = arith.uitofp %a : i64 to f64 + return %r : f64 +} + +// CHECK-LABEL: func @uitofp_i64_f64_vector +// CHECK-SAME: ([[ARG:%.+]]: vector<3x2xi32>) -> vector<3xf64> +// CHECK-NEXT: [[EXTLOW:%.+]] = vector.extract_strided_slice [[ARG]] {offsets = [0, 0], sizes = [3, 1], strides = [1, 1]} : vector<3x2xi32> to vector<3x1xi32> +// CHECK-NEXT: [[EXTHI:%.+]] = vector.extract_strided_slice [[ARG]] {offsets = [0, 1], sizes = [3, 1], strides = [1, 1]} : vector<3x2xi32> to vector<3x1xi32> +// CHECK-NEXT: [[LOW:%.+]] = vector.shape_cast [[EXTLOW]] : vector<3x1xi32> to vector<3xi32> +// CHECK-NEXT: [[HI:%.+]] = vector.shape_cast [[EXTHI]] : vector<3x1xi32> to vector<3xi32> +// CHECK-NEXT: [[CST0:%.+]] = arith.constant dense<0> : vector<3xi32> +// CHECK-NEXT: [[HIEQ0:%.+]] = arith.cmpi eq, [[HI]], [[CST0]] : vector<3xi32> +// CHECK-NEXT: [[LOWFP:%.+]] = arith.uitofp [[LOW]] : vector<3xi32> to vector<3xf64> +// CHECK-NEXT: [[HIFP:%.+]] = arith.uitofp [[HI]] : vector<3xi32> to vector<3xf64> +// CHECK-NEXT: [[POW:%.+]] = arith.constant dense<0x41F0000000000000> : vector<3xf64> +// CHECK-NEXT: [[RESHI:%.+]] = arith.mulf [[HIFP]], [[POW]] : vector<3xf64> +// CHECK-NEXT: [[RES:%.+]] = arith.addf [[LOWFP]], [[RESHI]] : vector<3xf64> +// CHECK-NEXT: [[SEL:%.+]] = arith.select [[HIEQ0]], [[LOWFP]], [[RES]] : vector<3xi1>, vector<3xf64> +// CHECK-NEXT: return [[SEL]] : vector<3xf64> +func.func @uitofp_i64_f64_vector(%a : vector<3xi64>) -> vector<3xf64> { + %r = arith.uitofp %a : vector<3xi64> to vector<3xf64> + return %r : vector<3xf64> +} + +// CHECK-LABEL: func @uitofp_i64_f16 +// CHECK-SAME: ([[ARG:%.+]]: vector<2xi32>) -> f16 +// CHECK-NEXT: [[LOW:%.+]] = vector.extract [[ARG]][0] : vector<2xi32> +// CHECK-NEXT: [[HI:%.+]] = vector.extract [[ARG]][1] : vector<2xi32> +// CHECK-NEXT: [[CST0:%.+]] = arith.constant 0 : i32 +// CHECK-NEXT: [[HIEQ0:%.+]] = arith.cmpi eq, [[HI]], [[CST0]] : i32 +// CHECK-NEXT: [[LOWFP:%.+]] = arith.uitofp [[LOW]] : i32 to f16 +// CHECK-NEXT: [[HIFP:%.+]] = arith.uitofp [[HI]] : i32 to f16 +// CHECK-NEXT: [[POW:%.+]] = arith.constant 0x7C00 : f16 +// CHECK-NEXT: [[RESHI:%.+]] = arith.mulf [[HIFP]], [[POW]] : f16 +// CHECK-NEXT: [[RES:%.+]] = arith.addf [[LOWFP]], [[RESHI]] : f16 +// CHECK-NEXT: [[SEL:%.+]] = arith.select [[HIEQ0]], [[LOWFP]], [[RES]] : f16 +// CHECK-NEXT: return [[SEL]] : f16 +func.func @uitofp_i64_f16(%a : i64) -> f16 { + %r = arith.uitofp %a : i64 to f16 + return %r : f16 +} diff --git a/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-uitofp-i32.mlir b/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-uitofp-i32.mlir new file mode 100644 index 0000000000000..c3d7db0de6d20 --- /dev/null +++ b/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-uitofp-i32.mlir @@ -0,0 +1,77 @@ +// Check that the wide integer `arith.uitofp` emulation produces the same result as wide +// `arith.uitofp`. Emulate i32 ops with i16 ops. + +// RUN: mlir-opt %s --convert-scf-to-cf --convert-cf-to-llvm --convert-vector-to-llvm \ +// RUN: --convert-func-to-llvm --convert-arith-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: --shared-libs=%mlir_c_runner_utils | \ +// RUN: FileCheck %s --match-full-lines + +// RUN: mlir-opt %s --test-arith-emulate-wide-int="widest-int-supported=16" \ +// RUN: --convert-scf-to-cf --convert-cf-to-llvm --convert-vector-to-llvm \ +// RUN: --convert-func-to-llvm --convert-arith-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: --shared-libs=%mlir_c_runner_utils | \ +// RUN: FileCheck %s --match-full-lines + +// Ops in this function *only* will be emulated using i16 types. +func.func @emulate_uitofp(%arg: i32) -> f32 { + %res = arith.uitofp %arg : i32 to f32 + return %res : f32 +} + +func.func @check_uitofp(%arg : i32) -> () { + %res = func.call @emulate_uitofp(%arg) : (i32) -> (f32) + vector.print %res : f32 + return +} + +func.func @entry() { + %cst0 = arith.constant 0 : i32 + %cst1 = arith.constant 1 : i32 + %cst2 = arith.constant 2 : i32 + %cst7 = arith.constant 7 : i32 + %cst1337 = arith.constant 1337 : i32 + %cst_i16_max = arith.constant 65535 : i32 + %cst_i16_overflow = arith.constant 65536 : i32 + + %cst_n1 = arith.constant -1 : i32 + %cst_n13 = arith.constant -13 : i32 + %cst_n1337 = arith.constant -1337 : i32 + + %cst_i16_min = arith.constant -32768 : i32 + + %cst_f32_int_max = arith.constant 16777217 : i32 + %cst_f32_int_min = arith.constant -16777217 : i32 + + // CHECK: 0 + func.call @check_uitofp(%cst0) : (i32) -> () + // CHECK-NEXT: 1 + func.call @check_uitofp(%cst1) : (i32) -> () + // CHECK-NEXT: 2 + func.call @check_uitofp(%cst2) : (i32) -> () + // CHECK-NEXT: 7 + func.call @check_uitofp(%cst7) : (i32) -> () + // CHECK-NEXT: 1337 + func.call @check_uitofp(%cst1337) : (i32) -> () + // CHECK-NEXT: 65535 + func.call @check_uitofp(%cst_i16_max) : (i32) -> () + // CHECK-NEXT: 65536 + func.call @check_uitofp(%cst_i16_overflow) : (i32) -> () + + // CHECK-NEXT: 4.2{{.+}}e+09 + func.call @check_uitofp(%cst_n1) : (i32) -> () + // CHECK-NEXT: 4.2{{.+}}e+09 + func.call @check_uitofp(%cst_n1337) : (i32) -> () + + // CHECK-NEXT: 4.2{{.+}}e+09 + func.call @check_uitofp(%cst_i16_min) : (i32) -> () + // CHECK-NEXT: 4.2{{.+}}e+09 + func.call @check_uitofp(%cst_i16_min) : (i32) -> () + // CHECK-NEXT: 1.6{{.+}}e+07 + func.call @check_uitofp(%cst_f32_int_max) : (i32) -> () + // CHECK-NEXT: 4.2{{.+}}e+09 + func.call @check_uitofp(%cst_f32_int_min) : (i32) -> () + + return +} From abfc358cff0c0cfc8ffbc6c164d97e13a18a1685 Mon Sep 17 00:00:00 2001 From: Jakub Kuderski Date: Wed, 22 Mar 2023 19:09:48 -0400 Subject: [PATCH 354/691] [mlir][arith] Add `sitofp` support to WIE This depends on the handling of `uitofp` in D146606. Reviewed By: antiagainst Differential Revision: https://reviews.llvm.org/D146597 --- .../Arith/Transforms/EmulateWideInt.cpp | 49 ++++++++++++- mlir/test/Dialect/Arith/emulate-wide-int.mlir | 43 ++++++++++++ .../test-wide-int-emulation-sitofp-i32.mlir | 68 +++++++++++++++++++ 3 files changed, 159 insertions(+), 1 deletion(-) create mode 100644 mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-sitofp-i32.mlir diff --git a/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp b/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp index 83f01397c4490..781ea3d3eca63 100644 --- a/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp +++ b/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp @@ -16,6 +16,7 @@ #include "mlir/IR/TypeUtilities.h" #include "mlir/Support/LogicalResult.h" #include "mlir/Transforms/DialectConversion.h" +#include "llvm/ADT/APInt.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MathExtras.h" #include @@ -907,6 +908,52 @@ struct ConvertShRSI final : OpConversionPattern { } }; +//===----------------------------------------------------------------------===// +// ConvertSIToFP +//===----------------------------------------------------------------------===// + +struct ConvertSIToFP final : OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(arith::SIToFPOp op, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + Location loc = op.getLoc(); + + Value in = op.getIn(); + Type oldTy = in.getType(); + auto newTy = + dyn_cast_or_null(getTypeConverter()->convertType(oldTy)); + if (!newTy) + return rewriter.notifyMatchFailure( + loc, llvm::formatv("unsupported type: {0}", oldTy)); + + unsigned oldBitWidth = getElementTypeOrSelf(oldTy).getIntOrFloatBitWidth(); + Value zeroCst = createScalarOrSplatConstant(rewriter, loc, oldTy, 0); + Value oneCst = createScalarOrSplatConstant(rewriter, loc, oldTy, 1); + Value allOnesCst = createScalarOrSplatConstant( + rewriter, loc, oldTy, APInt::getAllOnes(oldBitWidth)); + + // To avoid operating on very large unsigned numbers, perform the + // conversion on the absolute value. Then, decide whether to negate the + // result or not based on that sign bit. We assume two's complement and + // implement negation by flipping all bits and adding 1. + // Note that this relies on the the other conversion patterns to legalize + // created ops and narrow the bit widths. + Value isNeg = rewriter.create(loc, arith::CmpIPredicate::slt, + in, zeroCst); + Value bitwiseNeg = rewriter.create(loc, in, allOnesCst); + Value neg = rewriter.create(loc, bitwiseNeg, oneCst); + Value abs = rewriter.create(loc, isNeg, neg, in); + + Value absResult = rewriter.create(loc, op.getType(), abs); + Value negResult = rewriter.create(loc, absResult); + rewriter.replaceOpWithNewOp(op, isNeg, negResult, + absResult); + return success(); + } +}; + //===----------------------------------------------------------------------===// // ConvertUIToFP //===----------------------------------------------------------------------===// @@ -1146,5 +1193,5 @@ void arith::populateArithWideIntEmulationPatterns( ConvertIndexCastIntToIndex, ConvertIndexCastIndexToInt, ConvertIndexCastIndexToInt, - ConvertUIToFP>(typeConverter, patterns.getContext()); + ConvertSIToFP, ConvertUIToFP>(typeConverter, patterns.getContext()); } diff --git a/mlir/test/Dialect/Arith/emulate-wide-int.mlir b/mlir/test/Dialect/Arith/emulate-wide-int.mlir index 55b4e7f89b0ac..9fb5478d7e94f 100644 --- a/mlir/test/Dialect/Arith/emulate-wide-int.mlir +++ b/mlir/test/Dialect/Arith/emulate-wide-int.mlir @@ -964,3 +964,46 @@ func.func @uitofp_i64_f16(%a : i64) -> f16 { %r = arith.uitofp %a : i64 to f16 return %r : f16 } + +// CHECK-LABEL: func @sitofp_i64_f64 +// CHECK-SAME: ([[ARG:%.+]]: vector<2xi32>) -> f64 +// CHECK: [[VONES:%.+]] = arith.constant dense<-1> : vector<2xi32> +// CHECK: [[ONES1:%.+]] = vector.extract [[VONES]][0] : vector<2xi32> +// CHECK-NEXT: [[ONES2:%.+]] = vector.extract [[VONES]][1] : vector<2xi32> +// CHECK: arith.xori {{%.+}}, [[ONES1]] : i32 +// CHECK-NEXT: arith.xori {{%.+}}, [[ONES2]] : i32 +// CHECK: [[CST0:%.+]] = arith.constant 0 : i32 +// CHECK: [[HIEQ0:%.+]] = arith.cmpi eq, [[HI:%.+]], [[CST0]] : i32 +// CHECK-NEXT: [[LOWFP:%.+]] = arith.uitofp [[LOW:%.+]] : i32 to f64 +// CHECK-NEXT: [[HIFP:%.+]] = arith.uitofp [[HI]] : i32 to f64 +// CHECK-NEXT: [[POW:%.+]] = arith.constant 0x41F0000000000000 : f64 +// CHECK-NEXT: [[RESHI:%.+]] = arith.mulf [[HIFP]], [[POW]] : f64 +// CHECK-NEXT: [[RES:%.+]] = arith.addf [[LOWFP]], [[RESHI]] : f64 +// CHECK-NEXT: [[SEL:%.+]] = arith.select [[HIEQ0]], [[LOWFP]], [[RES]] : f64 +// CHECK-NEXT: [[NEG:%.+]] = arith.negf [[SEL]] : f64 +// CHECK-NEXT: [[FINAL:%.+]] = arith.select %{{.+}}, [[NEG]], [[SEL]] : f64 +// CHECK-NEXT: return [[FINAL]] : f64 +func.func @sitofp_i64_f64(%a : i64) -> f64 { + %r = arith.sitofp %a : i64 to f64 + return %r : f64 +} + +// CHECK-LABEL: func @sitofp_i64_f64_vector +// CHECK-SAME: ([[ARG:%.+]]: vector<3x2xi32>) -> vector<3xf64> +// CHECK: [[VONES:%.+]] = arith.constant dense<-1> : vector<3x2xi32> +// CHECK: arith.xori +// CHECK-NEXT: arith.xori +// CHECK: [[HIEQ0:%.+]] = arith.cmpi eq, [[HI:%.+]], [[CST0:%.+]] : vector<3xi32> +// CHECK-NEXT: [[LOWFP:%.+]] = arith.uitofp [[LOW:%.+]] : vector<3xi32> to vector<3xf64> +// CHECK-NEXT: [[HIFP:%.+]] = arith.uitofp [[HI:%.+]] : vector<3xi32> to vector<3xf64> +// CHECK-NEXT: [[POW:%.+]] = arith.constant dense<0x41F0000000000000> : vector<3xf64> +// CHECK-NEXT: [[RESHI:%.+]] = arith.mulf [[HIFP]], [[POW]] : vector<3xf64> +// CHECK-NEXT: [[RES:%.+]] = arith.addf [[LOWFP]], [[RESHI]] : vector<3xf64> +// CHECK-NEXT: [[SEL:%.+]] = arith.select [[HIEQ0]], [[LOWFP]], [[RES]] : vector<3xi1>, vector<3xf64> +// CHECK-NEXT: [[NEG:%.+]] = arith.negf [[SEL]] : vector<3xf64> +// CHECK-NEXT: [[FINAL:%.+]] = arith.select %{{.+}}, [[NEG]], [[SEL]] : vector<3xi1>, vector<3xf64> +// CHECK-NEXT: return [[FINAL]] : vector<3xf64> +func.func @sitofp_i64_f64_vector(%a : vector<3xi64>) -> vector<3xf64> { + %r = arith.sitofp %a : vector<3xi64> to vector<3xf64> + return %r : vector<3xf64> +} diff --git a/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-sitofp-i32.mlir b/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-sitofp-i32.mlir new file mode 100644 index 0000000000000..3fc008705f111 --- /dev/null +++ b/mlir/test/Integration/Dialect/Arith/CPU/test-wide-int-emulation-sitofp-i32.mlir @@ -0,0 +1,68 @@ +// Check that the wide integer `arith.sitofp` emulation produces the same result as wide +// `arith.sitofp`. Emulate i32 ops with i16 ops. + +// RUN: mlir-opt %s --convert-scf-to-cf --convert-cf-to-llvm --convert-vector-to-llvm \ +// RUN: --convert-func-to-llvm --convert-arith-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: --shared-libs=%mlir_c_runner_utils | \ +// RUN: FileCheck %s --match-full-lines + +// RUN: mlir-opt %s --test-arith-emulate-wide-int="widest-int-supported=16" \ +// RUN: --convert-scf-to-cf --convert-cf-to-llvm --convert-vector-to-llvm \ +// RUN: --convert-func-to-llvm --convert-arith-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: --shared-libs=%mlir_c_runner_utils | \ +// RUN: FileCheck %s --match-full-lines + +// Ops in this function *only* will be emulated using i16 types. +func.func @emulate_sitofp(%arg: i32) -> f32 { + %res = arith.sitofp %arg : i32 to f32 + return %res : f32 +} + +func.func @check_sitofp(%arg : i32) -> () { + %res = func.call @emulate_sitofp(%arg) : (i32) -> (f32) + vector.print %res : f32 + return +} + +func.func @entry() { + %cst0 = arith.constant 0 : i32 + %cst1 = arith.constant 1 : i32 + %cst2 = arith.constant 2 : i32 + %cst7 = arith.constant 7 : i32 + %cst1337 = arith.constant 1337 : i32 + + %cst_n1 = arith.constant -1 : i32 + %cst_n13 = arith.constant -13 : i32 + %cst_n1337 = arith.constant -1337 : i32 + + %cst_i16_min = arith.constant -32768 : i32 + + %cst_f32_int_max = arith.constant 16777217 : i32 + %cst_f32_int_min = arith.constant -16777217 : i32 + + // CHECK: 0 + func.call @check_sitofp(%cst0) : (i32) -> () + // CHECK-NEXT: 1 + func.call @check_sitofp(%cst1) : (i32) -> () + // CHECK-NEXT: 2 + func.call @check_sitofp(%cst2) : (i32) -> () + // CHECK-NEXT: 7 + func.call @check_sitofp(%cst7) : (i32) -> () + // CHECK-NEXT: 1337 + func.call @check_sitofp(%cst1337) : (i32) -> () + // CHECK-NEXT: -1 + func.call @check_sitofp(%cst_n1) : (i32) -> () + // CHECK-NEXT: -1337 + func.call @check_sitofp(%cst_n1337) : (i32) -> () + + // CHECK-NEXT: -32768 + func.call @check_sitofp(%cst_i16_min) : (i32) -> () + // CHECK-NEXT: 1.6{{.+}}e+07 + func.call @check_sitofp(%cst_f32_int_max) : (i32) -> () + // CHECK-NEXT: -1.6{{.+}}e+07 + func.call @check_sitofp(%cst_f32_int_min) : (i32) -> () + + return +} From c81f14e5898c37c13e5b22485cf37cc124caf0c1 Mon Sep 17 00:00:00 2001 From: Jakub Kuderski Date: Wed, 22 Mar 2023 19:15:12 -0400 Subject: [PATCH 355/691] [mlir][arith] Fix typos in WIE. NFC. --- .../Dialect/Arith/Transforms/WideIntEmulationConverter.h | 2 +- mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/mlir/include/mlir/Dialect/Arith/Transforms/WideIntEmulationConverter.h b/mlir/include/mlir/Dialect/Arith/Transforms/WideIntEmulationConverter.h index ea0ab14e9b8f1..5dbbfedcc70ee 100644 --- a/mlir/include/mlir/Dialect/Arith/Transforms/WideIntEmulationConverter.h +++ b/mlir/include/mlir/Dialect/Arith/Transforms/WideIntEmulationConverter.h @@ -16,7 +16,7 @@ namespace mlir::arith { /// two halves and thus turning into supported ones, i.e., i2*N --> iN, where N /// is the widest integer bitwidth supported by the target. /// Currently, we only handle power-of-two integer types and support conversions -/// of integers twice as wide as the maxium supported by the target. Wide +/// of integers twice as wide as the maximum supported by the target. Wide /// integers are represented as vectors, e.g., i64 --> vector<2xi32>, where the /// first element is the low half of the original integer, and the second /// element the high half. diff --git a/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp b/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp index 781ea3d3eca63..96a58459a37b9 100644 --- a/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp +++ b/mlir/lib/Dialect/Arith/Transforms/EmulateWideInt.cpp @@ -43,7 +43,7 @@ static std::pair getHalves(const APInt &value, return {std::move(low), std::move(high)}; } -/// Returns the type with the last (innermost) dimention reduced to x1. +/// Returns the type with the last (innermost) dimension reduced to x1. /// Scalarizes 1D vector inputs to match how we extract/insert vector values, /// e.g.: /// - vector<3x2xi16> --> vector<3x1xi16> @@ -128,7 +128,7 @@ static Value dropTrailingX1Dim(ConversionPatternRewriter &rewriter, if (!vecTy) return input; - // Shape cast to drop the last x1 dimention. + // Shape cast to drop the last x1 dimension. ArrayRef shape = vecTy.getShape(); assert(shape.size() >= 2 && "Expected vector with at list two dims"); assert(shape.back() == 1 && "Expected the last vector dim to be x1"); @@ -177,13 +177,13 @@ static Value insertLastDimSlice(ConversionPatternRewriter &rewriter, /// dimension. /// When all `resultComponents` are scalars, the result type is `vector`; /// when `resultComponents` are `vector<...x1xT>`s, the result type is -/// `vector<...xNxT>`, where `N` is the number of `resultComponenets`. +/// `vector<...xNxT>`, where `N` is the number of `resultComponents`. static Value constructResultVector(ConversionPatternRewriter &rewriter, Location loc, VectorType resultType, ValueRange resultComponents) { llvm::ArrayRef resultShape = resultType.getShape(); (void)resultShape; - assert(!resultShape.empty() && "Result expected to have dimentions"); + assert(!resultShape.empty() && "Result expected to have dimensions"); assert(resultShape.back() == static_cast(resultComponents.size()) && "Wrong number of result components"); From 0c0387c7a5e979d2dbf791404c7398856895f8fb Mon Sep 17 00:00:00 2001 From: Pavel Kopyl Date: Thu, 16 Mar 2023 21:53:14 +0100 Subject: [PATCH 356/691] [NVPTX] Port GenericToNVVM to the new PM. Differential Revision: https://reviews.llvm.org/D146345 --- llvm/lib/Target/NVPTX/NVPTX.h | 6 ++- llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp | 52 ++++++++++++------- llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 16 ++++-- llvm/test/CodeGen/NVPTX/generic-to-nvvm-ir.ll | 1 + 4 files changed, 53 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h index 95184420f6087..521a7843b1142 100644 --- a/llvm/lib/Target/NVPTX/NVPTX.h +++ b/llvm/lib/Target/NVPTX/NVPTX.h @@ -38,7 +38,7 @@ enum CondCodes { FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel); ModulePass *createNVPTXAssignValidGlobalNamesPass(); -ModulePass *createGenericToNVVMPass(); +ModulePass *createGenericToNVVMLegacyPass(); FunctionPass *createNVVMIntrRangePass(unsigned int SmVersion); FunctionPass *createNVVMReflectPass(unsigned int SmVersion); MachineFunctionPass *createNVPTXPrologEpilogPass(); @@ -67,6 +67,10 @@ struct NVVMReflectPass : PassInfoMixin { unsigned SmVersion; }; +struct GenericToNVVMPass : PassInfoMixin { + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + namespace NVPTX { enum DrvInterface { NVCL, diff --git a/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp index d892023c6cb7f..4f03e474edb47 100644 --- a/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp @@ -29,19 +29,13 @@ using namespace llvm; namespace llvm { -void initializeGenericToNVVMPass(PassRegistry &); +void initializeGenericToNVVMLegacyPassPass(PassRegistry &); } namespace { -class GenericToNVVM : public ModulePass { +class GenericToNVVM { public: - static char ID; - - GenericToNVVM() : ModulePass(ID) {} - - bool runOnModule(Module &M) override; - - void getAnalysisUsage(AnalysisUsage &AU) const override {} + bool runOnModule(Module &M); private: Value *remapConstant(Module *M, Function *F, Constant *C, @@ -59,15 +53,6 @@ class GenericToNVVM : public ModulePass { }; } // end namespace -char GenericToNVVM::ID = 0; - -ModulePass *llvm::createGenericToNVVMPass() { return new GenericToNVVM(); } - -INITIALIZE_PASS( - GenericToNVVM, "generic-to-nvvm", - "Ensure that the global variables are in the global address space", false, - false) - bool GenericToNVVM::runOnModule(Module &M) { // Create a clone of each global variable that has the default address space. // The clone is created with the global address space specifier, and the pair @@ -293,3 +278,34 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C, llvm_unreachable("GenericToNVVM encountered an unsupported ConstantExpr"); } } + +namespace { +class GenericToNVVMLegacyPass : public ModulePass { +public: + static char ID; + + GenericToNVVMLegacyPass() : ModulePass(ID) {} + + bool runOnModule(Module &M) override; +}; +} // namespace + +char GenericToNVVMLegacyPass::ID = 0; + +ModulePass *llvm::createGenericToNVVMLegacyPass() { + return new GenericToNVVMLegacyPass(); +} + +INITIALIZE_PASS( + GenericToNVVMLegacyPass, "generic-to-nvvm", + "Ensure that the global variables are in the global address space", false, + false) + +bool GenericToNVVMLegacyPass::runOnModule(Module &M) { + return GenericToNVVM().runOnModule(M); +} + +PreservedAnalyses GenericToNVVMPass::run(Module &M, ModuleAnalysisManager &AM) { + return GenericToNVVM().runOnModule(M) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 5ff9b4df6d7f9..3d1e4fcde90aa 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -64,7 +64,7 @@ static cl::opt UseShortPointersOpt( namespace llvm { -void initializeGenericToNVVMPass(PassRegistry&); +void initializeGenericToNVVMLegacyPassPass(PassRegistry &); void initializeNVPTXAllocaHoistingPass(PassRegistry &); void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); void initializeNVPTXAtomicLowerPass(PassRegistry &); @@ -89,7 +89,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTarget() { // but it's very NVPTX-specific. initializeNVVMReflectPass(PR); initializeNVVMIntrRangePass(PR); - initializeGenericToNVVMPass(PR); + initializeGenericToNVVMLegacyPassPass(PR); initializeNVPTXAllocaHoistingPass(PR); initializeNVPTXAssignValidGlobalNamesPass(PR); initializeNVPTXAtomicLowerPass(PR); @@ -246,6 +246,16 @@ void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { return false; }); + PB.registerPipelineParsingCallback( + [](StringRef PassName, ModulePassManager &PM, + ArrayRef) { + if (PassName == "generic-to-nvvm") { + PM.addPass(GenericToNVVMPass()); + return true; + } + return false; + }); + PB.registerPipelineStartEPCallback( [this](ModulePassManager &PM, OptimizationLevel Level) { FunctionPassManager FPM; @@ -348,7 +358,7 @@ void NVPTXPassConfig::addIRPasses() { if (getOptLevel() != CodeGenOpt::None) addPass(createNVPTXImageOptimizerPass()); addPass(createNVPTXAssignValidGlobalNamesPass()); - addPass(createGenericToNVVMPass()); + addPass(createGenericToNVVMLegacyPass()); // NVPTXLowerArgs is required for correctness and should be run right // before the address space inference passes. diff --git a/llvm/test/CodeGen/NVPTX/generic-to-nvvm-ir.ll b/llvm/test/CodeGen/NVPTX/generic-to-nvvm-ir.ll index 51344b474d29e..daed7c1c98f0b 100644 --- a/llvm/test/CodeGen/NVPTX/generic-to-nvvm-ir.ll +++ b/llvm/test/CodeGen/NVPTX/generic-to-nvvm-ir.ll @@ -1,6 +1,7 @@ ; Verify functionality of NVPTXGenericToNVVM.cpp pass. ; ; RUN: opt < %s -march nvptx64 -S -generic-to-nvvm | FileCheck %s +; RUN: opt < %s -march nvptx64 -S -passes='generic-to-nvvm' | FileCheck %s target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" target triple = "nvptx64-nvidia-cuda" From fd47ab05e5abd98254d2bba012d81dbb00217812 Mon Sep 17 00:00:00 2001 From: Amy Huang Date: Wed, 22 Mar 2023 15:55:18 -0700 Subject: [PATCH 357/691] Add "REQUIRES: asserts" to test that uses --debug-only flag --- bolt/test/X86/section-end-sym.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bolt/test/X86/section-end-sym.s b/bolt/test/X86/section-end-sym.s index a9bca5604ec16..38517bf7e0719 100644 --- a/bolt/test/X86/section-end-sym.s +++ b/bolt/test/X86/section-end-sym.s @@ -1,7 +1,7 @@ ## Check that BOLT doesn't consider end-of-section symbols (e.g., _etext) as ## functions. -# REQUIRES: system-linux +# REQUIRES: system-linux, asserts # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t.o # RUN: ld.lld %t.o -o %t.exe -q From d859275e7701c10b7dfe8b2be27b8eae4d97a7bd Mon Sep 17 00:00:00 2001 From: Kai Sasaki Date: Thu, 23 Mar 2023 09:50:40 +0900 Subject: [PATCH 358/691] [mlir] Fix typo for unknown operation Reviewed By: rriddle Differential Revision: https://reviews.llvm.org/D146607 --- mlir/lib/IR/AsmPrinter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp index dd3112516fc51..75448955f3123 100644 --- a/mlir/lib/IR/AsmPrinter.cpp +++ b/mlir/lib/IR/AsmPrinter.cpp @@ -1369,7 +1369,7 @@ void SSANameState::printValueID(Value value, bool printResultNo, void SSANameState::printOperationID(Operation *op, raw_ostream &stream) const { auto it = operationIDs.find(op); if (it == operationIDs.end()) { - stream << "<>"; + stream << "<>"; } else { stream << '%' << it->second; } From 9855fe4568770947abf6c465c513dfd4a6c6dca6 Mon Sep 17 00:00:00 2001 From: Ben Shi Date: Tue, 14 Mar 2023 17:27:47 +0800 Subject: [PATCH 359/691] [RISCV][NFC] Add more tests for SLP vectorization (binops on load/store) Reviewed By: reames Differential Revision: https://reviews.llvm.org/D146025 --- .../SLPVectorizer/RISCV/load-binop-store.ll | 386 ++++++++++++++++++ 1 file changed, 386 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/RISCV/load-binop-store.ll diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/load-binop-store.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/load-binop-store.ll new file mode 100644 index 0000000000000..92b0f83c84b9e --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/load-binop-store.ll @@ -0,0 +1,386 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 -mattr=+v \ +; RUN: -riscv-v-vector-bits-min=-1 -riscv-v-slp-max-vf=0 -S | FileCheck %s --check-prefixes=CHECK +; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 -mattr=+v -S | FileCheck %s --check-prefixes=DEFAULT + +define void @vec_add(ptr %dest, ptr %p) { +; CHECK-LABEL: @vec_add( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i16> [[TMP0]], +; CHECK-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 +; CHECK-NEXT: ret void +; +; DEFAULT-LABEL: @vec_add( +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 +; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 +; DEFAULT-NEXT: [[A0:%.*]] = add i16 [[E0]], 1 +; DEFAULT-NEXT: [[A1:%.*]] = add i16 [[E1]], 1 +; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 +; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 +; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: ret void +; +entry: + %e0 = load i16, ptr %p, align 4 + %inc = getelementptr inbounds i16, ptr %p, i64 1 + %e1 = load i16, ptr %inc, align 2 + + %a0 = add i16 %e0, 1 + %a1 = add i16 %e1, 1 + + store i16 %a0, ptr %dest, align 4 + %inc2 = getelementptr inbounds i16, ptr %dest, i64 1 + store i16 %a1, ptr %inc2, align 2 + ret void +} + +define void @vec_sub(ptr %dest, ptr %p) { +; CHECK-LABEL: @vec_sub( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i16> [[TMP0]], +; CHECK-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 +; CHECK-NEXT: ret void +; +; DEFAULT-LABEL: @vec_sub( +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 +; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 +; DEFAULT-NEXT: [[A0:%.*]] = sub i16 [[E0]], 17 +; DEFAULT-NEXT: [[A1:%.*]] = sub i16 [[E1]], 17 +; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 +; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 +; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: ret void +; +entry: + %e0 = load i16, ptr %p, align 4 + %inc = getelementptr inbounds i16, ptr %p, i64 1 + %e1 = load i16, ptr %inc, align 2 + + %a0 = sub i16 %e0, 17 + %a1 = sub i16 %e1, 17 + + store i16 %a0, ptr %dest, align 4 + %inc2 = getelementptr inbounds i16, ptr %dest, i64 1 + store i16 %a1, ptr %inc2, align 2 + ret void +} + +define void @vec_rsub(ptr %dest, ptr %p) { +; CHECK-LABEL: @vec_rsub( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = sub <2 x i16> , [[TMP0]] +; CHECK-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 +; CHECK-NEXT: ret void +; +; DEFAULT-LABEL: @vec_rsub( +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 +; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 +; DEFAULT-NEXT: [[A0:%.*]] = sub i16 29, [[E0]] +; DEFAULT-NEXT: [[A1:%.*]] = sub i16 29, [[E1]] +; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 +; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 +; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: ret void +; +entry: + %e0 = load i16, ptr %p, align 4 + %inc = getelementptr inbounds i16, ptr %p, i64 1 + %e1 = load i16, ptr %inc, align 2 + + %a0 = sub i16 29, %e0 + %a1 = sub i16 29, %e1 + + store i16 %a0, ptr %dest, align 4 + %inc2 = getelementptr inbounds i16, ptr %dest, i64 1 + store i16 %a1, ptr %inc2, align 2 + ret void +} + +define void @vec_mul(ptr %dest, ptr %p) { +; CHECK-LABEL: @vec_mul( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = mul <2 x i16> [[TMP0]], +; CHECK-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 +; CHECK-NEXT: ret void +; +; DEFAULT-LABEL: @vec_mul( +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 +; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 +; DEFAULT-NEXT: [[A0:%.*]] = mul i16 [[E0]], 7 +; DEFAULT-NEXT: [[A1:%.*]] = mul i16 [[E1]], 7 +; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 +; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 +; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: ret void +; +entry: + %e0 = load i16, ptr %p, align 4 + %inc = getelementptr inbounds i16, ptr %p, i64 1 + %e1 = load i16, ptr %inc, align 2 + + %a0 = mul i16 %e0, 7 + %a1 = mul i16 %e1, 7 + + store i16 %a0, ptr %dest, align 4 + %inc2 = getelementptr inbounds i16, ptr %dest, i64 1 + store i16 %a1, ptr %inc2, align 2 + ret void +} + +define void @vec_sdiv(ptr %dest, ptr %p) { +; CHECK-LABEL: @vec_sdiv( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = sdiv <2 x i16> [[TMP0]], +; CHECK-NEXT: store <2 x i16> [[TMP1]], ptr [[DEST:%.*]], align 4 +; CHECK-NEXT: ret void +; +; DEFAULT-LABEL: @vec_sdiv( +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 +; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 +; DEFAULT-NEXT: [[A0:%.*]] = sdiv i16 [[E0]], 7 +; DEFAULT-NEXT: [[A1:%.*]] = sdiv i16 [[E1]], 7 +; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 +; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 +; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: ret void +; +entry: + %e0 = load i16, ptr %p, align 4 + %inc = getelementptr inbounds i16, ptr %p, i64 1 + %e1 = load i16, ptr %inc, align 2 + + %a0 = sdiv i16 %e0, 7 + %a1 = sdiv i16 %e1, 7 + + store i16 %a0, ptr %dest, align 4 + %inc2 = getelementptr inbounds i16, ptr %dest, i64 1 + store i16 %a1, ptr %inc2, align 2 + ret void +} + +define void @vec_and(ptr %dest, ptr %p, ptr %q) { +; CHECK-LABEL: @vec_and( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[Q:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i16> [[TMP0]], [[TMP1]] +; CHECK-NEXT: store <2 x i16> [[TMP2]], ptr [[DEST:%.*]], align 4 +; CHECK-NEXT: ret void +; +; DEFAULT-LABEL: @vec_and( +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 +; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 +; DEFAULT-NEXT: [[F0:%.*]] = load i16, ptr [[Q:%.*]], align 4 +; DEFAULT-NEXT: [[INQ:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 1 +; DEFAULT-NEXT: [[F1:%.*]] = load i16, ptr [[INQ]], align 2 +; DEFAULT-NEXT: [[A0:%.*]] = and i16 [[E0]], [[F0]] +; DEFAULT-NEXT: [[A1:%.*]] = and i16 [[E1]], [[F1]] +; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 +; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 +; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: ret void +; +entry: + %e0 = load i16, ptr %p, align 4 + %inc = getelementptr inbounds i16, ptr %p, i64 1 + %e1 = load i16, ptr %inc, align 2 + + %f0 = load i16, ptr %q, align 4 + %inq = getelementptr inbounds i16, ptr %q, i64 1 + %f1 = load i16, ptr %inq, align 2 + + %a0 = and i16 %e0, %f0 + %a1 = and i16 %e1, %f1 + + store i16 %a0, ptr %dest, align 4 + %inc2 = getelementptr inbounds i16, ptr %dest, i64 1 + store i16 %a1, ptr %inc2, align 2 + ret void +} + +define void @vec_or(ptr %dest, ptr %p, ptr %q) { +; CHECK-LABEL: @vec_or( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[Q:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i16> [[TMP0]], [[TMP1]] +; CHECK-NEXT: store <2 x i16> [[TMP2]], ptr [[DEST:%.*]], align 4 +; CHECK-NEXT: ret void +; +; DEFAULT-LABEL: @vec_or( +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 +; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 +; DEFAULT-NEXT: [[F0:%.*]] = load i16, ptr [[Q:%.*]], align 4 +; DEFAULT-NEXT: [[INQ:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 1 +; DEFAULT-NEXT: [[F1:%.*]] = load i16, ptr [[INQ]], align 2 +; DEFAULT-NEXT: [[A0:%.*]] = or i16 [[E0]], [[F0]] +; DEFAULT-NEXT: [[A1:%.*]] = or i16 [[E1]], [[F1]] +; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 +; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 +; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: ret void +; +entry: + %e0 = load i16, ptr %p, align 4 + %inc = getelementptr inbounds i16, ptr %p, i64 1 + %e1 = load i16, ptr %inc, align 2 + + %f0 = load i16, ptr %q, align 4 + %inq = getelementptr inbounds i16, ptr %q, i64 1 + %f1 = load i16, ptr %inq, align 2 + + %a0 = or i16 %e0, %f0 + %a1 = or i16 %e1, %f1 + + store i16 %a0, ptr %dest, align 4 + %inc2 = getelementptr inbounds i16, ptr %dest, i64 1 + store i16 %a1, ptr %inc2, align 2 + ret void +} + +define void @vec_sll(ptr %dest, ptr %p, ptr %q) { +; CHECK-LABEL: @vec_sll( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[Q:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i16> [[TMP0]], [[TMP1]] +; CHECK-NEXT: store <2 x i16> [[TMP2]], ptr [[DEST:%.*]], align 4 +; CHECK-NEXT: ret void +; +; DEFAULT-LABEL: @vec_sll( +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 +; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 +; DEFAULT-NEXT: [[F0:%.*]] = load i16, ptr [[Q:%.*]], align 4 +; DEFAULT-NEXT: [[INQ:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 1 +; DEFAULT-NEXT: [[F1:%.*]] = load i16, ptr [[INQ]], align 2 +; DEFAULT-NEXT: [[A0:%.*]] = shl i16 [[E0]], [[F0]] +; DEFAULT-NEXT: [[A1:%.*]] = shl i16 [[E1]], [[F1]] +; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 +; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 +; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: ret void +; +entry: + %e0 = load i16, ptr %p, align 4 + %inc = getelementptr inbounds i16, ptr %p, i64 1 + %e1 = load i16, ptr %inc, align 2 + + %f0 = load i16, ptr %q, align 4 + %inq = getelementptr inbounds i16, ptr %q, i64 1 + %f1 = load i16, ptr %inq, align 2 + + %a0 = shl i16 %e0, %f0 + %a1 = shl i16 %e1, %f1 + + store i16 %a0, ptr %dest, align 4 + %inc2 = getelementptr inbounds i16, ptr %dest, i64 1 + store i16 %a1, ptr %inc2, align 2 + ret void +} + +declare i16 @llvm.smin.i16(i16, i16) +define void @vec_smin(ptr %dest, ptr %p, ptr %q) { +; CHECK-LABEL: @vec_smin( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[Q:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +; CHECK-NEXT: store <2 x i16> [[TMP2]], ptr [[DEST:%.*]], align 4 +; CHECK-NEXT: ret void +; +; DEFAULT-LABEL: @vec_smin( +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 +; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 +; DEFAULT-NEXT: [[F0:%.*]] = load i16, ptr [[Q:%.*]], align 4 +; DEFAULT-NEXT: [[INQ:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 1 +; DEFAULT-NEXT: [[F1:%.*]] = load i16, ptr [[INQ]], align 2 +; DEFAULT-NEXT: [[A0:%.*]] = tail call i16 @llvm.smin.i16(i16 [[E0]], i16 [[F0]]) +; DEFAULT-NEXT: [[A1:%.*]] = tail call i16 @llvm.smin.i16(i16 [[E1]], i16 [[F1]]) +; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 +; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 +; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: ret void +; +entry: + %e0 = load i16, ptr %p, align 4 + %inc = getelementptr inbounds i16, ptr %p, i64 1 + %e1 = load i16, ptr %inc, align 2 + + %f0 = load i16, ptr %q, align 4 + %inq = getelementptr inbounds i16, ptr %q, i64 1 + %f1 = load i16, ptr %inq, align 2 + + %a0 = tail call i16 @llvm.smin.i16(i16 %e0, i16 %f0) + %a1 = tail call i16 @llvm.smin.i16(i16 %e1, i16 %f1) + + store i16 %a0, ptr %dest, align 4 + %inc2 = getelementptr inbounds i16, ptr %dest, i64 1 + store i16 %a1, ptr %inc2, align 2 + ret void +} + +declare i16 @llvm.umax.i16(i16, i16) +define void @vec_umax(ptr %dest, ptr %p, ptr %q) { +; CHECK-LABEL: @vec_umax( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr [[Q:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +; CHECK-NEXT: store <2 x i16> [[TMP2]], ptr [[DEST:%.*]], align 4 +; CHECK-NEXT: ret void +; +; DEFAULT-LABEL: @vec_umax( +; DEFAULT-NEXT: entry: +; DEFAULT-NEXT: [[E0:%.*]] = load i16, ptr [[P:%.*]], align 4 +; DEFAULT-NEXT: [[INC:%.*]] = getelementptr inbounds i16, ptr [[P]], i64 1 +; DEFAULT-NEXT: [[E1:%.*]] = load i16, ptr [[INC]], align 2 +; DEFAULT-NEXT: [[F0:%.*]] = load i16, ptr [[Q:%.*]], align 4 +; DEFAULT-NEXT: [[INQ:%.*]] = getelementptr inbounds i16, ptr [[Q]], i64 1 +; DEFAULT-NEXT: [[F1:%.*]] = load i16, ptr [[INQ]], align 2 +; DEFAULT-NEXT: [[A0:%.*]] = tail call i16 @llvm.umax.i16(i16 [[E0]], i16 [[F0]]) +; DEFAULT-NEXT: [[A1:%.*]] = tail call i16 @llvm.umax.i16(i16 [[E1]], i16 [[F1]]) +; DEFAULT-NEXT: store i16 [[A0]], ptr [[DEST:%.*]], align 4 +; DEFAULT-NEXT: [[INC2:%.*]] = getelementptr inbounds i16, ptr [[DEST]], i64 1 +; DEFAULT-NEXT: store i16 [[A1]], ptr [[INC2]], align 2 +; DEFAULT-NEXT: ret void +; +entry: + %e0 = load i16, ptr %p, align 4 + %inc = getelementptr inbounds i16, ptr %p, i64 1 + %e1 = load i16, ptr %inc, align 2 + + %f0 = load i16, ptr %q, align 4 + %inq = getelementptr inbounds i16, ptr %q, i64 1 + %f1 = load i16, ptr %inq, align 2 + + %a0 = tail call i16 @llvm.umax.i16(i16 %e0, i16 %f0) + %a1 = tail call i16 @llvm.umax.i16(i16 %e1, i16 %f1) + + store i16 %a0, ptr %dest, align 4 + %inc2 = getelementptr inbounds i16, ptr %dest, i64 1 + store i16 %a1, ptr %inc2, align 2 + ret void +} From ae63b1a5767b89fe5af140365f9e3ccf74feb1f0 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 22 Mar 2023 19:58:08 -0500 Subject: [PATCH 360/691] [libc] Adjust NVPTX startup code Summary: The startup code needs to include the environment pointer so we add this to the arguments. Also we need to ensure that the `crt1.o` file is made with `-fgpu-rdc` set so we can actually use it without undefined reference errors. --- libc/startup/gpu/nvptx/CMakeLists.txt | 7 ++++++- libc/startup/gpu/nvptx/start.cpp | 7 ++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/libc/startup/gpu/nvptx/CMakeLists.txt b/libc/startup/gpu/nvptx/CMakeLists.txt index f7f58ec702bf2..96ab7540cedb1 100644 --- a/libc/startup/gpu/nvptx/CMakeLists.txt +++ b/libc/startup/gpu/nvptx/CMakeLists.txt @@ -8,6 +8,7 @@ add_startup_object( -nogpulib # Do not include any GPU vendor libraries. -nostdinc -x cuda # Use the CUDA toolchain to emit the `_start` kernel. + -fgpu-rdc # Emit relocatable device code from CUDA. --offload-device-only --offload-arch=${LIBC_GPU_TARGET_ARCHITECTURE} NO_GPU_BUNDLE # Compile this file directly without special GPU handling. @@ -15,4 +16,8 @@ add_startup_object( get_fq_target_name(crt1 fq_name) # Ensure that clang uses the correct linker for this object type. -target_link_libraries(${fq_name} PUBLIC "--target=${LIBC_GPU_TARGET_TRIPLE}") +target_link_libraries(${fq_name} + PUBLIC + "-march=${LIBC_GPU_TARGET_ARCHITECTURE}" + "--target=${LIBC_GPU_TARGET_TRIPLE}" +) diff --git a/libc/startup/gpu/nvptx/start.cpp b/libc/startup/gpu/nvptx/start.cpp index 61569423c7b55..cf4077c3d9edd 100644 --- a/libc/startup/gpu/nvptx/start.cpp +++ b/libc/startup/gpu/nvptx/start.cpp @@ -6,10 +6,11 @@ // //===----------------------------------------------------------------------===// -extern "C" __attribute__((device)) int main(int argc, char **argv); +extern "C" __attribute__((device)) int main(int argc, char **argv, char **envp); // TODO: We shouldn't need to use the CUDA language to emit a kernel for NVPTX. extern "C" [[gnu::visibility("protected")]] __attribute__((global)) void -_start(int argc, char **argv, int *ret) { - __atomic_fetch_or(ret, main(argc, argv), __ATOMIC_RELAXED); +_start(int argc, char **argv, char **envp, int *ret, void *in, void *out, + void *buffer) { + __atomic_fetch_or(ret, main(argc, argv, envp), __ATOMIC_RELAXED); } From 3d7383d7f44a4b9760f6566e62950968e90e0a1c Mon Sep 17 00:00:00 2001 From: Kai Sasaki Date: Thu, 23 Mar 2023 09:59:29 +0900 Subject: [PATCH 361/691] [mlir][affine] Prevent vectorizer test from crash without any map If the vectorizer test pass does not get any affine map, it should output nothing instead of crash. Issue: https://github.com/llvm/llvm-project/issues/61534 Reviewed By: nicolasvasilache, dcaballe Differential Revision: https://reviews.llvm.org/D146601 --- mlir/test/Dialect/Affine/SuperVectorize/compose_maps.mlir | 8 ++++++++ mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp | 3 +++ 2 files changed, 11 insertions(+) diff --git a/mlir/test/Dialect/Affine/SuperVectorize/compose_maps.mlir b/mlir/test/Dialect/Affine/SuperVectorize/compose_maps.mlir index 3b7820cec67a2..b53fc55fdac91 100644 --- a/mlir/test/Dialect/Affine/SuperVectorize/compose_maps.mlir +++ b/mlir/test/Dialect/Affine/SuperVectorize/compose_maps.mlir @@ -159,3 +159,11 @@ func.func @multi_symbols() { "test_affine_map"() { affine_map = affine_map<(d0, d1)[s0, s1] -> (d0 + 1 + s1, d1 - 1 - s0)> } : () -> () return } + +// ----- + +// CHECK-LABEL: @no_affine_maps +func.func @no_affine_maps() { + // CHECK: return + return +} diff --git a/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp b/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp index 61428bbf7091f..b31dd3f7d866f 100644 --- a/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp +++ b/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp @@ -215,6 +215,9 @@ void VectorizerTestPass::testComposeMaps(llvm::raw_ostream &outs) { .getValue(); maps.push_back(map); } + if (maps.empty()) + // Nothing to compose + return; AffineMap res; for (auto m : maps) { res = res ? res.compose(m) : m; From 89a1af749166627704cdf9d676455d32616c2c06 Mon Sep 17 00:00:00 2001 From: Matthias Gehre Date: Thu, 23 Mar 2023 01:08:43 +0000 Subject: [PATCH 362/691] [mlir][tosa] TosaToLinalg: Lower TOSA.Cast via RoundEven according to TOSA spec 0.60.0 TOSA now specifies rounding of ties to even in section 1.8.2., "Main Inference Profile" Reviewed By: eric-k256, rsuderman Differential Revision: https://reviews.llvm.org/D146617 --- mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp | 12 +----------- .../test/Conversion/TosaToLinalg/tosa-to-linalg.mlir | 7 +------ 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp index 271a09539e46e..be24f5ee5feb4 100644 --- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp +++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp @@ -471,11 +471,6 @@ createLinalgBodyCalculationForElementwiseOp(Operation *op, ValueRange args, } if (arith::FPToSIOp::areCastCompatible(srcTy, dstTy)) { - auto zero = rewriter.create( - loc, rewriter.getF32FloatAttr(0.0f)); - auto half = rewriter.create( - loc, rewriter.getF32FloatAttr(0.5f)); - auto intMin = rewriter.create( loc, rewriter.getF32FloatAttr( APInt::getSignedMinValue(dstTy.getIntOrFloatBitWidth()) @@ -486,12 +481,7 @@ createLinalgBodyCalculationForElementwiseOp(Operation *op, ValueRange args, APInt::getSignedMaxValue(dstTy.getIntOrFloatBitWidth()) .getSExtValue())); - auto added = rewriter.create(loc, args[0], half); - auto subbed = rewriter.create(loc, args[0], half); - auto negative = rewriter.create( - loc, arith::CmpFPredicate::OLT, args[0], zero); - auto rounded = - rewriter.create(loc, negative, subbed, added); + auto rounded = rewriter.create(loc, args[0]); auto clamped = clampFloatHelper(loc, rounded, intMin, intMax, rewriter); diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir index 133999eff1ec3..476131b262fb9 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir @@ -237,14 +237,9 @@ func.func @test_simple_f32(%arg0: tensor<1xf32>) -> () { %19 = "tosa.sigmoid"(%0) : (tensor<1xf32>) -> tensor<1xf32> // CHECK: linalg.generic - // CHECK: arith.constant 0.000000e+00 - // CHECK: arith.constant 5.000000e-01 // CHECK: arith.constant -2.14748365E+9 // CHECK: arith.constant 2.14748365E+9 - // CHECK: arith.addf - // CHECK: arith.subf - // CHECK: arith.cmpf olt - // CHECK: select + // CHECK: math.roundeven // CHECK: arith.minf // CHECK: arith.maxf // CHECK: arith.fptosi From 25557aa38a0dab76f5b7a4518942f69d879693c0 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Thu, 23 Mar 2023 11:21:35 +0800 Subject: [PATCH 363/691] Recommit [Modules] Remove unnecessary check when generating name lookup table in ASTWriter Close https://github.com/llvm/llvm-project/issues/61065. We will avoid writing the names from external AST naturally. But currently its check is often false positive since we may have already marked the declarations as external but DeclContext::hasNeedToReconcileExternalVisibleStorage would be false after reconciling. Tested with libcxx's modular build. This patch can improve 8% compilation time in an internal workloads. See the discussion in https://reviews.llvm.org/rG1e0709167f5edd330889f51bb203c458bdb5e359 to see the information for recommitting. --- clang/include/clang/Serialization/ASTWriter.h | 1 - clang/lib/Serialization/ASTWriter.cpp | 9 +-- clang/test/Modules/pr61065.cppm | 55 +++++++++++++++++++ 3 files changed, 56 insertions(+), 9 deletions(-) create mode 100644 clang/test/Modules/pr61065.cppm diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index 09ee1744e8945..d31fa38b93825 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -514,7 +514,6 @@ class ASTWriter : public ASTDeserializationListener, void WriteTypeAbbrevs(); void WriteType(QualType T); - bool isLookupResultExternal(StoredDeclsList &Result, DeclContext *DC); bool isLookupResultEntirelyExternal(StoredDeclsList &Result, DeclContext *DC); void GenerateNameLookupTable(const DeclContext *DC, diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index e8f390bc5b1dd..94160409c5f53 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -3849,12 +3849,6 @@ class ASTDeclContextNameLookupTrait { } // namespace -bool ASTWriter::isLookupResultExternal(StoredDeclsList &Result, - DeclContext *DC) { - return Result.hasExternalDecls() && - DC->hasNeedToReconcileExternalVisibleStorage(); -} - bool ASTWriter::isLookupResultEntirelyExternal(StoredDeclsList &Result, DeclContext *DC) { for (auto *D : Result.getLookupResult()) @@ -3897,8 +3891,7 @@ ASTWriter::GenerateNameLookupTable(const DeclContext *ConstDC, // don't need to write an entry for the name at all. If we can't // write out a lookup set without performing more deserialization, // just skip this entry. - if (isLookupResultExternal(Result, DC) && - isLookupResultEntirelyExternal(Result, DC)) + if (isLookupResultEntirelyExternal(Result, DC)) continue; // We also skip empty results. If any of the results could be external and diff --git a/clang/test/Modules/pr61065.cppm b/clang/test/Modules/pr61065.cppm new file mode 100644 index 0000000000000..44fa3679974ad --- /dev/null +++ b/clang/test/Modules/pr61065.cppm @@ -0,0 +1,55 @@ +// From https://github.com/llvm/llvm-project/issues/61065 +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-module-interface -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-module-interface -o %t/b.pcm \ +// RUN: -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 %t/c.cppm -emit-module-interface -o %t/c.pcm \ +// RUN: -fprebuilt-module-path=%t +// RUN: %clang_cc1 -std=c++20 %t/d.cpp -fsyntax-only -verify -fprebuilt-module-path=%t + +//--- a.cppm +export module a; + +struct base { + base(int) {} +}; + +export struct a : base { + using base::base; +}; + +//--- b.cppm +export module b; + +import a; + +a b() { + return a(1); +} + +//--- c.cppm +export module c; + +import a; +import b; + +struct noncopyable { + noncopyable(noncopyable const &) = delete; + noncopyable() = default; +}; + +export struct c { + noncopyable c0; + a c1 = 43; + c() = default; +}; + +//--- d.cpp +// expected-no-diagnostics +import c; +void d() { + c _; +} From 45a0433b39ffbd7cee9cc8a92f2300324b3548e0 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 22 Mar 2023 21:02:00 -0700 Subject: [PATCH 364/691] [-Wunsafe-buffer-usage] Add [[fallthrough]] after D143206 --- clang/lib/Analysis/UnsafeBufferUsage.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/lib/Analysis/UnsafeBufferUsage.cpp b/clang/lib/Analysis/UnsafeBufferUsage.cpp index 95e4c8388bc44..4a8358af68ec5 100644 --- a/clang/lib/Analysis/UnsafeBufferUsage.cpp +++ b/clang/lib/Analysis/UnsafeBufferUsage.cpp @@ -973,6 +973,7 @@ PointerDereferenceGadget::getFixits(const Strategy &S) const { endOfOperand.value().getLocWithOffset(1), "[0]")}}; } } + [[fallthrough]]; case Strategy::Kind::Iterator: case Strategy::Kind::Array: case Strategy::Kind::Vector: From 1c420cd4e31f68fedca83b4d3a857a5519f4ce03 Mon Sep 17 00:00:00 2001 From: Jun Zhang Date: Thu, 23 Mar 2023 12:48:59 +0800 Subject: [PATCH 365/691] Precommit test for #61120 Signed-off-by: Jun Zhang --- llvm/test/CodeGen/X86/setcc-combine.ll | 560 +++++++++++++++++++++++++ 1 file changed, 560 insertions(+) diff --git a/llvm/test/CodeGen/X86/setcc-combine.ll b/llvm/test/CodeGen/X86/setcc-combine.ll index c2ee78989ba16..f9542a3268ca8 100644 --- a/llvm/test/CodeGen/X86/setcc-combine.ll +++ b/llvm/test/CodeGen/X86/setcc-combine.ll @@ -499,3 +499,563 @@ define double @ogt_no_zero(double %x) { %r = select i1 %cmp, double %x, double %neg ret double %r } + +define i64 @cmp_sgt_not(i64 %a, i64 %b) { +; CHECK-LABEL: cmp_sgt_not: +; CHECK: # %bb.0: +; CHECK-NEXT: notq %rdi +; CHECK-NEXT: notq %rsi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: setg %al +; CHECK-NEXT: negq %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %nb = xor i64 %b, -1 + %c = icmp sgt i64 %na, %nb + %r = sext i1 %c to i64 + ret i64 %r +} + +define i64 @cmp_sgt_not_with_constant(i64 %a) { +; CHECK-LABEL: cmp_sgt_not_with_constant: +; CHECK: # %bb.0: +; CHECK-NEXT: notq %rdi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq $43, %rdi +; CHECK-NEXT: setge %al +; CHECK-NEXT: negq %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %c = icmp sgt i64 %na, 42 + %r = sext i1 %c to i64 + ret i64 %r +} + +define <4 x i32> @cmp_sgt_not_with_vec(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: cmp_sgt_not_with_vec: +; CHECK: # %bb.0: +; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 +; CHECK-NEXT: pxor %xmm2, %xmm0 +; CHECK-NEXT: pxor %xmm2, %xmm1 +; CHECK-NEXT: pcmpgtd %xmm1, %xmm0 +; CHECK-NEXT: retq + %na = xor <4 x i32> %a, + %nb = xor <4 x i32> %b, + %c = icmp sgt <4 x i32> %na, %nb + %r = sext <4 x i1> %c to <4 x i32> + ret <4 x i32> %r +} + +define i64 @cmp_ugt_not(i64 %a, i64 %b) { +; CHECK-LABEL: cmp_ugt_not: +; CHECK: # %bb.0: +; CHECK-NEXT: notq %rdi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: addq %rsi, %rdi +; CHECK-NEXT: sbbq %rax, %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %nb = xor i64 %b, -1 + %c = icmp ugt i64 %na, %nb + %r = sext i1 %c to i64 + ret i64 %r +} + +define i64 @cmp_ugt_not_with_constant(i64 %a) { +; CHECK-LABEL: cmp_ugt_not_with_constant: +; CHECK: # %bb.0: +; CHECK-NEXT: notq %rdi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq $43, %rdi +; CHECK-NEXT: adcq $-1, %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %c = icmp ugt i64 %na, 42 + %r = sext i1 %c to i64 + ret i64 %r +} + +define <4 x i32> @cmp_ugt_not_with_vec(<4 x i32> %a, <4 x i32> %b) { +; SSE2-LABEL: cmp_ugt_not_with_vec: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647] +; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: cmp_ugt_not_with_vec: +; SSE41: # %bb.0: +; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE41-NEXT: pxor %xmm2, %xmm0 +; SSE41-NEXT: pxor %xmm2, %xmm1 +; SSE41-NEXT: pminud %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm2, %xmm0 +; SSE41-NEXT: retq + %na = xor <4 x i32> %a, + %nb = xor <4 x i32> %b, + %c = icmp ugt <4 x i32> %na, %nb + %r = sext <4 x i1> %c to <4 x i32> + ret <4 x i32> %r +} + +define i64 @cmp_sge_not(i64 %a, i64 %b) { +; CHECK-LABEL: cmp_sge_not: +; CHECK: # %bb.0: +; CHECK-NEXT: notq %rdi +; CHECK-NEXT: notq %rsi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: setge %al +; CHECK-NEXT: negq %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %nb = xor i64 %b, -1 + %c = icmp sge i64 %na, %nb + %r = sext i1 %c to i64 + ret i64 %r +} + +define i64 @cmp_sge_not_with_constant(i64 %a) { +; CHECK-LABEL: cmp_sge_not_with_constant: +; CHECK: # %bb.0: +; CHECK-NEXT: notq %rdi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq $42, %rdi +; CHECK-NEXT: setge %al +; CHECK-NEXT: negq %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %c = icmp sge i64 %na, 42 + %r = sext i1 %c to i64 + ret i64 %r +} + +define <4 x i32> @cmp_sge_not_with_vec(<4 x i32> %a, <4 x i32> %b) { +; SSE2-LABEL: cmp_sge_not_with_vec: +; SSE2: # %bb.0: +; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647] +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE2-NEXT: pxor %xmm3, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: cmp_sge_not_with_vec: +; SSE41: # %bb.0: +; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE41-NEXT: pxor %xmm2, %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm2 +; SSE41-NEXT: pmaxud %xmm0, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE41-NEXT: retq + %na = xor <4 x i32> %a, + %nb = xor <4 x i32> %b, + %c = icmp uge <4 x i32> %na, %nb + %r = sext <4 x i1> %c to <4 x i32> + ret <4 x i32> %r +} + +define i64 @cmp_uge_not(i64 %a, i64 %b) { +; CHECK-LABEL: cmp_uge_not: +; CHECK: # %bb.0: +; CHECK-NEXT: notq %rdi +; CHECK-NEXT: notq %rsi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: adcq $-1, %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %nb = xor i64 %b, -1 + %c = icmp uge i64 %na, %nb + %r = sext i1 %c to i64 + ret i64 %r +} + +define i64 @cmp_uge_not_with_constant(i64 %a) { +; CHECK-LABEL: cmp_uge_not_with_constant: +; CHECK: # %bb.0: +; CHECK-NEXT: notq %rdi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq $42, %rdi +; CHECK-NEXT: adcq $-1, %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %c = icmp uge i64 %na, 42 + %r = sext i1 %c to i64 + ret i64 %r +} + +define <4 x i32> @cmp_uge_not_with_vec(<4 x i32> %a, <4 x i32> %b) { +; SSE2-LABEL: cmp_uge_not_with_vec: +; SSE2: # %bb.0: +; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647] +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE2-NEXT: pxor %xmm3, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: cmp_uge_not_with_vec: +; SSE41: # %bb.0: +; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE41-NEXT: pxor %xmm2, %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm2 +; SSE41-NEXT: pmaxud %xmm0, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE41-NEXT: retq + %na = xor <4 x i32> %a, + %nb = xor <4 x i32> %b, + %c = icmp uge <4 x i32> %na, %nb + %r = sext <4 x i1> %c to <4 x i32> + ret <4 x i32> %r +} + +define i64 @cmp_sle_not(i64 %a, i64 %b) { +; CHECK-LABEL: cmp_sle_not: +; CHECK: # %bb.0: +; CHECK-NEXT: notq %rdi +; CHECK-NEXT: notq %rsi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: setle %al +; CHECK-NEXT: negq %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %nb = xor i64 %b, -1 + %c = icmp sle i64 %na, %nb + %r = sext i1 %c to i64 + ret i64 %r +} + +define i64 @cmp_sle_not_with_constant(i64 %a) { +; CHECK-LABEL: cmp_sle_not_with_constant: +; CHECK: # %bb.0: +; CHECK-NEXT: notq %rdi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq $43, %rdi +; CHECK-NEXT: setl %al +; CHECK-NEXT: negq %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %c = icmp sle i64 %na, 42 + %r = sext i1 %c to i64 + ret i64 %r +} + +define <4 x i32> @cmp_sle_not_with_vec(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: cmp_sle_not_with_vec: +; CHECK: # %bb.0: +; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 +; CHECK-NEXT: pxor %xmm2, %xmm0 +; CHECK-NEXT: pxor %xmm2, %xmm1 +; CHECK-NEXT: pcmpgtd %xmm1, %xmm0 +; CHECK-NEXT: pxor %xmm2, %xmm0 +; CHECK-NEXT: retq + %na = xor <4 x i32> %a, + %nb = xor <4 x i32> %b, + %c = icmp sle <4 x i32> %na, %nb + %r = sext <4 x i1> %c to <4 x i32> + ret <4 x i32> %r +} + +define i64 @cmp_slt_not(i64 %a, i64 %b) { +; CHECK-LABEL: cmp_slt_not: +; CHECK: # %bb.0: +; CHECK-NEXT: notq %rdi +; CHECK-NEXT: notq %rsi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: setl %al +; CHECK-NEXT: negq %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %nb = xor i64 %b, -1 + %c = icmp slt i64 %na, %nb + %r = sext i1 %c to i64 + ret i64 %r +} + +define i64 @cmp_slt_not_with_constant(i64 %a) { +; CHECK-LABEL: cmp_slt_not_with_constant: +; CHECK: # %bb.0: +; CHECK-NEXT: notq %rdi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq $42, %rdi +; CHECK-NEXT: setl %al +; CHECK-NEXT: negq %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %c = icmp slt i64 %na, 42 + %r = sext i1 %c to i64 + ret i64 %r +} + +define <4 x i32> @cmp_slt_not_with_vec(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: cmp_slt_not_with_vec: +; CHECK: # %bb.0: +; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 +; CHECK-NEXT: pxor %xmm2, %xmm0 +; CHECK-NEXT: pxor %xmm1, %xmm2 +; CHECK-NEXT: pcmpgtd %xmm0, %xmm2 +; CHECK-NEXT: movdqa %xmm2, %xmm0 +; CHECK-NEXT: retq + %na = xor <4 x i32> %a, + %nb = xor <4 x i32> %b, + %c = icmp slt <4 x i32> %na, %nb + %r = sext <4 x i1> %c to <4 x i32> + ret <4 x i32> %r +} + + +define i64 @cmp_ult_not(i64 %a, i64 %b) { +; CHECK-LABEL: cmp_ult_not: +; CHECK: # %bb.0: +; CHECK-NEXT: notq %rsi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: addq %rdi, %rsi +; CHECK-NEXT: sbbq %rax, %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %nb = xor i64 %b, -1 + %c = icmp ult i64 %na, %nb + %r = sext i1 %c to i64 + ret i64 %r +} + +define i64 @cmp_ult_not_with_constant(i64 %a) { +; CHECK-LABEL: cmp_ult_not_with_constant: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: addq $42, %rdi +; CHECK-NEXT: sbbq %rax, %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %c = icmp ult i64 %na, 42 + %r = sext i1 %c to i64 + ret i64 %r +} + +define <4 x i32> @cmp_ult_not_with_vec(<4 x i32> %a, <4 x i32> %b) { +; SSE2-LABEL: cmp_ult_not_with_vec: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647] +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: cmp_ult_not_with_vec: +; SSE41: # %bb.0: +; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE41-NEXT: pxor %xmm2, %xmm0 +; SSE41-NEXT: pxor %xmm2, %xmm1 +; SSE41-NEXT: pmaxud %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 +; SSE41-NEXT: pxor %xmm2, %xmm0 +; SSE41-NEXT: retq + %na = xor <4 x i32> %a, + %nb = xor <4 x i32> %b, + %c = icmp ult <4 x i32> %na, %nb + %r = sext <4 x i1> %c to <4 x i32> + ret <4 x i32> %r +} + +define i64 @cmp_ule_not(i64 %a, i64 %b) { +; CHECK-LABEL: cmp_ule_not: +; CHECK: # %bb.0: +; CHECK-NEXT: notq %rdi +; CHECK-NEXT: notq %rsi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq %rdi, %rsi +; CHECK-NEXT: adcq $-1, %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %nb = xor i64 %b, -1 + %c = icmp ule i64 %na, %nb + %r = sext i1 %c to i64 + ret i64 %r +} + +define i64 @cmp_ule_not_with_constant(i64 %a) { +; CHECK-LABEL: cmp_ule_not_with_constant: +; CHECK: # %bb.0: +; CHECK-NEXT: notq %rdi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq $43, %rdi +; CHECK-NEXT: sbbq %rax, %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %c = icmp ule i64 %na, 42 + %r = sext i1 %c to i64 + ret i64 %r +} + +define <4 x i32> @cmp_ule_not_with_vec(<4 x i32> %a, <4 x i32> %b) { +; SSE2-LABEL: cmp_ule_not_with_vec: +; SSE2: # %bb.0: +; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483647,2147483647,2147483647,2147483647] +; SSE2-NEXT: pxor %xmm3, %xmm1 +; SSE2-NEXT: pxor %xmm3, %xmm0 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSE41-LABEL: cmp_ule_not_with_vec: +; SSE41: # %bb.0: +; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE41-NEXT: pxor %xmm2, %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm2 +; SSE41-NEXT: pminud %xmm0, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE41-NEXT: retq + %na = xor <4 x i32> %a, + %nb = xor <4 x i32> %b, + %c = icmp ule <4 x i32> %na, %nb + %r = sext <4 x i1> %c to <4 x i32> + ret <4 x i32> %r +} + +define i64 @cmp_eq_not(i64 %a, i64 %b) { +; CHECK-LABEL: cmp_eq_not: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: sete %al +; CHECK-NEXT: negq %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %nb = xor i64 %b, -1 + %c = icmp eq i64 %na, %nb + %r = sext i1 %c to i64 + ret i64 %r +} + +define i64 @cmp_eq_not_with_constant(i64 %a) { +; CHECK-LABEL: cmp_eq_not_with_constant: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq $-43, %rdi +; CHECK-NEXT: sete %al +; CHECK-NEXT: negq %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %c = icmp eq i64 %na, 42 + %r = sext i1 %c to i64 + ret i64 %r +} + +define <4 x i32> @cmp_eq_not_with_vec(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: cmp_eq_not_with_vec: +; CHECK: # %bb.0: +; CHECK-NEXT: pcmpeqd %xmm1, %xmm0 +; CHECK-NEXT: retq + %na = xor <4 x i32> %a, + %nb = xor <4 x i32> %b, + %c = icmp eq <4 x i32> %na, %nb + %r = sext <4 x i1> %c to <4 x i32> + ret <4 x i32> %r +} +define i64 @cmp_ne_not(i64 %a, i64 %b) { +; CHECK-LABEL: cmp_ne_not: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: setne %al +; CHECK-NEXT: negq %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %nb = xor i64 %b, -1 + %c = icmp ne i64 %na, %nb + %r = sext i1 %c to i64 + ret i64 %r +} + +define i64 @cmp_ne_not_with_constant(i64 %a) { +; CHECK-LABEL: cmp_ne_not_with_constant: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq $-43, %rdi +; CHECK-NEXT: setne %al +; CHECK-NEXT: negq %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %c = icmp ne i64 %na, 42 + %r = sext i1 %c to i64 + ret i64 %r +} + +define <4 x i32> @cmp_ne_not_with_vec(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: cmp_ne_not_with_vec: +; CHECK: # %bb.0: +; CHECK-NEXT: pcmpeqd %xmm1, %xmm0 +; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: retq + %na = xor <4 x i32> %a, + %nb = xor <4 x i32> %b, + %c = icmp ne <4 x i32> %na, %nb + %r = sext <4 x i1> %c to <4 x i32> + ret <4 x i32> %r +} + +define i64 @cmp_uge_not_commute(i64 %b, i64 %a) { +; CHECK-LABEL: cmp_uge_not_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: notq %rsi +; CHECK-NEXT: notq %rdi +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq %rdi, %rsi +; CHECK-NEXT: adcq $-1, %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %nb = xor i64 %b, -1 + %c = icmp uge i64 %na, %nb + %r = sext i1 %c to i64 + ret i64 %r +} + +define i64 @cmp_ult_not_with_constant_commute(i64 %a) { +; CHECK-LABEL: cmp_ult_not_with_constant_commute: +; CHECK: # %bb.0: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: cmpq $43, %rdi +; CHECK-NEXT: adcq $-1, %rax +; CHECK-NEXT: retq + %na = xor i64 %a, -1 + %c = icmp ult i64 42, %a + %r = sext i1 %c to i64 + ret i64 %r +} + +define <2 x i64> @cmp_uge_not_with_vec2xi64(<2 x i64> %a, <2 x i64> %b) { +; CHECK-LABEL: cmp_uge_not_with_vec2xi64: +; CHECK: # %bb.0: +; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [9223372034707292159,9223372034707292159] +; CHECK-NEXT: pxor %xmm2, %xmm0 +; CHECK-NEXT: pxor %xmm2, %xmm1 +; CHECK-NEXT: movdqa %xmm1, %xmm2 +; CHECK-NEXT: pcmpgtd %xmm0, %xmm2 +; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] +; CHECK-NEXT: pcmpeqd %xmm0, %xmm1 +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] +; CHECK-NEXT: pand %xmm3, %xmm0 +; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] +; CHECK-NEXT: por %xmm0, %xmm1 +; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 +; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: retq + %na = xor <2 x i64> %a, + %nb = xor <2 x i64> %b, + %c = icmp uge <2 x i64> %na, %nb + %r = sext <2 x i1> %c to <2 x i64> + ret <2 x i64> %r +} From b3e12beb44dc36e9ed0f5e9cb3fb1eef0823894e Mon Sep 17 00:00:00 2001 From: Jun Zhang Date: Thu, 23 Mar 2023 12:49:05 +0800 Subject: [PATCH 366/691] [TLI] Fold ~X >/< ~Y --> Y >/< X Fixes: https://github.com/llvm/llvm-project/issues/61120 Signed-off-by: Jun Zhang Differential Revision: https://reviews.llvm.org/D146512 --- .../CodeGen/SelectionDAG/TargetLowering.cpp | 17 ++ llvm/test/CodeGen/X86/cmov.ll | 4 +- llvm/test/CodeGen/X86/setcc-combine.ll | 178 +++++++----------- 3 files changed, 86 insertions(+), 113 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index c82f9ce64ea5a..9ef3c15cfe374 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -4974,6 +4974,23 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, N0, N1, NewCond); } + // ~X > ~Y --> Y > X + // ~X < ~Y --> Y < X + // ~X < C --> X > ~C + // ~X > C --> X < ~C + if ((isSignedIntSetCC(Cond) || isUnsignedIntSetCC(Cond)) && + N0.getValueType().isInteger()) { + if (isBitwiseNot(N0)) { + if (isBitwiseNot(N1)) + return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond); + + if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) { + SDValue Not = DAG.getNOT(dl, N1, OpVT); + return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond); + } + } + } + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && N0.getValueType().isInteger()) { if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB || diff --git a/llvm/test/CodeGen/X86/cmov.ll b/llvm/test/CodeGen/X86/cmov.ll index 94df5fa6d96fc..dbe85eced6a59 100644 --- a/llvm/test/CodeGen/X86/cmov.ll +++ b/llvm/test/CodeGen/X86/cmov.ll @@ -213,10 +213,10 @@ define i64 @test8(i64 %0, i64 %1, i64 %2) { define i32 @smin(i32 %x) { ; CHECK-LABEL: smin: ; CHECK: # %bb.0: -; CHECK-NEXT: notl %edi ; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: notl %edi ; CHECK-NEXT: movl $-1, %eax -; CHECK-NEXT: cmovsl %edi, %eax +; CHECK-NEXT: cmovnsl %edi, %eax ; CHECK-NEXT: retq %not_x = xor i32 %x, -1 %1 = icmp slt i32 %not_x, -1 diff --git a/llvm/test/CodeGen/X86/setcc-combine.ll b/llvm/test/CodeGen/X86/setcc-combine.ll index f9542a3268ca8..780a769bc9e2b 100644 --- a/llvm/test/CodeGen/X86/setcc-combine.ll +++ b/llvm/test/CodeGen/X86/setcc-combine.ll @@ -503,10 +503,8 @@ define double @ogt_no_zero(double %x) { define i64 @cmp_sgt_not(i64 %a, i64 %b) { ; CHECK-LABEL: cmp_sgt_not: ; CHECK: # %bb.0: -; CHECK-NEXT: notq %rdi -; CHECK-NEXT: notq %rsi ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmpq %rdi, %rsi ; CHECK-NEXT: setg %al ; CHECK-NEXT: negq %rax ; CHECK-NEXT: retq @@ -520,10 +518,9 @@ define i64 @cmp_sgt_not(i64 %a, i64 %b) { define i64 @cmp_sgt_not_with_constant(i64 %a) { ; CHECK-LABEL: cmp_sgt_not_with_constant: ; CHECK: # %bb.0: -; CHECK-NEXT: notq %rdi ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpq $43, %rdi -; CHECK-NEXT: setge %al +; CHECK-NEXT: cmpq $-43, %rdi +; CHECK-NEXT: setl %al ; CHECK-NEXT: negq %rax ; CHECK-NEXT: retq %na = xor i64 %a, -1 @@ -535,10 +532,8 @@ define i64 @cmp_sgt_not_with_constant(i64 %a) { define <4 x i32> @cmp_sgt_not_with_vec(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: cmp_sgt_not_with_vec: ; CHECK: # %bb.0: -; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 -; CHECK-NEXT: pxor %xmm2, %xmm0 -; CHECK-NEXT: pxor %xmm2, %xmm1 -; CHECK-NEXT: pcmpgtd %xmm1, %xmm0 +; CHECK-NEXT: pcmpgtd %xmm0, %xmm1 +; CHECK-NEXT: movdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %na = xor <4 x i32> %a, %nb = xor <4 x i32> %b, @@ -565,10 +560,9 @@ define i64 @cmp_ugt_not(i64 %a, i64 %b) { define i64 @cmp_ugt_not_with_constant(i64 %a) { ; CHECK-LABEL: cmp_ugt_not_with_constant: ; CHECK: # %bb.0: -; CHECK-NEXT: notq %rdi ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpq $43, %rdi -; CHECK-NEXT: adcq $-1, %rax +; CHECK-NEXT: cmpq $-43, %rdi +; CHECK-NEXT: sbbq %rax, %rax ; CHECK-NEXT: retq %na = xor i64 %a, -1 %c = icmp ugt i64 %na, 42 @@ -579,20 +573,19 @@ define i64 @cmp_ugt_not_with_constant(i64 %a) { define <4 x i32> @cmp_ugt_not_with_vec(<4 x i32> %a, <4 x i32> %b) { ; SSE2-LABEL: cmp_ugt_not_with_vec: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647] -; SSE2-NEXT: pxor %xmm2, %xmm1 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] ; SSE2-NEXT: pxor %xmm2, %xmm0 -; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: cmp_ugt_not_with_vec: ; SSE41: # %bb.0: -; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm1 -; SSE41-NEXT: pminud %xmm0, %xmm1 +; SSE41-NEXT: pminud %xmm1, %xmm0 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: retq %na = xor <4 x i32> %a, %nb = xor <4 x i32> %b, @@ -604,10 +597,8 @@ define <4 x i32> @cmp_ugt_not_with_vec(<4 x i32> %a, <4 x i32> %b) { define i64 @cmp_sge_not(i64 %a, i64 %b) { ; CHECK-LABEL: cmp_sge_not: ; CHECK: # %bb.0: -; CHECK-NEXT: notq %rdi -; CHECK-NEXT: notq %rsi ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmpq %rdi, %rsi ; CHECK-NEXT: setge %al ; CHECK-NEXT: negq %rax ; CHECK-NEXT: retq @@ -621,10 +612,9 @@ define i64 @cmp_sge_not(i64 %a, i64 %b) { define i64 @cmp_sge_not_with_constant(i64 %a) { ; CHECK-LABEL: cmp_sge_not_with_constant: ; CHECK: # %bb.0: -; CHECK-NEXT: notq %rdi ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpq $42, %rdi -; CHECK-NEXT: setge %al +; CHECK-NEXT: cmpq $-42, %rdi +; CHECK-NEXT: setl %al ; CHECK-NEXT: negq %rax ; CHECK-NEXT: retq %na = xor i64 %a, -1 @@ -636,22 +626,18 @@ define i64 @cmp_sge_not_with_constant(i64 %a) { define <4 x i32> @cmp_sge_not_with_vec(<4 x i32> %a, <4 x i32> %b) { ; SSE2-LABEL: cmp_sge_not_with_vec: ; SSE2: # %bb.0: -; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647] +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: pxor %xmm2, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm2 -; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 -; SSE2-NEXT: pxor %xmm3, %xmm2 -; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: pxor %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: cmp_sge_not_with_vec: ; SSE41: # %bb.0: -; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm2 -; SSE41-NEXT: pmaxud %xmm0, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE41-NEXT: pmaxud %xmm1, %xmm0 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 ; SSE41-NEXT: retq %na = xor <4 x i32> %a, %nb = xor <4 x i32> %b, @@ -663,10 +649,8 @@ define <4 x i32> @cmp_sge_not_with_vec(<4 x i32> %a, <4 x i32> %b) { define i64 @cmp_uge_not(i64 %a, i64 %b) { ; CHECK-LABEL: cmp_uge_not: ; CHECK: # %bb.0: -; CHECK-NEXT: notq %rdi -; CHECK-NEXT: notq %rsi ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmpq %rdi, %rsi ; CHECK-NEXT: adcq $-1, %rax ; CHECK-NEXT: retq %na = xor i64 %a, -1 @@ -679,10 +663,9 @@ define i64 @cmp_uge_not(i64 %a, i64 %b) { define i64 @cmp_uge_not_with_constant(i64 %a) { ; CHECK-LABEL: cmp_uge_not_with_constant: ; CHECK: # %bb.0: -; CHECK-NEXT: notq %rdi ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpq $42, %rdi -; CHECK-NEXT: adcq $-1, %rax +; CHECK-NEXT: cmpq $-42, %rdi +; CHECK-NEXT: sbbq %rax, %rax ; CHECK-NEXT: retq %na = xor i64 %a, -1 %c = icmp uge i64 %na, 42 @@ -693,22 +676,18 @@ define i64 @cmp_uge_not_with_constant(i64 %a) { define <4 x i32> @cmp_uge_not_with_vec(<4 x i32> %a, <4 x i32> %b) { ; SSE2-LABEL: cmp_uge_not_with_vec: ; SSE2: # %bb.0: -; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647] +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: pxor %xmm2, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm2 -; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 -; SSE2-NEXT: pxor %xmm3, %xmm2 -; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE2-NEXT: pxor %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: cmp_uge_not_with_vec: ; SSE41: # %bb.0: -; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm2 -; SSE41-NEXT: pmaxud %xmm0, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE41-NEXT: pmaxud %xmm1, %xmm0 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 ; SSE41-NEXT: retq %na = xor <4 x i32> %a, %nb = xor <4 x i32> %b, @@ -720,10 +699,8 @@ define <4 x i32> @cmp_uge_not_with_vec(<4 x i32> %a, <4 x i32> %b) { define i64 @cmp_sle_not(i64 %a, i64 %b) { ; CHECK-LABEL: cmp_sle_not: ; CHECK: # %bb.0: -; CHECK-NEXT: notq %rdi -; CHECK-NEXT: notq %rsi ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmpq %rdi, %rsi ; CHECK-NEXT: setle %al ; CHECK-NEXT: negq %rax ; CHECK-NEXT: retq @@ -737,10 +714,9 @@ define i64 @cmp_sle_not(i64 %a, i64 %b) { define i64 @cmp_sle_not_with_constant(i64 %a) { ; CHECK-LABEL: cmp_sle_not_with_constant: ; CHECK: # %bb.0: -; CHECK-NEXT: notq %rdi ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpq $43, %rdi -; CHECK-NEXT: setl %al +; CHECK-NEXT: cmpq $-43, %rdi +; CHECK-NEXT: setge %al ; CHECK-NEXT: negq %rax ; CHECK-NEXT: retq %na = xor i64 %a, -1 @@ -752,11 +728,9 @@ define i64 @cmp_sle_not_with_constant(i64 %a) { define <4 x i32> @cmp_sle_not_with_vec(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: cmp_sle_not_with_vec: ; CHECK: # %bb.0: -; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 -; CHECK-NEXT: pxor %xmm2, %xmm0 -; CHECK-NEXT: pxor %xmm2, %xmm1 -; CHECK-NEXT: pcmpgtd %xmm1, %xmm0 -; CHECK-NEXT: pxor %xmm2, %xmm0 +; CHECK-NEXT: pcmpgtd %xmm0, %xmm1 +; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 +; CHECK-NEXT: pxor %xmm1, %xmm0 ; CHECK-NEXT: retq %na = xor <4 x i32> %a, %nb = xor <4 x i32> %b, @@ -768,10 +742,8 @@ define <4 x i32> @cmp_sle_not_with_vec(<4 x i32> %a, <4 x i32> %b) { define i64 @cmp_slt_not(i64 %a, i64 %b) { ; CHECK-LABEL: cmp_slt_not: ; CHECK: # %bb.0: -; CHECK-NEXT: notq %rdi -; CHECK-NEXT: notq %rsi ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpq %rsi, %rdi +; CHECK-NEXT: cmpq %rdi, %rsi ; CHECK-NEXT: setl %al ; CHECK-NEXT: negq %rax ; CHECK-NEXT: retq @@ -785,10 +757,9 @@ define i64 @cmp_slt_not(i64 %a, i64 %b) { define i64 @cmp_slt_not_with_constant(i64 %a) { ; CHECK-LABEL: cmp_slt_not_with_constant: ; CHECK: # %bb.0: -; CHECK-NEXT: notq %rdi ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpq $42, %rdi -; CHECK-NEXT: setl %al +; CHECK-NEXT: cmpq $-42, %rdi +; CHECK-NEXT: setge %al ; CHECK-NEXT: negq %rax ; CHECK-NEXT: retq %na = xor i64 %a, -1 @@ -800,11 +771,7 @@ define i64 @cmp_slt_not_with_constant(i64 %a) { define <4 x i32> @cmp_slt_not_with_vec(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: cmp_slt_not_with_vec: ; CHECK: # %bb.0: -; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 -; CHECK-NEXT: pxor %xmm2, %xmm0 -; CHECK-NEXT: pxor %xmm1, %xmm2 -; CHECK-NEXT: pcmpgtd %xmm0, %xmm2 -; CHECK-NEXT: movdqa %xmm2, %xmm0 +; CHECK-NEXT: pcmpgtd %xmm1, %xmm0 ; CHECK-NEXT: retq %na = xor <4 x i32> %a, %nb = xor <4 x i32> %b, @@ -845,21 +812,18 @@ define i64 @cmp_ult_not_with_constant(i64 %a) { define <4 x i32> @cmp_ult_not_with_vec(<4 x i32> %a, <4 x i32> %b) { ; SSE2-LABEL: cmp_ult_not_with_vec: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647] +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: pxor %xmm2, %xmm1 ; SSE2-NEXT: pxor %xmm2, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm2 -; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 -; SSE2-NEXT: movdqa %xmm2, %xmm0 +; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: cmp_ult_not_with_vec: ; SSE41: # %bb.0: -; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm1 -; SSE41-NEXT: pmaxud %xmm0, %xmm1 +; SSE41-NEXT: pmaxud %xmm1, %xmm0 ; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE41-NEXT: pxor %xmm2, %xmm0 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 +; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: retq %na = xor <4 x i32> %a, %nb = xor <4 x i32> %b, @@ -871,10 +835,8 @@ define <4 x i32> @cmp_ult_not_with_vec(<4 x i32> %a, <4 x i32> %b) { define i64 @cmp_ule_not(i64 %a, i64 %b) { ; CHECK-LABEL: cmp_ule_not: ; CHECK: # %bb.0: -; CHECK-NEXT: notq %rdi -; CHECK-NEXT: notq %rsi ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpq %rdi, %rsi +; CHECK-NEXT: cmpq %rsi, %rdi ; CHECK-NEXT: adcq $-1, %rax ; CHECK-NEXT: retq %na = xor i64 %a, -1 @@ -887,10 +849,9 @@ define i64 @cmp_ule_not(i64 %a, i64 %b) { define i64 @cmp_ule_not_with_constant(i64 %a) { ; CHECK-LABEL: cmp_ule_not_with_constant: ; CHECK: # %bb.0: -; CHECK-NEXT: notq %rdi ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpq $43, %rdi -; CHECK-NEXT: sbbq %rax, %rax +; CHECK-NEXT: cmpq $-43, %rdi +; CHECK-NEXT: adcq $-1, %rax ; CHECK-NEXT: retq %na = xor i64 %a, -1 %c = icmp ule i64 %na, 42 @@ -901,21 +862,18 @@ define i64 @cmp_ule_not_with_constant(i64 %a) { define <4 x i32> @cmp_ule_not_with_vec(<4 x i32> %a, <4 x i32> %b) { ; SSE2-LABEL: cmp_ule_not_with_vec: ; SSE2: # %bb.0: -; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483647,2147483647,2147483647,2147483647] -; SSE2-NEXT: pxor %xmm3, %xmm1 -; SSE2-NEXT: pxor %xmm3, %xmm0 -; SSE2-NEXT: pcmpgtd %xmm1, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm2 +; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 ; SSE2-NEXT: pxor %xmm2, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: cmp_ule_not_with_vec: ; SSE41: # %bb.0: -; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm2 -; SSE41-NEXT: pminud %xmm0, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE41-NEXT: pminud %xmm1, %xmm0 +; SSE41-NEXT: pcmpeqd %xmm1, %xmm0 ; SSE41-NEXT: retq %na = xor <4 x i32> %a, %nb = xor <4 x i32> %b, @@ -1010,10 +968,8 @@ define <4 x i32> @cmp_ne_not_with_vec(<4 x i32> %a, <4 x i32> %b) { define i64 @cmp_uge_not_commute(i64 %b, i64 %a) { ; CHECK-LABEL: cmp_uge_not_commute: ; CHECK: # %bb.0: -; CHECK-NEXT: notq %rsi -; CHECK-NEXT: notq %rdi ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: cmpq %rdi, %rsi +; CHECK-NEXT: cmpq %rsi, %rdi ; CHECK-NEXT: adcq $-1, %rax ; CHECK-NEXT: retq %na = xor i64 %a, -1 @@ -1039,14 +995,14 @@ define i64 @cmp_ult_not_with_constant_commute(i64 %a) { define <2 x i64> @cmp_uge_not_with_vec2xi64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: cmp_uge_not_with_vec2xi64: ; CHECK: # %bb.0: -; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [9223372034707292159,9223372034707292159] -; CHECK-NEXT: pxor %xmm2, %xmm0 +; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456] ; CHECK-NEXT: pxor %xmm2, %xmm1 -; CHECK-NEXT: movdqa %xmm1, %xmm2 -; CHECK-NEXT: pcmpgtd %xmm0, %xmm2 +; CHECK-NEXT: pxor %xmm2, %xmm0 +; CHECK-NEXT: movdqa %xmm0, %xmm2 +; CHECK-NEXT: pcmpgtd %xmm1, %xmm2 ; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2] -; CHECK-NEXT: pcmpeqd %xmm0, %xmm1 -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] +; CHECK-NEXT: pcmpeqd %xmm1, %xmm0 +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; CHECK-NEXT: pand %xmm3, %xmm0 ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] ; CHECK-NEXT: por %xmm0, %xmm1 From 8d93cbed6e383d5a8b5985f9087cb31ffd5ac7f5 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 22 Mar 2023 22:10:20 -0700 Subject: [PATCH 367/691] [InstCombine] Precommit tests This patch precommits tests for: https://github.com/llvm/llvm-project/issues/60802 --- llvm/test/Transforms/InstCombine/bit_ceil.ll | 146 +++++++++++++++++++ 1 file changed, 146 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/bit_ceil.ll b/llvm/test/Transforms/InstCombine/bit_ceil.ll index aa98896aac549..98f4cdb6fb834 100644 --- a/llvm/test/Transforms/InstCombine/bit_ceil.ll +++ b/llvm/test/Transforms/InstCombine/bit_ceil.ll @@ -85,6 +85,7 @@ define i32 @bit_ceil_32_plus_1(i32 %x) { ret i32 %sel } +; std::bit_ceil(x + 2) define i32 @bit_ceil_plus_2(i32 %x) { ; CHECK-LABEL: @bit_ceil_plus_2( ; CHECK-NEXT: entry: @@ -152,5 +153,150 @@ entry: ret i32 %sel } +; Commuted select operands should still be recognized. +define i32 @bit_ceil_commuted_operands(i32 %x) { +; CHECK-LABEL: @bit_ceil_commuted_operands( +; CHECK-NEXT: [[DEC:%.*]] = add i32 [[X:%.*]], -1 +; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[DEC]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB]] +; CHECK-NEXT: [[UGT_INV:%.*]] = icmp ugt i32 [[X]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[UGT_INV]], i32 [[SHL]], i32 1 +; CHECK-NEXT: ret i32 [[SEL]] +; + %dec = add i32 %x, -1 + %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false) + %sub = sub i32 32, %ctlz + %shl = shl i32 1, %sub + %ugt = icmp ule i32 %x, 1 + %sel = select i1 %ugt, i32 1, i32 %shl + ret i32 %sel +} + +; Negative test: wrong select constant +define i32 @bit_ceil_wrong_select_constant(i32 %x) { +; CHECK-LABEL: @bit_ceil_wrong_select_constant( +; CHECK-NEXT: [[DEC:%.*]] = add i32 [[X:%.*]], -1 +; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[DEC]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB]] +; CHECK-NEXT: [[UGT_INV:%.*]] = icmp ult i32 [[X]], 2 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[UGT_INV]], i32 2, i32 [[SHL]] +; CHECK-NEXT: ret i32 [[SEL]] +; + %dec = add i32 %x, -1 + %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false) + %sub = sub i32 32, %ctlz + %shl = shl i32 1, %sub + %ugt = icmp ugt i32 %x, 1 + %sel = select i1 %ugt, i32 %shl, i32 2 + ret i32 %sel +} + +; Negative test: select condition != false does not guarantee ctlz being either 0 or 32 +define i32 @bit_ceil_32_wrong_cond(i32 %x) { +; CHECK-LABEL: @bit_ceil_32_wrong_cond( +; CHECK-NEXT: [[DEC:%.*]] = add i32 [[X:%.*]], -1 +; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[DEC]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB]] +; CHECK-NEXT: [[UGT:%.*]] = icmp ugt i32 [[X]], 2 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[UGT]], i32 [[SHL]], i32 1 +; CHECK-NEXT: ret i32 [[SEL]] +; + %dec = add i32 %x, -1 + %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false) + %sub = sub i32 32, %ctlz + %shl = shl i32 1, %sub + %ugt = icmp ugt i32 %x, 2 + %sel = select i1 %ugt, i32 %shl, i32 1 + ret i32 %sel +} + +; Negative test: wrong sub constant +define i32 @bit_ceil_wrong_sub_constant(i32 %x) { +; CHECK-LABEL: @bit_ceil_wrong_sub_constant( +; CHECK-NEXT: [[DEC:%.*]] = add i32 [[X:%.*]], -1 +; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[DEC]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 33, [[CTLZ]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB]] +; CHECK-NEXT: [[UGT:%.*]] = icmp ugt i32 [[X]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[UGT]], i32 [[SHL]], i32 1 +; CHECK-NEXT: ret i32 [[SEL]] +; + %dec = add i32 %x, -1 + %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false) + %sub = sub i32 33, %ctlz + %shl = shl i32 1, %sub + %ugt = icmp ugt i32 %x, 1 + %sel = select i1 %ugt, i32 %shl, i32 1 + ret i32 %sel +} + +; Negative test: the shl result used twice +define i32 @bit_ceil_32_shl_used_twice(i32 %x, ptr %p) { +; CHECK-LABEL: @bit_ceil_32_shl_used_twice( +; CHECK-NEXT: [[DEC:%.*]] = add i32 [[X:%.*]], -1 +; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[DEC]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB]] +; CHECK-NEXT: [[UGT:%.*]] = icmp ugt i32 [[X]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[UGT]], i32 [[SHL]], i32 1 +; CHECK-NEXT: store i32 [[SHL]], ptr [[P:%.*]], align 4 +; CHECK-NEXT: ret i32 [[SEL]] +; + %dec = add i32 %x, -1 + %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false) + %sub = sub i32 32, %ctlz + %shl = shl i32 1, %sub + %ugt = icmp ugt i32 %x, 1 + %sel = select i1 %ugt, i32 %shl, i32 1 + store i32 %shl, ptr %p, align 4 + ret i32 %sel +} + +; Negative test: the sub result used twice +define i32 @bit_ceil_32_sub_used_twice(i32 %x, ptr %p) { +; CHECK-LABEL: @bit_ceil_32_sub_used_twice( +; CHECK-NEXT: [[DEC:%.*]] = add i32 [[X:%.*]], -1 +; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[DEC]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB]] +; CHECK-NEXT: [[UGT:%.*]] = icmp ugt i32 [[X]], 1 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[UGT]], i32 [[SHL]], i32 1 +; CHECK-NEXT: store i32 [[SUB]], ptr [[P:%.*]], align 4 +; CHECK-NEXT: ret i32 [[SEL]] +; + %dec = add i32 %x, -1 + %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false) + %sub = sub i32 32, %ctlz + %shl = shl i32 1, %sub + %ugt = icmp ugt i32 %x, 1 + %sel = select i1 %ugt, i32 %shl, i32 1 + store i32 %sub, ptr %p, align 4 + ret i32 %sel +} + +; a vector version of @bit_ceil_32 above +define <4 x i32> @bit_ceil_v4i32(<4 x i32> %x) { +; CHECK-LABEL: @bit_ceil_v4i32( +; CHECK-NEXT: [[DEC:%.*]] = add <4 x i32> [[X:%.*]], +; CHECK-NEXT: [[CTLZ:%.*]] = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[DEC]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw <4 x i32> , [[CTLZ]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw <4 x i32> , [[SUB]] +; CHECK-NEXT: [[UGT:%.*]] = icmp ugt <4 x i32> [[X]], +; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[UGT]], <4 x i32> [[SHL]], <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[SEL]] +; + %dec = add <4 x i32> %x, + %ctlz = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %dec, i1 false) + %sub = sub <4 x i32> , %ctlz + %shl = shl <4 x i32> , %sub + %ugt = icmp ugt <4 x i32> %x, + %sel = select <4 x i1> %ugt, <4 x i32> %shl, <4 x i32> + ret <4 x i32> %sel +} + declare i32 @llvm.ctlz.i32(i32, i1 immarg) declare i64 @llvm.ctlz.i64(i64, i1 immarg) +declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) From 0d19e583f1066935de5d0e9c55ee4a4f78649e23 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 22 Mar 2023 22:10:22 -0700 Subject: [PATCH 368/691] [InstCombine] Precommit a test This patch precommits a test for: https://github.com/llvm/llvm-project/issues/61183 --- llvm/test/Transforms/InstCombine/bit_floor.ll | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/bit_floor.ll b/llvm/test/Transforms/InstCombine/bit_floor.ll index 0ef7fe3d22e0f..d436e53eb4504 100644 --- a/llvm/test/Transforms/InstCombine/bit_floor.ll +++ b/llvm/test/Transforms/InstCombine/bit_floor.ll @@ -39,5 +39,26 @@ define i64 @bit_floor_64(i64 %x) { ret i64 %sel } +; a vector version of @bit_floor_32 above +define <4 x i32> @bit_floor_v4i32(<4 x i32> %x) { +; CHECK-LABEL: @bit_floor_v4i32( +; CHECK-NEXT: [[EQ0:%.*]] = icmp eq <4 x i32> [[X:%.*]], zeroinitializer +; CHECK-NEXT: [[LSHR:%.*]] = lshr <4 x i32> [[X]], +; CHECK-NEXT: [[CTLZ:%.*]] = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[LSHR]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw <4 x i32> , [[CTLZ]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw <4 x i32> , [[SUB]] +; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[EQ0]], <4 x i32> zeroinitializer, <4 x i32> [[SHL]] +; CHECK-NEXT: ret <4 x i32> [[SEL]] +; + %eq0 = icmp eq <4 x i32> %x, + %lshr = lshr <4 x i32> %x, + %ctlz = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %lshr, i1 false) + %sub = sub <4 x i32> , %ctlz + %shl = shl <4 x i32> , %sub + %sel = select <4 x i1> %eq0, <4 x i32> , <4 x i32> %shl + ret <4 x i32> %sel +} + declare i32 @llvm.ctlz.i32(i32, i1 immarg) declare i64 @llvm.ctlz.i64(i64, i1 immarg) +declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) From 4524db7316b2db9a999b2894a047799dfb6c5cf6 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 22 Mar 2023 22:10:23 -0700 Subject: [PATCH 369/691] [ARM] Use isNullConstant (NFC) --- llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 10 +++------- llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp | 5 ++--- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index fbf688de637b5..efacc8b8f3c0e 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -2720,10 +2720,7 @@ void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, } auto OpIsZero = [N](size_t OpNo) { - if (ConstantSDNode *OpConst = dyn_cast(N->getOperand(OpNo))) - if (OpConst->getZExtValue() == 0) - return true; - return false; + return isNullConstant(N->getOperand(OpNo)); }; // If the input accumulator value is not zero, select an instruction with @@ -3990,10 +3987,9 @@ void ARMDAGToDAGISel::Select(SDNode *N) { SDValue SmulLoHi = N->getOperand(1); SDValue Subc = N->getOperand(2); - auto *Zero = dyn_cast(Subc.getOperand(0)); + SDValue Zero = Subc.getOperand(0); - if (!Zero || Zero->getZExtValue() != 0 || - Subc.getOperand(1) != SmulLoHi.getValue(0) || + if (!isNullConstant(Zero) || Subc.getOperand(1) != SmulLoHi.getValue(0) || N->getOperand(1) != SmulLoHi.getValue(1) || N->getOperand(2) != Subc.getValue(1)) break; diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 913724daf0ad6..c57825949c1ce 100644 --- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -65,9 +65,8 @@ SDValue ARMSelectionDAGInfo::EmitSpecializedLibcall( break; case RTLIB::MEMSET: AEABILibcall = AEABI_MEMSET; - if (ConstantSDNode *ConstantSrc = dyn_cast(Src)) - if (ConstantSrc->getZExtValue() == 0) - AEABILibcall = AEABI_MEMCLR; + if (isNullConstant(Src)) + AEABILibcall = AEABI_MEMCLR; break; default: return SDValue(); From 7bb6d1b32ea9972277201b7651086ab9faffc557 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 22 Mar 2023 22:10:25 -0700 Subject: [PATCH 370/691] [llvm] Skip getAPIntValue (NFC) ConstantSDNode provides some convenience functions like isZero, getZExtValue, and isMinSignedValue that are named identically to those provided by APInt, so we can "skip" getAPIntValue. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 ++-- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 7 +++---- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 4 ++-- llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp | 3 +-- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 2 +- 5 files changed, 9 insertions(+), 11 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4a34d4724ae8f..cc722bcc8c2b3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4105,7 +4105,7 @@ SDValue DAGCombiner::visitSUBO(SDNode *N) { ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); // fold (subox, c) -> (addo x, -c) - if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) { + if (IsSigned && N1C && !N1C->isMinSignedValue()) { return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); } @@ -4585,7 +4585,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return DAG.getNegative(N0, DL, VT); // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0) - if (N1C && N1C->getAPIntValue().isMinSignedValue()) + if (N1C && N1C->isMinSignedValue()) return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT)); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 9ef3c15cfe374..b7b67a20bc9e9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3908,8 +3908,7 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck( SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift( EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, DAGCombinerInfo &DCI, const SDLoc &DL) const { - assert(isConstOrConstSplat(N1C) && - isConstOrConstSplat(N1C)->getAPIntValue().isZero() && + assert(isConstOrConstSplat(N1C) && isConstOrConstSplat(N1C)->isZero() && "Should be a comparison with 0."); assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Valid only for [in]equality comparisons."); @@ -4738,8 +4737,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // For example, when high 32-bits of i64 X are known clear: // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1 - bool CmpZero = N1C->getAPIntValue().isZero(); - bool CmpNegOne = N1C->getAPIntValue().isAllOnes(); + bool CmpZero = N1C->isZero(); + bool CmpNegOne = N1C->isAllOnes(); if ((CmpZero || CmpNegOne) && N0.hasOneUse()) { // Match or(lo,shl(hi,bw/2)) pattern. auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index c55b2e4d8fa14..6214c3e935ec4 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -2770,7 +2770,7 @@ bool AMDGPUDAGToDAGISel::SelectDotIUVOP3PMods(SDValue In, SDValue &Src) const { assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value"); unsigned Mods = SISrcMods::OP_SEL_1; - unsigned SrcSign = C->getAPIntValue().getZExtValue(); + unsigned SrcSign = C->getZExtValue(); if (SrcSign == 1) Mods ^= SISrcMods::NEG; @@ -2784,7 +2784,7 @@ bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In, assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value"); unsigned Mods = SISrcMods::OP_SEL_1; - unsigned SrcVal = C->getAPIntValue().getZExtValue(); + unsigned SrcVal = C->getZExtValue(); if (SrcVal == 1) Mods |= SISrcMods::OP_SEL_0; diff --git a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp index 03015a457a0d1..6ea8e200bd4e9 100644 --- a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp +++ b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp @@ -275,8 +275,7 @@ bool AVRDAGToDAGISel::SelectInlineAsmMemoryOperand( } if (ImmNode->getValueType(0) != MVT::i8) { - Disp = CurDAG->getTargetConstant( - ImmNode->getAPIntValue().getZExtValue(), dl, MVT::i8); + Disp = CurDAG->getTargetConstant(ImmNode->getZExtValue(), dl, MVT::i8); } else { Disp = ImmOp; } diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 03a387570e3c6..7670d4d41cd86 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -18149,7 +18149,7 @@ PPC::AddrMode PPCTargetLowering::SelectOptimalAddrMode(const SDNode *Parent, if (Flags & PPC::MOF_RPlusSImm16) { SDValue Op0 = N.getOperand(0); SDValue Op1 = N.getOperand(1); - int16_t Imm = cast(Op1)->getAPIntValue().getZExtValue(); + int16_t Imm = cast(Op1)->getZExtValue(); if (!Align || isAligned(*Align, Imm)) { Disp = DAG.getTargetConstant(Imm, DL, N.getValueType()); Base = Op0; From fd29a4d24267eef0f11d238cb4a32b07d56d6c5c Mon Sep 17 00:00:00 2001 From: wlei Date: Wed, 22 Mar 2023 13:13:27 -0700 Subject: [PATCH 371/691] [Pseudo Probe] Use the name from debug info to compute GUID in probe desc This is to fix a GUID mismatch while decoding pseudo probe, a GUID from the inline tree is not in the GUID2FuncDescMap. It turned out that frontend could change the function name making it different from the one in debug info(https://reviews.llvm.org/D111009). Here change to use the function name from debug info to be consistent with the probe name from the inline stack. Reviewed By: hoy, wenlei Differential Revision: https://reviews.llvm.org/D146657 --- llvm/include/llvm/IR/MDBuilder.h | 2 +- llvm/lib/IR/MDBuilder.cpp | 4 ++-- llvm/lib/Transforms/IPO/SampleProfileProbe.cpp | 15 +++++++++++---- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/IR/MDBuilder.h b/llvm/include/llvm/IR/MDBuilder.h index bd542bd0d2b2b..39165453de16b 100644 --- a/llvm/include/llvm/IR/MDBuilder.h +++ b/llvm/include/llvm/IR/MDBuilder.h @@ -78,7 +78,7 @@ class MDBuilder { MDNode *createFunctionSectionPrefix(StringRef Prefix); /// Return metadata containing the pseudo probe descriptor for a function. - MDNode *createPseudoProbeDesc(uint64_t GUID, uint64_t Hash, Function *F); + MDNode *createPseudoProbeDesc(uint64_t GUID, uint64_t Hash, StringRef FName); /// Return metadata containing llvm statistics. MDNode * diff --git a/llvm/lib/IR/MDBuilder.cpp b/llvm/lib/IR/MDBuilder.cpp index 38ab1d3d10244..2490b3012bdc2 100644 --- a/llvm/lib/IR/MDBuilder.cpp +++ b/llvm/lib/IR/MDBuilder.cpp @@ -336,12 +336,12 @@ MDNode *MDBuilder::createIrrLoopHeaderWeight(uint64_t Weight) { } MDNode *MDBuilder::createPseudoProbeDesc(uint64_t GUID, uint64_t Hash, - Function *F) { + StringRef FName) { auto *Int64Ty = Type::getInt64Ty(Context); SmallVector Ops(3); Ops[0] = createConstant(ConstantInt::get(Int64Ty, GUID)); Ops[1] = createConstant(ConstantInt::get(Int64Ty, Hash)); - Ops[2] = createString(F->getName()); + Ops[2] = createString(FName); return MDNode::get(Context, Ops); } diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp index 7a40ddee81798..ed1d5575db69a 100644 --- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp @@ -286,9 +286,16 @@ uint32_t SampleProfileProber::getCallsiteId(const Instruction *Call) const { void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) { Module *M = F.getParent(); MDBuilder MDB(F.getContext()); - // Compute a GUID without considering the function's linkage type. This is - // fine since function name is the only key in the profile database. - uint64_t Guid = Function::getGUID(F.getName()); + // Since the GUID from probe desc and inline stack are computed seperately, we + // need to make sure their names are consistent, so here also use the name + // from debug info. + StringRef FName = F.getName(); + if (auto *SP = F.getSubprogram()) { + FName = SP->getLinkageName(); + if (FName.empty()) + FName = SP->getName(); + } + uint64_t Guid = Function::getGUID(FName); // Assign an artificial debug line to a probe that doesn't come with a real // line. A probe not having a debug line will get an incomplete inline @@ -371,7 +378,7 @@ void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) { // - FunctionHash. // - FunctionName auto Hash = getFunctionHash(); - auto *MD = MDB.createPseudoProbeDesc(Guid, Hash, &F); + auto *MD = MDB.createPseudoProbeDesc(Guid, Hash, FName); auto *NMD = M->getNamedMetadata(PseudoProbeDescMetadataName); assert(NMD && "llvm.pseudo_probe_desc should be pre-created"); NMD->addOperand(MD); From c2df1d8a6d1cab95637a3b40d49a15e535135b33 Mon Sep 17 00:00:00 2001 From: "Wu, Yingcong" Date: Wed, 22 Mar 2023 21:49:19 -0700 Subject: [PATCH 372/691] [libfuzzer] add test of cov file-id in control file There is test for ft file-id in control file, but no test for cov line. Without the test, a invalid cov file-id would cause crash. Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D145672 --- compiler-rt/lib/fuzzer/FuzzerMerge.cpp | 8 ++++++-- compiler-rt/test/fuzzer/merge-control-file.test | 14 ++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/compiler-rt/lib/fuzzer/FuzzerMerge.cpp b/compiler-rt/lib/fuzzer/FuzzerMerge.cpp index 24bd11958e807..8c8806e8aafd3 100644 --- a/compiler-rt/lib/fuzzer/FuzzerMerge.cpp +++ b/compiler-rt/lib/fuzzer/FuzzerMerge.cpp @@ -77,6 +77,7 @@ bool Merger::Parse(std::istream &IS, bool ParseCoverage) { size_t ExpectedStartMarker = 0; const size_t kInvalidStartMarker = -1; size_t LastSeenStartMarker = kInvalidStartMarker; + bool HaveFtMarker = true; std::vector TmpFeatures; std::set PCs; while (std::getline(IS, Line, '\n')) { @@ -93,12 +94,13 @@ bool Merger::Parse(std::istream &IS, bool ParseCoverage) { LastSeenStartMarker = ExpectedStartMarker; assert(ExpectedStartMarker < Files.size()); ExpectedStartMarker++; + HaveFtMarker = false; } else if (Marker == "FT") { // FT FILE_ID COV1 COV2 COV3 ... size_t CurrentFileIdx = N; if (CurrentFileIdx != LastSeenStartMarker) return false; - LastSeenStartMarker = kInvalidStartMarker; + HaveFtMarker = true; if (ParseCoverage) { TmpFeatures.clear(); // use a vector from outer scope to avoid resizes. while (ISS1 >> N) @@ -108,6 +110,8 @@ bool Merger::Parse(std::istream &IS, bool ParseCoverage) { } } else if (Marker == "COV") { size_t CurrentFileIdx = N; + if (CurrentFileIdx != LastSeenStartMarker) + return false; if (ParseCoverage) while (ISS1 >> N) if (PCs.insert(N).second) @@ -116,7 +120,7 @@ bool Merger::Parse(std::istream &IS, bool ParseCoverage) { return false; } } - if (LastSeenStartMarker != kInvalidStartMarker) + if (!HaveFtMarker && LastSeenStartMarker != kInvalidStartMarker) LastFailure = Files[LastSeenStartMarker].Name; FirstNotProcessedFile = ExpectedStartMarker; diff --git a/compiler-rt/test/fuzzer/merge-control-file.test b/compiler-rt/test/fuzzer/merge-control-file.test index ebd2cf5af3baa..c7d666ea471e9 100644 --- a/compiler-rt/test/fuzzer/merge-control-file.test +++ b/compiler-rt/test/fuzzer/merge-control-file.test @@ -50,3 +50,17 @@ RUN: echo STARTED 2 2 >> %t/MCF RUN: echo FT 2 13 >> %t/MCF RUN: %run %t/T.exe -merge=1 %t/T1 %t/T2 -merge_control_file=%t/MCF 2>&1 | FileCheck %s --check-prefix=OK_3 OK_3: MERGE-OUTER: nothing to do, merge has been completed before + +# Test for invalid COV file_id +RUN: rm -f %t/T1/*; cp %t/T0/* %t/T1 +RUN: echo 3 > %t/MCF; echo 0 >> %t/MCF; echo %t/T1/1 >> %t/MCF; echo %t/T1/2 >> %t/MCF; echo %t/T1/3 >> %t/MCF +RUN: echo STARTED 0 1 >> %t/MCF +RUN: echo FT 0 11 >> %t/MCF +RUN: echo STARTED 1 2 >> %t/MCF +RUN: echo FT 1 12 >> %t/MCF +RUN: echo STARTED 2 2 >> %t/MCF +RUN: echo FT 2 13 >> %t/MCF +# Invalid file-id 21 here +RUN: echo COV 21 13 >> %t/MCF +RUN: %run %t/T.exe -merge=1 %t/T1 %t/T2 -merge_control_file=%t/MCF 2>&1 | FileCheck %s --check-prefix=COV_INVALID +COV_INVALID: MERGE-OUTER: bad control file, will overwrite it From 021edda0b20468e20a72b1788721b2b70228bffb Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 22 Mar 2023 23:00:06 -0700 Subject: [PATCH 373/691] [TableGen] Simplify CodeGenHwModes constructor. NFC Remove the loop that erases the DefaultMode from the Record vector. Instead we can skip over in the loop that creates HwMode objects. --- llvm/utils/TableGen/CodeGenHwModes.cpp | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/llvm/utils/TableGen/CodeGenHwModes.cpp b/llvm/utils/TableGen/CodeGenHwModes.cpp index 99a97e89e60c5..d8652dfa121f3 100644 --- a/llvm/utils/TableGen/CodeGenHwModes.cpp +++ b/llvm/utils/TableGen/CodeGenHwModes.cpp @@ -65,23 +65,16 @@ void HwModeSelect::dump() const { } CodeGenHwModes::CodeGenHwModes(RecordKeeper &RK) : Records(RK) { - std::vector MRs = Records.getAllDerivedDefinitions("HwMode"); - // The default mode needs a definition in the .td sources for TableGen - // to accept references to it. We need to ignore the definition here. - for (auto I = MRs.begin(), E = MRs.end(); I != E; ++I) { - if ((*I)->getName() != DefaultModeName) + for (Record *R : Records.getAllDerivedDefinitions("HwMode")) { + // The default mode needs a definition in the .td sources for TableGen + // to accept references to it. We need to ignore the definition here. + if (R->getName() == DefaultModeName) continue; - MRs.erase(I); - break; - } - - for (Record *R : MRs) { - Modes.emplace_back(R); + Modes.push_back(R); ModeIds.insert(std::make_pair(R, Modes.size())); } - std::vector MSs = Records.getAllDerivedDefinitions("HwModeSelect"); - for (Record *R : MSs) { + for (Record *R : Records.getAllDerivedDefinitions("HwModeSelect")) { auto P = ModeSelects.emplace(std::make_pair(R, HwModeSelect(R, *this))); assert(P.second); (void)P; From b3256047d01f3cc57a617d984612d63b28998de7 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 22 Mar 2023 23:17:15 -0700 Subject: [PATCH 374/691] [TableGen] Change push_back back to emplace_back. This was a mistake I made in 021edda0b20468e20a72b1788721b2b70228bffb. --- llvm/utils/TableGen/CodeGenHwModes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/TableGen/CodeGenHwModes.cpp b/llvm/utils/TableGen/CodeGenHwModes.cpp index d8652dfa121f3..2171507f4c63f 100644 --- a/llvm/utils/TableGen/CodeGenHwModes.cpp +++ b/llvm/utils/TableGen/CodeGenHwModes.cpp @@ -70,7 +70,7 @@ CodeGenHwModes::CodeGenHwModes(RecordKeeper &RK) : Records(RK) { // to accept references to it. We need to ignore the definition here. if (R->getName() == DefaultModeName) continue; - Modes.push_back(R); + Modes.emplace_back(R); ModeIds.insert(std::make_pair(R, Modes.size())); } From f9f4767af9f3d89792d67ae8c5f65913ff263b89 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 22 Mar 2023 23:27:59 -0700 Subject: [PATCH 375/691] [InstCombine] Precommit tests This patch precommits tests for: https://github.com/llvm/llvm-project/issues/61183 --- llvm/test/Transforms/InstCombine/bit_floor.ll | 108 ++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/bit_floor.ll b/llvm/test/Transforms/InstCombine/bit_floor.ll index d436e53eb4504..9daa8eee8969c 100644 --- a/llvm/test/Transforms/InstCombine/bit_floor.ll +++ b/llvm/test/Transforms/InstCombine/bit_floor.ll @@ -39,6 +39,114 @@ define i64 @bit_floor_64(i64 %x) { ret i64 %sel } +; Commutted select operands should still be recognized. +define i32 @bit_floor_commuted_operands(i32 %x) { +; CHECK-LABEL: @bit_floor_commuted_operands( +; CHECK-NEXT: [[NE0_NOT:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[X]], 1 +; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[LSHR]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[NE0_NOT]], i32 0, i32 [[SHL]] +; CHECK-NEXT: ret i32 [[SEL]] +; + %ne0 = icmp ne i32 %x, 0 + %lshr = lshr i32 %x, 1 + %ctlz = tail call i32 @llvm.ctlz.i32(i32 %lshr, i1 false) + %sub = sub i32 32, %ctlz + %shl = shl i32 1, %sub + %sel = select i1 %ne0, i32 %shl, i32 0 + ret i32 %sel +} + +; Negative test: lshr used twice +define i32 @bit_floor_lshr_used_twice(i32 %x, ptr %p) { +; CHECK-LABEL: @bit_floor_lshr_used_twice( +; CHECK-NEXT: [[EQ0:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[X]], 1 +; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[LSHR]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[EQ0]], i32 0, i32 [[SHL]] +; CHECK-NEXT: store i32 [[LSHR]], ptr [[P:%.*]], align 4 +; CHECK-NEXT: ret i32 [[SEL]] +; + %eq0 = icmp eq i32 %x, 0 + %lshr = lshr i32 %x, 1 + %ctlz = tail call i32 @llvm.ctlz.i32(i32 %lshr, i1 false) + %sub = sub i32 32, %ctlz + %shl = shl i32 1, %sub + %sel = select i1 %eq0, i32 0, i32 %shl + store i32 %lshr, ptr %p, align 4 + ret i32 %sel +} + +; Negative test: ctlz used twice +define i32 @bit_floor_ctlz_used_twice(i32 %x, ptr %p) { +; CHECK-LABEL: @bit_floor_ctlz_used_twice( +; CHECK-NEXT: [[EQ0:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[X]], 1 +; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[LSHR]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[EQ0]], i32 0, i32 [[SHL]] +; CHECK-NEXT: store i32 [[CTLZ]], ptr [[P:%.*]], align 4 +; CHECK-NEXT: ret i32 [[SEL]] +; + %eq0 = icmp eq i32 %x, 0 + %lshr = lshr i32 %x, 1 + %ctlz = tail call i32 @llvm.ctlz.i32(i32 %lshr, i1 false) + %sub = sub i32 32, %ctlz + %shl = shl i32 1, %sub + %sel = select i1 %eq0, i32 0, i32 %shl + store i32 %ctlz, ptr %p, align 4 + ret i32 %sel +} + +; Negative test: sub used twice +define i32 @bit_floor_sub_used_twice(i32 %x, ptr %p) { +; CHECK-LABEL: @bit_floor_sub_used_twice( +; CHECK-NEXT: [[EQ0:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[X]], 1 +; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[LSHR]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[EQ0]], i32 0, i32 [[SHL]] +; CHECK-NEXT: store i32 [[SUB]], ptr [[P:%.*]], align 4 +; CHECK-NEXT: ret i32 [[SEL]] +; + %eq0 = icmp eq i32 %x, 0 + %lshr = lshr i32 %x, 1 + %ctlz = tail call i32 @llvm.ctlz.i32(i32 %lshr, i1 false) + %sub = sub i32 32, %ctlz + %shl = shl i32 1, %sub + %sel = select i1 %eq0, i32 0, i32 %shl + store i32 %sub, ptr %p, align 4 + ret i32 %sel +} + +; Negative test: shl used twice +define i32 @bit_floor_shl_used_twice(i32 %x, ptr %p) { +; CHECK-LABEL: @bit_floor_shl_used_twice( +; CHECK-NEXT: [[EQ0:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[X]], 1 +; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[LSHR]], i1 false), !range [[RNG0]] +; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]] +; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB]] +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[EQ0]], i32 0, i32 [[SHL]] +; CHECK-NEXT: store i32 [[SHL]], ptr [[P:%.*]], align 4 +; CHECK-NEXT: ret i32 [[SEL]] +; + %eq0 = icmp eq i32 %x, 0 + %lshr = lshr i32 %x, 1 + %ctlz = tail call i32 @llvm.ctlz.i32(i32 %lshr, i1 false) + %sub = sub i32 32, %ctlz + %shl = shl i32 1, %sub + %sel = select i1 %eq0, i32 0, i32 %shl + store i32 %shl, ptr %p, align 4 + ret i32 %sel +} + ; a vector version of @bit_floor_32 above define <4 x i32> @bit_floor_v4i32(<4 x i32> %x) { ; CHECK-LABEL: @bit_floor_v4i32( From a35f9c6e0813c92e5efb0aac3e2681c7e1651f7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Degioanni?= Date: Thu, 23 Mar 2023 07:58:37 +0100 Subject: [PATCH 376/691] [mlir][llvm] Add poison constant. This patch introduces the poison constant from LLVM in the LLVM IR dialect. It also adds import and export support for it, along with roundtrip tests. Reviewed By: gysit Differential Revision: https://reviews.llvm.org/D146631 --- mlir/docs/Dialects/LLVM.md | 1 + mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td | 24 ++++++++++++++++++- mlir/lib/Target/LLVMIR/ModuleImport.cpp | 6 +++++ mlir/test/Dialect/LLVMIR/roundtrip.mlir | 8 +++++-- .../test/Target/LLVMIR/Import/instructions.ll | 3 +++ mlir/test/Target/LLVMIR/llvmir.mlir | 3 +++ 6 files changed, 42 insertions(+), 3 deletions(-) diff --git a/mlir/docs/Dialects/LLVM.md b/mlir/docs/Dialects/LLVM.md index 53d4dfbf686e7..c41d7254a378c 100644 --- a/mlir/docs/Dialects/LLVM.md +++ b/mlir/docs/Dialects/LLVM.md @@ -105,6 +105,7 @@ values for thread-safety and concept parsimony reasons. Instead, regular values are produced by dedicated operations that have the corresponding semantics: [`llvm.mlir.constant`](#llvmmlirconstant-mlirllvmconstantop), [`llvm.mlir.undef`](#llvmmlirundef-mlirllvmundefop), +[`llvm.mlir.poison`](#llvmmlirpoison-mlirllvmpoisonop), [`llvm.mlir.null`](#llvmmlirnull-mlirllvmnullop). Note how these operations are prefixed with `mlir.` to indicate that they don't belong to LLVM IR but are only necessary to model it in MLIR. The values produced by these operations are diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td index 1bbc32f3d2917..3abe1614f66fa 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td @@ -1652,7 +1652,7 @@ def LLVM_UndefOp : LLVM_Op<"mlir.undef", [Pure]>, Unlike LLVM IR, MLIR does not have first-class undefined values. Such values must be created as SSA values using `llvm.mlir.undef`. This operation has no operands or attributes. It creates an undefined value of the specified LLVM - IR dialect type wrapping an LLVM IR structure type. + IR dialect type. Example: @@ -1666,6 +1666,28 @@ def LLVM_UndefOp : LLVM_Op<"mlir.undef", [Pure]>, let assemblyFormat = "attr-dict `:` type($res)"; } +def LLVM_PoisonOp : LLVM_Op<"mlir.poison", [Pure]>, + LLVM_Builder<"$res = llvm::PoisonValue::get($_resultType);"> { + let summary = "Creates a poison value of LLVM dialect type."; + let description = [{ + Unlike LLVM IR, MLIR does not have first-class poison values. Such values + must be created as SSA values using `llvm.mlir.poison`. This operation has + no operands or attributes. It creates a poison value of the specified LLVM + IR dialect type. + + Example: + + ```mlir + // Create a poison value for a structure with a 32-bit integer followed + // by a float. + %0 = llvm.mlir.poison : !llvm.struct<(i32, f32)> + ``` + }]; + let results = (outs LLVM_Type:$res); + let builders = [LLVM_OneResultOpBuilder]; + let assemblyFormat = "attr-dict `:` type($res)"; +} + def LLVM_ConstantOp : LLVM_Op<"mlir.constant", [Pure, ConstantLike]>, LLVM_Builder<[{$res = getLLVMConstant($_resultType, $value, $_location, diff --git a/mlir/lib/Target/LLVMIR/ModuleImport.cpp b/mlir/lib/Target/LLVMIR/ModuleImport.cpp index d3ac7dcc17554..707f28d6c3641 100644 --- a/mlir/lib/Target/LLVMIR/ModuleImport.cpp +++ b/mlir/lib/Target/LLVMIR/ModuleImport.cpp @@ -1036,6 +1036,12 @@ FailureOr ModuleImport::convertConstant(llvm::Constant *constant) { return builder.create(loc, type).getResult(); } + // Convert poison. + if (auto *poisonVal = dyn_cast(constant)) { + Type type = convertType(poisonVal->getType()); + return builder.create(loc, type).getResult(); + } + // Convert undef. if (auto *undefVal = dyn_cast(constant)) { Type type = convertType(undefVal->getType()); diff --git a/mlir/test/Dialect/LLVMIR/roundtrip.mlir b/mlir/test/Dialect/LLVMIR/roundtrip.mlir index 9147027c9d4b2..c495a36bc0678 100644 --- a/mlir/test/Dialect/LLVMIR/roundtrip.mlir +++ b/mlir/test/Dialect/LLVMIR/roundtrip.mlir @@ -438,10 +438,14 @@ llvm.func @invokeLandingpad() -> i32 attributes { personality = @__gxx_personali func.func @useFreezeOp(%arg0: i32) { // CHECK: = llvm.freeze %[[ARG0:.*]] : i32 %0 = llvm.freeze %arg0 : i32 - // CHECK: %[[x:.*]] = llvm.mlir.undef : i8 + // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef : i8 %1 = llvm.mlir.undef : i8 - // CHECK: = llvm.freeze %[[x]] : i8 + // CHECK: = llvm.freeze %[[UNDEF]] : i8 %2 = llvm.freeze %1 : i8 + // CHECK: %[[POISON:.*]] = llvm.mlir.poison : i8 + %3 = llvm.mlir.poison : i8 + // CHECK: = llvm.freeze %[[POISON]] : i8 + %4 = llvm.freeze %3 : i8 return } diff --git a/mlir/test/Target/LLVMIR/Import/instructions.ll b/mlir/test/Target/LLVMIR/Import/instructions.ll index cbdb0ebe295ef..3f5ade4f15735 100644 --- a/mlir/test/Target/LLVMIR/Import/instructions.ll +++ b/mlir/test/Target/LLVMIR/Import/instructions.ll @@ -523,10 +523,13 @@ define void @gep_dynamic_idx(ptr %ptr, i32 %idx) { ; CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]] define void @freeze(i32 %arg1) { ; CHECK: %[[UNDEF:[0-9]+]] = llvm.mlir.undef : i64 + ; CHECK: %[[POISON:[0-9]+]] = llvm.mlir.poison : i16 ; CHECK: llvm.freeze %[[ARG1]] : i32 ; CHECK: llvm.freeze %[[UNDEF]] : i64 + ; CHECK: llvm.freeze %[[POISON]] : i16 %1 = freeze i32 %arg1 %2 = freeze i64 undef + %3 = freeze i16 poison ret void } diff --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir index 46120cb348296..6d340bc57fcd1 100644 --- a/mlir/test/Target/LLVMIR/llvmir.mlir +++ b/mlir/test/Target/LLVMIR/llvmir.mlir @@ -1618,6 +1618,9 @@ llvm.func @callFreezeOp(%x : i32) { %1 = llvm.mlir.undef : i32 // CHECK: freeze i32 undef %2 = llvm.freeze %1 : i32 + %3 = llvm.mlir.poison : i32 + // CHECK: freeze i32 poison + %4 = llvm.freeze %3 : i32 llvm.return } From c5d22f4e1866549f0d8c3aad598c0153c31679e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Thu, 23 Mar 2023 08:22:23 +0100 Subject: [PATCH 377/691] [clang][Sema][NFC] Make local variable const --- clang/lib/Sema/SemaDeclAttr.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 229e73618c53c..efa275c0aa12b 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -3759,7 +3759,7 @@ static void handleEnumExtensibilityAttr(Sema &S, Decl *D, /// Handle __attribute__((format_arg((idx)))) attribute based on /// http://gcc.gnu.org/onlinedocs/gcc/Function-Attributes.html static void handleFormatArgAttr(Sema &S, Decl *D, const ParsedAttr &AL) { - Expr *IdxExpr = AL.getArgAsExpr(0); + const Expr *IdxExpr = AL.getArgAsExpr(0); ParamIdx Idx; if (!checkFunctionOrMethodParameterIndex(S, D, AL, 1, IdxExpr, Idx)) return; From 79df1a785d8e45a00906709fc403ba4bd5af4f66 Mon Sep 17 00:00:00 2001 From: Tobias Gysi Date: Thu, 23 Mar 2023 08:13:54 +0100 Subject: [PATCH 378/691] [mlir][llvm] Switch rountrip tests to opaque pointers. The revision switches all remaining LLVM dialect roundtrip tests to opaque pointers. Selected tests are copied to a postfixed test file for the time being. Part of https://discourse.llvm.org/t/rfc-switching-the-llvm-dialect-and-dialect-lowerings-to-opaque-pointers/68179 Reviewed By: zero9178 Differential Revision: https://reviews.llvm.org/D146639 --- .../LLVMIR/roundtrip-typed-pointers.mlir | 73 +++++++ mlir/test/Dialect/LLVMIR/roundtrip.mlir | 187 ++++++++---------- 2 files changed, 150 insertions(+), 110 deletions(-) create mode 100644 mlir/test/Dialect/LLVMIR/roundtrip-typed-pointers.mlir diff --git a/mlir/test/Dialect/LLVMIR/roundtrip-typed-pointers.mlir b/mlir/test/Dialect/LLVMIR/roundtrip-typed-pointers.mlir new file mode 100644 index 0000000000000..7cc5a6deee541 --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/roundtrip-typed-pointers.mlir @@ -0,0 +1,73 @@ +// RUN: mlir-opt %s | mlir-opt | FileCheck %s + +// CHECK-LABEL: func @ops +// CHECK-SAME: %[[I32:.*]]: i32 +func.func @ops(%arg0: i32) { +// Memory-related operations. +// +// CHECK-NEXT: %[[ALLOCA:.*]] = llvm.alloca %[[I32]] x f64 : (i32) -> !llvm.ptr +// CHECK-NEXT: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][%[[I32]], %[[I32]]] : (!llvm.ptr, i32, i32) -> !llvm.ptr +// CHECK-NEXT: %[[VALUE:.*]] = llvm.load %[[GEP]] : !llvm.ptr +// CHECK-NEXT: llvm.store %[[VALUE]], %[[ALLOCA]] : !llvm.ptr +// CHECK-NEXT: %{{.*}} = llvm.bitcast %[[ALLOCA]] : !llvm.ptr to !llvm.ptr + %13 = llvm.alloca %arg0 x f64 : (i32) -> !llvm.ptr + %14 = llvm.getelementptr %13[%arg0, %arg0] : (!llvm.ptr, i32, i32) -> !llvm.ptr + %15 = llvm.load %14 : !llvm.ptr + llvm.store %15, %13 : !llvm.ptr + %16 = llvm.bitcast %13 : !llvm.ptr to !llvm.ptr + llvm.return +} + +// CHECK-LABEL: @gep +llvm.func @gep(%ptr: !llvm.ptr)>>, %idx: i64, + %ptr2: !llvm.ptr)>>) { + // CHECK: llvm.getelementptr %{{.*}}[%{{.*}}, 1, 0] : (!llvm.ptr)>>, i64) -> !llvm.ptr + llvm.getelementptr %ptr[%idx, 1, 0] : (!llvm.ptr)>>, i64) -> !llvm.ptr + // CHECK: llvm.getelementptr inbounds %{{.*}}[%{{.*}}, 0, %{{.*}}] : (!llvm.ptr)>>, i64, i64) -> !llvm.ptr + llvm.getelementptr inbounds %ptr2[%idx, 0, %idx] : (!llvm.ptr)>>, i64, i64) -> !llvm.ptr + llvm.return +} + +// CHECK-LABEL: @alloca +func.func @alloca(%size : i64) { + // CHECK: llvm.alloca %{{.*}} x i32 : (i64) -> !llvm.ptr + llvm.alloca %size x i32 {alignment = 0} : (i64) -> (!llvm.ptr) + // CHECK: llvm.alloca inalloca %{{.*}} x i32 {alignment = 8 : i64} : (i64) -> !llvm.ptr + llvm.alloca inalloca %size x i32 {alignment = 8} : (i64) -> (!llvm.ptr) + llvm.return +} + +// CHECK-LABEL: @null +func.func @null() { + // CHECK: llvm.mlir.null : !llvm.ptr + %0 = llvm.mlir.null : !llvm.ptr + // CHECK: llvm.mlir.null : !llvm.ptr>)>>, i64)>> + %1 = llvm.mlir.null : !llvm.ptr>)>>, i64)>> + llvm.return +} + +// CHECK-LABEL: llvm.func @vararg_func +llvm.func @vararg_func(%arg0: i32, ...) { + // CHECK: %{{.*}} = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %{{.*}} = llvm.mlir.constant(1 : i32) : i32 + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[ALLOCA0:.+]] = llvm.alloca %{{.*}} x !llvm.struct<"struct.va_list", (ptr)> {alignment = 8 : i64} : (i32) -> !llvm.ptr)>> + // CHECK: %[[CAST0:.+]] = llvm.bitcast %[[ALLOCA0]] : !llvm.ptr)>> to !llvm.ptr + %2 = llvm.alloca %1 x !llvm.struct<"struct.va_list", (ptr)> {alignment = 8 : i64} : (i32) -> !llvm.ptr)>> + %3 = llvm.bitcast %2 : !llvm.ptr)>> to !llvm.ptr + // CHECK: llvm.intr.vastart %[[CAST0]] + llvm.intr.vastart %3 : !llvm.ptr + // CHECK: %[[ALLOCA1:.+]] = llvm.alloca %{{.*}} x !llvm.ptr {alignment = 8 : i64} : (i32) -> !llvm.ptr> + // CHECK: %[[CAST1:.+]] = llvm.bitcast %[[ALLOCA1]] : !llvm.ptr> to !llvm.ptr + %4 = llvm.alloca %0 x !llvm.ptr {alignment = 8 : i64} : (i32) -> !llvm.ptr> + %5 = llvm.bitcast %4 : !llvm.ptr> to !llvm.ptr + // CHECK: llvm.intr.vacopy %[[CAST0]] to %[[CAST1]] + llvm.intr.vacopy %3 to %5 : !llvm.ptr, !llvm.ptr + // CHECK: llvm.intr.vaend %[[CAST1]] + // CHECK: llvm.intr.vaend %[[CAST0]] + llvm.intr.vaend %5 : !llvm.ptr + llvm.intr.vaend %3 : !llvm.ptr + // CHECK: llvm.return + llvm.return +} diff --git a/mlir/test/Dialect/LLVMIR/roundtrip.mlir b/mlir/test/Dialect/LLVMIR/roundtrip.mlir index c495a36bc0678..b430c56fe7aa2 100644 --- a/mlir/test/Dialect/LLVMIR/roundtrip.mlir +++ b/mlir/test/Dialect/LLVMIR/roundtrip.mlir @@ -1,10 +1,10 @@ // RUN: mlir-opt %s | mlir-opt | FileCheck %s // CHECK-LABEL: func @ops -// CHECK-SAME: (%[[I32:.*]]: i32, %[[FLOAT:.*]]: f32, %[[I8PTR1:.*]]: !llvm.ptr, %[[I8PTR2:.*]]: !llvm.ptr, %[[BOOL:.*]]: i1, %[[VI8PTR1:.*]]: !llvm.vec<2 x ptr>) +// CHECK-SAME: (%[[I32:.*]]: i32, %[[FLOAT:.*]]: f32, %[[PTR1:.*]]: !llvm.ptr, %[[PTR2:.*]]: !llvm.ptr, %[[BOOL:.*]]: i1, %[[VPTR1:.*]]: !llvm.vec<2 x ptr>) func.func @ops(%arg0: i32, %arg1: f32, - %arg2: !llvm.ptr, %arg3: !llvm.ptr, - %arg4: i1, %arg5 : !llvm.vec<2x!llvm.ptr>) { + %arg2: !llvm.ptr, %arg3: !llvm.ptr, + %arg4: i1, %arg5 : !llvm.vec<2x!llvm.ptr>) { // Integer arithmetic binary operations. // // CHECK: {{.*}} = llvm.add %[[I32]], %[[I32]] : i32 @@ -16,9 +16,9 @@ func.func @ops(%arg0: i32, %arg1: f32, // CHECK: {{.*}} = llvm.srem %[[I32]], %[[I32]] : i32 // CHECK: %[[SCALAR_PRED0:.+]] = llvm.icmp "ne" %[[I32]], %[[I32]] : i32 // CHECK: {{.*}} = llvm.add %[[SCALAR_PRED0]], %[[SCALAR_PRED0]] : i1 -// CHECK: %[[SCALAR_PRED1:.+]] = llvm.icmp "ne" %[[I8PTR1]], %[[I8PTR1]] : !llvm.ptr +// CHECK: %[[SCALAR_PRED1:.+]] = llvm.icmp "ne" %[[PTR1]], %[[PTR1]] : !llvm.ptr // CHECK: {{.*}} = llvm.add %[[SCALAR_PRED1]], %[[SCALAR_PRED1]] : i1 -// CHECK: %[[VEC_PRED:.+]] = llvm.icmp "ne" %[[VI8PTR1]], %[[VI8PTR1]] : !llvm.vec<2 x ptr> +// CHECK: %[[VEC_PRED:.+]] = llvm.icmp "ne" %[[VPTR1]], %[[VPTR1]] : !llvm.vec<2 x ptr> // CHECK: {{.*}} = llvm.add %[[VEC_PRED]], %[[VEC_PRED]] : vector<2xi1> %0 = llvm.add %arg0, %arg0 : i32 %1 = llvm.sub %arg0, %arg0 : i32 @@ -29,9 +29,9 @@ func.func @ops(%arg0: i32, %arg1: f32, %6 = llvm.srem %arg0, %arg0 : i32 %7 = llvm.icmp "ne" %arg0, %arg0 : i32 %typecheck_7 = llvm.add %7, %7 : i1 - %ptrcmp = llvm.icmp "ne" %arg2, %arg2 : !llvm.ptr + %ptrcmp = llvm.icmp "ne" %arg2, %arg2 : !llvm.ptr %typecheck_ptrcmp = llvm.add %ptrcmp, %ptrcmp : i1 - %vptrcmp = llvm.icmp "ne" %arg5, %arg5 : !llvm.vec<2 x ptr> + %vptrcmp = llvm.icmp "ne" %arg5, %arg5 : !llvm.vec<2 x ptr> %typecheck_vptrcmp = llvm.add %vptrcmp, %vptrcmp : vector<2 x i1> // Floating point binary operations. @@ -49,16 +49,14 @@ func.func @ops(%arg0: i32, %arg1: f32, // Memory-related operations. // -// CHECK-NEXT: %[[ALLOCA:.*]] = llvm.alloca %[[I32]] x f64 : (i32) -> !llvm.ptr -// CHECK-NEXT: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][%[[I32]], %[[I32]]] : (!llvm.ptr, i32, i32) -> !llvm.ptr -// CHECK-NEXT: %[[VALUE:.*]] = llvm.load %[[GEP]] : !llvm.ptr -// CHECK-NEXT: llvm.store %[[VALUE]], %[[ALLOCA]] : !llvm.ptr -// CHECK-NEXT: %{{.*}} = llvm.bitcast %[[ALLOCA]] : !llvm.ptr to !llvm.ptr - %13 = llvm.alloca %arg0 x f64 : (i32) -> !llvm.ptr - %14 = llvm.getelementptr %13[%arg0, %arg0] : (!llvm.ptr, i32, i32) -> !llvm.ptr - %15 = llvm.load %14 : !llvm.ptr - llvm.store %15, %13 : !llvm.ptr - %16 = llvm.bitcast %13 : !llvm.ptr to !llvm.ptr +// CHECK-NEXT: %[[ALLOCA:.*]] = llvm.alloca %[[I32]] x f64 : (i32) -> !llvm.ptr +// CHECK-NEXT: %[[GEP:.*]] = llvm.getelementptr %[[ALLOCA]][%[[I32]], %[[I32]]] : (!llvm.ptr, i32, i32) -> !llvm.ptr, f64 +// CHECK-NEXT: %[[VALUE:.*]] = llvm.load %[[GEP]] : !llvm.ptr -> f64 +// CHECK-NEXT: llvm.store %[[VALUE]], %[[ALLOCA]] : f64, !llvm.ptr + %13 = llvm.alloca %arg0 x f64 : (i32) -> !llvm.ptr + %14 = llvm.getelementptr %13[%arg0, %arg0] : (!llvm.ptr, i32, i32) -> !llvm.ptr, f64 + %15 = llvm.load %14 : !llvm.ptr -> f64 + llvm.store %15, %13 : f64, !llvm.ptr // Function call-related operations. // @@ -130,10 +128,10 @@ func.func @ops(%arg0: i32, %arg1: f32, // Integer to pointer and pointer to integer conversions. // -// CHECK: %[[PTR:.*]] = llvm.inttoptr %[[I32]] : i32 to !llvm.ptr -// CHECK: %{{.*}} = llvm.ptrtoint %[[PTR]] : !llvm.ptr to i32 - %25 = llvm.inttoptr %arg0 : i32 to !llvm.ptr - %26 = llvm.ptrtoint %25 : !llvm.ptr to i32 +// CHECK: %[[PTR:.*]] = llvm.inttoptr %[[I32]] : i32 to !llvm.ptr +// CHECK: %{{.*}} = llvm.ptrtoint %[[PTR]] : !llvm.ptr to i32 + %25 = llvm.inttoptr %arg0 : i32 to !llvm.ptr + %26 = llvm.ptrtoint %25 : !llvm.ptr to i32 // Extended and Quad floating point // @@ -163,28 +161,27 @@ func.func @ops(%arg0: i32, %arg1: f32, // CHECK: llvm.intr.round(%[[FLOAT]]) : (f32) -> f32 %34 = llvm.intr.round(%arg1) : (f32) -> f32 -// CHECK: "llvm.intr.memcpy"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () - "llvm.intr.memcpy"(%arg2, %arg3, %arg0, %arg4) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () +// CHECK: "llvm.intr.memcpy"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () + "llvm.intr.memcpy"(%arg2, %arg3, %arg0, %arg4) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () -// CHECK: "llvm.intr.memcpy"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () - "llvm.intr.memcpy"(%arg2, %arg3, %arg0, %arg4) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () +// CHECK: "llvm.intr.memcpy"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () + "llvm.intr.memcpy"(%arg2, %arg3, %arg0, %arg4) : (!llvm.ptr, !llvm.ptr, i32, i1) -> () // CHECK: %[[SZ:.*]] = llvm.mlir.constant %sz = llvm.mlir.constant(10: i64) : i64 -// CHECK: "llvm.intr.memcpy.inline"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, !llvm.ptr, i64, i1) -> () - "llvm.intr.memcpy.inline"(%arg2, %arg3, %sz, %arg4) : (!llvm.ptr, !llvm.ptr, i64, i1) -> () +// CHECK: "llvm.intr.memcpy.inline"(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, !llvm.ptr, i64, i1) -> () + "llvm.intr.memcpy.inline"(%arg2, %arg3, %sz, %arg4) : (!llvm.ptr, !llvm.ptr, i64, i1) -> () // CHECK: llvm.return llvm.return } // CHECK-LABEL: @gep -llvm.func @gep(%ptr: !llvm.ptr)>>, %idx: i64, - %ptr2: !llvm.ptr)>>) { - // CHECK: llvm.getelementptr %{{.*}}[%{{.*}}, 1, 0] : (!llvm.ptr)>>, i64) -> !llvm.ptr - llvm.getelementptr %ptr[%idx, 1, 0] : (!llvm.ptr)>>, i64) -> !llvm.ptr - // CHECK: llvm.getelementptr inbounds %{{.*}}[%{{.*}}, 0, %{{.*}}] : (!llvm.ptr)>>, i64, i64) -> !llvm.ptr - llvm.getelementptr inbounds %ptr2[%idx, 0, %idx] : (!llvm.ptr)>>, i64, i64) -> !llvm.ptr +llvm.func @gep(%ptr: !llvm.ptr, %idx: i64, %ptr2: !llvm.ptr) { + // CHECK: llvm.getelementptr %{{.*}}[%{{.*}}, 1, 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(i32, struct<(i32, f32)>)> + llvm.getelementptr %ptr[%idx, 1, 0] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(i32, struct<(i32, f32)>)> + // CHECK: llvm.getelementptr inbounds %{{.*}}[%{{.*}}, 0, %{{.*}}] : (!llvm.ptr, i64, i64) -> !llvm.ptr, !llvm.struct<(array<10 x f32>)> + llvm.getelementptr inbounds %ptr2[%idx, 0, %idx] : (!llvm.ptr, i64, i64) -> !llvm.ptr, !llvm.struct<(array<10 x f32>)> llvm.return } @@ -248,9 +245,9 @@ llvm.func @foo(%arg0: i32) -> !llvm.struct<(i32, f64, i32)> { } // CHECK-LABEL: @casts -// CHECK-SAME: (%[[I32:.*]]: i32, %[[I64:.*]]: i64, %[[V4I32:.*]]: vector<4xi32>, %[[V4I64:.*]]: vector<4xi64>, %[[I32PTR:.*]]: !llvm.ptr) +// CHECK-SAME: (%[[I32:.*]]: i32, %[[I64:.*]]: i64, %[[V4I32:.*]]: vector<4xi32>, %[[V4I64:.*]]: vector<4xi64>, %[[PTR:.*]]: !llvm.ptr) func.func @casts(%arg0: i32, %arg1: i64, %arg2: vector<4xi32>, - %arg3: vector<4xi64>, %arg4: !llvm.ptr) { + %arg3: vector<4xi64>, %arg4: !llvm.ptr) { // CHECK: = llvm.sext %[[I32]] : i32 to i56 %0 = llvm.sext %arg0 : i32 to i56 // CHECK: = llvm.zext %[[I32]] : i32 to i64 @@ -271,21 +268,23 @@ func.func @casts(%arg0: i32, %arg1: i64, %arg2: vector<4xi32>, %8 = llvm.fptosi %7 : f32 to i32 // CHECK: = llvm.fptoui %[[FLOAT]] : f32 to i32 %9 = llvm.fptoui %7 : f32 to i32 -// CHECK: = llvm.addrspacecast %[[I32PTR]] : !llvm.ptr to !llvm.ptr - %10 = llvm.addrspacecast %arg4 : !llvm.ptr to !llvm.ptr +// CHECK: = llvm.addrspacecast %[[PTR]] : !llvm.ptr to !llvm.ptr<2> + %10 = llvm.addrspacecast %arg4 : !llvm.ptr to !llvm.ptr<2> +// CHECK: = llvm.bitcast %[[I64]] : i64 to f64 + %11 = llvm.bitcast %arg1 : i64 to f64 llvm.return } // CHECK-LABEL: @vect -func.func @vect(%arg0: vector<4xf32>, %arg1: i32, %arg2: f32, %arg3: !llvm.vec<2 x ptr>) { +func.func @vect(%arg0: vector<4xf32>, %arg1: i32, %arg2: f32, %arg3: !llvm.vec<2 x ptr>) { // CHECK: = llvm.extractelement {{.*}} : vector<4xf32> %0 = llvm.extractelement %arg0[%arg1 : i32] : vector<4xf32> // CHECK: = llvm.insertelement {{.*}} : vector<4xf32> %1 = llvm.insertelement %arg2, %arg0[%arg1 : i32] : vector<4xf32> // CHECK: = llvm.shufflevector {{.*}} [0, 0, 0, 0, 7] : vector<4xf32> %2 = llvm.shufflevector %arg0, %arg0 [0, 0, 0, 0, 7] : vector<4xf32> -// CHECK: = llvm.shufflevector %{{.+}}, %{{.+}} [1, 0] : !llvm.vec<2 x ptr> - %3 = llvm.shufflevector %arg3, %arg3 [1, 0] : !llvm.vec<2 x ptr> +// CHECK: = llvm.shufflevector %{{.+}}, %{{.+}} [1, 0] : !llvm.vec<2 x ptr> + %3 = llvm.shufflevector %arg3, %arg3 [1, 0] : !llvm.vec<2 x ptr> // CHECK: = llvm.mlir.constant(dense<1.000000e+00> : vector<4xf32>) : vector<4xf32> %4 = llvm.mlir.constant(dense<1.0> : vector<4xf32>) : vector<4xf32> return @@ -323,19 +322,17 @@ func.func @mixed_vect(%arg0: vector<8xf32>, %arg1: vector<4xf32>, %arg2: vector< // CHECK-LABEL: @alloca func.func @alloca(%size : i64) { - // CHECK: llvm.alloca %{{.*}} x i32 : (i64) -> !llvm.ptr - llvm.alloca %size x i32 {alignment = 0} : (i64) -> (!llvm.ptr) - // CHECK: llvm.alloca inalloca %{{.*}} x i32 {alignment = 8 : i64} : (i64) -> !llvm.ptr - llvm.alloca inalloca %size x i32 {alignment = 8} : (i64) -> (!llvm.ptr) + // CHECK: llvm.alloca %{{.*}} x i32 : (i64) -> !llvm.ptr + llvm.alloca %size x i32 {alignment = 0} : (i64) -> (!llvm.ptr) + // CHECK: llvm.alloca inalloca %{{.*}} x i32 {alignment = 8 : i64} : (i64) -> !llvm.ptr + llvm.alloca inalloca %size x i32 {alignment = 8} : (i64) -> (!llvm.ptr) llvm.return } // CHECK-LABEL: @null func.func @null() { - // CHECK: llvm.mlir.null : !llvm.ptr - %0 = llvm.mlir.null : !llvm.ptr - // CHECK: llvm.mlir.null : !llvm.ptr>)>>, i64)>> - %1 = llvm.mlir.null : !llvm.ptr>)>>, i64)>> + // CHECK: llvm.mlir.null : !llvm.ptr + %0 = llvm.mlir.null : !llvm.ptr llvm.return } @@ -375,61 +372,57 @@ func.func @cmpxchg(%ptr : !llvm.ptr, %cmp : i32, %new : i32) { llvm.return } -llvm.mlir.global external constant @_ZTIi() : !llvm.ptr -llvm.func @bar(!llvm.ptr, !llvm.ptr, !llvm.ptr) +llvm.mlir.global external constant @_ZTIi() : !llvm.ptr +llvm.func @bar(!llvm.ptr, !llvm.ptr, !llvm.ptr) llvm.func @__gxx_personality_v0(...) -> i32 // CHECK-LABEL: @invokeLandingpad llvm.func @invokeLandingpad() -> i32 attributes { personality = @__gxx_personality_v0 } { -// CHECK: %[[a0:.*]] = llvm.mlir.constant(0 : i32) : i32 +// CHECK: %[[V0:.*]] = llvm.mlir.constant(0 : i32) : i32 // CHECK: %{{.*}} = llvm.mlir.constant(3 : i32) : i32 -// CHECK: %[[a2:.*]] = llvm.mlir.constant("\01") : !llvm.array<1 x i8> -// CHECK: %[[a3:.*]] = llvm.mlir.null : !llvm.ptr> -// CHECK: %[[a4:.*]] = llvm.mlir.null : !llvm.ptr -// CHECK: %[[a5:.*]] = llvm.mlir.addressof @_ZTIi : !llvm.ptr> -// CHECK: %[[a6:.*]] = llvm.bitcast %[[a5]] : !llvm.ptr> to !llvm.ptr -// CHECK: %[[a7:.*]] = llvm.mlir.constant(1 : i32) : i32 -// CHECK: %[[a8:.*]] = llvm.alloca %[[a7]] x i8 : (i32) -> !llvm.ptr -// CHECK: %{{.*}} = llvm.invoke @foo(%[[a7]]) to ^[[BB2:.*]] unwind ^[[BB1:.*]] : (i32) -> !llvm.struct<(i32, f64, i32)> +// CHECK: %[[V1:.*]] = llvm.mlir.constant("\01") : !llvm.array<1 x i8> +// CHECK: %[[V2:.*]] = llvm.mlir.null : !llvm.ptr +// CHECK: %[[V3:.*]] = llvm.mlir.addressof @_ZTIi : !llvm.ptr +// CHECK: %[[V4:.*]] = llvm.mlir.constant(1 : i32) : i32 +// CHECK: %[[V5:.*]] = llvm.alloca %[[V4]] x i8 : (i32) -> !llvm.ptr +// CHECK: %{{.*}} = llvm.invoke @foo(%[[V4]]) to ^[[BB2:.*]] unwind ^[[BB1:.*]] : (i32) -> !llvm.struct<(i32, f64, i32)> %0 = llvm.mlir.constant(0 : i32) : i32 %1 = llvm.mlir.constant(3 : i32) : i32 %2 = llvm.mlir.constant("\01") : !llvm.array<1 x i8> - %3 = llvm.mlir.null : !llvm.ptr> - %4 = llvm.mlir.null : !llvm.ptr - %5 = llvm.mlir.addressof @_ZTIi : !llvm.ptr> - %6 = llvm.bitcast %5 : !llvm.ptr> to !llvm.ptr - %7 = llvm.mlir.constant(1 : i32) : i32 - %8 = llvm.alloca %7 x i8 : (i32) -> !llvm.ptr - %9 = llvm.invoke @foo(%7) to ^bb2 unwind ^bb1 : (i32) -> !llvm.struct<(i32, f64, i32)> + %3 = llvm.mlir.null : !llvm.ptr + %4 = llvm.mlir.addressof @_ZTIi : !llvm.ptr + %5 = llvm.mlir.constant(1 : i32) : i32 + %6 = llvm.alloca %5 x i8 : (i32) -> !llvm.ptr + %7 = llvm.invoke @foo(%5) to ^bb2 unwind ^bb1 : (i32) -> !llvm.struct<(i32, f64, i32)> // CHECK: ^[[BB1]]: -// CHECK: %[[lp:.*]] = llvm.landingpad cleanup (catch %[[a3]] : !llvm.ptr>) (catch %[[a6]] : !llvm.ptr) (filter %[[a2]] : !llvm.array<1 x i8>) : !llvm.struct<(ptr, i32)> -// CHECK: %{{.*}} = llvm.intr.eh.typeid.for %6 : (!llvm.ptr) -> i32 -// CHECK: llvm.resume %[[lp]] : !llvm.struct<(ptr, i32)> +// CHECK: %[[lp:.*]] = llvm.landingpad cleanup (catch %[[V2]] : !llvm.ptr) (catch %[[V3]] : !llvm.ptr) (filter %[[V1]] : !llvm.array<1 x i8>) : !llvm.struct<(ptr, i32)> +// CHECK: %{{.*}} = llvm.intr.eh.typeid.for %[[V3]] : (!llvm.ptr) -> i32 +// CHECK: llvm.resume %[[lp]] : !llvm.struct<(ptr, i32)> ^bb1: - %10 = llvm.landingpad cleanup (catch %3 : !llvm.ptr>) (catch %6 : !llvm.ptr) (filter %2 : !llvm.array<1 x i8>) : !llvm.struct<(ptr, i32)> - %11 = llvm.intr.eh.typeid.for %6 : (!llvm.ptr) -> i32 - llvm.resume %10 : !llvm.struct<(ptr, i32)> + %10 = llvm.landingpad cleanup (catch %3 : !llvm.ptr) (catch %4 : !llvm.ptr) (filter %2 : !llvm.array<1 x i8>) : !llvm.struct<(ptr, i32)> + %11 = llvm.intr.eh.typeid.for %4 : (!llvm.ptr) -> i32 + llvm.resume %10 : !llvm.struct<(ptr, i32)> // CHECK: ^[[BB2]]: -// CHECK: llvm.return %[[a7]] : i32 +// CHECK: llvm.return %[[V4]] : i32 ^bb2: - llvm.return %7 : i32 + llvm.return %5 : i32 // CHECK: ^[[BB3:.*]]: -// CHECK: llvm.invoke @bar(%[[a8]], %[[a6]], %[[a4]]) to ^[[BB2]] unwind ^[[BB1]] : (!llvm.ptr, !llvm.ptr, !llvm.ptr) -> () +// CHECK: llvm.invoke @bar(%[[V5]], %[[V3]], %[[V2]]) to ^[[BB2]] unwind ^[[BB1]] : (!llvm.ptr, !llvm.ptr, !llvm.ptr) -> () ^bb3: - llvm.invoke @bar(%8, %6, %4) to ^bb2 unwind ^bb1 : (!llvm.ptr, !llvm.ptr, !llvm.ptr) -> () + llvm.invoke @bar(%6, %4, %3) to ^bb2 unwind ^bb1 : (!llvm.ptr, !llvm.ptr, !llvm.ptr) -> () // CHECK: ^[[BB4:.*]]: // CHECK: %[[FUNC:.*]] = llvm.mlir.addressof @foo : !llvm.ptr // CHECK: %{{.*}} = llvm.invoke %[[FUNC]]{{.*}}: !llvm.ptr, ^bb4: %12 = llvm.mlir.addressof @foo : !llvm.ptr - %13 = llvm.invoke %12(%7) to ^bb2 unwind ^bb1 : !llvm.ptr, (i32) -> !llvm.struct<(i32, f64, i32)> + %13 = llvm.invoke %12(%5) to ^bb2 unwind ^bb1 : !llvm.ptr, (i32) -> !llvm.struct<(i32, f64, i32)> // CHECK: ^[[BB5:.*]]: -// CHECK: llvm.return %[[a0]] : i32 +// CHECK: llvm.return %[[V0]] : i32 ^bb5: llvm.return %0 : i32 } @@ -528,32 +521,6 @@ func.func @fastmathFlags(%arg0: f32, %arg1: f32, %arg2: i32, %arg3: vector<2 x f return } -// CHECK-LABEL: llvm.func @vararg_func -llvm.func @vararg_func(%arg0: i32, ...) { - // CHECK: %{{.*}} = llvm.mlir.constant(1 : i32) : i32 - // CHECK: %{{.*}} = llvm.mlir.constant(1 : i32) : i32 - %0 = llvm.mlir.constant(1 : i32) : i32 - %1 = llvm.mlir.constant(1 : i32) : i32 - // CHECK: %[[ALLOCA0:.+]] = llvm.alloca %{{.*}} x !llvm.struct<"struct.va_list", (ptr)> {alignment = 8 : i64} : (i32) -> !llvm.ptr)>> - // CHECK: %[[CAST0:.+]] = llvm.bitcast %[[ALLOCA0]] : !llvm.ptr)>> to !llvm.ptr - %2 = llvm.alloca %1 x !llvm.struct<"struct.va_list", (ptr)> {alignment = 8 : i64} : (i32) -> !llvm.ptr)>> - %3 = llvm.bitcast %2 : !llvm.ptr)>> to !llvm.ptr - // CHECK: llvm.intr.vastart %[[CAST0]] - llvm.intr.vastart %3 : !llvm.ptr - // CHECK: %[[ALLOCA1:.+]] = llvm.alloca %{{.*}} x !llvm.ptr {alignment = 8 : i64} : (i32) -> !llvm.ptr> - // CHECK: %[[CAST1:.+]] = llvm.bitcast %[[ALLOCA1]] : !llvm.ptr> to !llvm.ptr - %4 = llvm.alloca %0 x !llvm.ptr {alignment = 8 : i64} : (i32) -> !llvm.ptr> - %5 = llvm.bitcast %4 : !llvm.ptr> to !llvm.ptr - // CHECK: llvm.intr.vacopy %[[CAST0]] to %[[CAST1]] - llvm.intr.vacopy %3 to %5 : !llvm.ptr, !llvm.ptr - // CHECK: llvm.intr.vaend %[[CAST1]] - // CHECK: llvm.intr.vaend %[[CAST0]] - llvm.intr.vaend %5 : !llvm.ptr - llvm.intr.vaend %3 : !llvm.ptr - // CHECK: llvm.return - llvm.return -} - // CHECK-LABEL: @lifetime // CHECK-SAME: %[[P:.*]]: !llvm.ptr llvm.func @lifetime(%p: !llvm.ptr) { @@ -564,8 +531,8 @@ llvm.func @lifetime(%p: !llvm.ptr) { llvm.return } -// CHECK-LABEL: @vararg_func_opaque_pointers -llvm.func @vararg_func_opaque_pointers(%arg0: i32, ...) { +// CHECK-LABEL: @vararg_func +llvm.func @vararg_func(%arg0: i32, ...) { // CHECK: %[[C:.*]] = llvm.mlir.constant(1 : i32) // CHECK: %[[LIST:.*]] = llvm.alloca // CHECK: llvm.intr.vastart %[[LIST]] : !llvm.ptr{{$}} @@ -585,17 +552,17 @@ llvm.func @vararg_func_opaque_pointers(%arg0: i32, ...) { llvm.return } -// CHECK-LABEL: @eh_typeid_opaque_pointers +// CHECK-LABEL: @eh_typeid // CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr -llvm.func @eh_typeid_opaque_pointers(%arg0: !llvm.ptr) -> i32 { +llvm.func @eh_typeid(%arg0: !llvm.ptr) -> i32 { // CHECK: llvm.intr.eh.typeid.for %[[ARG0]] : (!llvm.ptr) -> i32 %0 = llvm.intr.eh.typeid.for %arg0 : (!llvm.ptr) -> i32 llvm.return %0 : i32 } -// CHECK-LABEL: @stackrestore_opaque_pointers +// CHECK-LABEL: @stackrestore // CHECK-SAME: %[[ARG0:.*]]: !llvm.ptr -llvm.func @stackrestore_opaque_pointers(%arg0: !llvm.ptr) { +llvm.func @stackrestore(%arg0: !llvm.ptr) { // CHECK: llvm.intr.stackrestore %[[ARG0]] : !llvm.ptr llvm.intr.stackrestore %arg0 : !llvm.ptr llvm.return From 467cf1542808851773500fe0af0da916f46fa80c Mon Sep 17 00:00:00 2001 From: Congcong Cai Date: Thu, 23 Mar 2023 15:40:55 +0800 Subject: [PATCH 379/691] [NFC] Fix typo lld::wasm in comment --- lld/wasm/Driver.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index e697f4b55ae6f..310f9df2d5b68 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -1217,4 +1217,4 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { writeResult(); } -} // namespace wasm::lld +} // namespace lld::wasm From 5acd29eb4d9e411b3631c26babcd1d2655623f4a Mon Sep 17 00:00:00 2001 From: Martin Braenne Date: Thu, 23 Mar 2023 07:45:40 +0000 Subject: [PATCH 380/691] [clang][dataflow] Fix crash when RHS of `&&` or `||` calls `noreturn` func. The crash happened because the transfer fucntion for `&&` and `||` unconditionally tried to retrieve the value of the RHS. However, if the RHS is unreachable, there is no environment for it, and trying to retrieve the operand's value causes an assertion failure. See also the comments in the code for further details. Reviewed By: xazax.hun, ymandel, sgatev, gribozavr2 Differential Revision: https://reviews.llvm.org/D146514 --- .../FlowSensitive/ControlFlowContext.h | 13 +++- .../clang/Analysis/FlowSensitive/Transfer.h | 6 +- .../FlowSensitive/ControlFlowContext.cpp | 29 +++++++- clang/lib/Analysis/FlowSensitive/Transfer.cpp | 42 ++++++++---- .../TypeErasedDataflowAnalysis.cpp | 2 + .../Analysis/FlowSensitive/TestingSupport.h | 14 ++++ .../Analysis/FlowSensitive/TransferTest.cpp | 66 +++++++++++++++++++ 7 files changed, 153 insertions(+), 19 deletions(-) diff --git a/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h b/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h index e641468f77d00..3495bdfc538cb 100644 --- a/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h +++ b/clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h @@ -18,6 +18,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/Stmt.h" #include "clang/Analysis/CFG.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Support/Error.h" #include @@ -47,18 +48,26 @@ class ControlFlowContext { return StmtToBlock; } + /// Returns whether `B` is reachable from the entry block. + bool isBlockReachable(const CFGBlock &B) const { + return BlockReachable[B.getBlockID()]; + } + private: // FIXME: Once the deprecated `build` method is removed, mark `D` as "must not // be null" and add an assertion. ControlFlowContext(const Decl *D, std::unique_ptr Cfg, - llvm::DenseMap StmtToBlock) + llvm::DenseMap StmtToBlock, + llvm::BitVector BlockReachable) : ContainingDecl(D), Cfg(std::move(Cfg)), - StmtToBlock(std::move(StmtToBlock)) {} + StmtToBlock(std::move(StmtToBlock)), + BlockReachable(std::move(BlockReachable)) {} /// The `Decl` containing the statement used to construct the CFG. const Decl *ContainingDecl; std::unique_ptr Cfg; llvm::DenseMap StmtToBlock; + llvm::BitVector BlockReachable; }; } // namespace dataflow diff --git a/clang/include/clang/Analysis/FlowSensitive/Transfer.h b/clang/include/clang/Analysis/FlowSensitive/Transfer.h index 78a426ed94dd5..db3d780bf35e5 100644 --- a/clang/include/clang/Analysis/FlowSensitive/Transfer.h +++ b/clang/include/clang/Analysis/FlowSensitive/Transfer.h @@ -26,9 +26,9 @@ class StmtToEnvMap { public: virtual ~StmtToEnvMap() = default; - /// Returns the environment of the basic block that contains `S` or nullptr if - /// there isn't one. - /// FIXME: Ensure that the result can't be null and return a const reference. + /// Retrieves the environment of the basic block that contains `S`. + /// If `S` is reachable, returns a non-null pointer to the environment. + /// If `S` is not reachable, returns nullptr. virtual const Environment *getEnvironment(const Stmt &S) const = 0; }; diff --git a/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp b/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp index 2492b5203724c..6699a0fc9d79e 100644 --- a/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp +++ b/clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp @@ -16,6 +16,7 @@ #include "clang/AST/Decl.h" #include "clang/AST/Stmt.h" #include "clang/Analysis/CFG.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Support/Error.h" #include @@ -44,6 +45,28 @@ buildStmtToBasicBlockMap(const CFG &Cfg) { return StmtToBlock; } +static llvm::BitVector findReachableBlocks(const CFG &Cfg) { + llvm::BitVector BlockReachable(Cfg.getNumBlockIDs(), false); + + llvm::SmallVector BlocksToVisit; + BlocksToVisit.push_back(&Cfg.getEntry()); + while (!BlocksToVisit.empty()) { + const CFGBlock *Block = BlocksToVisit.back(); + BlocksToVisit.pop_back(); + + if (BlockReachable[Block->getBlockID()]) + continue; + + BlockReachable[Block->getBlockID()] = true; + + for (const CFGBlock *Succ : Block->succs()) + if (Succ) + BlocksToVisit.push_back(Succ); + } + + return BlockReachable; +} + llvm::Expected ControlFlowContext::build(const Decl *D, Stmt &S, ASTContext &C) { CFG::BuildOptions Options; @@ -64,7 +87,11 @@ ControlFlowContext::build(const Decl *D, Stmt &S, ASTContext &C) { llvm::DenseMap StmtToBlock = buildStmtToBasicBlockMap(*Cfg); - return ControlFlowContext(D, std::move(Cfg), std::move(StmtToBlock)); + + llvm::BitVector BlockReachable = findReachableBlocks(*Cfg); + + return ControlFlowContext(D, std::move(Cfg), std::move(StmtToBlock), + std::move(BlockReachable)); } } // namespace dataflow diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp index e427f1458a8db..a1ed37da54c28 100644 --- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp +++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp @@ -162,15 +162,27 @@ class TransferVisitor : public ConstStmtVisitor { } case BO_LAnd: case BO_LOr: { - BoolValue &LHSVal = getLogicOperatorSubExprValue(*LHS); - BoolValue &RHSVal = getLogicOperatorSubExprValue(*RHS); - auto &Loc = Env.createStorageLocation(*S); Env.setStorageLocation(*S, Loc); + + BoolValue *LHSVal = getLogicOperatorSubExprValue(*LHS); + // If the LHS was not reachable, this BinaryOperator would also not be + // reachable, and we would never get here. + assert(LHSVal != nullptr); + BoolValue *RHSVal = getLogicOperatorSubExprValue(*RHS); + if (RHSVal == nullptr) { + // If the RHS isn't reachable and we evaluate this BinaryOperator, + // then the value of the LHS must have triggered the short-circuit + // logic. This implies that the value of the entire expression must be + // equal to the value of the LHS. + Env.setValue(Loc, *LHSVal); + break; + } + if (S->getOpcode() == BO_LAnd) - Env.setValue(Loc, Env.makeAnd(LHSVal, RHSVal)); + Env.setValue(Loc, Env.makeAnd(*LHSVal, *RHSVal)); else - Env.setValue(Loc, Env.makeOr(LHSVal, RHSVal)); + Env.setValue(Loc, Env.makeOr(*LHSVal, *RHSVal)); break; } case BO_NE: @@ -779,15 +791,19 @@ class TransferVisitor : public ConstStmtVisitor { } private: - BoolValue &getLogicOperatorSubExprValue(const Expr &SubExpr) { + /// If `SubExpr` is reachable, returns a non-null pointer to the value for + /// `SubExpr`. If `SubExpr` is not reachable, returns nullptr. + BoolValue *getLogicOperatorSubExprValue(const Expr &SubExpr) { // `SubExpr` and its parent logic operator might be part of different basic // blocks. We try to access the value that is assigned to `SubExpr` in the // corresponding environment. - if (const Environment *SubExprEnv = StmtToEnv.getEnvironment(SubExpr)) { - if (auto *Val = dyn_cast_or_null( - SubExprEnv->getValue(SubExpr, SkipPast::Reference))) - return *Val; - } + const Environment *SubExprEnv = StmtToEnv.getEnvironment(SubExpr); + if (!SubExprEnv) + return nullptr; + + if (auto *Val = dyn_cast_or_null( + SubExprEnv->getValue(SubExpr, SkipPast::Reference))) + return Val; if (Env.getStorageLocation(SubExpr, SkipPast::None) == nullptr) { // Sub-expressions that are logic operators are not added in basic blocks @@ -800,11 +816,11 @@ class TransferVisitor : public ConstStmtVisitor { if (auto *Val = dyn_cast_or_null( Env.getValue(SubExpr, SkipPast::Reference))) - return *Val; + return Val; // If the value of `SubExpr` is still unknown, we create a fresh symbolic // boolean value for it. - return Env.makeAtomicBoolValue(); + return &Env.makeAtomicBoolValue(); } // If context sensitivity is enabled, try to analyze the body of the callee diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp index fe00d765b6bef..d94b547ca17de 100644 --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -51,6 +51,8 @@ class StmtToEnvMapImpl : public StmtToEnvMap { const Environment *getEnvironment(const Stmt &S) const override { auto BlockIt = CFCtx.getStmtToBlock().find(&ignoreCFGOmittedNodes(S)); assert(BlockIt != CFCtx.getStmtToBlock().end()); + if (!CFCtx.isBlockReachable(*BlockIt->getSecond())) + return nullptr; const auto &State = BlockToState[BlockIt->getSecond()->getBlockID()]; assert(State); return &State->Env; diff --git a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h index bc089f141850a..ef67dc98790c0 100644 --- a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h +++ b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h @@ -389,6 +389,20 @@ checkDataflow(AnalysisInputs AI, /// `Name` must be unique in `ASTCtx`. const ValueDecl *findValueDecl(ASTContext &ASTCtx, llvm::StringRef Name); +/// Returns the value (of type `ValueT`) for the given identifier. +/// `ValueT` must be a subclass of `Value` and must be of the appropriate type. +/// +/// Requirements: +/// +/// `Name` must be unique in `ASTCtx`. +template +ValueT &getValueForDecl(ASTContext &ASTCtx, const Environment &Env, + llvm::StringRef Name) { + const ValueDecl *VD = findValueDecl(ASTCtx, Name); + assert(VD != nullptr); + return *cast(Env.getValue(*VD, SkipPast::None)); +} + /// Creates and owns constraints which are boolean values. class ConstraintContext { public: diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp index 9c16335714c55..1bb772a93bda6 100644 --- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp @@ -5104,4 +5104,70 @@ TEST(TransferTest, UnnamedBitfieldInitializer) { }); } +// Repro for a crash that used to occur when we call a `noreturn` function +// within one of the operands of a `&&` or `||` operator. +TEST(TransferTest, NoReturnFunctionInsideShortCircuitedBooleanOp) { + std::string Code = R"( + __attribute__((noreturn)) int doesnt_return(); + bool some_condition(); + void target(bool b1, bool b2) { + // Neither of these should crash. In addition, if we don't terminate the + // program, we know that the operators need to trigger the short-circuit + // logic, so `NoreturnOnRhsOfAnd` will be false and `NoreturnOnRhsOfOr` + // will be true. + bool NoreturnOnRhsOfAnd = b1 && doesnt_return() > 0; + bool NoreturnOnRhsOfOr = b2 || doesnt_return() > 0; + + // Calling a `noreturn` function on the LHS of an `&&` or `||` makes the + // entire expression unreachable. So we know that in both of the following + // cases, if `target()` terminates, the `else` branch was taken. + bool NoreturnOnLhsMakesAndUnreachable = false; + if (some_condition()) + doesnt_return() > 0 && some_condition(); + else + NoreturnOnLhsMakesAndUnreachable = true; + + bool NoreturnOnLhsMakesOrUnreachable = false; + if (some_condition()) + doesnt_return() > 0 || some_condition(); + else + NoreturnOnLhsMakesOrUnreachable = true; + + // [[p]] + } + )"; + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) { + ASSERT_THAT(Results.keys(), UnorderedElementsAre("p")); + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + + // Check that [[p]] is reachable with a non-false flow condition. + EXPECT_FALSE(Env.flowConditionImplies(Env.getBoolLiteralValue(false))); + + auto &B1 = getValueForDecl(ASTCtx, Env, "b1"); + EXPECT_TRUE(Env.flowConditionImplies(Env.makeNot(B1))); + + auto &NoreturnOnRhsOfAnd = + getValueForDecl(ASTCtx, Env, "NoreturnOnRhsOfAnd"); + EXPECT_TRUE(Env.flowConditionImplies(Env.makeNot(NoreturnOnRhsOfAnd))); + + auto &B2 = getValueForDecl(ASTCtx, Env, "b2"); + EXPECT_TRUE(Env.flowConditionImplies(B2)); + + auto &NoreturnOnRhsOfOr = + getValueForDecl(ASTCtx, Env, "NoreturnOnRhsOfOr"); + EXPECT_TRUE(Env.flowConditionImplies(NoreturnOnRhsOfOr)); + + auto &NoreturnOnLhsMakesAndUnreachable = getValueForDecl( + ASTCtx, Env, "NoreturnOnLhsMakesAndUnreachable"); + EXPECT_TRUE(Env.flowConditionImplies(NoreturnOnLhsMakesAndUnreachable)); + + auto &NoreturnOnLhsMakesOrUnreachable = getValueForDecl( + ASTCtx, Env, "NoreturnOnLhsMakesOrUnreachable"); + EXPECT_TRUE(Env.flowConditionImplies(NoreturnOnLhsMakesOrUnreachable)); + }); +} + } // namespace From b08d35f826a6b7696a02f1d811da7a2f951e74a1 Mon Sep 17 00:00:00 2001 From: Carlos Galvez Date: Wed, 22 Mar 2023 18:47:48 +0000 Subject: [PATCH 381/691] [clang-tidy] Ignore DISABLED_ in test suite name in google-avoid-underscore-in-googletest-name Test suite name can also be disabled with DISABLED_, not just the test case name. Fix also broken link in the test that refers to the explanation as to why underscores may not be used. Differential Revision: https://reviews.llvm.org/D146655 --- .../google/AvoidUnderscoreInGoogletestNameCheck.cpp | 6 ++++-- clang-tools-extra/docs/ReleaseNotes.rst | 4 ++++ .../google/avoid-underscore-in-googletest-name.cpp | 12 +++++++++++- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/clang-tidy/google/AvoidUnderscoreInGoogletestNameCheck.cpp b/clang-tools-extra/clang-tidy/google/AvoidUnderscoreInGoogletestNameCheck.cpp index c5bd6055072aa..b903f2552b7e6 100644 --- a/clang-tools-extra/clang-tidy/google/AvoidUnderscoreInGoogletestNameCheck.cpp +++ b/clang-tools-extra/clang-tidy/google/AvoidUnderscoreInGoogletestNameCheck.cpp @@ -51,8 +51,10 @@ class AvoidUnderscoreInGoogletestNameCallback : public PPCallbacks { const Token *TestNameToken = Args->getUnexpArgument(1); if (!TestCaseNameToken || !TestNameToken) return; - std::string TestCaseName = PP->getSpelling(*TestCaseNameToken); - if (TestCaseName.find('_') != std::string::npos) + std::string TestCaseNameMaybeDisabled = PP->getSpelling(*TestCaseNameToken); + StringRef TestCaseName = TestCaseNameMaybeDisabled; + TestCaseName.consume_front(KDisabledTestPrefix); + if (TestCaseName.contains('_')) Check->diag(TestCaseNameToken->getLocation(), "avoid using \"_\" in test case name \"%0\" according to " "Googletest FAQ") diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 3f79e8e2a187a..80f5b46681713 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -234,6 +234,10 @@ Changes in existing checks string for ``Prefix`` or ``Suffix`` options could result in the style not being used. +- Fixed an issue in :doc:`google-avoid-underscore-in-googletest-name + ` when using + ``DISABLED_`` in the test suite name. + Removed checks ^^^^^^^^^^^^^^ diff --git a/clang-tools-extra/test/clang-tidy/checkers/google/avoid-underscore-in-googletest-name.cpp b/clang-tools-extra/test/clang-tidy/checkers/google/avoid-underscore-in-googletest-name.cpp index 6e8a5c2d50af9..3ab5a6ffe383b 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/google/avoid-underscore-in-googletest-name.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/google/avoid-underscore-in-googletest-name.cpp @@ -87,21 +87,31 @@ TYPED_TEST_P(Illegal_Type_ParameterizedTestCaseName, TestName) {} // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: avoid using "_" in test case name "Illegal_Type_ParameterizedTestCaseName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] // Underscores are allowed to disable a test with the DISABLED_ prefix. -// https://github.com/google/googletest/blob/master/googletest/docs/faq.md#why-should-test-suite-names-and-test-names-not-contain-underscore +// https://google.github.io/googletest/faq.html#why-should-test-suite-names-and-test-names-not-contain-underscore TEST(TestCaseName, TestName) {} TEST(TestCaseName, DISABLED_TestName) {} +TEST(DISABLED_TestCaseName, TestName) {} +TEST(DISABLED_TestCaseName, DISABLED_TestName) {} TEST_F(TestCaseFixtureName, TestName) {} TEST_F(TestCaseFixtureName, DISABLED_TestName) {} +TEST_F(DISABLED_TestCaseFixtureName, TestName) {} +TEST_F(DISABLED_TestCaseFixtureName, DISABLED_TestName) {} TEST_P(ParameterizedTestCaseFixtureName, TestName) {} TEST_P(ParameterizedTestCaseFixtureName, DISABLED_TestName) {} +TEST_P(DISABLED_ParameterizedTestCaseFixtureName, TestName) {} +TEST_P(DISABLED_ParameterizedTestCaseFixtureName, DISABLED_TestName) {} TYPED_TEST(TypedTestName, TestName) {} TYPED_TEST(TypedTestName, DISABLED_TestName) {} +TYPED_TEST(DISABLED_TypedTestName, TestName) {} +TYPED_TEST(DISABLED_TypedTestName, DISABLED_TestName) {} TYPED_TEST_P(TypeParameterizedTestName, TestName) {} TYPED_TEST_P(TypeParameterizedTestName, DISABLED_TestName) {} +TYPED_TEST_P(DISABLED_TypeParameterizedTestName, TestName) {} +TYPED_TEST_P(DISABLED_TypeParameterizedTestName, DISABLED_TestName) {} FRIEND_TEST(FriendTest, Is_NotChecked) {} FRIEND_TEST(Friend_Test, IsNotChecked) {} From 5b0055a4ae8d27bf2a8db903eed22ff642fc27c3 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Thu, 23 Mar 2023 09:25:01 +0100 Subject: [PATCH 382/691] [mlir][Analysis][NFC] Split FlatAffineValueConstraints into multiple classes The new class hierarchy is as follows: * `IntegerRelation` (no change) * `IntegerPolyhedron` (no change) * `FlatLinearConstraints`: provides an AffineExpr-based API * `FlatLinearValueConstraints`: stores an additional mapping of non-local vars to SSA values * `FlatAffineValueConstraints`: provides additional helper functions for Affine dialect ops * `FlatAffineRelation` (no change) `FlatConstraints` and `FlatValueConstraints` are moved from `MLIRAffineAnalysis` to `MLIRAnalysis` and can be used without depending on the Affine dialect. This change is in preparation of D145681, which adds an MLIR interface that depends on `FlatConstraints` (and cannot depend on the Affine dialect or any other dialect). Differential Revision: https://reviews.llvm.org/D146201 --- mlir/docs/Rationale/UsageOfConst.md | 6 +- .../Analysis/FlatLinearValueConstraints.h | 560 +++++++ .../Analysis/Presburger/IntegerRelation.h | 9 +- .../Affine/Analysis/AffineStructures.h | 516 +------ mlir/include/mlir/IR/AffineExprVisitor.h | 2 +- mlir/include/mlir/IR/IntegerSet.h | 2 +- mlir/lib/Analysis/CMakeLists.txt | 6 +- .../Analysis/FlatLinearValueConstraints.cpp | 1344 +++++++++++++++++ .../Affine/Analysis/AffineStructures.cpp | 1335 +--------------- mlir/lib/IR/AffineExpr.cpp | 2 +- mlir/lib/IR/AffineMap.cpp | 2 +- mlir/test/Transforms/memref-bound-check.mlir | 2 +- .../Transforms/memref-dependence-check.mlir | 2 +- 13 files changed, 1982 insertions(+), 1806 deletions(-) create mode 100644 mlir/include/mlir/Analysis/FlatLinearValueConstraints.h create mode 100644 mlir/lib/Analysis/FlatLinearValueConstraints.cpp diff --git a/mlir/docs/Rationale/UsageOfConst.md b/mlir/docs/Rationale/UsageOfConst.md index 102b948a0eac1..7a54a4e6de7f5 100644 --- a/mlir/docs/Rationale/UsageOfConst.md +++ b/mlir/docs/Rationale/UsageOfConst.md @@ -235,9 +235,9 @@ if (auto *dimOp = inst->dyn_cast()) { It is much better to eliminate them entirely, and just pass around `DimOp` directly. For example, instead of: -```C++ +```c++ LogicalResult mlir::getIndexSet(MutableArrayRef> forOps, - FlatAffineConstraints *domain) { + FlatAffineValueConstraints *domain) { ``` @@ -245,7 +245,7 @@ It is a lot nicer to just have: ```c++ LogicalResult mlir::getIndexSet(MutableArrayRef forOps, - FlatAffineConstraints *domain) { + FlatAffineValueConstraints *domain) { ``` Particularly since all of the `FooOp` classes are already semantically a smart diff --git a/mlir/include/mlir/Analysis/FlatLinearValueConstraints.h b/mlir/include/mlir/Analysis/FlatLinearValueConstraints.h new file mode 100644 index 0000000000000..a6900ab599386 --- /dev/null +++ b/mlir/include/mlir/Analysis/FlatLinearValueConstraints.h @@ -0,0 +1,560 @@ +//===- FlatLinearValueConstraints.h - Linear Constraints --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_ANALYSIS_FLATLINEARVALUECONSTRAINTS_H +#define MLIR_ANALYSIS_FLATLINEARVALUECONSTRAINTS_H + +#include "mlir/Analysis/Presburger/IntegerRelation.h" +#include "mlir/Analysis/Presburger/Matrix.h" +#include "mlir/IR/AffineExpr.h" +#include "mlir/IR/OpDefinition.h" +#include "mlir/Support/LogicalResult.h" +#include + +namespace mlir { + +class AffineMap; +class IntegerSet; +class MLIRContext; +class Value; +class MemRefType; +struct MutableAffineMap; + +namespace presburger { +class MultiAffineFunction; +} // namespace presburger + +/// FlatLinearConstraints is an extension of IntegerPolyhedron. It provides an +/// AffineExpr-based API. +class FlatLinearConstraints : public presburger::IntegerPolyhedron { +public: + /// Constructs a constraint system reserving memory for the specified number + /// of constraints and variables. `valArgs` are the optional SSA values + /// associated with each dimension/symbol. These must either be empty or match + /// the number of dimensions and symbols. + FlatLinearConstraints(unsigned numReservedInequalities, + unsigned numReservedEqualities, + unsigned numReservedCols, unsigned numDims, + unsigned numSymbols, unsigned numLocals) + : IntegerPolyhedron(numReservedInequalities, numReservedEqualities, + numReservedCols, + presburger::PresburgerSpace::getSetSpace( + numDims, numSymbols, numLocals)) { + assert(numReservedCols >= getNumVars() + 1); + } + + /// Constructs a constraint system with the specified number of dimensions + /// and symbols. `valArgs` are the optional SSA values associated with each + /// dimension/symbol. These must either be empty or match the number of + /// dimensions and symbols. + FlatLinearConstraints(unsigned numDims = 0, unsigned numSymbols = 0, + unsigned numLocals = 0) + : FlatLinearConstraints(/*numReservedInequalities=*/0, + /*numReservedEqualities=*/0, + /*numReservedCols=*/numDims + numSymbols + + numLocals + 1, + numDims, numSymbols, numLocals) {} + + FlatLinearConstraints(const IntegerPolyhedron &fac) + : IntegerPolyhedron(fac) {} + + /// Return the kind of this object. + Kind getKind() const override { return Kind::FlatLinearConstraints; } + + static bool classof(const IntegerRelation *cst) { + return cst->getKind() >= Kind::FlatLinearConstraints && + cst->getKind() <= Kind::FlatAffineRelation; + } + + /// Clones this object. + std::unique_ptr clone() const; + + /// Adds a bound for the variable at the specified position with constraints + /// being drawn from the specified bound map. In case of an EQ bound, the + /// bound map is expected to have exactly one result. In case of a LB/UB, the + /// bound map may have more than one result, for each of which an inequality + /// is added. + /// + /// The bound can be added as open or closed by specifying isClosedBound. In + /// case of a LB/UB, isClosedBound = false means the bound is added internally + /// as a closed bound by +1/-1 respectively. In case of an EQ bound, it can + /// only be added as a closed bound. + /// + /// Note: The dimensions/symbols of this FlatLinearConstraints must match the + /// dimensions/symbols of the affine map. + LogicalResult addBound(BoundType type, unsigned pos, AffineMap boundMap, + bool isClosedBound); + + /// Adds a bound for the variable at the specified position with constraints + /// being drawn from the specified bound map. In case of an EQ bound, the + /// bound map is expected to have exactly one result. In case of a LB/UB, the + /// bound map may have more than one result, for each of which an inequality + /// is added. + /// Note: The dimensions/symbols of this FlatLinearConstraints must match the + /// dimensions/symbols of the affine map. By default the lower bound is closed + /// and the upper bound is open. + LogicalResult addBound(BoundType type, unsigned pos, AffineMap boundMap); + + /// The `addBound` overload above hides the inherited overloads by default, so + /// we explicitly introduce them here. + using IntegerPolyhedron::addBound; + + /// Returns the constraint system as an integer set. Returns a null integer + /// set if the system has no constraints, or if an integer set couldn't be + /// constructed as a result of a local variable's explicit representation not + /// being known and such a local variable appearing in any of the constraints. + IntegerSet getAsIntegerSet(MLIRContext *context) const; + + /// Computes the lower and upper bounds of the first `num` dimensional + /// variables (starting at `offset`) as an affine map of the remaining + /// variables (dimensional and symbolic). This method is able to detect + /// variables as floordiv's and mod's of affine expressions of other + /// variables with respect to (positive) constants. Sets bound map to a + /// null AffineMap if such a bound can't be found (or yet unimplemented). + /// + /// By default the returned lower bounds are closed and upper bounds are open. + /// If `closedUb` is true, the upper bound is closed. + void getSliceBounds(unsigned offset, unsigned num, MLIRContext *context, + SmallVectorImpl *lbMaps, + SmallVectorImpl *ubMaps, + bool closedUB = false); + + /// Composes an affine map whose dimensions and symbols match one to one with + /// the dimensions and symbols of this FlatLinearConstraints. The results of + /// the map `other` are added as the leading dimensions of this constraint + /// system. Returns failure if `other` is a semi-affine map. + LogicalResult composeMatchingMap(AffineMap other); + + /// Gets the lower and upper bound of the `offset` + `pos`th variable + /// treating [0, offset) U [offset + num, symStartPos) as dimensions and + /// [symStartPos, getNumDimAndSymbolVars) as symbols, and `pos` lies in + /// [0, num). The multi-dimensional maps in the returned pair represent the + /// max and min of potentially multiple affine expressions. `localExprs` holds + /// pre-computed AffineExpr's for all local variables in the system. + /// + /// By default the returned lower bounds are closed and upper bounds are open. + /// If `closedUb` is true, the upper bound is closed. + std::pair + getLowerAndUpperBound(unsigned pos, unsigned offset, unsigned num, + unsigned symStartPos, ArrayRef localExprs, + MLIRContext *context, bool closedUB = false) const; + + /// Insert variables of the specified kind at position `pos`. Positions are + /// relative to the kind of variable. The coefficient columns corresponding + /// to the added variables are initialized to zero. `vals` are the Values + /// corresponding to the variables. Values should not be used with + /// VarKind::Local since values can only be attached to non-local variables. + /// Return the absolute column position (i.e., not relative to the kind of + /// variable) of the first added variable. + /// + /// Note: Empty Values are allowed in `vals`. + unsigned insertDimVar(unsigned pos, unsigned num = 1) { + return insertVar(VarKind::SetDim, pos, num); + } + unsigned insertSymbolVar(unsigned pos, unsigned num = 1) { + return insertVar(VarKind::Symbol, pos, num); + } + unsigned insertLocalVar(unsigned pos, unsigned num = 1) { + return insertVar(VarKind::Local, pos, num); + } + + /// Append variables of the specified kind after the last variable of that + /// kind. The coefficient columns corresponding to the added variables are + /// initialized to zero. `vals` are the Values corresponding to the + /// variables. Return the absolute column position (i.e., not relative to the + /// kind of variable) of the first appended variable. + /// + /// Note: Empty Values are allowed in `vals`. + unsigned appendDimVar(unsigned num = 1) { + return appendVar(VarKind::SetDim, num); + } + unsigned appendSymbolVar(unsigned num = 1) { + return appendVar(VarKind::Symbol, num); + } + unsigned appendLocalVar(unsigned num = 1) { + return appendVar(VarKind::Local, num); + } + +protected: + using VarKind = presburger::VarKind; + + /// Compute an explicit representation for local vars. For all systems coming + /// from MLIR integer sets, maps, or expressions where local vars were + /// introduced to model floordivs and mods, this always succeeds. + LogicalResult computeLocalVars(SmallVectorImpl &memo, + MLIRContext *context) const; + + /// Given an affine map that is aligned with this constraint system: + /// * Flatten the map. + /// * Add newly introduced local columns at the beginning of this constraint + /// system (local column pos 0). + /// * Add equalities that define the new local columns to this constraint + /// system. + /// * Return the flattened expressions via `flattenedExprs`. + /// + /// Note: This is a shared helper function of `addLowerOrUpperBound` and + /// `composeMatchingMap`. + LogicalResult flattenAlignedMapAndMergeLocals( + AffineMap map, std::vector> *flattenedExprs); + + /// Prints the number of constraints, dimensions, symbols and locals in the + /// FlatLinearConstraints. Also, prints for each variable whether there is + /// an SSA Value attached to it. + void printSpace(raw_ostream &os) const override; +}; + +/// FlatLinearValueConstraints represents an extension of FlatLinearConstraints +/// where each non-local variable can have an SSA Value attached to it. +class FlatLinearValueConstraints : public FlatLinearConstraints { +public: + /// Constructs a constraint system reserving memory for the specified number + /// of constraints and variables. `valArgs` are the optional SSA values + /// associated with each dimension/symbol. These must either be empty or match + /// the number of dimensions and symbols. + FlatLinearValueConstraints(unsigned numReservedInequalities, + unsigned numReservedEqualities, + unsigned numReservedCols, unsigned numDims, + unsigned numSymbols, unsigned numLocals, + ArrayRef> valArgs) + : FlatLinearConstraints(numReservedInequalities, numReservedEqualities, + numReservedCols, numDims, numSymbols, numLocals) { + assert(valArgs.empty() || valArgs.size() == getNumDimAndSymbolVars()); + values.reserve(numReservedCols); + if (valArgs.empty()) + values.resize(getNumDimAndSymbolVars(), std::nullopt); + else + values.append(valArgs.begin(), valArgs.end()); + } + + /// Constructs a constraint system reserving memory for the specified number + /// of constraints and variables. `valArgs` are the optional SSA values + /// associated with each dimension/symbol. These must either be empty or match + /// the number of dimensions and symbols. + FlatLinearValueConstraints(unsigned numReservedInequalities, + unsigned numReservedEqualities, + unsigned numReservedCols, unsigned numDims, + unsigned numSymbols, unsigned numLocals, + ArrayRef valArgs) + : FlatLinearConstraints(numReservedInequalities, numReservedEqualities, + numReservedCols, numDims, numSymbols, numLocals) { + assert(valArgs.empty() || valArgs.size() == getNumDimAndSymbolVars()); + values.reserve(numReservedCols); + if (valArgs.empty()) + values.resize(getNumDimAndSymbolVars(), std::nullopt); + else + values.append(valArgs.begin(), valArgs.end()); + } + + /// Constructs a constraint system with the specified number of dimensions + /// and symbols. `valArgs` are the optional SSA values associated with each + /// dimension/symbol. These must either be empty or match the number of + /// dimensions and symbols. + FlatLinearValueConstraints(unsigned numDims, unsigned numSymbols, + unsigned numLocals, + ArrayRef> valArgs) + : FlatLinearValueConstraints(/*numReservedInequalities=*/0, + /*numReservedEqualities=*/0, + /*numReservedCols=*/numDims + numSymbols + + numLocals + 1, + numDims, numSymbols, numLocals, valArgs) {} + + /// Constructs a constraint system with the specified number of dimensions + /// and symbols. `valArgs` are the optional SSA values associated with each + /// dimension/symbol. These must either be empty or match the number of + /// dimensions and symbols. + FlatLinearValueConstraints(unsigned numDims = 0, unsigned numSymbols = 0, + unsigned numLocals = 0, + ArrayRef valArgs = {}) + : FlatLinearValueConstraints(/*numReservedInequalities=*/0, + /*numReservedEqualities=*/0, + /*numReservedCols=*/numDims + numSymbols + + numLocals + 1, + numDims, numSymbols, numLocals, valArgs) {} + + FlatLinearValueConstraints(const IntegerPolyhedron &fac, + ArrayRef> valArgs = {}) + : FlatLinearConstraints(fac) { + assert(valArgs.empty() || valArgs.size() == getNumDimAndSymbolVars()); + if (valArgs.empty()) + values.resize(getNumDimAndSymbolVars(), std::nullopt); + else + values.append(valArgs.begin(), valArgs.end()); + } + + /// Creates an affine constraint system from an IntegerSet. + explicit FlatLinearValueConstraints(IntegerSet set, ValueRange operands = {}); + + // Construct a hyperrectangular constraint set from ValueRanges that represent + // induction variables, lower and upper bounds. `ivs`, `lbs` and `ubs` are + // expected to match one to one. The order of variables and constraints is: + // + // ivs | lbs | ubs | eq/ineq + // ----+-----+-----+--------- + // 1 -1 0 >= 0 + // ----+-----+-----+--------- + // -1 0 1 >= 0 + // + // All dimensions as set as VarKind::SetDim. + static FlatLinearValueConstraints + getHyperrectangular(ValueRange ivs, ValueRange lbs, ValueRange ubs); + + /// Return the kind of this object. + Kind getKind() const override { return Kind::FlatLinearValueConstraints; } + + static bool classof(const IntegerRelation *cst) { + return cst->getKind() >= Kind::FlatLinearValueConstraints && + cst->getKind() <= Kind::FlatAffineRelation; + } + + /// Replaces the contents of this FlatLinearValueConstraints with `other`. + void clearAndCopyFrom(const IntegerRelation &other) override; + + /// Adds a constant bound for the variable associated with the given Value. + void addBound(BoundType type, Value val, int64_t value); + using FlatLinearConstraints::addBound; + + /// Returns the Value associated with the pos^th variable. Asserts if + /// no Value variable was associated. + inline Value getValue(unsigned pos) const { + assert(pos < getNumDimAndSymbolVars() && "Invalid position"); + assert(hasValue(pos) && "variable's Value not set"); + return *values[pos]; + } + + /// Returns the Values associated with variables in range [start, end). + /// Asserts if no Value was associated with one of these variables. + inline void getValues(unsigned start, unsigned end, + SmallVectorImpl *values) const { + assert(end <= getNumDimAndSymbolVars() && "invalid end position"); + assert(start <= end && "invalid start position"); + values->clear(); + values->reserve(end - start); + for (unsigned i = start; i < end; i++) + values->push_back(getValue(i)); + } + inline void getAllValues(SmallVectorImpl *values) const { + getValues(0, getNumDimAndSymbolVars(), values); + } + + inline ArrayRef> getMaybeValues() const { + return {values.data(), values.size()}; + } + + inline ArrayRef> + getMaybeValues(presburger::VarKind kind) const { + assert(kind != VarKind::Local && + "Local variables do not have any value attached to them."); + return {values.data() + getVarKindOffset(kind), getNumVarKind(kind)}; + } + + /// Returns true if the pos^th variable has an associated Value. + inline bool hasValue(unsigned pos) const { + assert(pos < getNumDimAndSymbolVars() && "Invalid position"); + return values[pos].has_value(); + } + + /// Returns true if at least one variable has an associated Value. + bool hasValues() const; + + unsigned appendDimVar(ValueRange vals); + using FlatLinearConstraints::appendDimVar; + + unsigned appendSymbolVar(ValueRange vals); + using FlatLinearConstraints::appendSymbolVar; + + unsigned insertDimVar(unsigned pos, ValueRange vals); + using FlatLinearConstraints::insertDimVar; + + unsigned insertSymbolVar(unsigned pos, ValueRange vals); + using FlatLinearConstraints::insertSymbolVar; + + unsigned insertVar(presburger::VarKind kind, unsigned pos, + unsigned num = 1) override; + unsigned insertVar(presburger::VarKind kind, unsigned pos, ValueRange vals); + + /// Removes variables in the column range [varStart, varLimit), and copies any + /// remaining valid data into place, updates member variables, and resizes + /// arrays as needed. + void removeVarRange(presburger::VarKind kind, unsigned varStart, + unsigned varLimit) override; + using IntegerPolyhedron::removeVarRange; + + /// Sets the Value associated with the pos^th variable. + inline void setValue(unsigned pos, Value val) { + assert(pos < getNumDimAndSymbolVars() && "invalid var position"); + values[pos] = val; + } + + /// Sets the Values associated with the variables in the range [start, end). + /// The range must contain only dim and symbol variables. + void setValues(unsigned start, unsigned end, ArrayRef values) { + assert(end <= getNumVars() && "invalid end position"); + assert(start <= end && "invalid start position"); + assert(values.size() == end - start && + "value should be provided for each variable in the range."); + for (unsigned i = start; i < end; ++i) + setValue(i, values[i - start]); + } + + /// Looks up the position of the variable with the specified Value. Returns + /// true if found (false otherwise). `pos` is set to the (column) position of + /// the variable. + bool findVar(Value val, unsigned *pos) const; + + /// Returns true if a variable with the specified Value exists, false + /// otherwise. + bool containsVar(Value val) const; + + /// Projects out the variable that is associate with Value. + void projectOut(Value val); + using IntegerPolyhedron::projectOut; + + /// Swap the posA^th variable with the posB^th variable. + void swapVar(unsigned posA, unsigned posB) override; + + /// Prints the number of constraints, dimensions, symbols and locals in the + /// FlatAffineValueConstraints. Also, prints for each variable whether there + /// is an SSA Value attached to it. + void printSpace(raw_ostream &os) const override; + + /// Align `map` with this constraint system based on `operands`. Each operand + /// must already have a corresponding dim/symbol in this constraint system. + AffineMap computeAlignedMap(AffineMap map, ValueRange operands) const; + + /// Merge and align the variables of `this` and `other` starting at + /// `offset`, so that both constraint systems get the union of the contained + /// variables that is dimension-wise and symbol-wise unique; both + /// constraint systems are updated so that they have the union of all + /// variables, with `this`'s original variables appearing first followed + /// by any of `other`'s variables that didn't appear in `this`. Local + /// variables in `other` that have the same division representation as local + /// variables in `this` are merged into one. + // E.g.: Input: `this` has (%i, %j) [%M, %N] + // `other` has (%k, %j) [%P, %N, %M] + // Output: both `this`, `other` have (%i, %j, %k) [%M, %N, %P] + // + void mergeAndAlignVarsWithOther(unsigned offset, + FlatLinearValueConstraints *other); + + /// Merge and align symbols of `this` and `other` such that both get union of + /// of symbols that are unique. Symbols in `this` and `other` should be + /// unique. Symbols with Value as `None` are considered to be inequal to all + /// other symbols. + void mergeSymbolVars(FlatLinearValueConstraints &other); + + /// Returns true if this constraint system and `other` are in the same + /// space, i.e., if they are associated with the same set of variables, + /// appearing in the same order. Returns false otherwise. + bool areVarsAlignedWithOther(const FlatLinearConstraints &other); + + /// Updates the constraints to be the smallest bounding (enclosing) box that + /// contains the points of `this` set and that of `other`, with the symbols + /// being treated specially. For each of the dimensions, the min of the lower + /// bounds (symbolic) and the max of the upper bounds (symbolic) is computed + /// to determine such a bounding box. `other` is expected to have the same + /// dimensional variables as this constraint system (in the same order). + /// + /// E.g.: + /// 1) this = {0 <= d0 <= 127}, + /// other = {16 <= d0 <= 192}, + /// output = {0 <= d0 <= 192} + /// 2) this = {s0 + 5 <= d0 <= s0 + 20}, + /// other = {s0 + 1 <= d0 <= s0 + 9}, + /// output = {s0 + 1 <= d0 <= s0 + 20} + /// 3) this = {0 <= d0 <= 5, 1 <= d1 <= 9} + /// other = {2 <= d0 <= 6, 5 <= d1 <= 15}, + /// output = {0 <= d0 <= 6, 1 <= d1 <= 15} + LogicalResult unionBoundingBox(const FlatLinearValueConstraints &other); + using IntegerPolyhedron::unionBoundingBox; + +protected: + /// Eliminates the variable at the specified position using Fourier-Motzkin + /// variable elimination, but uses Gaussian elimination if there is an + /// equality involving that variable. If the result of the elimination is + /// integer exact, `*isResultIntegerExact` is set to true. If `darkShadow` is + /// set to true, a potential under approximation (subset) of the rational + /// shadow / exact integer shadow is computed. + // See implementation comments for more details. + void fourierMotzkinEliminate(unsigned pos, bool darkShadow = false, + bool *isResultIntegerExact = nullptr) override; + + /// Returns false if the fields corresponding to various variable counts, or + /// equality/inequality buffer sizes aren't consistent; true otherwise. This + /// is meant to be used within an assert internally. + bool hasConsistentState() const override; + + /// Values corresponding to the (column) non-local variables of this + /// constraint system appearing in the order the variables correspond to + /// columns. Variables that aren't associated with any Value are set to + /// None. + SmallVector, 8> values; +}; + +/// Flattens 'expr' into 'flattenedExpr', which contains the coefficients of the +/// dimensions, symbols, and additional variables that represent floor divisions +/// of dimensions, symbols, and in turn other floor divisions. Returns failure +/// if 'expr' could not be flattened (i.e., semi-affine is not yet handled). +/// 'cst' contains constraints that connect newly introduced local variables +/// to existing dimensional and symbolic variables. See documentation for +/// AffineExprFlattener on how mod's and div's are flattened. +LogicalResult getFlattenedAffineExpr(AffineExpr expr, unsigned numDims, + unsigned numSymbols, + SmallVectorImpl *flattenedExpr, + FlatLinearConstraints *cst = nullptr); + +/// Flattens the result expressions of the map to their corresponding flattened +/// forms and set in 'flattenedExprs'. Returns failure if any expression in the +/// map could not be flattened (i.e., semi-affine is not yet handled). 'cst' +/// contains constraints that connect newly introduced local variables to +/// existing dimensional and / symbolic variables. See documentation for +/// AffineExprFlattener on how mod's and div's are flattened. For all affine +/// expressions that share the same operands (like those of an affine map), this +/// method should be used instead of repeatedly calling getFlattenedAffineExpr +/// since local variables added to deal with div's and mod's will be reused +/// across expressions. +LogicalResult +getFlattenedAffineExprs(AffineMap map, + std::vector> *flattenedExprs, + FlatLinearConstraints *cst = nullptr); +LogicalResult +getFlattenedAffineExprs(IntegerSet set, + std::vector> *flattenedExprs, + FlatLinearConstraints *cst = nullptr); + +LogicalResult +getMultiAffineFunctionFromMap(AffineMap map, + presburger::MultiAffineFunction &multiAff); + +/// Re-indexes the dimensions and symbols of an affine map with given `operands` +/// values to align with `dims` and `syms` values. +/// +/// Each dimension/symbol of the map, bound to an operand `o`, is replaced with +/// dimension `i`, where `i` is the position of `o` within `dims`. If `o` is not +/// in `dims`, replace it with symbol `i`, where `i` is the position of `o` +/// within `syms`. If `o` is not in `syms` either, replace it with a new symbol. +/// +/// Note: If a value appears multiple times as a dimension/symbol (or both), all +/// corresponding dim/sym expressions are replaced with the first dimension +/// bound to that value (or first symbol if no such dimension exists). +/// +/// The resulting affine map has `dims.size()` many dimensions and at least +/// `syms.size()` many symbols. +/// +/// The SSA values of the symbols of the resulting map are optionally returned +/// via `newSyms`. This is a concatenation of `syms` with the SSA values of the +/// newly added symbols. +/// +/// Note: As part of this re-indexing, dimensions may turn into symbols, or vice +/// versa. +AffineMap alignAffineMapWithValues(AffineMap map, ValueRange operands, + ValueRange dims, ValueRange syms, + SmallVector *newSyms = nullptr); + +} // namespace mlir + +#endif // MLIR_ANALYSIS_FLATLINEARVALUECONSTRAINTS_H diff --git a/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h b/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h index 347be26325e5a..8b0c2a561cfb8 100644 --- a/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h +++ b/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h @@ -54,10 +54,12 @@ class IntegerRelation { public: /// All derived classes of IntegerRelation. enum class Kind { - FlatAffineConstraints, - FlatAffineValueConstraints, IntegerRelation, IntegerPolyhedron, + FlatLinearConstraints, + FlatLinearValueConstraints, + FlatAffineValueConstraints, + FlatAffineRelation }; /// Constructs a relation reserving memory for the specified number @@ -848,7 +850,8 @@ class IntegerPolyhedron : public IntegerRelation { Kind getKind() const override { return Kind::IntegerPolyhedron; } static bool classof(const IntegerRelation *cst) { - return cst->getKind() == Kind::IntegerPolyhedron; + return cst->getKind() >= Kind::IntegerPolyhedron && + cst->getKind() <= Kind::FlatAffineRelation; } // Clones this object. diff --git a/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h b/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h index 1b302f55422d8..6249428fb8e15 100644 --- a/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h +++ b/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h @@ -13,6 +13,7 @@ #ifndef MLIR_DIALECT_AFFINE_ANALYSIS_AFFINESTRUCTURES_H #define MLIR_DIALECT_AFFINE_ANALYSIS_AFFINESTRUCTURES_H +#include "mlir/Analysis/FlatLinearValueConstraints.h" #include "mlir/Analysis/Presburger/IntegerRelation.h" #include "mlir/Analysis/Presburger/Matrix.h" #include "mlir/IR/AffineExpr.h" @@ -38,117 +39,20 @@ namespace presburger { class MultiAffineFunction; } // namespace presburger -/// FlatAffineValueConstraints represents an extension of IntegerPolyhedron -/// where each non-local variable can have an SSA Value attached to it. -class FlatAffineValueConstraints : public presburger::IntegerPolyhedron { +/// FlatAffineValueConstraints is an extension of FlatLinearValueConstraints +/// with helper functions for Affine dialect ops. +class FlatAffineValueConstraints : public FlatLinearValueConstraints { public: - /// Constructs a constraint system reserving memory for the specified number - /// of constraints and variables. `valArgs` are the optional SSA values - /// associated with each dimension/symbol. These must either be empty or match - /// the number of dimensions and symbols. - FlatAffineValueConstraints(unsigned numReservedInequalities, - unsigned numReservedEqualities, - unsigned numReservedCols, unsigned numDims, - unsigned numSymbols, unsigned numLocals, - ArrayRef> valArgs) - : IntegerPolyhedron(numReservedInequalities, numReservedEqualities, - numReservedCols, - presburger::PresburgerSpace::getSetSpace( - numDims, numSymbols, numLocals)) { - assert(numReservedCols >= getNumVars() + 1); - assert(valArgs.empty() || valArgs.size() == getNumDimAndSymbolVars()); - values.reserve(numReservedCols); - if (valArgs.empty()) - values.resize(getNumDimAndSymbolVars(), std::nullopt); - else - values.append(valArgs.begin(), valArgs.end()); - } - - /// Constructs a constraint system reserving memory for the specified number - /// of constraints and variables. `valArgs` are the optional SSA values - /// associated with each dimension/symbol. These must either be empty or match - /// the number of dimensions and symbols. - FlatAffineValueConstraints(unsigned numReservedInequalities, - unsigned numReservedEqualities, - unsigned numReservedCols, unsigned numDims, - unsigned numSymbols, unsigned numLocals, - ArrayRef valArgs = {}) - : IntegerPolyhedron(numReservedInequalities, numReservedEqualities, - numReservedCols, - presburger::PresburgerSpace::getSetSpace( - numDims, numSymbols, numLocals)) { - assert(numReservedCols >= getNumVars() + 1); - assert(valArgs.empty() || valArgs.size() == getNumDimAndSymbolVars()); - values.reserve(numReservedCols); - if (valArgs.empty()) - values.resize(getNumDimAndSymbolVars(), std::nullopt); - else - values.append(valArgs.begin(), valArgs.end()); - } + using FlatLinearValueConstraints::FlatLinearValueConstraints; - /// Constructs a constraint system with the specified number of dimensions - /// and symbols. `valArgs` are the optional SSA values associated with each - /// dimension/symbol. These must either be empty or match the number of - /// dimensions and symbols. - FlatAffineValueConstraints(unsigned numDims, unsigned numSymbols, - unsigned numLocals, - ArrayRef> valArgs) - : FlatAffineValueConstraints(/*numReservedInequalities=*/0, - /*numReservedEqualities=*/0, - /*numReservedCols=*/numDims + numSymbols + - numLocals + 1, - numDims, numSymbols, numLocals, valArgs) {} - - /// Constructs a constraint system with the specified number of dimensions - /// and symbols. `valArgs` are the optional SSA values associated with each - /// dimension/symbol. These must either be empty or match the number of - /// dimensions and symbols. - FlatAffineValueConstraints(unsigned numDims = 0, unsigned numSymbols = 0, - unsigned numLocals = 0, - ArrayRef valArgs = {}) - : FlatAffineValueConstraints(/*numReservedInequalities=*/0, - /*numReservedEqualities=*/0, - /*numReservedCols=*/numDims + numSymbols + - numLocals + 1, - numDims, numSymbols, numLocals, valArgs) {} - - FlatAffineValueConstraints(const IntegerPolyhedron &fac, - ArrayRef> valArgs = {}) - : IntegerPolyhedron(fac) { - assert(valArgs.empty() || valArgs.size() == getNumDimAndSymbolVars()); - if (valArgs.empty()) - values.resize(getNumDimAndSymbolVars(), std::nullopt); - else - values.append(valArgs.begin(), valArgs.end()); - } - - /// Creates an affine constraint system from an IntegerSet. - explicit FlatAffineValueConstraints(IntegerSet set, ValueRange operands = {}); - - // Construct a hyperrectangular constraint set from ValueRanges that represent - // induction variables, lower and upper bounds. `ivs`, `lbs` and `ubs` are - // expected to match one to one. The order of variables and constraints is: - // - // ivs | lbs | ubs | eq/ineq - // ----+-----+-----+--------- - // 1 -1 0 >= 0 - // ----+-----+-----+--------- - // -1 0 1 >= 0 - // - // All dimensions as set as VarKind::SetDim. - static FlatAffineValueConstraints - getHyperrectangular(ValueRange ivs, ValueRange lbs, ValueRange ubs); - - /// Return the kind of this FlatAffineConstraints. + /// Return the kind of this object. Kind getKind() const override { return Kind::FlatAffineValueConstraints; } static bool classof(const IntegerRelation *cst) { - return cst->getKind() == Kind::FlatAffineValueConstraints; + return cst->getKind() >= Kind::FlatAffineValueConstraints && + cst->getKind() <= Kind::FlatAffineRelation; } - /// Clones this object. - std::unique_ptr clone() const; - /// Adds constraints (lower and upper bounds) for the specified 'affine.for' /// operation's Value using IR information stored in its bound maps. The /// right variable is first looked up using `forOp`'s Value. Asserts if the @@ -191,32 +95,6 @@ class FlatAffineValueConstraints : public presburger::IntegerPolyhedron { /// the columns in the current one regarding numbers and values. void addAffineIfOpDomain(AffineIfOp ifOp); - /// Adds a bound for the variable at the specified position with constraints - /// being drawn from the specified bound map. In case of an EQ bound, the - /// bound map is expected to have exactly one result. In case of a LB/UB, the - /// bound map may have more than one result, for each of which an inequality - /// is added. - /// - /// The bound can be added as open or closed by specifying isClosedBound. In - /// case of a LB/UB, isClosedBound = false means the bound is added internally - /// as a closed bound by +1/-1 respectively. In case of an EQ bound, it can - /// only be added as a closed bound. - /// - /// Note: The dimensions/symbols of this FlatAffineConstraints must match the - /// dimensions/symbols of the affine map. - LogicalResult addBound(BoundType type, unsigned pos, AffineMap boundMap, - bool isClosedBound); - - /// Adds a bound for the variable at the specified position with constraints - /// being drawn from the specified bound map. In case of an EQ bound, the - /// bound map is expected to have exactly one result. In case of a LB/UB, the - /// bound map may have more than one result, for each of which an inequality - /// is added. - /// Note: The dimensions/symbols of this FlatAffineConstraints must match the - /// dimensions/symbols of the affine map. By default the lower bound is closed - /// and the upper bound is open. - LogicalResult addBound(BoundType type, unsigned pos, AffineMap boundMap); - /// Adds a bound for the variable at the specified position with constraints /// being drawn from the specified bound map and operands. In case of an /// EQ bound, the bound map is expected to have exactly one result. In case @@ -224,62 +102,15 @@ class FlatAffineValueConstraints : public presburger::IntegerPolyhedron { /// an inequality is added. LogicalResult addBound(BoundType type, unsigned pos, AffineMap boundMap, ValueRange operands); + using FlatLinearValueConstraints::addBound; - /// Adds a constant bound for the variable associated with the given Value. - void addBound(BoundType type, Value val, int64_t value); - - /// The `addBound` overload above hides the inherited overloads by default, so - /// we explicitly introduce them here. - using IntegerPolyhedron::addBound; - - /// Returns the constraint system as an integer set. Returns a null integer - /// set if the system has no constraints, or if an integer set couldn't be - /// constructed as a result of a local variable's explicit representation not - /// being known and such a local variable appearing in any of the constraints. - IntegerSet getAsIntegerSet(MLIRContext *context) const; - - /// Computes the lower and upper bounds of the first `num` dimensional - /// variables (starting at `offset`) as an affine map of the remaining - /// variables (dimensional and symbolic). This method is able to detect - /// variables as floordiv's and mod's of affine expressions of other - /// variables with respect to (positive) constants. Sets bound map to a - /// null AffineMap if such a bound can't be found (or yet unimplemented). - /// - /// By default the returned lower bounds are closed and upper bounds are open. - /// If `closedUb` is true, the upper bound is closed. - void getSliceBounds(unsigned offset, unsigned num, MLIRContext *context, - SmallVectorImpl *lbMaps, - SmallVectorImpl *ubMaps, - bool closedUB = false); - - /// Composes an affine map whose dimensions and symbols match one to one with - /// the dimensions and symbols of this FlatAffineConstraints. The results of - /// the map `other` are added as the leading dimensions of this constraint - /// system. Returns failure if `other` is a semi-affine map. - LogicalResult composeMatchingMap(AffineMap other); - - /// Gets the lower and upper bound of the `offset` + `pos`th variable - /// treating [0, offset) U [offset + num, symStartPos) as dimensions and - /// [symStartPos, getNumDimAndSymbolVars) as symbols, and `pos` lies in - /// [0, num). The multi-dimensional maps in the returned pair represent the - /// max and min of potentially multiple affine expressions. `localExprs` holds - /// pre-computed AffineExpr's for all local variables in the system. - /// - /// By default the returned lower bounds are closed and upper bounds are open. - /// If `closedUb` is true, the upper bound is closed. - std::pair - getLowerAndUpperBound(unsigned pos, unsigned offset, unsigned num, - unsigned symStartPos, ArrayRef localExprs, - MLIRContext *context, bool closedUB = false) const; - - /// Returns the bound for the variable at `pos` from the inequality at - /// `ineqPos` as a 1-d affine value map (affine map + operands). The returned - /// affine value map can either be a lower bound or an upper bound depending - /// on the sign of atIneq(ineqPos, pos). Asserts if the row at `ineqPos` does - /// not involve the `pos`th variable. - void getIneqAsAffineValueMap(unsigned pos, unsigned ineqPos, - AffineValueMap &vmap, - MLIRContext *context) const; + /// Add the specified values as a dim or symbol var depending on its nature, + /// if it already doesn't exist in the system. `val` has to be either a + /// terminal symbol or a loop IV, i.e., it cannot be the result affine.apply + /// of any symbols or loop IVs. The variable is added to the end of the + /// existing dims or symbols. Additional information on the variable is + /// extracted from the IR and added to the constraint system. + void addInductionVarOrTerminalSymbol(Value val); /// Adds slice lower bounds represented by lower bounds in `lbMaps` and upper /// bounds in `ubMaps` to each variable in the constraint system which has @@ -292,79 +123,17 @@ class FlatAffineValueConstraints : public presburger::IntegerPolyhedron { ArrayRef ubMaps, ArrayRef operands); - /// Looks up the position of the variable with the specified Value. Returns - /// true if found (false otherwise). `pos` is set to the (column) position of - /// the variable. - bool findVar(Value val, unsigned *pos) const; - - /// Returns true if an variable with the specified Value exists, false - /// otherwise. - bool containsVar(Value val) const; - - /// Swap the posA^th variable with the posB^th variable. - void swapVar(unsigned posA, unsigned posB) override; - - /// Insert variables of the specified kind at position `pos`. Positions are - /// relative to the kind of variable. The coefficient columns corresponding - /// to the added variables are initialized to zero. `vals` are the Values - /// corresponding to the variables. Values should not be used with - /// VarKind::Local since values can only be attached to non-local variables. - /// Return the absolute column position (i.e., not relative to the kind of - /// variable) of the first added variable. - /// - /// Note: Empty Values are allowed in `vals`. - unsigned insertDimVar(unsigned pos, unsigned num = 1) { - return insertVar(VarKind::SetDim, pos, num); - } - unsigned insertSymbolVar(unsigned pos, unsigned num = 1) { - return insertVar(VarKind::Symbol, pos, num); - } - unsigned insertLocalVar(unsigned pos, unsigned num = 1) { - return insertVar(VarKind::Local, pos, num); - } - unsigned insertDimVar(unsigned pos, ValueRange vals); - unsigned insertSymbolVar(unsigned pos, ValueRange vals); - unsigned insertVar(presburger::VarKind kind, unsigned pos, - unsigned num = 1) override; - unsigned insertVar(presburger::VarKind kind, unsigned pos, ValueRange vals); - - /// Append variables of the specified kind after the last variable of that - /// kind. The coefficient columns corresponding to the added variables are - /// initialized to zero. `vals` are the Values corresponding to the - /// variables. Return the absolute column position (i.e., not relative to the - /// kind of variable) of the first appended variable. - /// - /// Note: Empty Values are allowed in `vals`. - unsigned appendDimVar(ValueRange vals); - unsigned appendSymbolVar(ValueRange vals); - unsigned appendDimVar(unsigned num = 1) { - return appendVar(VarKind::SetDim, num); - } - unsigned appendSymbolVar(unsigned num = 1) { - return appendVar(VarKind::Symbol, num); - } - unsigned appendLocalVar(unsigned num = 1) { - return appendVar(VarKind::Local, num); - } - - /// Removes variables in the column range [varStart, varLimit), and copies any - /// remaining valid data into place, updates member variables, and resizes - /// arrays as needed. - void removeVarRange(presburger::VarKind kind, unsigned varStart, - unsigned varLimit) override; - using IntegerPolyhedron::removeVarRange; - - /// Add the specified values as a dim or symbol var depending on its nature, - /// if it already doesn't exist in the system. `val` has to be either a - /// terminal symbol or a loop IV, i.e., it cannot be the result affine.apply - /// of any symbols or loop IVs. The variable is added to the end of the - /// existing dims or symbols. Additional information on the variable is - /// extracted from the IR and added to the constraint system. - void addInductionVarOrTerminalSymbol(Value val); + /// Changes all symbol variables which are loop IVs to dim variables. + void convertLoopIVSymbolsToDims(); - /// Align `map` with this constraint system based on `operands`. Each operand - /// must already have a corresponding dim/symbol in this constraint system. - AffineMap computeAlignedMap(AffineMap map, ValueRange operands) const; + /// Returns the bound for the variable at `pos` from the inequality at + /// `ineqPos` as a 1-d affine value map (affine map + operands). The returned + /// affine value map can either be a lower bound or an upper bound depending + /// on the sign of atIneq(ineqPos, pos). Asserts if the row at `ineqPos` does + /// not involve the `pos`th variable. + void getIneqAsAffineValueMap(unsigned pos, unsigned ineqPos, + AffineValueMap &vmap, + MLIRContext *context) const; /// Composes the affine value map with this FlatAffineValueConstrains, adding /// the results of the map as dimensions at the front @@ -373,168 +142,10 @@ class FlatAffineValueConstraints : public presburger::IntegerPolyhedron { /// /// Returns failure if the composition fails (when vMap is a semi-affine map). /// The vMap's operand Value's are used to look up the right positions in - /// the FlatAffineConstraints with which to associate. Every operand of vMap - /// should have a matching dim/symbol column in this constraint system (with - /// the same associated Value). + /// the FlatAffineValueConstraints with which to associate. Every operand of + /// vMap should have a matching dim/symbol column in this constraint system + /// (with the same associated Value). LogicalResult composeMap(const AffineValueMap *vMap); - - /// Projects out the variable that is associate with Value. - void projectOut(Value val); - using IntegerPolyhedron::projectOut; - - /// Changes all symbol variables which are loop IVs to dim variables. - void convertLoopIVSymbolsToDims(); - - /// Updates the constraints to be the smallest bounding (enclosing) box that - /// contains the points of `this` set and that of `other`, with the symbols - /// being treated specially. For each of the dimensions, the min of the lower - /// bounds (symbolic) and the max of the upper bounds (symbolic) is computed - /// to determine such a bounding box. `other` is expected to have the same - /// dimensional variables as this constraint system (in the same order). - /// - /// E.g.: - /// 1) this = {0 <= d0 <= 127}, - /// other = {16 <= d0 <= 192}, - /// output = {0 <= d0 <= 192} - /// 2) this = {s0 + 5 <= d0 <= s0 + 20}, - /// other = {s0 + 1 <= d0 <= s0 + 9}, - /// output = {s0 + 1 <= d0 <= s0 + 20} - /// 3) this = {0 <= d0 <= 5, 1 <= d1 <= 9} - /// other = {2 <= d0 <= 6, 5 <= d1 <= 15}, - /// output = {0 <= d0 <= 6, 1 <= d1 <= 15} - LogicalResult unionBoundingBox(const FlatAffineValueConstraints &other); - using IntegerPolyhedron::unionBoundingBox; - - /// Merge and align the variables of `this` and `other` starting at - /// `offset`, so that both constraint systems get the union of the contained - /// variables that is dimension-wise and symbol-wise unique; both - /// constraint systems are updated so that they have the union of all - /// variables, with `this`'s original variables appearing first followed - /// by any of `other`'s variables that didn't appear in `this`. Local - /// variables in `other` that have the same division representation as local - /// variables in `this` are merged into one. - // E.g.: Input: `this` has (%i, %j) [%M, %N] - // `other` has (%k, %j) [%P, %N, %M] - // Output: both `this`, `other` have (%i, %j, %k) [%M, %N, %P] - // - void mergeAndAlignVarsWithOther(unsigned offset, - FlatAffineValueConstraints *other); - - /// Returns true if this constraint system and `other` are in the same - /// space, i.e., if they are associated with the same set of variables, - /// appearing in the same order. Returns false otherwise. - bool areVarsAlignedWithOther(const FlatAffineValueConstraints &other); - - /// Replaces the contents of this FlatAffineValueConstraints with `other`. - void clearAndCopyFrom(const IntegerRelation &other) override; - - /// Returns the Value associated with the pos^th variable. Asserts if - /// no Value variable was associated. - inline Value getValue(unsigned pos) const { - assert(pos < getNumDimAndSymbolVars() && "Invalid position"); - assert(hasValue(pos) && "variable's Value not set"); - return *values[pos]; - } - - /// Returns true if the pos^th variable has an associated Value. - inline bool hasValue(unsigned pos) const { - assert(pos < getNumDimAndSymbolVars() && "Invalid position"); - return values[pos].has_value(); - } - - /// Returns true if at least one variable has an associated Value. - bool hasValues() const; - - /// Returns the Values associated with variables in range [start, end). - /// Asserts if no Value was associated with one of these variables. - inline void getValues(unsigned start, unsigned end, - SmallVectorImpl *values) const { - assert(end <= getNumDimAndSymbolVars() && "invalid end position"); - assert(start <= end && "invalid start position"); - values->clear(); - values->reserve(end - start); - for (unsigned i = start; i < end; i++) - values->push_back(getValue(i)); - } - inline void getAllValues(SmallVectorImpl *values) const { - getValues(0, getNumDimAndSymbolVars(), values); - } - - inline ArrayRef> getMaybeValues() const { - return {values.data(), values.size()}; - } - - inline ArrayRef> - getMaybeValues(presburger::VarKind kind) const { - assert(kind != VarKind::Local && - "Local variables do not have any value attached to them."); - return {values.data() + getVarKindOffset(kind), getNumVarKind(kind)}; - } - - /// Sets the Value associated with the pos^th variable. - inline void setValue(unsigned pos, Value val) { - assert(pos < getNumDimAndSymbolVars() && "invalid var position"); - values[pos] = val; - } - - /// Sets the Values associated with the variables in the range [start, end). - /// The range must contain only dim and symbol variables. - void setValues(unsigned start, unsigned end, ArrayRef values) { - assert(end <= getNumVars() && "invalid end position"); - assert(start <= end && "invalid start position"); - assert(values.size() == end - start && - "value should be provided for each variable in the range."); - for (unsigned i = start; i < end; ++i) - setValue(i, values[i - start]); - } - - /// Merge and align symbols of `this` and `other` such that both get union of - /// of symbols that are unique. Symbols in `this` and `other` should be - /// unique. Symbols with Value as `None` are considered to be inequal to all - /// other symbols. - void mergeSymbolVars(FlatAffineValueConstraints &other); - -protected: - using VarKind = presburger::VarKind; - - /// Returns false if the fields corresponding to various variable counts, or - /// equality/inequality buffer sizes aren't consistent; true otherwise. This - /// is meant to be used within an assert internally. - bool hasConsistentState() const override; - - /// Given an affine map that is aligned with this constraint system: - /// * Flatten the map. - /// * Add newly introduced local columns at the beginning of this constraint - /// system (local column pos 0). - /// * Add equalities that define the new local columns to this constraint - /// system. - /// * Return the flattened expressions via `flattenedExprs`. - /// - /// Note: This is a shared helper function of `addLowerOrUpperBound` and - /// `composeMatchingMap`. - LogicalResult flattenAlignedMapAndMergeLocals( - AffineMap map, std::vector> *flattenedExprs); - - /// Eliminates the variable at the specified position using Fourier-Motzkin - /// variable elimination, but uses Gaussian elimination if there is an - /// equality involving that variable. If the result of the elimination is - /// integer exact, `*isResultIntegerExact` is set to true. If `darkShadow` is - /// set to true, a potential under approximation (subset) of the rational - /// shadow / exact integer shadow is computed. - // See implementation comments for more details. - void fourierMotzkinEliminate(unsigned pos, bool darkShadow = false, - bool *isResultIntegerExact = nullptr) override; - - /// Prints the number of constraints, dimensions, symbols and locals in the - /// FlatAffineConstraints. Also, prints for each variable whether there is - /// an SSA Value attached to it. - void printSpace(raw_ostream &os) const override; - - /// Values corresponding to the (column) non-local variables of this - /// constraint system appearing in the order the variables correspond to - /// columns. Variables that aren't associated with any Value are set to - /// None. - SmallVector, 8> values; }; /// A FlatAffineRelation represents a set of ordered pairs (domain -> range) @@ -570,6 +181,13 @@ class FlatAffineRelation : public FlatAffineValueConstraints { : FlatAffineValueConstraints(fac), numDomainDims(numDomainDims), numRangeDims(numRangeDims) {} + /// Return the kind of this object. + Kind getKind() const override { return Kind::FlatAffineRelation; } + + static bool classof(const IntegerRelation *cst) { + return cst->getKind() == Kind::FlatAffineRelation; + } + /// Returns a set corresponding to the domain/range of the affine relation. FlatAffineValueConstraints getDomainSet() const; FlatAffineValueConstraints getRangeSet() const; @@ -616,66 +234,6 @@ class FlatAffineRelation : public FlatAffineValueConstraints { unsigned numRangeDims; }; -/// Flattens 'expr' into 'flattenedExpr', which contains the coefficients of the -/// dimensions, symbols, and additional variables that represent floor divisions -/// of dimensions, symbols, and in turn other floor divisions. Returns failure -/// if 'expr' could not be flattened (i.e., semi-affine is not yet handled). -/// 'cst' contains constraints that connect newly introduced local variables -/// to existing dimensional and symbolic variables. See documentation for -/// AffineExprFlattener on how mod's and div's are flattened. -LogicalResult getFlattenedAffineExpr(AffineExpr expr, unsigned numDims, - unsigned numSymbols, - SmallVectorImpl *flattenedExpr, - FlatAffineValueConstraints *cst = nullptr); - -/// Flattens the result expressions of the map to their corresponding flattened -/// forms and set in 'flattenedExprs'. Returns failure if any expression in the -/// map could not be flattened (i.e., semi-affine is not yet handled). 'cst' -/// contains constraints that connect newly introduced local variables to -/// existing dimensional and / symbolic variables. See documentation for -/// AffineExprFlattener on how mod's and div's are flattened. For all affine -/// expressions that share the same operands (like those of an affine map), this -/// method should be used instead of repeatedly calling getFlattenedAffineExpr -/// since local variables added to deal with div's and mod's will be reused -/// across expressions. -LogicalResult -getFlattenedAffineExprs(AffineMap map, - std::vector> *flattenedExprs, - FlatAffineValueConstraints *cst = nullptr); -LogicalResult -getFlattenedAffineExprs(IntegerSet set, - std::vector> *flattenedExprs, - FlatAffineValueConstraints *cst = nullptr); - -LogicalResult -getMultiAffineFunctionFromMap(AffineMap map, - presburger::MultiAffineFunction &multiAff); - -/// Re-indexes the dimensions and symbols of an affine map with given `operands` -/// values to align with `dims` and `syms` values. -/// -/// Each dimension/symbol of the map, bound to an operand `o`, is replaced with -/// dimension `i`, where `i` is the position of `o` within `dims`. If `o` is not -/// in `dims`, replace it with symbol `i`, where `i` is the position of `o` -/// within `syms`. If `o` is not in `syms` either, replace it with a new symbol. -/// -/// Note: If a value appears multiple times as a dimension/symbol (or both), all -/// corresponding dim/sym expressions are replaced with the first dimension -/// bound to that value (or first symbol if no such dimension exists). -/// -/// The resulting affine map has `dims.size()` many dimensions and at least -/// `syms.size()` many symbols. -/// -/// The SSA values of the symbols of the resulting map are optionally returned -/// via `newSyms`. This is a concatenation of `syms` with the SSA values of the -/// newly added symbols. -/// -/// Note: As part of this re-indexing, dimensions may turn into symbols, or vice -/// versa. -AffineMap alignAffineMapWithValues(AffineMap map, ValueRange operands, - ValueRange dims, ValueRange syms, - SmallVector *newSyms = nullptr); - /// Builds a relation from the given AffineMap/AffineValueMap `map`, containing /// all pairs of the form `operands -> result` that satisfy `map`. `rel` is set /// to the relation built. For example, give the AffineMap: @@ -696,6 +254,6 @@ LogicalResult getRelationFromMap(AffineMap &map, FlatAffineRelation &rel); LogicalResult getRelationFromMap(const AffineValueMap &map, FlatAffineRelation &rel); -} // namespace mlir. +} // namespace mlir #endif // MLIR_DIALECT_AFFINE_ANALYSIS_AFFINESTRUCTURES_H diff --git a/mlir/include/mlir/IR/AffineExprVisitor.h b/mlir/include/mlir/IR/AffineExprVisitor.h index 30ee1b6e0819c..f6216614c2238 100644 --- a/mlir/include/mlir/IR/AffineExprVisitor.h +++ b/mlir/include/mlir/IR/AffineExprVisitor.h @@ -324,7 +324,7 @@ class SimpleAffineExprFlattener // A floordiv is thus flattened by introducing a new local variable q, and // replacing that expression with 'q' while adding the constraints // c * q <= expr <= c * q + c - 1 to localVarCst (done by - // FlatAffineConstraints::addLocalFloorDiv). + // IntegerRelation::addLocalFloorDiv). // // A ceildiv is similarly flattened: // t = expr ceildiv c <=> t = (expr + c - 1) floordiv c diff --git a/mlir/include/mlir/IR/IntegerSet.h b/mlir/include/mlir/IR/IntegerSet.h index b8affcae74e6e..f814776f1ee7f 100644 --- a/mlir/include/mlir/IR/IntegerSet.h +++ b/mlir/include/mlir/IR/IntegerSet.h @@ -17,7 +17,7 @@ // This class is not meant for affine analysis and operations like set // operations, emptiness checks, or other math operations for analysis and -// transformation. For the latter, use FlatAffineConstraints. +// transformation. For the latter, use FlatAffineValueConstraints. // //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Analysis/CMakeLists.txt b/mlir/lib/Analysis/CMakeLists.txt index 25263db944e97..b68e03c5748fc 100644 --- a/mlir/lib/Analysis/CMakeLists.txt +++ b/mlir/lib/Analysis/CMakeLists.txt @@ -2,6 +2,7 @@ set(LLVM_OPTIONAL_SOURCES AliasAnalysis.cpp CallGraph.cpp DataLayoutAnalysis.cpp + FlatLinearValueConstraints.cpp Liveness.cpp SliceAnalysis.cpp @@ -14,11 +15,14 @@ set(LLVM_OPTIONAL_SOURCES DataFlow/SparseAnalysis.cpp ) +add_subdirectory(Presburger) + add_mlir_library(MLIRAnalysis AliasAnalysis.cpp CallGraph.cpp DataFlowFramework.cpp DataLayoutAnalysis.cpp + FlatLinearValueConstraints.cpp Liveness.cpp SliceAnalysis.cpp @@ -43,8 +47,8 @@ add_mlir_library(MLIRAnalysis MLIRInferIntRangeInterface MLIRInferTypeOpInterface MLIRLoopLikeInterface + MLIRPresburger MLIRSideEffectInterfaces MLIRViewLikeInterface ) -add_subdirectory(Presburger) diff --git a/mlir/lib/Analysis/FlatLinearValueConstraints.cpp b/mlir/lib/Analysis/FlatLinearValueConstraints.cpp new file mode 100644 index 0000000000000..b89b2d11003af --- /dev/null +++ b/mlir/lib/Analysis/FlatLinearValueConstraints.cpp @@ -0,0 +1,1344 @@ +//===- FlatLinearValueConstraints.cpp - Linear Constraint -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Analysis//FlatLinearValueConstraints.h" + +#include "mlir/Analysis/Presburger/LinearTransform.h" +#include "mlir/Analysis/Presburger/Simplex.h" +#include "mlir/Analysis/Presburger/Utils.h" +#include "mlir/IR/AffineExprVisitor.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/IntegerSet.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Support/MathExtras.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include + +#define DEBUG_TYPE "flat-value-constraints" + +using namespace mlir; +using namespace presburger; + +//===----------------------------------------------------------------------===// +// AffineExprFlattener +//===----------------------------------------------------------------------===// + +namespace { + +// See comments for SimpleAffineExprFlattener. +// An AffineExprFlattener extends a SimpleAffineExprFlattener by recording +// constraint information associated with mod's, floordiv's, and ceildiv's +// in FlatLinearConstraints 'localVarCst'. +struct AffineExprFlattener : public SimpleAffineExprFlattener { +public: + // Constraints connecting newly introduced local variables (for mod's and + // div's) to existing (dimensional and symbolic) ones. These are always + // inequalities. + IntegerPolyhedron localVarCst; + + AffineExprFlattener(unsigned nDims, unsigned nSymbols) + : SimpleAffineExprFlattener(nDims, nSymbols), + localVarCst(PresburgerSpace::getSetSpace(nDims, nSymbols)) {} + +private: + // Add a local variable (needed to flatten a mod, floordiv, ceildiv expr). + // The local variable added is always a floordiv of a pure add/mul affine + // function of other variables, coefficients of which are specified in + // `dividend' and with respect to the positive constant `divisor'. localExpr + // is the simplified tree expression (AffineExpr) corresponding to the + // quantifier. + void addLocalFloorDivId(ArrayRef dividend, int64_t divisor, + AffineExpr localExpr) override { + SimpleAffineExprFlattener::addLocalFloorDivId(dividend, divisor, localExpr); + // Update localVarCst. + localVarCst.addLocalFloorDiv(dividend, divisor); + } +}; + +} // namespace + +// Flattens the expressions in map. Returns failure if 'expr' was unable to be +// flattened (i.e., semi-affine expressions not handled yet). +static LogicalResult +getFlattenedAffineExprs(ArrayRef exprs, unsigned numDims, + unsigned numSymbols, + std::vector> *flattenedExprs, + FlatLinearConstraints *localVarCst) { + if (exprs.empty()) { + if (localVarCst) + *localVarCst = FlatLinearConstraints(numDims, numSymbols); + return success(); + } + + AffineExprFlattener flattener(numDims, numSymbols); + // Use the same flattener to simplify each expression successively. This way + // local variables / expressions are shared. + for (auto expr : exprs) { + if (!expr.isPureAffine()) + return failure(); + + flattener.walkPostOrder(expr); + } + + assert(flattener.operandExprStack.size() == exprs.size()); + flattenedExprs->clear(); + flattenedExprs->assign(flattener.operandExprStack.begin(), + flattener.operandExprStack.end()); + + if (localVarCst) + localVarCst->clearAndCopyFrom(flattener.localVarCst); + + return success(); +} + +// Flattens 'expr' into 'flattenedExpr'. Returns failure if 'expr' was unable to +// be flattened (semi-affine expressions not handled yet). +LogicalResult +mlir::getFlattenedAffineExpr(AffineExpr expr, unsigned numDims, + unsigned numSymbols, + SmallVectorImpl *flattenedExpr, + FlatLinearConstraints *localVarCst) { + std::vector> flattenedExprs; + LogicalResult ret = ::getFlattenedAffineExprs({expr}, numDims, numSymbols, + &flattenedExprs, localVarCst); + *flattenedExpr = flattenedExprs[0]; + return ret; +} + +/// Flattens the expressions in map. Returns failure if 'expr' was unable to be +/// flattened (i.e., semi-affine expressions not handled yet). +LogicalResult mlir::getFlattenedAffineExprs( + AffineMap map, std::vector> *flattenedExprs, + FlatLinearConstraints *localVarCst) { + if (map.getNumResults() == 0) { + if (localVarCst) + *localVarCst = + FlatLinearConstraints(map.getNumDims(), map.getNumSymbols()); + return success(); + } + return ::getFlattenedAffineExprs(map.getResults(), map.getNumDims(), + map.getNumSymbols(), flattenedExprs, + localVarCst); +} + +LogicalResult mlir::getFlattenedAffineExprs( + IntegerSet set, std::vector> *flattenedExprs, + FlatLinearConstraints *localVarCst) { + if (set.getNumConstraints() == 0) { + if (localVarCst) + *localVarCst = + FlatLinearConstraints(set.getNumDims(), set.getNumSymbols()); + return success(); + } + return ::getFlattenedAffineExprs(set.getConstraints(), set.getNumDims(), + set.getNumSymbols(), flattenedExprs, + localVarCst); +} + +//===----------------------------------------------------------------------===// +// FlatLinearConstraints +//===----------------------------------------------------------------------===// + +std::unique_ptr FlatLinearConstraints::clone() const { + return std::make_unique(*this); +} + +// Similar to `composeMap` except that no Values need be associated with the +// constraint system nor are they looked at -- the dimensions and symbols of +// `other` are expected to correspond 1:1 to `this` system. +LogicalResult FlatLinearConstraints::composeMatchingMap(AffineMap other) { + assert(other.getNumDims() == getNumDimVars() && "dim mismatch"); + assert(other.getNumSymbols() == getNumSymbolVars() && "symbol mismatch"); + + std::vector> flatExprs; + if (failed(flattenAlignedMapAndMergeLocals(other, &flatExprs))) + return failure(); + assert(flatExprs.size() == other.getNumResults()); + + // Add dimensions corresponding to the map's results. + insertDimVar(/*pos=*/0, /*num=*/other.getNumResults()); + + // We add one equality for each result connecting the result dim of the map to + // the other variables. + // E.g.: if the expression is 16*i0 + i1, and this is the r^th + // iteration/result of the value map, we are adding the equality: + // d_r - 16*i0 - i1 = 0. Similarly, when flattening (i0 + 1, i0 + 8*i2), we + // add two equalities: d_0 - i0 - 1 == 0, d1 - i0 - 8*i2 == 0. + for (unsigned r = 0, e = flatExprs.size(); r < e; r++) { + const auto &flatExpr = flatExprs[r]; + assert(flatExpr.size() >= other.getNumInputs() + 1); + + SmallVector eqToAdd(getNumCols(), 0); + // Set the coefficient for this result to one. + eqToAdd[r] = 1; + + // Dims and symbols. + for (unsigned i = 0, f = other.getNumInputs(); i < f; i++) { + // Negate `eq[r]` since the newly added dimension will be set to this one. + eqToAdd[e + i] = -flatExpr[i]; + } + // Local columns of `eq` are at the beginning. + unsigned j = getNumDimVars() + getNumSymbolVars(); + unsigned end = flatExpr.size() - 1; + for (unsigned i = other.getNumInputs(); i < end; i++, j++) { + eqToAdd[j] = -flatExpr[i]; + } + + // Constant term. + eqToAdd[getNumCols() - 1] = -flatExpr[flatExpr.size() - 1]; + + // Add the equality connecting the result of the map to this constraint set. + addEquality(eqToAdd); + } + + return success(); +} + +// Determine whether the variable at 'pos' (say var_r) can be expressed as +// modulo of another known variable (say var_n) w.r.t a constant. For example, +// if the following constraints hold true: +// ``` +// 0 <= var_r <= divisor - 1 +// var_n - (divisor * q_expr) = var_r +// ``` +// where `var_n` is a known variable (called dividend), and `q_expr` is an +// `AffineExpr` (called the quotient expression), `var_r` can be written as: +// +// `var_r = var_n mod divisor`. +// +// Additionally, in a special case of the above constaints where `q_expr` is an +// variable itself that is not yet known (say `var_q`), it can be written as a +// floordiv in the following way: +// +// `var_q = var_n floordiv divisor`. +// +// Returns true if the above mod or floordiv are detected, updating 'memo' with +// these new expressions. Returns false otherwise. +static bool detectAsMod(const FlatLinearConstraints &cst, unsigned pos, + int64_t lbConst, int64_t ubConst, + SmallVectorImpl &memo, + MLIRContext *context) { + assert(pos < cst.getNumVars() && "invalid position"); + + // Check if a divisor satisfying the condition `0 <= var_r <= divisor - 1` can + // be determined. + if (lbConst != 0 || ubConst < 1) + return false; + int64_t divisor = ubConst + 1; + + // Check for the aforementioned conditions in each equality. + for (unsigned curEquality = 0, numEqualities = cst.getNumEqualities(); + curEquality < numEqualities; curEquality++) { + int64_t coefficientAtPos = cst.atEq64(curEquality, pos); + // If current equality does not involve `var_r`, continue to the next + // equality. + if (coefficientAtPos == 0) + continue; + + // Constant term should be 0 in this equality. + if (cst.atEq64(curEquality, cst.getNumCols() - 1) != 0) + continue; + + // Traverse through the equality and construct the dividend expression + // `dividendExpr`, to contain all the variables which are known and are + // not divisible by `(coefficientAtPos * divisor)`. Hope here is that the + // `dividendExpr` gets simplified into a single variable `var_n` discussed + // above. + auto dividendExpr = getAffineConstantExpr(0, context); + + // Track the terms that go into quotient expression, later used to detect + // additional floordiv. + unsigned quotientCount = 0; + int quotientPosition = -1; + int quotientSign = 1; + + // Consider each term in the current equality. + unsigned curVar, e; + for (curVar = 0, e = cst.getNumDimAndSymbolVars(); curVar < e; ++curVar) { + // Ignore var_r. + if (curVar == pos) + continue; + int64_t coefficientOfCurVar = cst.atEq64(curEquality, curVar); + // Ignore vars that do not contribute to the current equality. + if (coefficientOfCurVar == 0) + continue; + // Check if the current var goes into the quotient expression. + if (coefficientOfCurVar % (divisor * coefficientAtPos) == 0) { + quotientCount++; + quotientPosition = curVar; + quotientSign = (coefficientOfCurVar * coefficientAtPos) > 0 ? 1 : -1; + continue; + } + // Variables that are part of dividendExpr should be known. + if (!memo[curVar]) + break; + // Append the current variable to the dividend expression. + dividendExpr = dividendExpr + memo[curVar] * coefficientOfCurVar; + } + + // Can't construct expression as it depends on a yet uncomputed var. + if (curVar < e) + continue; + + // Express `var_r` in terms of the other vars collected so far. + if (coefficientAtPos > 0) + dividendExpr = (-dividendExpr).floorDiv(coefficientAtPos); + else + dividendExpr = dividendExpr.floorDiv(-coefficientAtPos); + + // Simplify the expression. + dividendExpr = simplifyAffineExpr(dividendExpr, cst.getNumDimVars(), + cst.getNumSymbolVars()); + // Only if the final dividend expression is just a single var (which we call + // `var_n`), we can proceed. + // TODO: Handle AffineSymbolExpr as well. There is no reason to restrict it + // to dims themselves. + auto dimExpr = dividendExpr.dyn_cast(); + if (!dimExpr) + continue; + + // Express `var_r` as `var_n % divisor` and store the expression in `memo`. + if (quotientCount >= 1) { + auto ub = cst.getConstantBound64(FlatLinearConstraints::BoundType::UB, + dimExpr.getPosition()); + // If `var_n` has an upperbound that is less than the divisor, mod can be + // eliminated altogether. + if (ub && *ub < divisor) + memo[pos] = dimExpr; + else + memo[pos] = dimExpr % divisor; + // If a unique quotient `var_q` was seen, it can be expressed as + // `var_n floordiv divisor`. + if (quotientCount == 1 && !memo[quotientPosition]) + memo[quotientPosition] = dimExpr.floorDiv(divisor) * quotientSign; + + return true; + } + } + return false; +} + +/// Check if the pos^th variable can be expressed as a floordiv of an affine +/// function of other variables (where the divisor is a positive constant) +/// given the initial set of expressions in `exprs`. If it can be, the +/// corresponding position in `exprs` is set as the detected affine expr. For +/// eg: 4q <= i + j <= 4q + 3 <=> q = (i + j) floordiv 4. An equality can +/// also yield a floordiv: eg. 4q = i + j <=> q = (i + j) floordiv 4. 32q + 28 +/// <= i <= 32q + 31 => q = i floordiv 32. +static bool detectAsFloorDiv(const FlatLinearConstraints &cst, unsigned pos, + MLIRContext *context, + SmallVectorImpl &exprs) { + assert(pos < cst.getNumVars() && "invalid position"); + + // Get upper-lower bound pair for this variable. + SmallVector foundRepr(cst.getNumVars(), false); + for (unsigned i = 0, e = cst.getNumVars(); i < e; ++i) + if (exprs[i]) + foundRepr[i] = true; + + SmallVector dividend(cst.getNumCols()); + unsigned divisor; + auto ulPair = computeSingleVarRepr(cst, foundRepr, pos, dividend, divisor); + + // No upper-lower bound pair found for this var. + if (ulPair.kind == ReprKind::None || ulPair.kind == ReprKind::Equality) + return false; + + // Construct the dividend expression. + auto dividendExpr = getAffineConstantExpr(dividend.back(), context); + for (unsigned c = 0, f = cst.getNumVars(); c < f; c++) + if (dividend[c] != 0) + dividendExpr = dividendExpr + dividend[c] * exprs[c]; + + // Successfully detected the floordiv. + exprs[pos] = dividendExpr.floorDiv(divisor); + return true; +} + +std::pair FlatLinearConstraints::getLowerAndUpperBound( + unsigned pos, unsigned offset, unsigned num, unsigned symStartPos, + ArrayRef localExprs, MLIRContext *context, + bool closedUB) const { + assert(pos + offset < getNumDimVars() && "invalid dim start pos"); + assert(symStartPos >= (pos + offset) && "invalid sym start pos"); + assert(getNumLocalVars() == localExprs.size() && + "incorrect local exprs count"); + + SmallVector lbIndices, ubIndices, eqIndices; + getLowerAndUpperBoundIndices(pos + offset, &lbIndices, &ubIndices, &eqIndices, + offset, num); + + /// Add to 'b' from 'a' in set [0, offset) U [offset + num, symbStartPos). + auto addCoeffs = [&](ArrayRef a, SmallVectorImpl &b) { + b.clear(); + for (unsigned i = 0, e = a.size(); i < e; ++i) { + if (i < offset || i >= offset + num) + b.push_back(a[i]); + } + }; + + SmallVector lb, ub; + SmallVector lbExprs; + unsigned dimCount = symStartPos - num; + unsigned symCount = getNumDimAndSymbolVars() - symStartPos; + lbExprs.reserve(lbIndices.size() + eqIndices.size()); + // Lower bound expressions. + for (auto idx : lbIndices) { + auto ineq = getInequality64(idx); + // Extract the lower bound (in terms of other coeff's + const), i.e., if + // i - j + 1 >= 0 is the constraint, 'pos' is for i the lower bound is j + // - 1. + addCoeffs(ineq, lb); + std::transform(lb.begin(), lb.end(), lb.begin(), std::negate()); + auto expr = + getAffineExprFromFlatForm(lb, dimCount, symCount, localExprs, context); + // expr ceildiv divisor is (expr + divisor - 1) floordiv divisor + int64_t divisor = std::abs(ineq[pos + offset]); + expr = (expr + divisor - 1).floorDiv(divisor); + lbExprs.push_back(expr); + } + + SmallVector ubExprs; + ubExprs.reserve(ubIndices.size() + eqIndices.size()); + // Upper bound expressions. + for (auto idx : ubIndices) { + auto ineq = getInequality64(idx); + // Extract the upper bound (in terms of other coeff's + const). + addCoeffs(ineq, ub); + auto expr = + getAffineExprFromFlatForm(ub, dimCount, symCount, localExprs, context); + expr = expr.floorDiv(std::abs(ineq[pos + offset])); + int64_t ubAdjustment = closedUB ? 0 : 1; + ubExprs.push_back(expr + ubAdjustment); + } + + // Equalities. It's both a lower and a upper bound. + SmallVector b; + for (auto idx : eqIndices) { + auto eq = getEquality64(idx); + addCoeffs(eq, b); + if (eq[pos + offset] > 0) + std::transform(b.begin(), b.end(), b.begin(), std::negate()); + + // Extract the upper bound (in terms of other coeff's + const). + auto expr = + getAffineExprFromFlatForm(b, dimCount, symCount, localExprs, context); + expr = expr.floorDiv(std::abs(eq[pos + offset])); + // Upper bound is exclusive. + ubExprs.push_back(expr + 1); + // Lower bound. + expr = + getAffineExprFromFlatForm(b, dimCount, symCount, localExprs, context); + expr = expr.ceilDiv(std::abs(eq[pos + offset])); + lbExprs.push_back(expr); + } + + auto lbMap = AffineMap::get(dimCount, symCount, lbExprs, context); + auto ubMap = AffineMap::get(dimCount, symCount, ubExprs, context); + + return {lbMap, ubMap}; +} + +/// Computes the lower and upper bounds of the first 'num' dimensional +/// variables (starting at 'offset') as affine maps of the remaining +/// variables (dimensional and symbolic variables). Local variables are +/// themselves explicitly computed as affine functions of other variables in +/// this process if needed. +void FlatLinearConstraints::getSliceBounds(unsigned offset, unsigned num, + MLIRContext *context, + SmallVectorImpl *lbMaps, + SmallVectorImpl *ubMaps, + bool closedUB) { + assert(num < getNumDimVars() && "invalid range"); + + // Basic simplification. + normalizeConstraintsByGCD(); + + LLVM_DEBUG(llvm::dbgs() << "getSliceBounds for first " << num + << " variables\n"); + LLVM_DEBUG(dump()); + + // Record computed/detected variables. + SmallVector memo(getNumVars()); + // Initialize dimensional and symbolic variables. + for (unsigned i = 0, e = getNumDimVars(); i < e; i++) { + if (i < offset) + memo[i] = getAffineDimExpr(i, context); + else if (i >= offset + num) + memo[i] = getAffineDimExpr(i - num, context); + } + for (unsigned i = getNumDimVars(), e = getNumDimAndSymbolVars(); i < e; i++) + memo[i] = getAffineSymbolExpr(i - getNumDimVars(), context); + + bool changed; + do { + changed = false; + // Identify yet unknown variables as constants or mod's / floordiv's of + // other variables if possible. + for (unsigned pos = 0; pos < getNumVars(); pos++) { + if (memo[pos]) + continue; + + auto lbConst = getConstantBound64(BoundType::LB, pos); + auto ubConst = getConstantBound64(BoundType::UB, pos); + if (lbConst.has_value() && ubConst.has_value()) { + // Detect equality to a constant. + if (*lbConst == *ubConst) { + memo[pos] = getAffineConstantExpr(*lbConst, context); + changed = true; + continue; + } + + // Detect a variable as modulo of another variable w.r.t a + // constant. + if (detectAsMod(*this, pos, *lbConst, *ubConst, memo, context)) { + changed = true; + continue; + } + } + + // Detect a variable as a floordiv of an affine function of other + // variables (divisor is a positive constant). + if (detectAsFloorDiv(*this, pos, context, memo)) { + changed = true; + continue; + } + + // Detect a variable as an expression of other variables. + unsigned idx; + if (!findConstraintWithNonZeroAt(pos, /*isEq=*/true, &idx)) { + continue; + } + + // Build AffineExpr solving for variable 'pos' in terms of all others. + auto expr = getAffineConstantExpr(0, context); + unsigned j, e; + for (j = 0, e = getNumVars(); j < e; ++j) { + if (j == pos) + continue; + int64_t c = atEq64(idx, j); + if (c == 0) + continue; + // If any of the involved IDs hasn't been found yet, we can't proceed. + if (!memo[j]) + break; + expr = expr + memo[j] * c; + } + if (j < e) + // Can't construct expression as it depends on a yet uncomputed + // variable. + continue; + + // Add constant term to AffineExpr. + expr = expr + atEq64(idx, getNumVars()); + int64_t vPos = atEq64(idx, pos); + assert(vPos != 0 && "expected non-zero here"); + if (vPos > 0) + expr = (-expr).floorDiv(vPos); + else + // vPos < 0. + expr = expr.floorDiv(-vPos); + // Successfully constructed expression. + memo[pos] = expr; + changed = true; + } + // This loop is guaranteed to reach a fixed point - since once an + // variable's explicit form is computed (in memo[pos]), it's not updated + // again. + } while (changed); + + int64_t ubAdjustment = closedUB ? 0 : 1; + + // Set the lower and upper bound maps for all the variables that were + // computed as affine expressions of the rest as the "detected expr" and + // "detected expr + 1" respectively; set the undetected ones to null. + std::optional tmpClone; + for (unsigned pos = 0; pos < num; pos++) { + unsigned numMapDims = getNumDimVars() - num; + unsigned numMapSymbols = getNumSymbolVars(); + AffineExpr expr = memo[pos + offset]; + if (expr) + expr = simplifyAffineExpr(expr, numMapDims, numMapSymbols); + + AffineMap &lbMap = (*lbMaps)[pos]; + AffineMap &ubMap = (*ubMaps)[pos]; + + if (expr) { + lbMap = AffineMap::get(numMapDims, numMapSymbols, expr); + ubMap = AffineMap::get(numMapDims, numMapSymbols, expr + ubAdjustment); + } else { + // TODO: Whenever there are local variables in the dependence + // constraints, we'll conservatively over-approximate, since we don't + // always explicitly compute them above (in the while loop). + if (getNumLocalVars() == 0) { + // Work on a copy so that we don't update this constraint system. + if (!tmpClone) { + tmpClone.emplace(FlatLinearConstraints(*this)); + // Removing redundant inequalities is necessary so that we don't get + // redundant loop bounds. + tmpClone->removeRedundantInequalities(); + } + std::tie(lbMap, ubMap) = tmpClone->getLowerAndUpperBound( + pos, offset, num, getNumDimVars(), /*localExprs=*/{}, context, + closedUB); + } + + // If the above fails, we'll just use the constant lower bound and the + // constant upper bound (if they exist) as the slice bounds. + // TODO: being conservative for the moment in cases that + // lead to multiple bounds - until getConstDifference in LoopFusion.cpp is + // fixed (b/126426796). + if (!lbMap || lbMap.getNumResults() > 1) { + LLVM_DEBUG(llvm::dbgs() + << "WARNING: Potentially over-approximating slice lb\n"); + auto lbConst = getConstantBound64(BoundType::LB, pos + offset); + if (lbConst.has_value()) { + lbMap = AffineMap::get(numMapDims, numMapSymbols, + getAffineConstantExpr(*lbConst, context)); + } + } + if (!ubMap || ubMap.getNumResults() > 1) { + LLVM_DEBUG(llvm::dbgs() + << "WARNING: Potentially over-approximating slice ub\n"); + auto ubConst = getConstantBound64(BoundType::UB, pos + offset); + if (ubConst.has_value()) { + ubMap = AffineMap::get( + numMapDims, numMapSymbols, + getAffineConstantExpr(*ubConst + ubAdjustment, context)); + } + } + } + LLVM_DEBUG(llvm::dbgs() + << "lb map for pos = " << Twine(pos + offset) << ", expr: "); + LLVM_DEBUG(lbMap.dump();); + LLVM_DEBUG(llvm::dbgs() + << "ub map for pos = " << Twine(pos + offset) << ", expr: "); + LLVM_DEBUG(ubMap.dump();); + } +} + +LogicalResult FlatLinearConstraints::flattenAlignedMapAndMergeLocals( + AffineMap map, std::vector> *flattenedExprs) { + FlatLinearConstraints localCst; + if (failed(getFlattenedAffineExprs(map, flattenedExprs, &localCst))) { + LLVM_DEBUG(llvm::dbgs() + << "composition unimplemented for semi-affine maps\n"); + return failure(); + } + + // Add localCst information. + if (localCst.getNumLocalVars() > 0) { + unsigned numLocalVars = getNumLocalVars(); + // Insert local dims of localCst at the beginning. + insertLocalVar(/*pos=*/0, /*num=*/localCst.getNumLocalVars()); + // Insert local dims of `this` at the end of localCst. + localCst.appendLocalVar(/*num=*/numLocalVars); + // Dimensions of localCst and this constraint set match. Append localCst to + // this constraint set. + append(localCst); + } + + return success(); +} + +LogicalResult FlatLinearConstraints::addBound(BoundType type, unsigned pos, + AffineMap boundMap, + bool isClosedBound) { + assert(boundMap.getNumDims() == getNumDimVars() && "dim mismatch"); + assert(boundMap.getNumSymbols() == getNumSymbolVars() && "symbol mismatch"); + assert(pos < getNumDimAndSymbolVars() && "invalid position"); + assert((type != BoundType::EQ || isClosedBound) && + "EQ bound must be closed."); + + // Equality follows the logic of lower bound except that we add an equality + // instead of an inequality. + assert((type != BoundType::EQ || boundMap.getNumResults() == 1) && + "single result expected"); + bool lower = type == BoundType::LB || type == BoundType::EQ; + + std::vector> flatExprs; + if (failed(flattenAlignedMapAndMergeLocals(boundMap, &flatExprs))) + return failure(); + assert(flatExprs.size() == boundMap.getNumResults()); + + // Add one (in)equality for each result. + for (const auto &flatExpr : flatExprs) { + SmallVector ineq(getNumCols(), 0); + // Dims and symbols. + for (unsigned j = 0, e = boundMap.getNumInputs(); j < e; j++) { + ineq[j] = lower ? -flatExpr[j] : flatExpr[j]; + } + // Invalid bound: pos appears in `boundMap`. + // TODO: This should be an assertion. Fix `addDomainFromSliceMaps` and/or + // its callers to prevent invalid bounds from being added. + if (ineq[pos] != 0) + continue; + ineq[pos] = lower ? 1 : -1; + // Local columns of `ineq` are at the beginning. + unsigned j = getNumDimVars() + getNumSymbolVars(); + unsigned end = flatExpr.size() - 1; + for (unsigned i = boundMap.getNumInputs(); i < end; i++, j++) { + ineq[j] = lower ? -flatExpr[i] : flatExpr[i]; + } + // Make the bound closed in if flatExpr is open. The inequality is always + // created in the upper bound form, so the adjustment is -1. + int64_t boundAdjustment = (isClosedBound || type == BoundType::EQ) ? 0 : -1; + // Constant term. + ineq[getNumCols() - 1] = (lower ? -flatExpr[flatExpr.size() - 1] + : flatExpr[flatExpr.size() - 1]) + + boundAdjustment; + type == BoundType::EQ ? addEquality(ineq) : addInequality(ineq); + } + + return success(); +} + +LogicalResult FlatLinearConstraints::addBound(BoundType type, unsigned pos, + AffineMap boundMap) { + return addBound(type, pos, boundMap, /*isClosedBound=*/type != BoundType::UB); +} + +/// Compute an explicit representation for local vars. For all systems coming +/// from MLIR integer sets, maps, or expressions where local vars were +/// introduced to model floordivs and mods, this always succeeds. +LogicalResult +FlatLinearConstraints::computeLocalVars(SmallVectorImpl &memo, + MLIRContext *context) const { + unsigned numDims = getNumDimVars(); + unsigned numSyms = getNumSymbolVars(); + + // Initialize dimensional and symbolic variables. + for (unsigned i = 0; i < numDims; i++) + memo[i] = getAffineDimExpr(i, context); + for (unsigned i = numDims, e = numDims + numSyms; i < e; i++) + memo[i] = getAffineSymbolExpr(i - numDims, context); + + bool changed; + do { + // Each time `changed` is true at the end of this iteration, one or more + // local vars would have been detected as floordivs and set in memo; so the + // number of null entries in memo[...] strictly reduces; so this converges. + changed = false; + for (unsigned i = 0, e = getNumLocalVars(); i < e; ++i) + if (!memo[numDims + numSyms + i] && + detectAsFloorDiv(*this, /*pos=*/numDims + numSyms + i, context, memo)) + changed = true; + } while (changed); + + ArrayRef localExprs = + ArrayRef(memo).take_back(getNumLocalVars()); + return success( + llvm::all_of(localExprs, [](AffineExpr expr) { return expr; })); +} + +IntegerSet FlatLinearConstraints::getAsIntegerSet(MLIRContext *context) const { + if (getNumConstraints() == 0) + // Return universal set (always true): 0 == 0. + return IntegerSet::get(getNumDimVars(), getNumSymbolVars(), + getAffineConstantExpr(/*constant=*/0, context), + /*eqFlags=*/true); + + // Construct local references. + SmallVector memo(getNumVars(), AffineExpr()); + + if (failed(computeLocalVars(memo, context))) { + // Check if the local variables without an explicit representation have + // zero coefficients everywhere. + SmallVector noLocalRepVars; + unsigned numDimsSymbols = getNumDimAndSymbolVars(); + for (unsigned i = numDimsSymbols, e = getNumVars(); i < e; ++i) { + if (!memo[i] && !isColZero(/*pos=*/i)) + noLocalRepVars.push_back(i - numDimsSymbols); + } + if (!noLocalRepVars.empty()) { + LLVM_DEBUG({ + llvm::dbgs() << "local variables at position(s) "; + llvm::interleaveComma(noLocalRepVars, llvm::dbgs()); + llvm::dbgs() << " do not have an explicit representation in:\n"; + this->dump(); + }); + return IntegerSet(); + } + } + + ArrayRef localExprs = + ArrayRef(memo).take_back(getNumLocalVars()); + + // Construct the IntegerSet from the equalities/inequalities. + unsigned numDims = getNumDimVars(); + unsigned numSyms = getNumSymbolVars(); + + SmallVector eqFlags(getNumConstraints()); + std::fill(eqFlags.begin(), eqFlags.begin() + getNumEqualities(), true); + std::fill(eqFlags.begin() + getNumEqualities(), eqFlags.end(), false); + + SmallVector exprs; + exprs.reserve(getNumConstraints()); + + for (unsigned i = 0, e = getNumEqualities(); i < e; ++i) + exprs.push_back(getAffineExprFromFlatForm(getEquality64(i), numDims, + numSyms, localExprs, context)); + for (unsigned i = 0, e = getNumInequalities(); i < e; ++i) + exprs.push_back(getAffineExprFromFlatForm(getInequality64(i), numDims, + numSyms, localExprs, context)); + return IntegerSet::get(numDims, numSyms, exprs, eqFlags); +} + +//===----------------------------------------------------------------------===// +// FlatLinearValueConstraints +//===----------------------------------------------------------------------===// + +// Construct from an IntegerSet. +FlatLinearValueConstraints::FlatLinearValueConstraints(IntegerSet set, + ValueRange operands) + : FlatLinearConstraints(set.getNumInequalities(), set.getNumEqualities(), + set.getNumDims() + set.getNumSymbols() + 1, + set.getNumDims(), set.getNumSymbols(), + /*numLocals=*/0) { + // Populate values. + if (operands.empty()) { + values.resize(getNumDimAndSymbolVars(), std::nullopt); + } else { + assert(set.getNumInputs() == operands.size() && "operand count mismatch"); + values.assign(operands.begin(), operands.end()); + } + + // Flatten expressions and add them to the constraint system. + std::vector> flatExprs; + FlatLinearConstraints localVarCst; + if (failed(getFlattenedAffineExprs(set, &flatExprs, &localVarCst))) { + assert(false && "flattening unimplemented for semi-affine integer sets"); + return; + } + assert(flatExprs.size() == set.getNumConstraints()); + insertVar(VarKind::Local, getNumVarKind(VarKind::Local), + /*num=*/localVarCst.getNumLocalVars()); + + for (unsigned i = 0, e = flatExprs.size(); i < e; ++i) { + const auto &flatExpr = flatExprs[i]; + assert(flatExpr.size() == getNumCols()); + if (set.getEqFlags()[i]) { + addEquality(flatExpr); + } else { + addInequality(flatExpr); + } + } + // Add the other constraints involving local vars from flattening. + append(localVarCst); +} + +// Construct a hyperrectangular constraint set from ValueRanges that represent +// induction variables, lower and upper bounds. `ivs`, `lbs` and `ubs` are +// expected to match one to one. The order of variables and constraints is: +// +// ivs | lbs | ubs | eq/ineq +// ----+-----+-----+--------- +// 1 -1 0 >= 0 +// ----+-----+-----+--------- +// -1 0 1 >= 0 +// +// All dimensions as set as VarKind::SetDim. +FlatLinearValueConstraints +FlatLinearValueConstraints::getHyperrectangular(ValueRange ivs, ValueRange lbs, + ValueRange ubs) { + FlatLinearValueConstraints res; + unsigned nIvs = ivs.size(); + assert(nIvs == lbs.size() && "expected as many lower bounds as ivs"); + assert(nIvs == ubs.size() && "expected as many upper bounds as ivs"); + + if (nIvs == 0) + return res; + + res.appendDimVar(ivs); + unsigned lbsStart = res.appendDimVar(lbs); + unsigned ubsStart = res.appendDimVar(ubs); + + MLIRContext *ctx = ivs.front().getContext(); + for (int ivIdx = 0, e = nIvs; ivIdx < e; ++ivIdx) { + // iv - lb >= 0 + AffineMap lb = AffineMap::get(/*dimCount=*/3 * nIvs, /*symbolCount=*/0, + getAffineDimExpr(lbsStart + ivIdx, ctx)); + if (failed(res.addBound(BoundType::LB, ivIdx, lb))) + llvm_unreachable("Unexpected FlatLinearValueConstraints creation error"); + // -iv + ub >= 0 + AffineMap ub = AffineMap::get(/*dimCount=*/3 * nIvs, /*symbolCount=*/0, + getAffineDimExpr(ubsStart + ivIdx, ctx)); + if (failed(res.addBound(BoundType::UB, ivIdx, ub))) + llvm_unreachable("Unexpected FlatLinearValueConstraints creation error"); + } + return res; +} + +unsigned FlatLinearValueConstraints::appendDimVar(ValueRange vals) { + unsigned pos = getNumDimVars(); + return insertVar(VarKind::SetDim, pos, vals); +} + +unsigned FlatLinearValueConstraints::appendSymbolVar(ValueRange vals) { + unsigned pos = getNumSymbolVars(); + return insertVar(VarKind::Symbol, pos, vals); +} + +unsigned FlatLinearValueConstraints::insertDimVar(unsigned pos, + ValueRange vals) { + return insertVar(VarKind::SetDim, pos, vals); +} + +unsigned FlatLinearValueConstraints::insertSymbolVar(unsigned pos, + ValueRange vals) { + return insertVar(VarKind::Symbol, pos, vals); +} + +unsigned FlatLinearValueConstraints::insertVar(VarKind kind, unsigned pos, + unsigned num) { + unsigned absolutePos = IntegerPolyhedron::insertVar(kind, pos, num); + + if (kind != VarKind::Local) { + values.insert(values.begin() + absolutePos, num, std::nullopt); + assert(values.size() == getNumDimAndSymbolVars()); + } + + return absolutePos; +} + +unsigned FlatLinearValueConstraints::insertVar(VarKind kind, unsigned pos, + ValueRange vals) { + assert(!vals.empty() && "expected ValueRange with Values."); + assert(kind != VarKind::Local && + "values cannot be attached to local variables."); + unsigned num = vals.size(); + unsigned absolutePos = IntegerPolyhedron::insertVar(kind, pos, num); + + // If a Value is provided, insert it; otherwise use None. + for (unsigned i = 0; i < num; ++i) + values.insert(values.begin() + absolutePos + i, + vals[i] ? std::optional(vals[i]) : std::nullopt); + + assert(values.size() == getNumDimAndSymbolVars()); + return absolutePos; +} + +bool FlatLinearValueConstraints::hasValues() const { + return llvm::any_of( + values, [](const std::optional &var) { return var.has_value(); }); +} + +/// Checks if two constraint systems are in the same space, i.e., if they are +/// associated with the same set of variables, appearing in the same order. +static bool areVarsAligned(const FlatLinearValueConstraints &a, + const FlatLinearValueConstraints &b) { + return a.getNumDimVars() == b.getNumDimVars() && + a.getNumSymbolVars() == b.getNumSymbolVars() && + a.getNumVars() == b.getNumVars() && + a.getMaybeValues().equals(b.getMaybeValues()); +} + +/// Calls areVarsAligned to check if two constraint systems have the same set +/// of variables in the same order. +bool FlatLinearValueConstraints::areVarsAlignedWithOther( + const FlatLinearConstraints &other) { + return areVarsAligned(*this, other); +} + +/// Checks if the SSA values associated with `cst`'s variables in range +/// [start, end) are unique. +static bool LLVM_ATTRIBUTE_UNUSED areVarsUnique( + const FlatLinearValueConstraints &cst, unsigned start, unsigned end) { + + assert(start <= cst.getNumDimAndSymbolVars() && + "Start position out of bounds"); + assert(end <= cst.getNumDimAndSymbolVars() && "End position out of bounds"); + + if (start >= end) + return true; + + SmallPtrSet uniqueVars; + ArrayRef> maybeValues = + cst.getMaybeValues().slice(start, end - start); + for (std::optional val : maybeValues) { + if (val && !uniqueVars.insert(*val).second) + return false; + } + return true; +} + +/// Checks if the SSA values associated with `cst`'s variables are unique. +static bool LLVM_ATTRIBUTE_UNUSED +areVarsUnique(const FlatLinearValueConstraints &cst) { + return areVarsUnique(cst, 0, cst.getNumDimAndSymbolVars()); +} + +/// Checks if the SSA values associated with `cst`'s variables of kind `kind` +/// are unique. +static bool LLVM_ATTRIBUTE_UNUSED +areVarsUnique(const FlatLinearValueConstraints &cst, VarKind kind) { + + if (kind == VarKind::SetDim) + return areVarsUnique(cst, 0, cst.getNumDimVars()); + if (kind == VarKind::Symbol) + return areVarsUnique(cst, cst.getNumDimVars(), + cst.getNumDimAndSymbolVars()); + llvm_unreachable("Unexpected VarKind"); +} + +/// Merge and align the variables of A and B starting at 'offset', so that +/// both constraint systems get the union of the contained variables that is +/// dimension-wise and symbol-wise unique; both constraint systems are updated +/// so that they have the union of all variables, with A's original +/// variables appearing first followed by any of B's variables that didn't +/// appear in A. Local variables in B that have the same division +/// representation as local variables in A are merged into one. +// E.g.: Input: A has ((%i, %j) [%M, %N]) and B has (%k, %j) [%P, %N, %M]) +// Output: both A, B have (%i, %j, %k) [%M, %N, %P] +static void mergeAndAlignVars(unsigned offset, FlatLinearValueConstraints *a, + FlatLinearValueConstraints *b) { + assert(offset <= a->getNumDimVars() && offset <= b->getNumDimVars()); + // A merge/align isn't meaningful if a cst's vars aren't distinct. + assert(areVarsUnique(*a) && "A's values aren't unique"); + assert(areVarsUnique(*b) && "B's values aren't unique"); + + assert(llvm::all_of( + llvm::drop_begin(a->getMaybeValues(), offset), + [](const std::optional &var) { return var.has_value(); })); + + assert(llvm::all_of( + llvm::drop_begin(b->getMaybeValues(), offset), + [](const std::optional &var) { return var.has_value(); })); + + SmallVector aDimValues; + a->getValues(offset, a->getNumDimVars(), &aDimValues); + + { + // Merge dims from A into B. + unsigned d = offset; + for (auto aDimValue : aDimValues) { + unsigned loc; + if (b->findVar(aDimValue, &loc)) { + assert(loc >= offset && "A's dim appears in B's aligned range"); + assert(loc < b->getNumDimVars() && + "A's dim appears in B's non-dim position"); + b->swapVar(d, loc); + } else { + b->insertDimVar(d, aDimValue); + } + d++; + } + // Dimensions that are in B, but not in A, are added at the end. + for (unsigned t = a->getNumDimVars(), e = b->getNumDimVars(); t < e; t++) { + a->appendDimVar(b->getValue(t)); + } + assert(a->getNumDimVars() == b->getNumDimVars() && + "expected same number of dims"); + } + + // Merge and align symbols of A and B + a->mergeSymbolVars(*b); + // Merge and align locals of A and B + a->mergeLocalVars(*b); + + assert(areVarsAligned(*a, *b) && "IDs expected to be aligned"); +} + +// Call 'mergeAndAlignVars' to align constraint systems of 'this' and 'other'. +void FlatLinearValueConstraints::mergeAndAlignVarsWithOther( + unsigned offset, FlatLinearValueConstraints *other) { + mergeAndAlignVars(offset, this, other); +} + +/// Merge and align symbols of `this` and `other` such that both get union of +/// of symbols that are unique. Symbols in `this` and `other` should be +/// unique. Symbols with Value as `None` are considered to be inequal to all +/// other symbols. +void FlatLinearValueConstraints::mergeSymbolVars( + FlatLinearValueConstraints &other) { + + assert(areVarsUnique(*this, VarKind::Symbol) && "Symbol vars are not unique"); + assert(areVarsUnique(other, VarKind::Symbol) && "Symbol vars are not unique"); + + SmallVector aSymValues; + getValues(getNumDimVars(), getNumDimAndSymbolVars(), &aSymValues); + + // Merge symbols: merge symbols into `other` first from `this`. + unsigned s = other.getNumDimVars(); + for (Value aSymValue : aSymValues) { + unsigned loc; + // If the var is a symbol in `other`, then align it, otherwise assume that + // it is a new symbol + if (other.findVar(aSymValue, &loc) && loc >= other.getNumDimVars() && + loc < other.getNumDimAndSymbolVars()) + other.swapVar(s, loc); + else + other.insertSymbolVar(s - other.getNumDimVars(), aSymValue); + s++; + } + + // Symbols that are in other, but not in this, are added at the end. + for (unsigned t = other.getNumDimVars() + getNumSymbolVars(), + e = other.getNumDimAndSymbolVars(); + t < e; t++) + insertSymbolVar(getNumSymbolVars(), other.getValue(t)); + + assert(getNumSymbolVars() == other.getNumSymbolVars() && + "expected same number of symbols"); + assert(areVarsUnique(*this, VarKind::Symbol) && "Symbol vars are not unique"); + assert(areVarsUnique(other, VarKind::Symbol) && "Symbol vars are not unique"); +} + +bool FlatLinearValueConstraints::hasConsistentState() const { + return IntegerPolyhedron::hasConsistentState() && + values.size() == getNumDimAndSymbolVars(); +} + +void FlatLinearValueConstraints::removeVarRange(VarKind kind, unsigned varStart, + unsigned varLimit) { + IntegerPolyhedron::removeVarRange(kind, varStart, varLimit); + unsigned offset = getVarKindOffset(kind); + + if (kind != VarKind::Local) { + values.erase(values.begin() + varStart + offset, + values.begin() + varLimit + offset); + } +} + +AffineMap +FlatLinearValueConstraints::computeAlignedMap(AffineMap map, + ValueRange operands) const { + assert(map.getNumInputs() == operands.size() && "number of inputs mismatch"); + + SmallVector dims, syms; +#ifndef NDEBUG + SmallVector newSyms; + SmallVector *newSymsPtr = &newSyms; +#else + SmallVector *newSymsPtr = nullptr; +#endif // NDEBUG + + dims.reserve(getNumDimVars()); + syms.reserve(getNumSymbolVars()); + for (unsigned i = getVarKindOffset(VarKind::SetDim), + e = getVarKindEnd(VarKind::SetDim); + i < e; ++i) + dims.push_back(values[i] ? *values[i] : Value()); + for (unsigned i = getVarKindOffset(VarKind::Symbol), + e = getVarKindEnd(VarKind::Symbol); + i < e; ++i) + syms.push_back(values[i] ? *values[i] : Value()); + + AffineMap alignedMap = + alignAffineMapWithValues(map, operands, dims, syms, newSymsPtr); + // All symbols are already part of this FlatAffineValueConstraints. + assert(syms.size() == newSymsPtr->size() && "unexpected new/missing symbols"); + assert(std::equal(syms.begin(), syms.end(), newSymsPtr->begin()) && + "unexpected new/missing symbols"); + return alignedMap; +} + +bool FlatLinearValueConstraints::findVar(Value val, unsigned *pos) const { + unsigned i = 0; + for (const auto &mayBeVar : values) { + if (mayBeVar && *mayBeVar == val) { + *pos = i; + return true; + } + i++; + } + return false; +} + +bool FlatLinearValueConstraints::containsVar(Value val) const { + return llvm::any_of(values, [&](const std::optional &mayBeVar) { + return mayBeVar && *mayBeVar == val; + }); +} + +void FlatLinearValueConstraints::swapVar(unsigned posA, unsigned posB) { + IntegerPolyhedron::swapVar(posA, posB); + + if (getVarKindAt(posA) == VarKind::Local && + getVarKindAt(posB) == VarKind::Local) + return; + + // Treat value of a local variable as std::nullopt. + if (getVarKindAt(posA) == VarKind::Local) + values[posB] = std::nullopt; + else if (getVarKindAt(posB) == VarKind::Local) + values[posA] = std::nullopt; + else + std::swap(values[posA], values[posB]); +} + +void FlatLinearValueConstraints::addBound(BoundType type, Value val, + int64_t value) { + unsigned pos; + if (!findVar(val, &pos)) + // This is a pre-condition for this method. + assert(0 && "var not found"); + addBound(type, pos, value); +} + +void FlatLinearConstraints::printSpace(raw_ostream &os) const { + IntegerPolyhedron::printSpace(os); + os << "("; + for (unsigned i = 0, e = getNumDimAndSymbolVars(); i < e; i++) + os << "None\t"; + for (unsigned i = getVarKindOffset(VarKind::Local), + e = getVarKindEnd(VarKind::Local); + i < e; ++i) + os << "Local\t"; + os << "const)\n"; +} + +void FlatLinearValueConstraints::printSpace(raw_ostream &os) const { + IntegerPolyhedron::printSpace(os); + os << "("; + for (unsigned i = 0, e = getNumDimAndSymbolVars(); i < e; i++) { + if (hasValue(i)) + os << "Value\t"; + else + os << "None\t"; + } + for (unsigned i = getVarKindOffset(VarKind::Local), + e = getVarKindEnd(VarKind::Local); + i < e; ++i) + os << "Local\t"; + os << "const)\n"; +} + +void FlatLinearValueConstraints::clearAndCopyFrom( + const IntegerRelation &other) { + + if (auto *otherValueSet = + dyn_cast(&other)) { + *this = *otherValueSet; + } else { + *static_cast(this) = other; + values.clear(); + values.resize(getNumDimAndSymbolVars(), std::nullopt); + } +} + +void FlatLinearValueConstraints::fourierMotzkinEliminate( + unsigned pos, bool darkShadow, bool *isResultIntegerExact) { + SmallVector, 8> newVals = values; + if (getVarKindAt(pos) != VarKind::Local) + newVals.erase(newVals.begin() + pos); + // Note: Base implementation discards all associated Values. + IntegerPolyhedron::fourierMotzkinEliminate(pos, darkShadow, + isResultIntegerExact); + values = newVals; + assert(values.size() == getNumDimAndSymbolVars()); +} + +void FlatLinearValueConstraints::projectOut(Value val) { + unsigned pos; + bool ret = findVar(val, &pos); + assert(ret); + (void)ret; + fourierMotzkinEliminate(pos); +} + +LogicalResult FlatLinearValueConstraints::unionBoundingBox( + const FlatLinearValueConstraints &otherCst) { + assert(otherCst.getNumDimVars() == getNumDimVars() && "dims mismatch"); + assert(otherCst.getMaybeValues() + .slice(0, getNumDimVars()) + .equals(getMaybeValues().slice(0, getNumDimVars())) && + "dim values mismatch"); + assert(otherCst.getNumLocalVars() == 0 && "local vars not supported here"); + assert(getNumLocalVars() == 0 && "local vars not supported yet here"); + + // Align `other` to this. + if (!areVarsAligned(*this, otherCst)) { + FlatLinearValueConstraints otherCopy(otherCst); + mergeAndAlignVars(/*offset=*/getNumDimVars(), this, &otherCopy); + return IntegerPolyhedron::unionBoundingBox(otherCopy); + } + + return IntegerPolyhedron::unionBoundingBox(otherCst); +} + +//===----------------------------------------------------------------------===// +// Helper functions +//===----------------------------------------------------------------------===// + +AffineMap mlir::alignAffineMapWithValues(AffineMap map, ValueRange operands, + ValueRange dims, ValueRange syms, + SmallVector *newSyms) { + assert(operands.size() == map.getNumInputs() && + "expected same number of operands and map inputs"); + MLIRContext *ctx = map.getContext(); + Builder builder(ctx); + SmallVector dimReplacements(map.getNumDims(), {}); + unsigned numSymbols = syms.size(); + SmallVector symReplacements(map.getNumSymbols(), {}); + if (newSyms) { + newSyms->clear(); + newSyms->append(syms.begin(), syms.end()); + } + + for (const auto &operand : llvm::enumerate(operands)) { + // Compute replacement dim/sym of operand. + AffineExpr replacement; + auto dimIt = std::find(dims.begin(), dims.end(), operand.value()); + auto symIt = std::find(syms.begin(), syms.end(), operand.value()); + if (dimIt != dims.end()) { + replacement = + builder.getAffineDimExpr(std::distance(dims.begin(), dimIt)); + } else if (symIt != syms.end()) { + replacement = + builder.getAffineSymbolExpr(std::distance(syms.begin(), symIt)); + } else { + // This operand is neither a dimension nor a symbol. Add it as a new + // symbol. + replacement = builder.getAffineSymbolExpr(numSymbols++); + if (newSyms) + newSyms->push_back(operand.value()); + } + // Add to corresponding replacements vector. + if (operand.index() < map.getNumDims()) { + dimReplacements[operand.index()] = replacement; + } else { + symReplacements[operand.index() - map.getNumDims()] = replacement; + } + } + + return map.replaceDimsAndSymbols(dimReplacements, symReplacements, + dims.size(), numSymbols); +} + +LogicalResult +mlir::getMultiAffineFunctionFromMap(AffineMap map, + MultiAffineFunction &multiAff) { + FlatLinearConstraints cst; + std::vector> flattenedExprs; + LogicalResult result = getFlattenedAffineExprs(map, &flattenedExprs, &cst); + + if (result.failed()) + return failure(); + + DivisionRepr divs = cst.getLocalReprs(); + assert(divs.hasAllReprs() && + "AffineMap cannot produce divs without local representation"); + + // TODO: We shouldn't have to do this conversion. + Matrix mat(map.getNumResults(), map.getNumInputs() + divs.getNumDivs() + 1); + for (unsigned i = 0, e = flattenedExprs.size(); i < e; ++i) + for (unsigned j = 0, f = flattenedExprs[i].size(); j < f; ++j) + mat(i, j) = flattenedExprs[i][j]; + + multiAff = MultiAffineFunction( + PresburgerSpace::getRelationSpace(map.getNumDims(), map.getNumResults(), + map.getNumSymbols(), divs.getNumDivs()), + mat, divs); + + return success(); +} diff --git a/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp index 03b8b1d72a5fa..f087dca20f34c 100644 --- a/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp +++ b/mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp @@ -33,504 +33,6 @@ using namespace mlir; using namespace presburger; -namespace { - -// See comments for SimpleAffineExprFlattener. -// An AffineExprFlattener extends a SimpleAffineExprFlattener by recording -// constraint information associated with mod's, floordiv's, and ceildiv's -// in FlatAffineValueConstraints 'localVarCst'. -struct AffineExprFlattener : public SimpleAffineExprFlattener { -public: - // Constraints connecting newly introduced local variables (for mod's and - // div's) to existing (dimensional and symbolic) ones. These are always - // inequalities. - IntegerPolyhedron localVarCst; - - AffineExprFlattener(unsigned nDims, unsigned nSymbols) - : SimpleAffineExprFlattener(nDims, nSymbols), - localVarCst(PresburgerSpace::getSetSpace(nDims, nSymbols)) {} - -private: - // Add a local variable (needed to flatten a mod, floordiv, ceildiv expr). - // The local variable added is always a floordiv of a pure add/mul affine - // function of other variables, coefficients of which are specified in - // `dividend' and with respect to the positive constant `divisor'. localExpr - // is the simplified tree expression (AffineExpr) corresponding to the - // quantifier. - void addLocalFloorDivId(ArrayRef dividend, int64_t divisor, - AffineExpr localExpr) override { - SimpleAffineExprFlattener::addLocalFloorDivId(dividend, divisor, localExpr); - // Update localVarCst. - localVarCst.addLocalFloorDiv(dividend, divisor); - } -}; - -} // namespace - -// Flattens the expressions in map. Returns failure if 'expr' was unable to be -// flattened (i.e., semi-affine expressions not handled yet). -static LogicalResult -getFlattenedAffineExprs(ArrayRef exprs, unsigned numDims, - unsigned numSymbols, - std::vector> *flattenedExprs, - FlatAffineValueConstraints *localVarCst) { - if (exprs.empty()) { - if (localVarCst) - *localVarCst = FlatAffineValueConstraints(numDims, numSymbols); - return success(); - } - - AffineExprFlattener flattener(numDims, numSymbols); - // Use the same flattener to simplify each expression successively. This way - // local variables / expressions are shared. - for (auto expr : exprs) { - if (!expr.isPureAffine()) - return failure(); - - flattener.walkPostOrder(expr); - } - - assert(flattener.operandExprStack.size() == exprs.size()); - flattenedExprs->clear(); - flattenedExprs->assign(flattener.operandExprStack.begin(), - flattener.operandExprStack.end()); - - if (localVarCst) - localVarCst->clearAndCopyFrom(flattener.localVarCst); - - return success(); -} - -// Flattens 'expr' into 'flattenedExpr'. Returns failure if 'expr' was unable to -// be flattened (semi-affine expressions not handled yet). -LogicalResult -mlir::getFlattenedAffineExpr(AffineExpr expr, unsigned numDims, - unsigned numSymbols, - SmallVectorImpl *flattenedExpr, - FlatAffineValueConstraints *localVarCst) { - std::vector> flattenedExprs; - LogicalResult ret = ::getFlattenedAffineExprs({expr}, numDims, numSymbols, - &flattenedExprs, localVarCst); - *flattenedExpr = flattenedExprs[0]; - return ret; -} - -/// Flattens the expressions in map. Returns failure if 'expr' was unable to be -/// flattened (i.e., semi-affine expressions not handled yet). -LogicalResult mlir::getFlattenedAffineExprs( - AffineMap map, std::vector> *flattenedExprs, - FlatAffineValueConstraints *localVarCst) { - if (map.getNumResults() == 0) { - if (localVarCst) - *localVarCst = - FlatAffineValueConstraints(map.getNumDims(), map.getNumSymbols()); - return success(); - } - return ::getFlattenedAffineExprs(map.getResults(), map.getNumDims(), - map.getNumSymbols(), flattenedExprs, - localVarCst); -} - -LogicalResult mlir::getFlattenedAffineExprs( - IntegerSet set, std::vector> *flattenedExprs, - FlatAffineValueConstraints *localVarCst) { - if (set.getNumConstraints() == 0) { - if (localVarCst) - *localVarCst = - FlatAffineValueConstraints(set.getNumDims(), set.getNumSymbols()); - return success(); - } - return ::getFlattenedAffineExprs(set.getConstraints(), set.getNumDims(), - set.getNumSymbols(), flattenedExprs, - localVarCst); -} - -//===----------------------------------------------------------------------===// -// FlatAffineConstraints / FlatAffineValueConstraints. -//===----------------------------------------------------------------------===// - -std::unique_ptr -FlatAffineValueConstraints::clone() const { - return std::make_unique(*this); -} - -// Construct from an IntegerSet. -FlatAffineValueConstraints::FlatAffineValueConstraints(IntegerSet set, - ValueRange operands) - : IntegerPolyhedron(set.getNumInequalities(), set.getNumEqualities(), - set.getNumDims() + set.getNumSymbols() + 1, - PresburgerSpace::getSetSpace(set.getNumDims(), - set.getNumSymbols(), - /*numLocals=*/0)) { - // Populate values. - if (operands.empty()) { - values.resize(getNumDimAndSymbolVars(), std::nullopt); - } else { - assert(set.getNumInputs() == operands.size() && "operand count mismatch"); - values.assign(operands.begin(), operands.end()); - } - - // Flatten expressions and add them to the constraint system. - std::vector> flatExprs; - FlatAffineValueConstraints localVarCst; - if (failed(getFlattenedAffineExprs(set, &flatExprs, &localVarCst))) { - assert(false && "flattening unimplemented for semi-affine integer sets"); - return; - } - assert(flatExprs.size() == set.getNumConstraints()); - insertVar(VarKind::Local, getNumVarKind(VarKind::Local), - /*num=*/localVarCst.getNumLocalVars()); - - for (unsigned i = 0, e = flatExprs.size(); i < e; ++i) { - const auto &flatExpr = flatExprs[i]; - assert(flatExpr.size() == getNumCols()); - if (set.getEqFlags()[i]) { - addEquality(flatExpr); - } else { - addInequality(flatExpr); - } - } - // Add the other constraints involving local vars from flattening. - append(localVarCst); -} - -// Construct a hyperrectangular constraint set from ValueRanges that represent -// induction variables, lower and upper bounds. `ivs`, `lbs` and `ubs` are -// expected to match one to one. The order of variables and constraints is: -// -// ivs | lbs | ubs | eq/ineq -// ----+-----+-----+--------- -// 1 -1 0 >= 0 -// ----+-----+-----+--------- -// -1 0 1 >= 0 -// -// All dimensions as set as VarKind::SetDim. -FlatAffineValueConstraints -FlatAffineValueConstraints::getHyperrectangular(ValueRange ivs, ValueRange lbs, - ValueRange ubs) { - FlatAffineValueConstraints res; - unsigned nIvs = ivs.size(); - assert(nIvs == lbs.size() && "expected as many lower bounds as ivs"); - assert(nIvs == ubs.size() && "expected as many upper bounds as ivs"); - - if (nIvs == 0) - return res; - - res.appendDimVar(ivs); - unsigned lbsStart = res.appendDimVar(lbs); - unsigned ubsStart = res.appendDimVar(ubs); - - MLIRContext *ctx = ivs.front().getContext(); - for (int ivIdx = 0, e = nIvs; ivIdx < e; ++ivIdx) { - // iv - lb >= 0 - AffineMap lb = AffineMap::get(/*dimCount=*/3 * nIvs, /*symbolCount=*/0, - getAffineDimExpr(lbsStart + ivIdx, ctx)); - if (failed(res.addBound(BoundType::LB, ivIdx, lb))) - llvm_unreachable("Unexpected FlatAffineValueConstraints creation error"); - // -iv + ub >= 0 - AffineMap ub = AffineMap::get(/*dimCount=*/3 * nIvs, /*symbolCount=*/0, - getAffineDimExpr(ubsStart + ivIdx, ctx)); - if (failed(res.addBound(BoundType::UB, ivIdx, ub))) - llvm_unreachable("Unexpected FlatAffineValueConstraints creation error"); - } - return res; -} - -unsigned FlatAffineValueConstraints::appendDimVar(ValueRange vals) { - unsigned pos = getNumDimVars(); - return insertVar(VarKind::SetDim, pos, vals); -} - -unsigned FlatAffineValueConstraints::appendSymbolVar(ValueRange vals) { - unsigned pos = getNumSymbolVars(); - return insertVar(VarKind::Symbol, pos, vals); -} - -unsigned FlatAffineValueConstraints::insertDimVar(unsigned pos, - ValueRange vals) { - return insertVar(VarKind::SetDim, pos, vals); -} - -unsigned FlatAffineValueConstraints::insertSymbolVar(unsigned pos, - ValueRange vals) { - return insertVar(VarKind::Symbol, pos, vals); -} - -unsigned FlatAffineValueConstraints::insertVar(VarKind kind, unsigned pos, - unsigned num) { - unsigned absolutePos = IntegerPolyhedron::insertVar(kind, pos, num); - - if (kind != VarKind::Local) { - values.insert(values.begin() + absolutePos, num, std::nullopt); - assert(values.size() == getNumDimAndSymbolVars()); - } - - return absolutePos; -} - -unsigned FlatAffineValueConstraints::insertVar(VarKind kind, unsigned pos, - ValueRange vals) { - assert(!vals.empty() && "expected ValueRange with Values."); - assert(kind != VarKind::Local && - "values cannot be attached to local variables."); - unsigned num = vals.size(); - unsigned absolutePos = IntegerPolyhedron::insertVar(kind, pos, num); - - // If a Value is provided, insert it; otherwise use None. - for (unsigned i = 0; i < num; ++i) - values.insert(values.begin() + absolutePos + i, - vals[i] ? std::optional(vals[i]) : std::nullopt); - - assert(values.size() == getNumDimAndSymbolVars()); - return absolutePos; -} - -bool FlatAffineValueConstraints::hasValues() const { - return llvm::any_of( - values, [](const std::optional &var) { return var.has_value(); }); -} - -/// Checks if two constraint systems are in the same space, i.e., if they are -/// associated with the same set of variables, appearing in the same order. -static bool areVarsAligned(const FlatAffineValueConstraints &a, - const FlatAffineValueConstraints &b) { - return a.getNumDimVars() == b.getNumDimVars() && - a.getNumSymbolVars() == b.getNumSymbolVars() && - a.getNumVars() == b.getNumVars() && - a.getMaybeValues().equals(b.getMaybeValues()); -} - -/// Calls areVarsAligned to check if two constraint systems have the same set -/// of variables in the same order. -bool FlatAffineValueConstraints::areVarsAlignedWithOther( - const FlatAffineValueConstraints &other) { - return areVarsAligned(*this, other); -} - -/// Checks if the SSA values associated with `cst`'s variables in range -/// [start, end) are unique. -static bool LLVM_ATTRIBUTE_UNUSED areVarsUnique( - const FlatAffineValueConstraints &cst, unsigned start, unsigned end) { - - assert(start <= cst.getNumDimAndSymbolVars() && - "Start position out of bounds"); - assert(end <= cst.getNumDimAndSymbolVars() && "End position out of bounds"); - - if (start >= end) - return true; - - SmallPtrSet uniqueVars; - ArrayRef> maybeValues = - cst.getMaybeValues().slice(start, end - start); - for (std::optional val : maybeValues) { - if (val && !uniqueVars.insert(*val).second) - return false; - } - return true; -} - -/// Checks if the SSA values associated with `cst`'s variables are unique. -static bool LLVM_ATTRIBUTE_UNUSED -areVarsUnique(const FlatAffineValueConstraints &cst) { - return areVarsUnique(cst, 0, cst.getNumDimAndSymbolVars()); -} - -/// Checks if the SSA values associated with `cst`'s variables of kind `kind` -/// are unique. -static bool LLVM_ATTRIBUTE_UNUSED -areVarsUnique(const FlatAffineValueConstraints &cst, VarKind kind) { - - if (kind == VarKind::SetDim) - return areVarsUnique(cst, 0, cst.getNumDimVars()); - if (kind == VarKind::Symbol) - return areVarsUnique(cst, cst.getNumDimVars(), - cst.getNumDimAndSymbolVars()); - llvm_unreachable("Unexpected VarKind"); -} - -/// Merge and align the variables of A and B starting at 'offset', so that -/// both constraint systems get the union of the contained variables that is -/// dimension-wise and symbol-wise unique; both constraint systems are updated -/// so that they have the union of all variables, with A's original -/// variables appearing first followed by any of B's variables that didn't -/// appear in A. Local variables in B that have the same division -/// representation as local variables in A are merged into one. -// E.g.: Input: A has ((%i, %j) [%M, %N]) and B has (%k, %j) [%P, %N, %M]) -// Output: both A, B have (%i, %j, %k) [%M, %N, %P] -static void mergeAndAlignVars(unsigned offset, FlatAffineValueConstraints *a, - FlatAffineValueConstraints *b) { - assert(offset <= a->getNumDimVars() && offset <= b->getNumDimVars()); - // A merge/align isn't meaningful if a cst's vars aren't distinct. - assert(areVarsUnique(*a) && "A's values aren't unique"); - assert(areVarsUnique(*b) && "B's values aren't unique"); - - assert(llvm::all_of( - llvm::drop_begin(a->getMaybeValues(), offset), - [](const std::optional &var) { return var.has_value(); })); - - assert(llvm::all_of( - llvm::drop_begin(b->getMaybeValues(), offset), - [](const std::optional &var) { return var.has_value(); })); - - SmallVector aDimValues; - a->getValues(offset, a->getNumDimVars(), &aDimValues); - - { - // Merge dims from A into B. - unsigned d = offset; - for (auto aDimValue : aDimValues) { - unsigned loc; - if (b->findVar(aDimValue, &loc)) { - assert(loc >= offset && "A's dim appears in B's aligned range"); - assert(loc < b->getNumDimVars() && - "A's dim appears in B's non-dim position"); - b->swapVar(d, loc); - } else { - b->insertDimVar(d, aDimValue); - } - d++; - } - // Dimensions that are in B, but not in A, are added at the end. - for (unsigned t = a->getNumDimVars(), e = b->getNumDimVars(); t < e; t++) { - a->appendDimVar(b->getValue(t)); - } - assert(a->getNumDimVars() == b->getNumDimVars() && - "expected same number of dims"); - } - - // Merge and align symbols of A and B - a->mergeSymbolVars(*b); - // Merge and align locals of A and B - a->mergeLocalVars(*b); - - assert(areVarsAligned(*a, *b) && "IDs expected to be aligned"); -} - -// Call 'mergeAndAlignVars' to align constraint systems of 'this' and 'other'. -void FlatAffineValueConstraints::mergeAndAlignVarsWithOther( - unsigned offset, FlatAffineValueConstraints *other) { - mergeAndAlignVars(offset, this, other); -} - -LogicalResult -FlatAffineValueConstraints::composeMap(const AffineValueMap *vMap) { - return composeMatchingMap( - computeAlignedMap(vMap->getAffineMap(), vMap->getOperands())); -} - -// Similar to `composeMap` except that no Values need be associated with the -// constraint system nor are they looked at -- the dimensions and symbols of -// `other` are expected to correspond 1:1 to `this` system. -LogicalResult FlatAffineValueConstraints::composeMatchingMap(AffineMap other) { - assert(other.getNumDims() == getNumDimVars() && "dim mismatch"); - assert(other.getNumSymbols() == getNumSymbolVars() && "symbol mismatch"); - - std::vector> flatExprs; - if (failed(flattenAlignedMapAndMergeLocals(other, &flatExprs))) - return failure(); - assert(flatExprs.size() == other.getNumResults()); - - // Add dimensions corresponding to the map's results. - insertDimVar(/*pos=*/0, /*num=*/other.getNumResults()); - - // We add one equality for each result connecting the result dim of the map to - // the other variables. - // E.g.: if the expression is 16*i0 + i1, and this is the r^th - // iteration/result of the value map, we are adding the equality: - // d_r - 16*i0 - i1 = 0. Similarly, when flattening (i0 + 1, i0 + 8*i2), we - // add two equalities: d_0 - i0 - 1 == 0, d1 - i0 - 8*i2 == 0. - for (unsigned r = 0, e = flatExprs.size(); r < e; r++) { - const auto &flatExpr = flatExprs[r]; - assert(flatExpr.size() >= other.getNumInputs() + 1); - - SmallVector eqToAdd(getNumCols(), 0); - // Set the coefficient for this result to one. - eqToAdd[r] = 1; - - // Dims and symbols. - for (unsigned i = 0, f = other.getNumInputs(); i < f; i++) { - // Negate `eq[r]` since the newly added dimension will be set to this one. - eqToAdd[e + i] = -flatExpr[i]; - } - // Local columns of `eq` are at the beginning. - unsigned j = getNumDimVars() + getNumSymbolVars(); - unsigned end = flatExpr.size() - 1; - for (unsigned i = other.getNumInputs(); i < end; i++, j++) { - eqToAdd[j] = -flatExpr[i]; - } - - // Constant term. - eqToAdd[getNumCols() - 1] = -flatExpr[flatExpr.size() - 1]; - - // Add the equality connecting the result of the map to this constraint set. - addEquality(eqToAdd); - } - - return success(); -} - -// Turn a symbol into a dimension. -static void turnSymbolIntoDim(FlatAffineValueConstraints *cst, Value value) { - unsigned pos; - if (cst->findVar(value, &pos) && pos >= cst->getNumDimVars() && - pos < cst->getNumDimAndSymbolVars()) { - cst->swapVar(pos, cst->getNumDimVars()); - cst->setDimSymbolSeparation(cst->getNumSymbolVars() - 1); - } -} - -/// Merge and align symbols of `this` and `other` such that both get union of -/// of symbols that are unique. Symbols in `this` and `other` should be -/// unique. Symbols with Value as `None` are considered to be inequal to all -/// other symbols. -void FlatAffineValueConstraints::mergeSymbolVars( - FlatAffineValueConstraints &other) { - - assert(areVarsUnique(*this, VarKind::Symbol) && "Symbol vars are not unique"); - assert(areVarsUnique(other, VarKind::Symbol) && "Symbol vars are not unique"); - - SmallVector aSymValues; - getValues(getNumDimVars(), getNumDimAndSymbolVars(), &aSymValues); - - // Merge symbols: merge symbols into `other` first from `this`. - unsigned s = other.getNumDimVars(); - for (Value aSymValue : aSymValues) { - unsigned loc; - // If the var is a symbol in `other`, then align it, otherwise assume that - // it is a new symbol - if (other.findVar(aSymValue, &loc) && loc >= other.getNumDimVars() && - loc < other.getNumDimAndSymbolVars()) - other.swapVar(s, loc); - else - other.insertSymbolVar(s - other.getNumDimVars(), aSymValue); - s++; - } - - // Symbols that are in other, but not in this, are added at the end. - for (unsigned t = other.getNumDimVars() + getNumSymbolVars(), - e = other.getNumDimAndSymbolVars(); - t < e; t++) - insertSymbolVar(getNumSymbolVars(), other.getValue(t)); - - assert(getNumSymbolVars() == other.getNumSymbolVars() && - "expected same number of symbols"); - assert(areVarsUnique(*this, VarKind::Symbol) && "Symbol vars are not unique"); - assert(areVarsUnique(other, VarKind::Symbol) && "Symbol vars are not unique"); -} - -// Changes all symbol variables which are loop IVs to dim variables. -void FlatAffineValueConstraints::convertLoopIVSymbolsToDims() { - // Gather all symbols which are loop IVs. - SmallVector loopIVs; - for (unsigned i = getNumDimVars(), e = getNumDimAndSymbolVars(); i < e; i++) { - if (hasValue(i) && getForInductionVarOwner(getValue(i))) - loopIVs.push_back(getValue(i)); - } - // Turn each symbol in 'loopIVs' into a dim variable. - for (auto iv : loopIVs) { - turnSymbolIntoDim(this, iv); - } -} void FlatAffineValueConstraints::addInductionVarOrTerminalSymbol(Value val) { if (containsVar(val)) @@ -709,559 +211,6 @@ void FlatAffineValueConstraints::addAffineIfOpDomain(AffineIfOp ifOp) { append(cst); } -bool FlatAffineValueConstraints::hasConsistentState() const { - return IntegerPolyhedron::hasConsistentState() && - values.size() == getNumDimAndSymbolVars(); -} - -void FlatAffineValueConstraints::removeVarRange(VarKind kind, unsigned varStart, - unsigned varLimit) { - IntegerPolyhedron::removeVarRange(kind, varStart, varLimit); - unsigned offset = getVarKindOffset(kind); - - if (kind != VarKind::Local) { - values.erase(values.begin() + varStart + offset, - values.begin() + varLimit + offset); - } -} - -// Determine whether the variable at 'pos' (say var_r) can be expressed as -// modulo of another known variable (say var_n) w.r.t a constant. For example, -// if the following constraints hold true: -// ``` -// 0 <= var_r <= divisor - 1 -// var_n - (divisor * q_expr) = var_r -// ``` -// where `var_n` is a known variable (called dividend), and `q_expr` is an -// `AffineExpr` (called the quotient expression), `var_r` can be written as: -// -// `var_r = var_n mod divisor`. -// -// Additionally, in a special case of the above constaints where `q_expr` is an -// variable itself that is not yet known (say `var_q`), it can be written as a -// floordiv in the following way: -// -// `var_q = var_n floordiv divisor`. -// -// Returns true if the above mod or floordiv are detected, updating 'memo' with -// these new expressions. Returns false otherwise. -static bool detectAsMod(const FlatAffineValueConstraints &cst, unsigned pos, - int64_t lbConst, int64_t ubConst, - SmallVectorImpl &memo, - MLIRContext *context) { - assert(pos < cst.getNumVars() && "invalid position"); - - // Check if a divisor satisfying the condition `0 <= var_r <= divisor - 1` can - // be determined. - if (lbConst != 0 || ubConst < 1) - return false; - int64_t divisor = ubConst + 1; - - // Check for the aforementioned conditions in each equality. - for (unsigned curEquality = 0, numEqualities = cst.getNumEqualities(); - curEquality < numEqualities; curEquality++) { - int64_t coefficientAtPos = cst.atEq64(curEquality, pos); - // If current equality does not involve `var_r`, continue to the next - // equality. - if (coefficientAtPos == 0) - continue; - - // Constant term should be 0 in this equality. - if (cst.atEq64(curEquality, cst.getNumCols() - 1) != 0) - continue; - - // Traverse through the equality and construct the dividend expression - // `dividendExpr`, to contain all the variables which are known and are - // not divisible by `(coefficientAtPos * divisor)`. Hope here is that the - // `dividendExpr` gets simplified into a single variable `var_n` discussed - // above. - auto dividendExpr = getAffineConstantExpr(0, context); - - // Track the terms that go into quotient expression, later used to detect - // additional floordiv. - unsigned quotientCount = 0; - int quotientPosition = -1; - int quotientSign = 1; - - // Consider each term in the current equality. - unsigned curVar, e; - for (curVar = 0, e = cst.getNumDimAndSymbolVars(); curVar < e; ++curVar) { - // Ignore var_r. - if (curVar == pos) - continue; - int64_t coefficientOfCurVar = cst.atEq64(curEquality, curVar); - // Ignore vars that do not contribute to the current equality. - if (coefficientOfCurVar == 0) - continue; - // Check if the current var goes into the quotient expression. - if (coefficientOfCurVar % (divisor * coefficientAtPos) == 0) { - quotientCount++; - quotientPosition = curVar; - quotientSign = (coefficientOfCurVar * coefficientAtPos) > 0 ? 1 : -1; - continue; - } - // Variables that are part of dividendExpr should be known. - if (!memo[curVar]) - break; - // Append the current variable to the dividend expression. - dividendExpr = dividendExpr + memo[curVar] * coefficientOfCurVar; - } - - // Can't construct expression as it depends on a yet uncomputed var. - if (curVar < e) - continue; - - // Express `var_r` in terms of the other vars collected so far. - if (coefficientAtPos > 0) - dividendExpr = (-dividendExpr).floorDiv(coefficientAtPos); - else - dividendExpr = dividendExpr.floorDiv(-coefficientAtPos); - - // Simplify the expression. - dividendExpr = simplifyAffineExpr(dividendExpr, cst.getNumDimVars(), - cst.getNumSymbolVars()); - // Only if the final dividend expression is just a single var (which we call - // `var_n`), we can proceed. - // TODO: Handle AffineSymbolExpr as well. There is no reason to restrict it - // to dims themselves. - auto dimExpr = dividendExpr.dyn_cast(); - if (!dimExpr) - continue; - - // Express `var_r` as `var_n % divisor` and store the expression in `memo`. - if (quotientCount >= 1) { - auto ub = cst.getConstantBound64( - FlatAffineValueConstraints::BoundType::UB, dimExpr.getPosition()); - // If `var_n` has an upperbound that is less than the divisor, mod can be - // eliminated altogether. - if (ub && *ub < divisor) - memo[pos] = dimExpr; - else - memo[pos] = dimExpr % divisor; - // If a unique quotient `var_q` was seen, it can be expressed as - // `var_n floordiv divisor`. - if (quotientCount == 1 && !memo[quotientPosition]) - memo[quotientPosition] = dimExpr.floorDiv(divisor) * quotientSign; - - return true; - } - } - return false; -} - -/// Check if the pos^th variable can be expressed as a floordiv of an affine -/// function of other variables (where the divisor is a positive constant) -/// given the initial set of expressions in `exprs`. If it can be, the -/// corresponding position in `exprs` is set as the detected affine expr. For -/// eg: 4q <= i + j <= 4q + 3 <=> q = (i + j) floordiv 4. An equality can -/// also yield a floordiv: eg. 4q = i + j <=> q = (i + j) floordiv 4. 32q + 28 -/// <= i <= 32q + 31 => q = i floordiv 32. -static bool detectAsFloorDiv(const FlatAffineValueConstraints &cst, - unsigned pos, MLIRContext *context, - SmallVectorImpl &exprs) { - assert(pos < cst.getNumVars() && "invalid position"); - - // Get upper-lower bound pair for this variable. - SmallVector foundRepr(cst.getNumVars(), false); - for (unsigned i = 0, e = cst.getNumVars(); i < e; ++i) - if (exprs[i]) - foundRepr[i] = true; - - SmallVector dividend(cst.getNumCols()); - unsigned divisor; - auto ulPair = computeSingleVarRepr(cst, foundRepr, pos, dividend, divisor); - - // No upper-lower bound pair found for this var. - if (ulPair.kind == ReprKind::None || ulPair.kind == ReprKind::Equality) - return false; - - // Construct the dividend expression. - auto dividendExpr = getAffineConstantExpr(dividend.back(), context); - for (unsigned c = 0, f = cst.getNumVars(); c < f; c++) - if (dividend[c] != 0) - dividendExpr = dividendExpr + dividend[c] * exprs[c]; - - // Successfully detected the floordiv. - exprs[pos] = dividendExpr.floorDiv(divisor); - return true; -} - -std::pair -FlatAffineValueConstraints::getLowerAndUpperBound( - unsigned pos, unsigned offset, unsigned num, unsigned symStartPos, - ArrayRef localExprs, MLIRContext *context, - bool closedUB) const { - assert(pos + offset < getNumDimVars() && "invalid dim start pos"); - assert(symStartPos >= (pos + offset) && "invalid sym start pos"); - assert(getNumLocalVars() == localExprs.size() && - "incorrect local exprs count"); - - SmallVector lbIndices, ubIndices, eqIndices; - getLowerAndUpperBoundIndices(pos + offset, &lbIndices, &ubIndices, &eqIndices, - offset, num); - - /// Add to 'b' from 'a' in set [0, offset) U [offset + num, symbStartPos). - auto addCoeffs = [&](ArrayRef a, SmallVectorImpl &b) { - b.clear(); - for (unsigned i = 0, e = a.size(); i < e; ++i) { - if (i < offset || i >= offset + num) - b.push_back(a[i]); - } - }; - - SmallVector lb, ub; - SmallVector lbExprs; - unsigned dimCount = symStartPos - num; - unsigned symCount = getNumDimAndSymbolVars() - symStartPos; - lbExprs.reserve(lbIndices.size() + eqIndices.size()); - // Lower bound expressions. - for (auto idx : lbIndices) { - auto ineq = getInequality64(idx); - // Extract the lower bound (in terms of other coeff's + const), i.e., if - // i - j + 1 >= 0 is the constraint, 'pos' is for i the lower bound is j - // - 1. - addCoeffs(ineq, lb); - std::transform(lb.begin(), lb.end(), lb.begin(), std::negate()); - auto expr = - getAffineExprFromFlatForm(lb, dimCount, symCount, localExprs, context); - // expr ceildiv divisor is (expr + divisor - 1) floordiv divisor - int64_t divisor = std::abs(ineq[pos + offset]); - expr = (expr + divisor - 1).floorDiv(divisor); - lbExprs.push_back(expr); - } - - SmallVector ubExprs; - ubExprs.reserve(ubIndices.size() + eqIndices.size()); - // Upper bound expressions. - for (auto idx : ubIndices) { - auto ineq = getInequality64(idx); - // Extract the upper bound (in terms of other coeff's + const). - addCoeffs(ineq, ub); - auto expr = - getAffineExprFromFlatForm(ub, dimCount, symCount, localExprs, context); - expr = expr.floorDiv(std::abs(ineq[pos + offset])); - int64_t ubAdjustment = closedUB ? 0 : 1; - ubExprs.push_back(expr + ubAdjustment); - } - - // Equalities. It's both a lower and a upper bound. - SmallVector b; - for (auto idx : eqIndices) { - auto eq = getEquality64(idx); - addCoeffs(eq, b); - if (eq[pos + offset] > 0) - std::transform(b.begin(), b.end(), b.begin(), std::negate()); - - // Extract the upper bound (in terms of other coeff's + const). - auto expr = - getAffineExprFromFlatForm(b, dimCount, symCount, localExprs, context); - expr = expr.floorDiv(std::abs(eq[pos + offset])); - // Upper bound is exclusive. - ubExprs.push_back(expr + 1); - // Lower bound. - expr = - getAffineExprFromFlatForm(b, dimCount, symCount, localExprs, context); - expr = expr.ceilDiv(std::abs(eq[pos + offset])); - lbExprs.push_back(expr); - } - - auto lbMap = AffineMap::get(dimCount, symCount, lbExprs, context); - auto ubMap = AffineMap::get(dimCount, symCount, ubExprs, context); - - return {lbMap, ubMap}; -} - -/// Computes the lower and upper bounds of the first 'num' dimensional -/// variables (starting at 'offset') as affine maps of the remaining -/// variables (dimensional and symbolic variables). Local variables are -/// themselves explicitly computed as affine functions of other variables in -/// this process if needed. -void FlatAffineValueConstraints::getSliceBounds( - unsigned offset, unsigned num, MLIRContext *context, - SmallVectorImpl *lbMaps, SmallVectorImpl *ubMaps, - bool closedUB) { - assert(num < getNumDimVars() && "invalid range"); - - // Basic simplification. - normalizeConstraintsByGCD(); - - LLVM_DEBUG(llvm::dbgs() << "getSliceBounds for first " << num - << " variables\n"); - LLVM_DEBUG(dump()); - - // Record computed/detected variables. - SmallVector memo(getNumVars()); - // Initialize dimensional and symbolic variables. - for (unsigned i = 0, e = getNumDimVars(); i < e; i++) { - if (i < offset) - memo[i] = getAffineDimExpr(i, context); - else if (i >= offset + num) - memo[i] = getAffineDimExpr(i - num, context); - } - for (unsigned i = getNumDimVars(), e = getNumDimAndSymbolVars(); i < e; i++) - memo[i] = getAffineSymbolExpr(i - getNumDimVars(), context); - - bool changed; - do { - changed = false; - // Identify yet unknown variables as constants or mod's / floordiv's of - // other variables if possible. - for (unsigned pos = 0; pos < getNumVars(); pos++) { - if (memo[pos]) - continue; - - auto lbConst = getConstantBound64(BoundType::LB, pos); - auto ubConst = getConstantBound64(BoundType::UB, pos); - if (lbConst.has_value() && ubConst.has_value()) { - // Detect equality to a constant. - if (*lbConst == *ubConst) { - memo[pos] = getAffineConstantExpr(*lbConst, context); - changed = true; - continue; - } - - // Detect an variable as modulo of another variable w.r.t a - // constant. - if (detectAsMod(*this, pos, *lbConst, *ubConst, memo, context)) { - changed = true; - continue; - } - } - - // Detect an variable as a floordiv of an affine function of other - // variables (divisor is a positive constant). - if (detectAsFloorDiv(*this, pos, context, memo)) { - changed = true; - continue; - } - - // Detect an variable as an expression of other variables. - unsigned idx; - if (!findConstraintWithNonZeroAt(pos, /*isEq=*/true, &idx)) { - continue; - } - - // Build AffineExpr solving for variable 'pos' in terms of all others. - auto expr = getAffineConstantExpr(0, context); - unsigned j, e; - for (j = 0, e = getNumVars(); j < e; ++j) { - if (j == pos) - continue; - int64_t c = atEq64(idx, j); - if (c == 0) - continue; - // If any of the involved IDs hasn't been found yet, we can't proceed. - if (!memo[j]) - break; - expr = expr + memo[j] * c; - } - if (j < e) - // Can't construct expression as it depends on a yet uncomputed - // variable. - continue; - - // Add constant term to AffineExpr. - expr = expr + atEq64(idx, getNumVars()); - int64_t vPos = atEq64(idx, pos); - assert(vPos != 0 && "expected non-zero here"); - if (vPos > 0) - expr = (-expr).floorDiv(vPos); - else - // vPos < 0. - expr = expr.floorDiv(-vPos); - // Successfully constructed expression. - memo[pos] = expr; - changed = true; - } - // This loop is guaranteed to reach a fixed point - since once an - // variable's explicit form is computed (in memo[pos]), it's not updated - // again. - } while (changed); - - int64_t ubAdjustment = closedUB ? 0 : 1; - - // Set the lower and upper bound maps for all the variables that were - // computed as affine expressions of the rest as the "detected expr" and - // "detected expr + 1" respectively; set the undetected ones to null. - std::optional tmpClone; - for (unsigned pos = 0; pos < num; pos++) { - unsigned numMapDims = getNumDimVars() - num; - unsigned numMapSymbols = getNumSymbolVars(); - AffineExpr expr = memo[pos + offset]; - if (expr) - expr = simplifyAffineExpr(expr, numMapDims, numMapSymbols); - - AffineMap &lbMap = (*lbMaps)[pos]; - AffineMap &ubMap = (*ubMaps)[pos]; - - if (expr) { - lbMap = AffineMap::get(numMapDims, numMapSymbols, expr); - ubMap = AffineMap::get(numMapDims, numMapSymbols, expr + ubAdjustment); - } else { - // TODO: Whenever there are local variables in the dependence - // constraints, we'll conservatively over-approximate, since we don't - // always explicitly compute them above (in the while loop). - if (getNumLocalVars() == 0) { - // Work on a copy so that we don't update this constraint system. - if (!tmpClone) { - tmpClone.emplace(FlatAffineValueConstraints(*this)); - // Removing redundant inequalities is necessary so that we don't get - // redundant loop bounds. - tmpClone->removeRedundantInequalities(); - } - std::tie(lbMap, ubMap) = tmpClone->getLowerAndUpperBound( - pos, offset, num, getNumDimVars(), /*localExprs=*/{}, context, - closedUB); - } - - // If the above fails, we'll just use the constant lower bound and the - // constant upper bound (if they exist) as the slice bounds. - // TODO: being conservative for the moment in cases that - // lead to multiple bounds - until getConstDifference in LoopFusion.cpp is - // fixed (b/126426796). - if (!lbMap || lbMap.getNumResults() > 1) { - LLVM_DEBUG(llvm::dbgs() - << "WARNING: Potentially over-approximating slice lb\n"); - auto lbConst = getConstantBound64(BoundType::LB, pos + offset); - if (lbConst.has_value()) { - lbMap = AffineMap::get(numMapDims, numMapSymbols, - getAffineConstantExpr(*lbConst, context)); - } - } - if (!ubMap || ubMap.getNumResults() > 1) { - LLVM_DEBUG(llvm::dbgs() - << "WARNING: Potentially over-approximating slice ub\n"); - auto ubConst = getConstantBound64(BoundType::UB, pos + offset); - if (ubConst.has_value()) { - ubMap = AffineMap::get( - numMapDims, numMapSymbols, - getAffineConstantExpr(*ubConst + ubAdjustment, context)); - } - } - } - LLVM_DEBUG(llvm::dbgs() - << "lb map for pos = " << Twine(pos + offset) << ", expr: "); - LLVM_DEBUG(lbMap.dump();); - LLVM_DEBUG(llvm::dbgs() - << "ub map for pos = " << Twine(pos + offset) << ", expr: "); - LLVM_DEBUG(ubMap.dump();); - } -} - -LogicalResult FlatAffineValueConstraints::flattenAlignedMapAndMergeLocals( - AffineMap map, std::vector> *flattenedExprs) { - FlatAffineValueConstraints localCst; - if (failed(getFlattenedAffineExprs(map, flattenedExprs, &localCst))) { - LLVM_DEBUG(llvm::dbgs() - << "composition unimplemented for semi-affine maps\n"); - return failure(); - } - - // Add localCst information. - if (localCst.getNumLocalVars() > 0) { - unsigned numLocalVars = getNumLocalVars(); - // Insert local dims of localCst at the beginning. - insertLocalVar(/*pos=*/0, /*num=*/localCst.getNumLocalVars()); - // Insert local dims of `this` at the end of localCst. - localCst.appendLocalVar(/*num=*/numLocalVars); - // Dimensions of localCst and this constraint set match. Append localCst to - // this constraint set. - append(localCst); - } - - return success(); -} - -LogicalResult FlatAffineValueConstraints::addBound(BoundType type, unsigned pos, - AffineMap boundMap, - bool isClosedBound) { - assert(boundMap.getNumDims() == getNumDimVars() && "dim mismatch"); - assert(boundMap.getNumSymbols() == getNumSymbolVars() && "symbol mismatch"); - assert(pos < getNumDimAndSymbolVars() && "invalid position"); - assert((type != BoundType::EQ || isClosedBound) && - "EQ bound must be closed."); - - // Equality follows the logic of lower bound except that we add an equality - // instead of an inequality. - assert((type != BoundType::EQ || boundMap.getNumResults() == 1) && - "single result expected"); - bool lower = type == BoundType::LB || type == BoundType::EQ; - - std::vector> flatExprs; - if (failed(flattenAlignedMapAndMergeLocals(boundMap, &flatExprs))) - return failure(); - assert(flatExprs.size() == boundMap.getNumResults()); - - // Add one (in)equality for each result. - for (const auto &flatExpr : flatExprs) { - SmallVector ineq(getNumCols(), 0); - // Dims and symbols. - for (unsigned j = 0, e = boundMap.getNumInputs(); j < e; j++) { - ineq[j] = lower ? -flatExpr[j] : flatExpr[j]; - } - // Invalid bound: pos appears in `boundMap`. - // TODO: This should be an assertion. Fix `addDomainFromSliceMaps` and/or - // its callers to prevent invalid bounds from being added. - if (ineq[pos] != 0) - continue; - ineq[pos] = lower ? 1 : -1; - // Local columns of `ineq` are at the beginning. - unsigned j = getNumDimVars() + getNumSymbolVars(); - unsigned end = flatExpr.size() - 1; - for (unsigned i = boundMap.getNumInputs(); i < end; i++, j++) { - ineq[j] = lower ? -flatExpr[i] : flatExpr[i]; - } - // Make the bound closed in if flatExpr is open. The inequality is always - // created in the upper bound form, so the adjustment is -1. - int64_t boundAdjustment = (isClosedBound || type == BoundType::EQ) ? 0 : -1; - // Constant term. - ineq[getNumCols() - 1] = (lower ? -flatExpr[flatExpr.size() - 1] - : flatExpr[flatExpr.size() - 1]) + - boundAdjustment; - type == BoundType::EQ ? addEquality(ineq) : addInequality(ineq); - } - - return success(); -} - -LogicalResult FlatAffineValueConstraints::addBound(BoundType type, unsigned pos, - AffineMap boundMap) { - return addBound(type, pos, boundMap, /*isClosedBound=*/type != BoundType::UB); -} - -AffineMap -FlatAffineValueConstraints::computeAlignedMap(AffineMap map, - ValueRange operands) const { - assert(map.getNumInputs() == operands.size() && "number of inputs mismatch"); - - SmallVector dims, syms; -#ifndef NDEBUG - SmallVector newSyms; - SmallVector *newSymsPtr = &newSyms; -#else - SmallVector *newSymsPtr = nullptr; -#endif // NDEBUG - - dims.reserve(getNumDimVars()); - syms.reserve(getNumSymbolVars()); - for (unsigned i = getVarKindOffset(VarKind::SetDim), - e = getVarKindEnd(VarKind::SetDim); - i < e; ++i) - dims.push_back(values[i] ? *values[i] : Value()); - for (unsigned i = getVarKindOffset(VarKind::Symbol), - e = getVarKindEnd(VarKind::Symbol); - i < e; ++i) - syms.push_back(values[i] ? *values[i] : Value()); - - AffineMap alignedMap = - alignAffineMapWithValues(map, operands, dims, syms, newSymsPtr); - // All symbols are already part of this FlatAffineConstraints. - assert(syms.size() == newSymsPtr->size() && "unexpected new/missing symbols"); - assert(std::equal(syms.begin(), syms.end(), newSymsPtr->begin()) && - "unexpected new/missing symbols"); - return alignedMap; -} - LogicalResult FlatAffineValueConstraints::addBound(BoundType type, unsigned pos, AffineMap boundMap, ValueRange boundOperands) { @@ -1329,149 +278,34 @@ LogicalResult FlatAffineValueConstraints::addSliceBounds( return success(); } -bool FlatAffineValueConstraints::findVar(Value val, unsigned *pos) const { - unsigned i = 0; - for (const auto &mayBeVar : values) { - if (mayBeVar && *mayBeVar == val) { - *pos = i; - return true; - } - i++; - } - return false; -} - -bool FlatAffineValueConstraints::containsVar(Value val) const { - return llvm::any_of(values, [&](const std::optional &mayBeVar) { - return mayBeVar && *mayBeVar == val; - }); -} - -void FlatAffineValueConstraints::swapVar(unsigned posA, unsigned posB) { - IntegerPolyhedron::swapVar(posA, posB); - - if (getVarKindAt(posA) == VarKind::Local && - getVarKindAt(posB) == VarKind::Local) - return; - - // Treat value of a local variable as std::nullopt. - if (getVarKindAt(posA) == VarKind::Local) - values[posB] = std::nullopt; - else if (getVarKindAt(posB) == VarKind::Local) - values[posA] = std::nullopt; - else - std::swap(values[posA], values[posB]); +LogicalResult +FlatAffineValueConstraints::composeMap(const AffineValueMap *vMap) { + return composeMatchingMap( + computeAlignedMap(vMap->getAffineMap(), vMap->getOperands())); } -void FlatAffineValueConstraints::addBound(BoundType type, Value val, - int64_t value) { +// Turn a symbol into a dimension. +static void turnSymbolIntoDim(FlatAffineValueConstraints *cst, Value value) { unsigned pos; - if (!findVar(val, &pos)) - // This is a pre-condition for this method. - assert(0 && "var not found"); - addBound(type, pos, value); -} - -void FlatAffineValueConstraints::printSpace(raw_ostream &os) const { - IntegerPolyhedron::printSpace(os); - os << "("; - for (unsigned i = 0, e = getNumDimAndSymbolVars(); i < e; i++) { - if (hasValue(i)) - os << "Value\t"; - else - os << "None\t"; + if (cst->findVar(value, &pos) && pos >= cst->getNumDimVars() && + pos < cst->getNumDimAndSymbolVars()) { + cst->swapVar(pos, cst->getNumDimVars()); + cst->setDimSymbolSeparation(cst->getNumSymbolVars() - 1); } - for (unsigned i = getVarKindOffset(VarKind::Local), - e = getVarKindEnd(VarKind::Local); - i < e; ++i) - os << "Local\t"; - os << "const)\n"; } -void FlatAffineValueConstraints::clearAndCopyFrom( - const IntegerRelation &other) { - - if (auto *otherValueSet = - dyn_cast(&other)) { - *this = *otherValueSet; - } else { - *static_cast(this) = other; - values.clear(); - values.resize(getNumDimAndSymbolVars(), std::nullopt); +// Changes all symbol variables which are loop IVs to dim variables. +void FlatAffineValueConstraints::convertLoopIVSymbolsToDims() { + // Gather all symbols which are loop IVs. + SmallVector loopIVs; + for (unsigned i = getNumDimVars(), e = getNumDimAndSymbolVars(); i < e; i++) { + if (hasValue(i) && getForInductionVarOwner(getValue(i))) + loopIVs.push_back(getValue(i)); } -} - -void FlatAffineValueConstraints::fourierMotzkinEliminate( - unsigned pos, bool darkShadow, bool *isResultIntegerExact) { - SmallVector, 8> newVals = values; - if (getVarKindAt(pos) != VarKind::Local) - newVals.erase(newVals.begin() + pos); - // Note: Base implementation discards all associated Values. - IntegerPolyhedron::fourierMotzkinEliminate(pos, darkShadow, - isResultIntegerExact); - values = newVals; - assert(values.size() == getNumDimAndSymbolVars()); -} - -void FlatAffineValueConstraints::projectOut(Value val) { - unsigned pos; - bool ret = findVar(val, &pos); - assert(ret); - (void)ret; - fourierMotzkinEliminate(pos); -} - -LogicalResult FlatAffineValueConstraints::unionBoundingBox( - const FlatAffineValueConstraints &otherCst) { - assert(otherCst.getNumDimVars() == getNumDimVars() && "dims mismatch"); - assert(otherCst.getMaybeValues() - .slice(0, getNumDimVars()) - .equals(getMaybeValues().slice(0, getNumDimVars())) && - "dim values mismatch"); - assert(otherCst.getNumLocalVars() == 0 && "local vars not supported here"); - assert(getNumLocalVars() == 0 && "local vars not supported yet here"); - - // Align `other` to this. - if (!areVarsAligned(*this, otherCst)) { - FlatAffineValueConstraints otherCopy(otherCst); - mergeAndAlignVars(/*offset=*/getNumDimVars(), this, &otherCopy); - return IntegerPolyhedron::unionBoundingBox(otherCopy); + // Turn each symbol in 'loopIVs' into a dim variable. + for (auto iv : loopIVs) { + turnSymbolIntoDim(this, iv); } - - return IntegerPolyhedron::unionBoundingBox(otherCst); -} - -/// Compute an explicit representation for local vars. For all systems coming -/// from MLIR integer sets, maps, or expressions where local vars were -/// introduced to model floordivs and mods, this always succeeds. -static LogicalResult computeLocalVars(const FlatAffineValueConstraints &cst, - SmallVectorImpl &memo, - MLIRContext *context) { - unsigned numDims = cst.getNumDimVars(); - unsigned numSyms = cst.getNumSymbolVars(); - - // Initialize dimensional and symbolic variables. - for (unsigned i = 0; i < numDims; i++) - memo[i] = getAffineDimExpr(i, context); - for (unsigned i = numDims, e = numDims + numSyms; i < e; i++) - memo[i] = getAffineSymbolExpr(i - numDims, context); - - bool changed; - do { - // Each time `changed` is true at the end of this iteration, one or more - // local vars would have been detected as floordivs and set in memo; so the - // number of null entries in memo[...] strictly reduces; so this converges. - changed = false; - for (unsigned i = 0, e = cst.getNumLocalVars(); i < e; ++i) - if (!memo[numDims + numSyms + i] && - detectAsFloorDiv(cst, /*pos=*/numDims + numSyms + i, context, memo)) - changed = true; - } while (changed); - - ArrayRef localExprs = - ArrayRef(memo).take_back(cst.getNumLocalVars()); - return success( - llvm::all_of(localExprs, [](AffineExpr expr) { return expr; })); } void FlatAffineValueConstraints::getIneqAsAffineValueMap( @@ -1485,7 +319,7 @@ void FlatAffineValueConstraints::getIneqAsAffineValueMap( // Get expressions for local vars. SmallVector memo(getNumVars(), AffineExpr()); - if (failed(computeLocalVars(*this, memo, context))) + if (failed(computeLocalVars(memo, context))) assert(false && "one or more local exprs do not have an explicit representation"); auto localExprs = ArrayRef(memo).take_back(getNumLocalVars()); @@ -1519,105 +353,6 @@ void FlatAffineValueConstraints::getIneqAsAffineValueMap( vmap.reset(AffineMap::get(numDims - 1, numSyms, boundExpr), operands); } -IntegerSet -FlatAffineValueConstraints::getAsIntegerSet(MLIRContext *context) const { - if (getNumConstraints() == 0) - // Return universal set (always true): 0 == 0. - return IntegerSet::get(getNumDimVars(), getNumSymbolVars(), - getAffineConstantExpr(/*constant=*/0, context), - /*eqFlags=*/true); - - // Construct local references. - SmallVector memo(getNumVars(), AffineExpr()); - - if (failed(computeLocalVars(*this, memo, context))) { - // Check if the local variables without an explicit representation have - // zero coefficients everywhere. - SmallVector noLocalRepVars; - unsigned numDimsSymbols = getNumDimAndSymbolVars(); - for (unsigned i = numDimsSymbols, e = getNumVars(); i < e; ++i) { - if (!memo[i] && !isColZero(/*pos=*/i)) - noLocalRepVars.push_back(i - numDimsSymbols); - } - if (!noLocalRepVars.empty()) { - LLVM_DEBUG({ - llvm::dbgs() << "local variables at position(s) "; - llvm::interleaveComma(noLocalRepVars, llvm::dbgs()); - llvm::dbgs() << " do not have an explicit representation in:\n"; - this->dump(); - }); - return IntegerSet(); - } - } - - ArrayRef localExprs = - ArrayRef(memo).take_back(getNumLocalVars()); - - // Construct the IntegerSet from the equalities/inequalities. - unsigned numDims = getNumDimVars(); - unsigned numSyms = getNumSymbolVars(); - - SmallVector eqFlags(getNumConstraints()); - std::fill(eqFlags.begin(), eqFlags.begin() + getNumEqualities(), true); - std::fill(eqFlags.begin() + getNumEqualities(), eqFlags.end(), false); - - SmallVector exprs; - exprs.reserve(getNumConstraints()); - - for (unsigned i = 0, e = getNumEqualities(); i < e; ++i) - exprs.push_back(getAffineExprFromFlatForm(getEquality64(i), numDims, - numSyms, localExprs, context)); - for (unsigned i = 0, e = getNumInequalities(); i < e; ++i) - exprs.push_back(getAffineExprFromFlatForm(getInequality64(i), numDims, - numSyms, localExprs, context)); - return IntegerSet::get(numDims, numSyms, exprs, eqFlags); -} - -AffineMap mlir::alignAffineMapWithValues(AffineMap map, ValueRange operands, - ValueRange dims, ValueRange syms, - SmallVector *newSyms) { - assert(operands.size() == map.getNumInputs() && - "expected same number of operands and map inputs"); - MLIRContext *ctx = map.getContext(); - Builder builder(ctx); - SmallVector dimReplacements(map.getNumDims(), {}); - unsigned numSymbols = syms.size(); - SmallVector symReplacements(map.getNumSymbols(), {}); - if (newSyms) { - newSyms->clear(); - newSyms->append(syms.begin(), syms.end()); - } - - for (const auto &operand : llvm::enumerate(operands)) { - // Compute replacement dim/sym of operand. - AffineExpr replacement; - auto dimIt = std::find(dims.begin(), dims.end(), operand.value()); - auto symIt = std::find(syms.begin(), syms.end(), operand.value()); - if (dimIt != dims.end()) { - replacement = - builder.getAffineDimExpr(std::distance(dims.begin(), dimIt)); - } else if (symIt != syms.end()) { - replacement = - builder.getAffineSymbolExpr(std::distance(syms.begin(), symIt)); - } else { - // This operand is neither a dimension nor a symbol. Add it as a new - // symbol. - replacement = builder.getAffineSymbolExpr(numSymbols++); - if (newSyms) - newSyms->push_back(operand.value()); - } - // Add to corresponding replacements vector. - if (operand.index() < map.getNumDims()) { - dimReplacements[operand.index()] = replacement; - } else { - symReplacements[operand.index() - map.getNumDims()] = replacement; - } - } - - return map.replaceDimsAndSymbols(dimReplacements, symReplacements, - dims.size(), numSymbols); -} - FlatAffineValueConstraints FlatAffineRelation::getDomainSet() const { FlatAffineValueConstraints domain = *this; // Convert all range variables to local variables. @@ -1806,31 +541,3 @@ LogicalResult mlir::getRelationFromMap(const AffineValueMap &map, return success(); } - -LogicalResult -mlir::getMultiAffineFunctionFromMap(AffineMap map, - MultiAffineFunction &multiAff) { - FlatAffineValueConstraints cst; - std::vector> flattenedExprs; - LogicalResult result = getFlattenedAffineExprs(map, &flattenedExprs, &cst); - - if (result.failed()) - return failure(); - - DivisionRepr divs = cst.getLocalReprs(); - assert(divs.hasAllReprs() && - "AffineMap cannot produce divs without local representation"); - - // TODO: We shouldn't have to do this conversion. - Matrix mat(map.getNumResults(), map.getNumInputs() + divs.getNumDivs() + 1); - for (unsigned i = 0, e = flattenedExprs.size(); i < e; ++i) - for (unsigned j = 0, f = flattenedExprs[i].size(); j < f; ++j) - mat(i, j) = flattenedExprs[i][j]; - - multiAff = MultiAffineFunction( - PresburgerSpace::getRelationSpace(map.getNumDims(), map.getNumResults(), - map.getNumSymbols(), divs.getNumDivs()), - mat, divs); - - return success(); -} diff --git a/mlir/lib/IR/AffineExpr.cpp b/mlir/lib/IR/AffineExpr.cpp index 554452cb265fd..8564bacedd21c 100644 --- a/mlir/lib/IR/AffineExpr.cpp +++ b/mlir/lib/IR/AffineExpr.cpp @@ -1290,7 +1290,7 @@ void SimpleAffineExprFlattener::addLocalVariableSemiAffine( // A floordiv is thus flattened by introducing a new local variable q, and // replacing that expression with 'q' while adding the constraints // c * q <= expr <= c * q + c - 1 to localVarCst (done by -// FlatAffineConstraints::addLocalFloorDiv). +// IntegerRelation::addLocalFloorDiv). // // A ceildiv is similarly flattened: // t = expr ceildiv c <=> t = (expr + c - 1) floordiv c diff --git a/mlir/lib/IR/AffineMap.cpp b/mlir/lib/IR/AffineMap.cpp index c924d2bcde556..39c8ab96aa662 100644 --- a/mlir/lib/IR/AffineMap.cpp +++ b/mlir/lib/IR/AffineMap.cpp @@ -829,7 +829,7 @@ bool MutableAffineMap::isMultipleOf(unsigned idx, int64_t factor) const { if (results[idx].isMultipleOf(factor)) return true; - // TODO: use simplifyAffineExpr and FlatAffineConstraints to + // TODO: use simplifyAffineExpr and FlatAffineValueConstraints to // complete this (for a more powerful analysis). return false; } diff --git a/mlir/test/Transforms/memref-bound-check.mlir b/mlir/test/Transforms/memref-bound-check.mlir index fce6bdbca4aa1..80909abee51d6 100644 --- a/mlir/test/Transforms/memref-bound-check.mlir +++ b/mlir/test/Transforms/memref-bound-check.mlir @@ -201,7 +201,7 @@ func.func @out_of_bounds() { // This test case accesses within bounds. Without removal of a certain type of // trivially redundant constraints (those differing only in their constant // term), the number of constraints here explodes, and this would return out of -// bounds errors conservatively due to FlatAffineConstraints::kExplosionFactor. +// bounds errors conservatively due to IntegerRelation::kExplosionFactor. #map3 = affine_map<(d0, d1) -> ((d0 * 72 + d1) floordiv 2304 + ((((d0 * 72 + d1) mod 2304) mod 1152) mod 9) floordiv 3)> #map4 = affine_map<(d0, d1) -> ((d0 * 72 + d1) mod 2304 - (((d0 * 72 + d1) mod 2304) floordiv 1152) * 1151 - ((((d0 * 72 + d1) mod 2304) mod 1152) floordiv 9) * 9 - (((((d0 * 72 + d1) mod 2304) mod 1152) mod 9) floordiv 3) * 3)> #map5 = affine_map<(d0, d1) -> (((((d0 * 72 + d1) mod 2304) mod 1152) floordiv 9) floordiv 8)> diff --git a/mlir/test/Transforms/memref-dependence-check.mlir b/mlir/test/Transforms/memref-dependence-check.mlir index 3a16a33a1ed11..f272277cc7904 100644 --- a/mlir/test/Transforms/memref-dependence-check.mlir +++ b/mlir/test/Transforms/memref-dependence-check.mlir @@ -636,7 +636,7 @@ func.func @mod_deps() { affine.for %i0 = 0 to 10 { %a0 = affine.apply affine_map<(d0) -> (d0 mod 2)> (%i0) // Results are conservative here since we currently don't have a way to - // represent strided sets in FlatAffineConstraints. + // represent strided sets in FlatAffineValueConstraints. %v0 = affine.load %m[%a0] : memref<100xf32> // expected-remark@above {{dependence from 0 to 0 at depth 1 = false}} // expected-remark@above {{dependence from 0 to 0 at depth 2 = false}} From e50f131ae6e22aefdaa502af09a3396f49726976 Mon Sep 17 00:00:00 2001 From: Uday Bondhugula Date: Thu, 23 Mar 2023 13:15:22 +0530 Subject: [PATCH 383/691] [MLIR][Affine] Fix bug and MSAN issue in affine loop utils Fix bug and MSAN issue in affine loop utils introduced by d25e022cd19b83c22a6022edb78c4b97a5fc1b49 (D146495). While on it, fix/clean up issues in immediately surrounding code. Differential Revision: https://reviews.llvm.org/D146698 --- mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp index 38d660d4ff90b..1e567a6db4108 100644 --- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp @@ -2057,6 +2057,8 @@ static LogicalResult generateCopy( OpBuilder topBuilder(f.getBody()); Value zeroIndex = topBuilder.create(f.getLoc(), 0); + *sizeInBytes = 0; + if (begin == end) return success(); @@ -2105,7 +2107,6 @@ static LogicalResult generateCopy( if (*numElements == 0) { LLVM_DEBUG(llvm::dbgs() << "Nothing to copy\n"); - *sizeInBytes = 0; return success(); } @@ -2183,8 +2184,7 @@ static LogicalResult generateCopy( // fastMemRefType is a constant shaped memref. auto maySizeInBytes = getIntOrFloatMemRefSizeInBytes(fastMemRefType); // We don't account for things of unknown size. - if (!maySizeInBytes) - maySizeInBytes = 0; + *sizeInBytes = maySizeInBytes ? *maySizeInBytes : 0; LLVM_DEBUG(emitRemarkForBlock(*block) << "Creating fast buffer of type " << fastMemRefType @@ -2193,7 +2193,6 @@ static LogicalResult generateCopy( } else { // Reuse the one already created. fastMemRef = fastBufferMap[memref]; - *sizeInBytes = 0; } auto numElementsSSA = top.create(loc, *numElements); @@ -2554,13 +2553,13 @@ LogicalResult mlir::affineDataCopyGenerate(Block::iterator begin, if (llvm::DebugFlag && (forOp = dyn_cast(&*begin))) { LLVM_DEBUG(forOp.emitRemark() << llvm::divideCeil(totalCopyBuffersSizeInBytes, 1024) - << " KiB of copy buffers in fast memory space for this block\n"); + << " KiB of copy buffers in fast memory space for this block"); } if (totalCopyBuffersSizeInBytes > copyOptions.fastMemCapacityBytes) { - StringRef str = "Total size of all copy buffers' for this block " - "exceeds fast memory capacity\n"; - block->getParentOp()->emitWarning(str); + block->getParentOp()->emitWarning( + "total size of all copy buffers' for this block exceeds fast memory " + "capacity"); } return success(); From 47bff1cc46b5de96841fd1592df0c828e1d38e35 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Thu, 23 Mar 2023 09:40:51 +0100 Subject: [PATCH 384/691] [mlir][Analysis][NFC] Make BoundType a top-level enum `BoundType` is no longer a nested member of `IntegerRelation` but a top-level enum in the `presburger` namespace. This allows `BoundType` to be predeclared in header files. Nested members cannot be predeclared. Differential Revision: https://reviews.llvm.org/D146210 --- .../Analysis/FlatLinearValueConstraints.h | 9 +++--- .../Analysis/Presburger/IntegerRelation.h | 6 ++-- .../Affine/Analysis/AffineStructures.h | 4 +-- .../Analysis/FlatLinearValueConstraints.cpp | 3 +- mlir/lib/Analysis/Presburger/PWMAFunction.cpp | 4 +-- .../Affine/Analysis/AffineAnalysis.cpp | 6 ++-- mlir/lib/Dialect/Affine/Analysis/Utils.cpp | 31 +++++++++---------- .../TransformOps/AffineTransformOps.cpp | 6 ++-- mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp | 4 +-- mlir/lib/Dialect/Affine/Utils/Utils.cpp | 7 ++--- mlir/lib/Dialect/Linalg/Utils/Utils.cpp | 7 ++--- .../SCF/Utils/AffineCanonicalizationUtils.cpp | 10 +++--- .../Presburger/IntegerPolyhedronTest.cpp | 8 ++--- 13 files changed, 48 insertions(+), 57 deletions(-) diff --git a/mlir/include/mlir/Analysis/FlatLinearValueConstraints.h b/mlir/include/mlir/Analysis/FlatLinearValueConstraints.h index a6900ab599386..abebd7328f823 100644 --- a/mlir/include/mlir/Analysis/FlatLinearValueConstraints.h +++ b/mlir/include/mlir/Analysis/FlatLinearValueConstraints.h @@ -87,8 +87,8 @@ class FlatLinearConstraints : public presburger::IntegerPolyhedron { /// /// Note: The dimensions/symbols of this FlatLinearConstraints must match the /// dimensions/symbols of the affine map. - LogicalResult addBound(BoundType type, unsigned pos, AffineMap boundMap, - bool isClosedBound); + LogicalResult addBound(presburger::BoundType type, unsigned pos, + AffineMap boundMap, bool isClosedBound); /// Adds a bound for the variable at the specified position with constraints /// being drawn from the specified bound map. In case of an EQ bound, the @@ -98,7 +98,8 @@ class FlatLinearConstraints : public presburger::IntegerPolyhedron { /// Note: The dimensions/symbols of this FlatLinearConstraints must match the /// dimensions/symbols of the affine map. By default the lower bound is closed /// and the upper bound is open. - LogicalResult addBound(BoundType type, unsigned pos, AffineMap boundMap); + LogicalResult addBound(presburger::BoundType type, unsigned pos, + AffineMap boundMap); /// The `addBound` overload above hides the inherited overloads by default, so /// we explicitly introduce them here. @@ -315,7 +316,7 @@ class FlatLinearValueConstraints : public FlatLinearConstraints { void clearAndCopyFrom(const IntegerRelation &other) override; /// Adds a constant bound for the variable associated with the given Value. - void addBound(BoundType type, Value val, int64_t value); + void addBound(presburger::BoundType type, Value val, int64_t value); using FlatLinearConstraints::addBound; /// Returns the Value associated with the pos^th variable. Asserts if diff --git a/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h b/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h index 8b0c2a561cfb8..9646894736de0 100644 --- a/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h +++ b/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h @@ -31,6 +31,9 @@ class PresburgerSet; class PresburgerRelation; struct SymbolicLexMin; +/// The type of bound: equal, lower bound or upper bound. +enum class BoundType { EQ, LB, UB }; + /// An IntegerRelation represents the set of points from a PresburgerSpace that /// satisfy a list of affine constraints. Affine constraints can be inequalities /// or equalities in the form: @@ -397,9 +400,6 @@ class IntegerRelation { /// to None. DivisionRepr getLocalReprs(std::vector *repr = nullptr) const; - /// The type of bound: equal, lower bound or upper bound. - enum BoundType { EQ, LB, UB }; - /// Adds a constant bound for the specified variable. void addBound(BoundType type, unsigned pos, const MPInt &value); void addBound(BoundType type, unsigned pos, int64_t value) { diff --git a/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h b/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h index 6249428fb8e15..e59836444cc19 100644 --- a/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h +++ b/mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h @@ -100,8 +100,8 @@ class FlatAffineValueConstraints : public FlatLinearValueConstraints { /// EQ bound, the bound map is expected to have exactly one result. In case /// of a LB/UB, the bound map may have more than one result, for each of which /// an inequality is added. - LogicalResult addBound(BoundType type, unsigned pos, AffineMap boundMap, - ValueRange operands); + LogicalResult addBound(presburger::BoundType type, unsigned pos, + AffineMap boundMap, ValueRange operands); using FlatLinearValueConstraints::addBound; /// Add the specified values as a dim or symbol var depending on its nature, diff --git a/mlir/lib/Analysis/FlatLinearValueConstraints.cpp b/mlir/lib/Analysis/FlatLinearValueConstraints.cpp index b89b2d11003af..24c8d871ff97c 100644 --- a/mlir/lib/Analysis/FlatLinearValueConstraints.cpp +++ b/mlir/lib/Analysis/FlatLinearValueConstraints.cpp @@ -308,8 +308,7 @@ static bool detectAsMod(const FlatLinearConstraints &cst, unsigned pos, // Express `var_r` as `var_n % divisor` and store the expression in `memo`. if (quotientCount >= 1) { - auto ub = cst.getConstantBound64(FlatLinearConstraints::BoundType::UB, - dimExpr.getPosition()); + auto ub = cst.getConstantBound64(BoundType::UB, dimExpr.getPosition()); // If `var_n` has an upperbound that is less than the divisor, mod can be // eliminated altogether. if (ub && *ub < divisor) diff --git a/mlir/lib/Analysis/Presburger/PWMAFunction.cpp b/mlir/lib/Analysis/Presburger/PWMAFunction.cpp index 64b9ba6bf7a0e..ce9e810069c48 100644 --- a/mlir/lib/Analysis/Presburger/PWMAFunction.cpp +++ b/mlir/lib/Analysis/Presburger/PWMAFunction.cpp @@ -231,14 +231,14 @@ MultiAffineFunction::getLexSet(OrderingKind comp, // outA - outB <= -1 // outA <= outB - 1 // outA < outB - levelSet.addBound(IntegerPolyhedron::BoundType::UB, subExpr, MPInt(-1)); + levelSet.addBound(BoundType::UB, subExpr, MPInt(-1)); break; case OrderingKind::GT: // For greater than, we add a lower bound of 1: // outA - outB >= 1 // outA > outB + 1 // outA > outB - levelSet.addBound(IntegerPolyhedron::BoundType::LB, subExpr, MPInt(1)); + levelSet.addBound(BoundType::LB, subExpr, MPInt(1)); break; case OrderingKind::GE: case OrderingKind::LE: diff --git a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp index d7720a052e0dd..da8f0883d7d5d 100644 --- a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp +++ b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp @@ -445,12 +445,10 @@ static void computeDirectionVector( dependenceComponents->resize(numCommonLoops); for (unsigned j = 0; j < numCommonLoops; ++j) { (*dependenceComponents)[j].op = commonLoops[j].getOperation(); - auto lbConst = - dependenceDomain->getConstantBound64(IntegerPolyhedron::LB, j); + auto lbConst = dependenceDomain->getConstantBound64(BoundType::LB, j); (*dependenceComponents)[j].lb = lbConst.value_or(std::numeric_limits::min()); - auto ubConst = - dependenceDomain->getConstantBound64(IntegerPolyhedron::UB, j); + auto ubConst = dependenceDomain->getConstantBound64(BoundType::UB, j); (*dependenceComponents)[j].ub = ubConst.value_or(std::numeric_limits::max()); } diff --git a/mlir/lib/Dialect/Affine/Analysis/Utils.cpp b/mlir/lib/Dialect/Affine/Analysis/Utils.cpp index db4fa354d4c2d..41a739d726ed5 100644 --- a/mlir/lib/Dialect/Affine/Analysis/Utils.cpp +++ b/mlir/lib/Dialect/Affine/Analysis/Utils.cpp @@ -98,7 +98,7 @@ ComputationSliceState::getAsConstraints(FlatAffineValueConstraints *cst) { if (isValidSymbol(value)) { // Check if the symbol is a constant. if (auto cOp = value.getDefiningOp()) - cst->addBound(FlatAffineValueConstraints::EQ, value, cOp.value()); + cst->addBound(BoundType::EQ, value, cOp.value()); } else if (auto loop = getForInductionVarOwner(value)) { if (failed(cst->addAffineForOpDomain(loop))) return failure(); @@ -357,11 +357,11 @@ std::optional MemRefRegion::getConstantBoundingSizeAndShape( // that will need non-trivials means to eliminate. FlatAffineValueConstraints cstWithShapeBounds(cst); for (unsigned r = 0; r < rank; r++) { - cstWithShapeBounds.addBound(FlatAffineValueConstraints::LB, r, 0); + cstWithShapeBounds.addBound(BoundType::LB, r, 0); int64_t dimSize = memRefType.getDimSize(r); if (ShapedType::isDynamic(dimSize)) continue; - cstWithShapeBounds.addBound(FlatAffineValueConstraints::UB, r, dimSize - 1); + cstWithShapeBounds.addBound(BoundType::UB, r, dimSize - 1); } // Find a constant upper bound on the extent of this memref region along each @@ -516,7 +516,7 @@ LogicalResult MemRefRegion::compute(Operation *op, unsigned loopDepth, // Check if the symbol is a constant. Value symbol = operand; if (auto constVal = getConstantIntValue(symbol)) - cst.addBound(FlatAffineValueConstraints::EQ, symbol, constVal.value()); + cst.addBound(BoundType::EQ, symbol, constVal.value()); } else { LLVM_DEBUG(llvm::dbgs() << "unknown affine dimensional value"); return failure(); @@ -580,11 +580,10 @@ LogicalResult MemRefRegion::compute(Operation *op, unsigned loopDepth, if (addMemRefDimBounds) { auto memRefType = memref.getType().cast(); for (unsigned r = 0; r < rank; r++) { - cst.addBound(FlatAffineValueConstraints::LB, /*pos=*/r, /*value=*/0); + cst.addBound(BoundType::LB, /*pos=*/r, /*value=*/0); if (memRefType.isDynamicDim(r)) continue; - cst.addBound(FlatAffineValueConstraints::UB, /*pos=*/r, - memRefType.getDimSize(r) - 1); + cst.addBound(BoundType::UB, /*pos=*/r, memRefType.getDimSize(r) - 1); } } cst.removeTrivialRedundancy(); @@ -695,7 +694,7 @@ LogicalResult mlir::boundCheckLoadOrStoreOp(LoadOrStoreOp loadOrStoreOp, continue; // Check for overflow: d_i >= memref dim size. - ucst.addBound(FlatAffineValueConstraints::LB, r, dimSize); + ucst.addBound(BoundType::LB, r, dimSize); outOfBounds = !ucst.isEmpty(); if (outOfBounds && emitError) { loadOrStoreOp.emitOpError() @@ -706,7 +705,7 @@ LogicalResult mlir::boundCheckLoadOrStoreOp(LoadOrStoreOp loadOrStoreOp, FlatAffineValueConstraints lcst(*region.getConstraints()); std::fill(ineq.begin(), ineq.end(), 0); // d_i <= -1; - lcst.addBound(FlatAffineValueConstraints::UB, r, -1); + lcst.addBound(BoundType::UB, r, -1); outOfBounds = !lcst.isEmpty(); if (outOfBounds && emitError) { loadOrStoreOp.emitOpError() @@ -1403,9 +1402,8 @@ static void unpackOptionalValues(ArrayRef> source, /// Note: This function adds a new symbol column to the `constraints` for each /// dimension/symbol that exists in the affine map but not in `constraints`. static LogicalResult alignAndAddBound(FlatAffineValueConstraints &constraints, - IntegerPolyhedron::BoundType type, - unsigned pos, AffineMap map, - ValueRange operands) { + BoundType type, unsigned pos, + AffineMap map, ValueRange operands) { SmallVector dims, syms, newSyms; unpackOptionalValues(constraints.getMaybeValues(VarKind::SetDim), dims); unpackOptionalValues(constraints.getMaybeValues(VarKind::Symbol), syms); @@ -1482,7 +1480,7 @@ mlir::simplifyConstrainedMinMaxOp(Operation *op, // Add an inequality for each result expr_i of map: // isMin: op <= expr_i, !isMin: op >= expr_i - auto boundType = isMin ? IntegerPolyhedron::UB : IntegerPolyhedron::LB; + auto boundType = isMin ? BoundType::UB : BoundType::LB; // Upper bounds are exclusive, so add 1. (`affine.min` ops are inclusive.) AffineMap mapLbUb = isMin ? addConstToResults(map, 1) : map; if (failed( @@ -1504,8 +1502,7 @@ mlir::simplifyConstrainedMinMaxOp(Operation *op, // Add an equality: Set dimOpBound to computed bound. // Add back dimension for op. (Was removed by `getSliceBounds`.) AffineMap alignedBoundMap = boundMap.shiftDims(/*shift=*/1, /*offset=*/dimOp); - if (failed(constraints.addBound(IntegerPolyhedron::EQ, dimOpBound, - alignedBoundMap))) + if (failed(constraints.addBound(BoundType::EQ, dimOpBound, alignedBoundMap))) return failure(); // If the constraint system is empty, there is an inconsistency. (E.g., this @@ -1530,7 +1527,7 @@ mlir::simplifyConstrainedMinMaxOp(Operation *op, // Note: These equalities could have been added earlier and used to express // minOp <= expr_i. However, then we run the risk that `getSliceBounds` // computes minOpUb in terms of r_i dims, which is not desired. - if (failed(alignAndAddBound(newConstr, IntegerPolyhedron::EQ, i, + if (failed(alignAndAddBound(newConstr, BoundType::EQ, i, map.getSubMap({i - resultDimStart}), operands))) return failure(); @@ -1557,7 +1554,7 @@ mlir::simplifyConstrainedMinMaxOp(Operation *op, // Skip unused operands and operands that are already constants. if (!newOperands[i] || getConstantIntValue(newOperands[i])) continue; - if (auto bound = constraints.getConstantBound64(IntegerPolyhedron::EQ, i)) { + if (auto bound = constraints.getConstantBound64(BoundType::EQ, i)) { AffineExpr expr = i < newMap.getNumDims() ? builder.getAffineDimExpr(i) diff --git a/mlir/lib/Dialect/Affine/TransformOps/AffineTransformOps.cpp b/mlir/lib/Dialect/Affine/TransformOps/AffineTransformOps.cpp index 99dfaa9dee1d2..999adfad2ab5b 100644 --- a/mlir/lib/Dialect/Affine/TransformOps/AffineTransformOps.cpp +++ b/mlir/lib/Dialect/Affine/TransformOps/AffineTransformOps.cpp @@ -97,11 +97,9 @@ SimplifyBoundedAffineOpsOp::apply(TransformResults &results, unsigned pos; if (!cstr.findVar(std::get<0>(it), &pos)) pos = cstr.appendSymbolVar(std::get<0>(it)); - cstr.addBound(FlatAffineValueConstraints::BoundType::LB, pos, - std::get<1>(it)); + cstr.addBound(presburger::BoundType::LB, pos, std::get<1>(it)); // Note: addBound bounds are inclusive, but specified UB is exclusive. - cstr.addBound(FlatAffineValueConstraints::BoundType::UB, pos, - std::get<2>(it) - 1); + cstr.addBound(presburger::BoundType::UB, pos, std::get<2>(it) - 1); } // Transform all targets. diff --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp index 1e567a6db4108..a7f96dc0e08e2 100644 --- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp @@ -2371,8 +2371,8 @@ static bool getFullMemRefAsRegion(Operation *op, unsigned numParamLoopIVs, for (unsigned d = 0; d < rank; d++) { auto dimSize = memRefType.getDimSize(d); assert(dimSize > 0 && "filtered dynamic shapes above"); - regionCst->addBound(IntegerPolyhedron::LB, d, 0); - regionCst->addBound(IntegerPolyhedron::UB, d, dimSize - 1); + regionCst->addBound(BoundType::LB, d, 0); + regionCst->addBound(BoundType::UB, d, dimSize - 1); } return true; } diff --git a/mlir/lib/Dialect/Affine/Utils/Utils.cpp b/mlir/lib/Dialect/Affine/Utils/Utils.cpp index 50405953e05bd..d96b688d29ed5 100644 --- a/mlir/lib/Dialect/Affine/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/Utils.cpp @@ -1800,8 +1800,8 @@ MemRefType mlir::normalizeMemRefType(MemRefType memrefType, for (unsigned d = 0; d < rank; ++d) { // Use constraint system only in static dimensions. if (shape[d] > 0) { - fac.addBound(IntegerPolyhedron::LB, d, 0); - fac.addBound(IntegerPolyhedron::UB, d, shape[d] - 1); + fac.addBound(BoundType::LB, d, 0); + fac.addBound(BoundType::UB, d, shape[d] - 1); } else { memrefTypeDynDims.emplace_back(d); } @@ -1824,8 +1824,7 @@ MemRefType mlir::normalizeMemRefType(MemRefType memrefType, newShape[d] = ShapedType::kDynamic; } else { // The lower bound for the shape is always zero. - std::optional ubConst = - fac.getConstantBound64(IntegerPolyhedron::UB, d); + std::optional ubConst = fac.getConstantBound64(BoundType::UB, d); // For a static memref and an affine map with no symbols, this is // always bounded. However, when we have symbols, we may not be able to // obtain a constant upper bound. Also, mapping to a negative space is diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index f3879f5dd9d12..75f818b1b275d 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -270,7 +270,7 @@ void getUpperBoundForIndex(Value value, AffineMap &boundMap, if (auto applyOp = dyn_cast(op)) { AffineMap map = constraints.computeAlignedMap(applyOp.getAffineMap(), applyOp.getOperands()); - if (failed(constraints.addBound(IntegerPolyhedron::EQ, + if (failed(constraints.addBound(BoundType::EQ, getPosition(applyOp.getResult()), map))) return; continue; @@ -279,7 +279,7 @@ void getUpperBoundForIndex(Value value, AffineMap &boundMap, auto minOp = cast(op); AffineMap map = constraints.computeAlignedMap(minOp.getAffineMap(), minOp.getOperands()); - if (failed(constraints.addBound(IntegerPolyhedron::UB, + if (failed(constraints.addBound(BoundType::UB, getPosition(minOp.getResult()), map, /*isClosedBound=*/true))) return; @@ -290,8 +290,7 @@ void getUpperBoundForIndex(Value value, AffineMap &boundMap, // of the terminals of the index computation. unsigned pos = getPosition(value); if (constantRequired) { - auto ubConst = constraints.getConstantBound64( - FlatAffineValueConstraints::BoundType::UB, pos); + auto ubConst = constraints.getConstantBound64(BoundType::UB, pos); if (!ubConst) return; diff --git a/mlir/lib/Dialect/SCF/Utils/AffineCanonicalizationUtils.cpp b/mlir/lib/Dialect/SCF/Utils/AffineCanonicalizationUtils.cpp index 6964747cdebb6..1c458eee44d1a 100644 --- a/mlir/lib/Dialect/SCF/Utils/AffineCanonicalizationUtils.cpp +++ b/mlir/lib/Dialect/SCF/Utils/AffineCanonicalizationUtils.cpp @@ -98,9 +98,9 @@ LogicalResult scf::addLoopRangeConstraints(FlatAffineValueConstraints &cstr, std::optional lbInt = getConstantIntValue(lb); std::optional ubInt = getConstantIntValue(ub); if (lbInt) - cstr.addBound(IntegerPolyhedron::EQ, symLb, *lbInt); + cstr.addBound(BoundType::EQ, symLb, *lbInt); if (ubInt) - cstr.addBound(IntegerPolyhedron::EQ, symUb, *ubInt); + cstr.addBound(BoundType::EQ, symUb, *ubInt); // Lower bound: iv >= lb (equiv.: iv - lb >= 0) SmallVector ineqLb(cstr.getNumCols(), 0); @@ -131,7 +131,7 @@ LogicalResult scf::addLoopRangeConstraints(FlatAffineValueConstraints &cstr, /*dimCount=*/cstr.getNumDimVars(), /*symbolCount=*/cstr.getNumSymbolVars(), /*result=*/ivUb); - return cstr.addBound(IntegerPolyhedron::UB, dimIv, map); + return cstr.addBound(BoundType::UB, dimIv, map); } /// Canonicalize min/max operations in the context of for loops with a known @@ -202,9 +202,9 @@ LogicalResult scf::rewritePeeledMinMaxOp(RewriterBase &rewriter, Operation *op, constraints.appendDimVar({iv}); constraints.appendSymbolVar({ub, step}); if (auto constUb = getConstantIntValue(ub)) - constraints.addBound(IntegerPolyhedron::EQ, 1, *constUb); + constraints.addBound(BoundType::EQ, 1, *constUb); if (auto constStep = getConstantIntValue(step)) - constraints.addBound(IntegerPolyhedron::EQ, 2, *constStep); + constraints.addBound(BoundType::EQ, 2, *constStep); // Add loop peeling invariant. This is the main piece of knowledge that // enables AffineMinOp simplification. diff --git a/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp b/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp index cc55b96d5b1a8..6beb9384c8bf2 100644 --- a/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp +++ b/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp @@ -594,12 +594,12 @@ TEST(IntegerPolyhedronTest, removeRedundantConstraintsTest) { TEST(IntegerPolyhedronTest, addConstantUpperBound) { IntegerPolyhedron poly(PresburgerSpace::getSetSpace(2)); - poly.addBound(IntegerPolyhedron::UB, 0, 1); + poly.addBound(BoundType::UB, 0, 1); EXPECT_EQ(poly.atIneq(0, 0), -1); EXPECT_EQ(poly.atIneq(0, 1), 0); EXPECT_EQ(poly.atIneq(0, 2), 1); - poly.addBound(IntegerPolyhedron::UB, {1, 2, 3}, 1); + poly.addBound(BoundType::UB, {1, 2, 3}, 1); EXPECT_EQ(poly.atIneq(1, 0), -1); EXPECT_EQ(poly.atIneq(1, 1), -2); EXPECT_EQ(poly.atIneq(1, 2), -2); @@ -607,12 +607,12 @@ TEST(IntegerPolyhedronTest, addConstantUpperBound) { TEST(IntegerPolyhedronTest, addConstantLowerBound) { IntegerPolyhedron poly(PresburgerSpace::getSetSpace(2)); - poly.addBound(IntegerPolyhedron::LB, 0, 1); + poly.addBound(BoundType::LB, 0, 1); EXPECT_EQ(poly.atIneq(0, 0), 1); EXPECT_EQ(poly.atIneq(0, 1), 0); EXPECT_EQ(poly.atIneq(0, 2), -1); - poly.addBound(IntegerPolyhedron::LB, {1, 2, 3}, 1); + poly.addBound(BoundType::LB, {1, 2, 3}, 1); EXPECT_EQ(poly.atIneq(1, 0), 1); EXPECT_EQ(poly.atIneq(1, 1), 2); EXPECT_EQ(poly.atIneq(1, 2), 2); From 0691bcb18024a28e82e8dd9a08ab0820b40c9a37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Mon, 20 Mar 2023 17:43:26 +0100 Subject: [PATCH 385/691] [clang][Interp][NFC] Add tests for __fp16 Differential Revision: https://reviews.llvm.org/D146436 --- clang/test/AST/Interp/floats.cpp | 91 ++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/clang/test/AST/Interp/floats.cpp b/clang/test/AST/Interp/floats.cpp index 7b9328c4d1182..b3c4dd4c19a84 100644 --- a/clang/test/AST/Interp/floats.cpp +++ b/clang/test/AST/Interp/floats.cpp @@ -78,3 +78,94 @@ namespace compound { } static_assert(f2() == __FLT_MAX__, ""); } + + +namespace FP16 { + constexpr int i = 2; + constexpr __fp16 f = 1.0f; + static_assert(f == 1.0f, ""); + + constexpr __fp16 f2 = 1u * f; + static_assert(f2 == 1.0f, ""); + + constexpr __fp16 f3 = 1.5; + constexpr int i3 = f3; + static_assert(i3 == 1, ""); + + constexpr bool b3 = f3; + static_assert(b3, ""); + + + static_assert(1.0f16 + 3u == 4, ""); + static_assert(4.0f16 / 1.0f16 == 4, ""); + static_assert(10.0f16 * false == 0, ""); + + constexpr __fp16 __fp16s[] = {1.0f16, 2.0f16, 3.0f16, 4.0f16}; + + constexpr __fp16 m = 5.0f16 / 0.0f16; // ref-error {{must be initialized by a constant expression}} \ + // ref-note {{division by zero}} \ + // expected-error {{must be initialized by a constant expression}} \ + // expected-note {{division by zero}} + + static_assert(~2.0f16 == 3, ""); // ref-error {{invalid argument type '_Float16' to unary expression}} \ + // expected-error {{invalid argument type '_Float16' to unary expression}} + + /// Initialized by a double. + constexpr __fp16 df = 0.0; + /// The other way around. + constexpr double fd = 0.0f16; + + static_assert(0.0f == -0.0f, ""); + + const int k = 3 * (1.0f16 / 3.0f16); + static_assert(k == 1, ""); + + constexpr bool b = 1.0f16; + static_assert(b, ""); + + constexpr double db = true; + static_assert(db == 1.0f16, ""); + + constexpr __fp16 fa[] = {1.0f, 2.0, 1, false}; + constexpr double da[] = {1.0f, 2.0, 1, false}; + + constexpr __fp16 fm = __FLT16_MAX__; + constexpr int someInt = fm; + + constexpr float SomeFloat = __FLT_MAX__; + constexpr __fp16 halfFloat = SomeFloat; + + constexpr float fp16ptr() { + __fp16 f1 = 1.0f16; + __fp16 *f2 = &f1; + + *f2 = 3.0; + return f1; + } + static_assert(fp16ptr() == 3.0, ""); + + namespace compound { + constexpr float f1() { + __fp16 f = 0; + f += 3.0; + f -= 3.0f; + + f += 1; + f /= 1; + f /= 1.0; + f *= f; + + f *= 2.0; + return f; + } + static_assert(f1() == 2, ""); + + constexpr float f2() { + __fp16 f = __FLT16_MAX__; + f += 1.0; + return f; + } + static_assert(f2() == __FLT16_MAX__, ""); + } + +} From 49dcd08c3d963e79d0710faf0e4024eb9b84bc8b Mon Sep 17 00:00:00 2001 From: esmeyi Date: Thu, 23 Mar 2023 05:09:47 -0400 Subject: [PATCH 386/691] [XCOFF] support the ref directive for object generation. Summary: A R_REF relocation as a non-relocating reference is required to prevent garbage collection (by the binder) of the ref symbol in object generation. Reviewed By: shchenz Differential Revision: https://reviews.llvm.org/D144356 --- llvm/include/llvm/MC/MCStreamer.h | 2 +- llvm/include/llvm/MC/MCXCOFFStreamer.h | 5 +- llvm/lib/MC/MCAsmStreamer.cpp | 7 +-- llvm/lib/MC/MCStreamer.cpp | 2 +- llvm/lib/MC/MCXCOFFStreamer.cpp | 15 ++++++ llvm/lib/MC/XCOFFObjectWriter.cpp | 5 +- .../PowerPC/MCTargetDesc/PPCAsmBackend.cpp | 9 ++++ .../PowerPC/MCTargetDesc/PPCFixupKinds.h | 3 +- .../MCTargetDesc/PPCXCOFFObjectWriter.cpp | 6 +++ llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 18 +++++--- .../test/CodeGen/PowerPC/pgo-ref-directive.ll | 46 +++++++++++++++++-- 11 files changed, 98 insertions(+), 20 deletions(-) diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h index aa39954d62868..f5891b24ae4b4 100644 --- a/llvm/include/llvm/MC/MCStreamer.h +++ b/llvm/include/llvm/MC/MCStreamer.h @@ -645,7 +645,7 @@ class MCStreamer { /// relocation table for one or more symbols. /// /// \param Sym - The symbol on the .ref directive. - virtual void emitXCOFFRefDirective(StringRef Sym); + virtual void emitXCOFFRefDirective(const MCSymbol *Symbol); /// Emit an ELF .size directive. /// diff --git a/llvm/include/llvm/MC/MCXCOFFStreamer.h b/llvm/include/llvm/MC/MCXCOFFStreamer.h index a437faeccbff4..aea2a3265d572 100644 --- a/llvm/include/llvm/MC/MCXCOFFStreamer.h +++ b/llvm/include/llvm/MC/MCXCOFFStreamer.h @@ -31,10 +31,7 @@ class MCXCOFFStreamer : public MCObjectStreamer { void emitXCOFFSymbolLinkageWithVisibility(MCSymbol *Symbol, MCSymbolAttr Linkage, MCSymbolAttr Visibility) override; - void emitXCOFFRefDirective(StringRef Name) override { - report_fatal_error("emitXCOFFRefDirective is not implemented yet on object" - "generation path"); - } + void emitXCOFFRefDirective(const MCSymbol *Symbol) override; void emitXCOFFRenameDirective(const MCSymbol *Name, StringRef Rename) override { report_fatal_error("emitXCOFFRenameDirective is not implemented yet on " diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index 006f697b61875..fa1ab2717af2e 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -194,7 +194,7 @@ class MCAsmStreamer final : public MCStreamer { void emitXCOFFRenameDirective(const MCSymbol *Name, StringRef Rename) override; - void emitXCOFFRefDirective(StringRef Name) override; + void emitXCOFFRefDirective(const MCSymbol *Symbol) override; void emitXCOFFExceptDirective(const MCSymbol *Symbol, const MCSymbol *Trap, @@ -943,8 +943,9 @@ void MCAsmStreamer::emitXCOFFRenameDirective(const MCSymbol *Name, EmitEOL(); } -void MCAsmStreamer::emitXCOFFRefDirective(StringRef Name) { - OS << "\t.ref " << Name; +void MCAsmStreamer::emitXCOFFRefDirective(const MCSymbol *Symbol) { + OS << "\t.ref "; + Symbol->print(OS, MAI); EmitEOL(); } diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp index 517e258844156..4dd3163fd399d 100644 --- a/llvm/lib/MC/MCStreamer.cpp +++ b/llvm/lib/MC/MCStreamer.cpp @@ -1190,7 +1190,7 @@ void MCStreamer::emitXCOFFRenameDirective(const MCSymbol *Name, "XCOFF targets"); } -void MCStreamer::emitXCOFFRefDirective(StringRef Name) { +void MCStreamer::emitXCOFFRefDirective(const MCSymbol *Symbol) { llvm_unreachable("emitXCOFFRefDirective is only supported on XCOFF targets"); } diff --git a/llvm/lib/MC/MCXCOFFStreamer.cpp b/llvm/lib/MC/MCXCOFFStreamer.cpp index 25a678c68416e..d8ac07bc85b1b 100644 --- a/llvm/lib/MC/MCXCOFFStreamer.cpp +++ b/llvm/lib/MC/MCXCOFFStreamer.cpp @@ -81,6 +81,21 @@ void MCXCOFFStreamer::emitXCOFFSymbolLinkageWithVisibility( emitSymbolAttribute(Symbol, Visibility); } +void MCXCOFFStreamer::emitXCOFFRefDirective(const MCSymbol *Symbol) { + // Add a Fixup here to later record a relocation of type R_REF to prevent the + // ref symbol from being garbage collected (by the binder). + MCDataFragment *DF = getOrCreateDataFragment(); + const MCSymbolRefExpr *SRE = MCSymbolRefExpr::create(Symbol, getContext()); + std::optional MaybeKind = + getAssembler().getBackend().getFixupKind("R_REF"); + if (!MaybeKind) + report_fatal_error("failed to get fixup kind for R_REF relocation"); + + MCFixupKind Kind = *MaybeKind; + MCFixup Fixup = MCFixup::create(DF->getContents().size(), SRE, Kind); + DF->getFixups().push_back(Fixup); +} + void MCXCOFFStreamer::emitXCOFFExceptDirective(const MCSymbol *Symbol, const MCSymbol *Trap, unsigned Lang, unsigned Reason, diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp index c79bdeb2cac4c..6452050d5941e 100644 --- a/llvm/lib/MC/XCOFFObjectWriter.cpp +++ b/llvm/lib/MC/XCOFFObjectWriter.cpp @@ -663,7 +663,10 @@ void XCOFFObjectWriter::recordRelocation(MCAssembler &Asm, // instr address plus any constant value. FixedValue = SectionMap[SymASec]->Address - BRInstrAddress + Target.getConstant(); - } + } else if (Type == XCOFF::RelocationType::R_REF) + // The FixedValue should always be 0 since it specifies a nonrelocating + // reference. + FixedValue = 0; assert((Fixup.getOffset() <= MaxRawDataSize - Layout.getFragmentOffset(Fragment)) && diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 227bd59ba3a64..a814bb1b4c07e 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -238,6 +238,8 @@ class XCOFFPPCAsmBackend : public PPCAsmBackend { createObjectTargetWriter() const override { return createPPCXCOFFObjectWriter(TT.isArch64Bit()); } + + std::optional getFixupKind(StringRef Name) const override; }; } // end anonymous namespace @@ -272,6 +274,13 @@ ELFPPCAsmBackend::getFixupKind(StringRef Name) const { return std::nullopt; } +std::optional +XCOFFPPCAsmBackend::getFixupKind(StringRef Name) const { + return StringSwitch>(Name) + .Case("R_REF", (MCFixupKind)PPC::fixup_ppc_nofixup) + .Default(std::nullopt); +} + MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, const MCSubtargetInfo &STI, const MCRegisterInfo &MRI, diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index df0c666f5b113..9e8ee9f23107b 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -48,7 +48,8 @@ enum Fixups { /// Not a true fixup, but ties a symbol to a call to __tls_get_addr for the /// TLS general and local dynamic models, or inserts the thread-pointer - /// register number. + /// register number. It can also be used to tie the ref symbol to prevent it + /// from being garbage collected on AIX. fixup_ppc_nofixup, /// A 16-bit fixup corresponding to lo16(_foo) with implied 3 zero bits for diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp index 729cb35cbebcf..b6e749b781804 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFObjectWriter.cpp @@ -90,6 +90,12 @@ std::pair PPCXCOFFObjectWriter::getRelocTypeAndSignSize( return {XCOFF::RelocationType::R_RBR, EncodedSignednessIndicator | 25}; case PPC::fixup_ppc_br24abs: return {XCOFF::RelocationType::R_RBA, EncodedSignednessIndicator | 25}; + case PPC::fixup_ppc_nofixup: { + if (Modifier == MCSymbolRefExpr::VK_None) + return {XCOFF::RelocationType::R_REF, 0}; + else + llvm_unreachable("Unsupported Modifier"); + } break; case FK_Data_4: case FK_Data_8: const uint8_t SignAndSizeForFKData = diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 1ecaeabacf9f7..7c6fd3b85b1eb 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -2517,16 +2517,22 @@ void PPCAIXAsmPrinter::emitPGORefs() { OutStreamer->switchSection(CntsSection); if (OutContext.hasXCOFFSection( "__llvm_prf_data", - XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD))) - OutStreamer->emitXCOFFRefDirective("__llvm_prf_data[RW]"); + XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD))) { + MCSymbol *S = OutContext.getOrCreateSymbol("__llvm_prf_data[RW]"); + OutStreamer->emitXCOFFRefDirective(S); + } if (OutContext.hasXCOFFSection( "__llvm_prf_names", - XCOFF::CsectProperties(XCOFF::XMC_RO, XCOFF::XTY_SD))) - OutStreamer->emitXCOFFRefDirective("__llvm_prf_names[RO]"); + XCOFF::CsectProperties(XCOFF::XMC_RO, XCOFF::XTY_SD))) { + MCSymbol *S = OutContext.getOrCreateSymbol("__llvm_prf_names[RO]"); + OutStreamer->emitXCOFFRefDirective(S); + } if (OutContext.hasXCOFFSection( "__llvm_prf_vnds", - XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD))) - OutStreamer->emitXCOFFRefDirective("__llvm_prf_vnds[RW]"); + XCOFF::CsectProperties(XCOFF::XMC_RW, XCOFF::XTY_SD))) { + MCSymbol *S = OutContext.getOrCreateSymbol("__llvm_prf_vnds[RW]"); + OutStreamer->emitXCOFFRefDirective(S); + } } } diff --git a/llvm/test/CodeGen/PowerPC/pgo-ref-directive.ll b/llvm/test/CodeGen/PowerPC/pgo-ref-directive.ll index 172affa4a2661..201af2f949618 100644 --- a/llvm/test/CodeGen/PowerPC/pgo-ref-directive.ll +++ b/llvm/test/CodeGen/PowerPC/pgo-ref-directive.ll @@ -1,9 +1,22 @@ ; RUN: rm -rf %t && split-file %s %t -; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff -xcoff-traceback-table=false < %t/no-ref.ll | FileCheck %s --check-prefixes=NOREF -; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff -xcoff-traceback-table=false < %t/no-vnds.ll | FileCheck %s --check-prefixes=NOVNDS -; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff -xcoff-traceback-table=false < %t/with-vnds.ll | FileCheck %s --check-prefixes=WITHVNDS +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \ +; RUN: -xcoff-traceback-table=false < %t/no-ref.ll | FileCheck %s --check-prefixes=NOREF +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \ +; RUN: -xcoff-traceback-table=false --filetype=obj < %t/no-ref.ll -o %t/no-ref.o +; RUN: llvm-objdump %t/no-ref.o -r | FileCheck %s --check-prefix=NOREF-OBJ +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \ +; RUN: -xcoff-traceback-table=false < %t/no-vnds.ll | FileCheck %s --check-prefixes=NOVNDS +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \ +; RUN: -xcoff-traceback-table=false --filetype=obj < %t/no-vnds.ll -o %t/no-vnds.o +; RUN: llvm-objdump %t/no-vnds.o -r | FileCheck %s --check-prefix=NOVNDS-OBJ + +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \ +; RUN: -xcoff-traceback-table=false < %t/with-vnds.ll | FileCheck %s --check-prefixes=WITHVNDS +; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mattr=-altivec -mtriple powerpc-ibm-aix-xcoff \ +; RUN: -xcoff-traceback-table=false --filetype=obj < %t/with-vnds.ll -o %t/with-vnds.o +; RUN: llvm-objdump %t/with-vnds.o -tr | FileCheck %s --check-prefix=WITHVNDS-OBJ ;--- no-ref.ll ; The absence of a __llvm_prf_cnts section should stop generating the .refs. @@ -27,6 +40,10 @@ entry: ; NOREF-NOT: .ref __llvm_prf_names ; NOREF-NOT: .ref __llvm_prf_vnds +; NOREF-OBJ-NOT: R_REF __llvm_prf_data +; NOREF-OBJ-NOT: R_REF __llvm_prf_names +; NOREF-OBJ-NOT: R_REF __llvm_prf_vnds + ;--- no-vnds.ll ; This is the most common case. When -fprofile-generate is used and there exists executable code, we generate the __llvm_prf_cnts, __llvm_prf_data, and __llvm_prf_names sections. ; @@ -56,6 +73,10 @@ entry: ; NOVNDS-NEXT: .ref __llvm_prf_names[RO] ; NOVNDS-NOT: .ref __llvm_prf_vnds +; NOVNDS-OBJ: 00000008 R_REF __llvm_prf_data +; NOVNDS-OBJ: 00000008 R_REF __llvm_prf_names +; NOVNDS-OBJ-NOT: R_REF __llvm_prf_vnds + ;--- with-vnds.ll ; When value profiling is needed, the PGO instrumentation generates variables in the __llvm_prf_vnds section, so we generate a .ref for them too. ; @@ -80,3 +101,22 @@ entry: ; WITHVNDS-NEXT: .ref __llvm_prf_data[RW] ; WITHVNDS-NEXT: .ref __llvm_prf_names[RO] ; WITHVNDS-NEXT: .ref __llvm_prf_vnds[RW] + +; WITHVNDS-OBJ: SYMBOL TABLE: +; WITHVNDS-OBJ-NEXT: 00000000 df *DEBUG* 00000000 +; WITHVNDS-OBJ-NEXT: 00000000 l .text 00000008 .text +; WITHVNDS-OBJ-NEXT: 00000000 g F .text (csect: .text) 00000000 .main +; WITHVNDS-OBJ-NEXT: 00000008 l .text 00000006 __llvm_prf_names +; WITHVNDS-OBJ-NEXT: 00000010 l O .data 00000008 __llvm_prf_cnts +; WITHVNDS-OBJ-NEXT: 00000018 l O .data 00000008 __llvm_prf_data +; WITHVNDS-OBJ-NEXT: 00000020 l O .data 000000f0 __llvm_prf_vnds +; WITHVNDS-OBJ-NEXT: 00000110 g O .data 0000000c main +; WITHVNDS-OBJ-NEXT: 0000011c l .data 00000000 TOC + +; WITHVNDS-OBJ: RELOCATION RECORDS FOR [.data]: +; WITHVNDS-OBJ-NEXT: OFFSET TYPE VALUE +; WITHVNDS-OBJ-NEXT: 00000008 R_REF __llvm_prf_data +; WITHVNDS-OBJ-NEXT: 00000008 R_REF __llvm_prf_names +; WITHVNDS-OBJ-NEXT: 00000008 R_REF __llvm_prf_vnds +; WITHVNDS-OBJ-NEXT: 00000100 R_POS .main +; WITHVNDS-OBJ-NEXT: 00000104 R_POS TOC From 8c10256734cd47274671fcabe94f24f15ecd6209 Mon Sep 17 00:00:00 2001 From: MarcoFalke <*~=`'#}+{/-|&$^_@721217.xyz> Date: Tue, 14 Mar 2023 14:23:07 +0100 Subject: [PATCH 387/691] clang-tidy: Detect use-after-move in CXXCtorInitializer Fixes https://github.com/llvm/llvm-project/issues/51844 Differential Revision: https://reviews.llvm.org/D146288 --- .../clang-tidy/bugprone/UseAfterMoveCheck.cpp | 82 ++++++++---- clang-tools-extra/docs/ReleaseNotes.rst | 4 + .../checkers/bugprone/use-after-move.cpp | 126 ++++++++++++++++++ 3 files changed, 184 insertions(+), 28 deletions(-) diff --git a/clang-tools-extra/clang-tidy/bugprone/UseAfterMoveCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/UseAfterMoveCheck.cpp index b7eadb87b4fcd..c10c3652a153a 100644 --- a/clang-tools-extra/clang-tidy/bugprone/UseAfterMoveCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/UseAfterMoveCheck.cpp @@ -58,11 +58,11 @@ class UseAfterMoveFinder { public: UseAfterMoveFinder(ASTContext *TheContext); - // Within the given function body, finds the first use of 'MovedVariable' that + // Within the given code block, finds the first use of 'MovedVariable' that // occurs after 'MovingCall' (the expression that performs the move). If a // use-after-move is found, writes information about it to 'TheUseAfterMove'. // Returns whether a use-after-move was found. - bool find(Stmt *FunctionBody, const Expr *MovingCall, + bool find(Stmt *CodeBlock, const Expr *MovingCall, const ValueDecl *MovedVariable, UseAfterMove *TheUseAfterMove); private: @@ -104,7 +104,7 @@ static StatementMatcher inDecltypeOrTemplateArg() { UseAfterMoveFinder::UseAfterMoveFinder(ASTContext *TheContext) : Context(TheContext) {} -bool UseAfterMoveFinder::find(Stmt *FunctionBody, const Expr *MovingCall, +bool UseAfterMoveFinder::find(Stmt *CodeBlock, const Expr *MovingCall, const ValueDecl *MovedVariable, UseAfterMove *TheUseAfterMove) { // Generate the CFG manually instead of through an AnalysisDeclContext because @@ -118,12 +118,11 @@ bool UseAfterMoveFinder::find(Stmt *FunctionBody, const Expr *MovingCall, Options.AddImplicitDtors = true; Options.AddTemporaryDtors = true; std::unique_ptr TheCFG = - CFG::buildCFG(nullptr, FunctionBody, Context, Options); + CFG::buildCFG(nullptr, CodeBlock, Context, Options); if (!TheCFG) return false; - Sequence = - std::make_unique(TheCFG.get(), FunctionBody, Context); + Sequence = std::make_unique(TheCFG.get(), CodeBlock, Context); BlockMap = std::make_unique(TheCFG.get(), Context); Visited.clear(); @@ -398,20 +397,28 @@ static void emitDiagnostic(const Expr *MovingCall, const DeclRefExpr *MoveArg, } void UseAfterMoveCheck::registerMatchers(MatchFinder *Finder) { + // try_emplace is a common maybe-moving function that returns a + // bool to tell callers whether it moved. Ignore std::move inside + // try_emplace to avoid false positives as we don't track uses of + // the bool. + auto TryEmplaceMatcher = + cxxMemberCallExpr(callee(cxxMethodDecl(hasName("try_emplace")))); auto CallMoveMatcher = - callExpr(callee(functionDecl(hasName("::std::move"))), argumentCountIs(1), + callExpr(argumentCountIs(1), callee(functionDecl(hasName("::std::move"))), hasArgument(0, declRefExpr().bind("arg")), + unless(inDecltypeOrTemplateArg()), + unless(hasParent(TryEmplaceMatcher)), expr().bind("call-move"), anyOf(hasAncestor(compoundStmt( hasParent(lambdaExpr().bind("containing-lambda")))), - hasAncestor(functionDecl().bind("containing-func"))), - unless(inDecltypeOrTemplateArg()), - // try_emplace is a common maybe-moving function that returns a - // bool to tell callers whether it moved. Ignore std::move inside - // try_emplace to avoid false positives as we don't track uses of - // the bool. - unless(hasParent(cxxMemberCallExpr( - callee(cxxMethodDecl(hasName("try_emplace"))))))) - .bind("call-move"); + hasAncestor(functionDecl(anyOf( + cxxConstructorDecl( + hasAnyConstructorInitializer(withInitializer( + expr(anyOf(equalsBoundNode("call-move"), + hasDescendant(expr( + equalsBoundNode("call-move"))))) + .bind("containing-ctor-init")))) + .bind("containing-ctor"), + functionDecl().bind("containing-func")))))); Finder->addMatcher( traverse( @@ -434,6 +441,10 @@ void UseAfterMoveCheck::registerMatchers(MatchFinder *Finder) { } void UseAfterMoveCheck::check(const MatchFinder::MatchResult &Result) { + const auto *ContainingCtor = + Result.Nodes.getNodeAs("containing-ctor"); + const auto *ContainingCtorInit = + Result.Nodes.getNodeAs("containing-ctor-init"); const auto *ContainingLambda = Result.Nodes.getNodeAs("containing-lambda"); const auto *ContainingFunc = @@ -445,23 +456,38 @@ void UseAfterMoveCheck::check(const MatchFinder::MatchResult &Result) { if (!MovingCall || !MovingCall->getExprLoc().isValid()) MovingCall = CallMove; - Stmt *FunctionBody = nullptr; - if (ContainingLambda) - FunctionBody = ContainingLambda->getBody(); - else if (ContainingFunc) - FunctionBody = ContainingFunc->getBody(); - else - return; - // Ignore the std::move if the variable that was passed to it isn't a local // variable. if (!Arg->getDecl()->getDeclContext()->isFunctionOrMethod()) return; - UseAfterMoveFinder Finder(Result.Context); - UseAfterMove Use; - if (Finder.find(FunctionBody, MovingCall, Arg->getDecl(), &Use)) - emitDiagnostic(MovingCall, Arg, Use, this, Result.Context); + // Collect all code blocks that could use the arg after move. + llvm::SmallVector CodeBlocks{}; + if (ContainingCtor) { + CodeBlocks.push_back(ContainingCtor->getBody()); + if (ContainingCtorInit) { + // Collect the constructor initializer expressions. + bool BeforeMove{true}; + for (CXXCtorInitializer *Init : ContainingCtor->inits()) { + if (BeforeMove && Init->getInit()->IgnoreImplicit() == + ContainingCtorInit->IgnoreImplicit()) + BeforeMove = false; + if (!BeforeMove) + CodeBlocks.push_back(Init->getInit()); + } + } + } else if (ContainingLambda) { + CodeBlocks.push_back(ContainingLambda->getBody()); + } else if (ContainingFunc) { + CodeBlocks.push_back(ContainingFunc->getBody()); + } + + for (Stmt *CodeBlock : CodeBlocks) { + UseAfterMoveFinder Finder(Result.Context); + UseAfterMove Use; + if (Finder.find(CodeBlock, MovingCall, Arg->getDecl(), &Use)) + emitDiagnostic(MovingCall, Arg, Use, this, Result.Context); + } } } // namespace clang::tidy::bugprone diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 80f5b46681713..89419141cebbd 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -162,6 +162,10 @@ Changes in existing checks ` check. Global options of the same name should be used instead. +- Improved :doc:`bugprone-use-after-move + ` check to also cover constructor + initializers. + - Deprecated check-local options `HeaderFileExtensions` in :doc:`google-build-namespaces ` check. diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/use-after-move.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/use-after-move.cpp index 45cef8abfd1f6..1e0831048dbd4 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/bugprone/use-after-move.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/use-after-move.cpp @@ -369,6 +369,18 @@ void lambdas() { }; a.foo(); } + // Don't warn if 'a' is a copy inside a synchronous lambda + { + A a; + A copied{[a] mutable { return std::move(a); }()}; + a.foo(); + } + // False negative (should warn if 'a' is a ref inside a synchronous lambda) + { + A a; + A moved{[&a] mutable { return std::move(a); }()}; + a.foo(); + } // Warn if the use consists of a capture that happens after a move. { A a; @@ -1367,6 +1379,120 @@ void typeId() { } } // namespace UnevalContext +class CtorInit { +public: + CtorInit(std::string val) + : a{val.empty()}, // fine + s{std::move(val)}, + b{val.empty()} + // CHECK-NOTES: [[@LINE-1]]:11: warning: 'val' used after it was moved + // CHECK-NOTES: [[@LINE-3]]:9: note: move occurred here + {} + +private: + bool a; + std::string s; + bool b; +}; + +class CtorInitLambda { +public: + CtorInitLambda(std::string val) + : a{val.empty()}, // fine + s{std::move(val)}, + b{[&] { return val.empty(); }()}, + // CHECK-NOTES: [[@LINE-1]]:12: warning: 'val' used after it was moved + // CHECK-NOTES: [[@LINE-3]]:9: note: move occurred here + c{[] { + std::string str{}; + std::move(str); + return str.empty(); + // CHECK-NOTES: [[@LINE-1]]:18: warning: 'str' used after it was moved + // CHECK-NOTES: [[@LINE-3]]:11: note: move occurred here + }()} { + std::move(val); + // CHECK-NOTES: [[@LINE-1]]:15: warning: 'val' used after it was moved + // CHECK-NOTES: [[@LINE-13]]:9: note: move occurred here + std::string val2{}; + std::move(val2); + val2.empty(); + // CHECK-NOTES: [[@LINE-1]]:5: warning: 'val2' used after it was moved + // CHECK-NOTES: [[@LINE-3]]:5: note: move occurred here + } + +private: + bool a; + std::string s; + bool b; + bool c; + bool d{}; +}; + +class CtorInitOrder { +public: + CtorInitOrder(std::string val) + : a{val.empty()}, // fine + b{val.empty()}, + // CHECK-NOTES: [[@LINE-1]]:11: warning: 'val' used after it was moved + s{std::move(val)} {} // wrong order + // CHECK-NOTES: [[@LINE-1]]:9: note: move occurred here + // CHECK-NOTES: [[@LINE-4]]:11: note: the use happens in a later loop iteration than the move + +private: + bool a; + std::string s; + bool b; +}; + +struct Obj {}; +struct CtorD { + CtorD(Obj b); +}; + +struct CtorC { + CtorC(Obj b); +}; + +struct CtorB { + CtorB(Obj &b); +}; + +struct CtorA : CtorB, CtorC, CtorD { + CtorA(Obj b) : CtorB{b}, CtorC{std::move(b)}, CtorD{b} {} + // CHECK-NOTES: [[@LINE-1]]:55: warning: 'b' used after it was moved + // CHECK-NOTES: [[@LINE-2]]:34: note: move occurred here +}; + +struct Base { + Base(Obj b) : bb{std::move(b)} {} + template Base(Call &&c) : bb{c()} {}; + + Obj bb; +}; + +struct Derived : Base, CtorC { + Derived(Obj b) + : Base{[&] mutable { return std::move(b); }()}, + // False negative: The lambda/std::move was executed, so it should warn + // below + CtorC{b} {} +}; + +struct Derived2 : Base, CtorC { + Derived2(Obj b) + : Base{[&] mutable { return std::move(b); }}, + // This was a move, but it doesn't warn below, because it can't know if + // the lambda/std::move was actually called + CtorC{b} {} +}; + +struct Derived3 : Base, CtorC { + Derived3(Obj b) + : Base{[c = std::move(b)] mutable { return std::move(c); }}, CtorC{b} {} + // CHECK-NOTES: [[@LINE-1]]:74: warning: 'b' used after it was moved + // CHECK-NOTES: [[@LINE-2]]:19: note: move occurred here +}; + class PR38187 { public: PR38187(std::string val) : val_(std::move(val)) { From 814177e434d8daf70a3d67345c166d40457f68f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= Date: Thu, 23 Mar 2023 10:33:03 +0100 Subject: [PATCH 388/691] Revert "[clang][Interp][NFC] Add tests for __fp16" This reverts commit 0691bcb18024a28e82e8dd9a08ab0820b40c9a37. Looks like this breaks builders, e.g. https://lab.llvm.org/buildbot#builders/231/builds/9790 --- clang/test/AST/Interp/floats.cpp | 91 -------------------------------- 1 file changed, 91 deletions(-) diff --git a/clang/test/AST/Interp/floats.cpp b/clang/test/AST/Interp/floats.cpp index b3c4dd4c19a84..7b9328c4d1182 100644 --- a/clang/test/AST/Interp/floats.cpp +++ b/clang/test/AST/Interp/floats.cpp @@ -78,94 +78,3 @@ namespace compound { } static_assert(f2() == __FLT_MAX__, ""); } - - -namespace FP16 { - constexpr int i = 2; - constexpr __fp16 f = 1.0f; - static_assert(f == 1.0f, ""); - - constexpr __fp16 f2 = 1u * f; - static_assert(f2 == 1.0f, ""); - - constexpr __fp16 f3 = 1.5; - constexpr int i3 = f3; - static_assert(i3 == 1, ""); - - constexpr bool b3 = f3; - static_assert(b3, ""); - - - static_assert(1.0f16 + 3u == 4, ""); - static_assert(4.0f16 / 1.0f16 == 4, ""); - static_assert(10.0f16 * false == 0, ""); - - constexpr __fp16 __fp16s[] = {1.0f16, 2.0f16, 3.0f16, 4.0f16}; - - constexpr __fp16 m = 5.0f16 / 0.0f16; // ref-error {{must be initialized by a constant expression}} \ - // ref-note {{division by zero}} \ - // expected-error {{must be initialized by a constant expression}} \ - // expected-note {{division by zero}} - - static_assert(~2.0f16 == 3, ""); // ref-error {{invalid argument type '_Float16' to unary expression}} \ - // expected-error {{invalid argument type '_Float16' to unary expression}} - - /// Initialized by a double. - constexpr __fp16 df = 0.0; - /// The other way around. - constexpr double fd = 0.0f16; - - static_assert(0.0f == -0.0f, ""); - - const int k = 3 * (1.0f16 / 3.0f16); - static_assert(k == 1, ""); - - constexpr bool b = 1.0f16; - static_assert(b, ""); - - constexpr double db = true; - static_assert(db == 1.0f16, ""); - - constexpr __fp16 fa[] = {1.0f, 2.0, 1, false}; - constexpr double da[] = {1.0f, 2.0, 1, false}; - - constexpr __fp16 fm = __FLT16_MAX__; - constexpr int someInt = fm; - - constexpr float SomeFloat = __FLT_MAX__; - constexpr __fp16 halfFloat = SomeFloat; - - constexpr float fp16ptr() { - __fp16 f1 = 1.0f16; - __fp16 *f2 = &f1; - - *f2 = 3.0; - return f1; - } - static_assert(fp16ptr() == 3.0, ""); - - namespace compound { - constexpr float f1() { - __fp16 f = 0; - f += 3.0; - f -= 3.0f; - - f += 1; - f /= 1; - f /= 1.0; - f *= f; - - f *= 2.0; - return f; - } - static_assert(f1() == 2, ""); - - constexpr float f2() { - __fp16 f = __FLT16_MAX__; - f += 1.0; - return f; - } - static_assert(f2() == __FLT16_MAX__, ""); - } - -} From 25466efb532f2255c86ffa721a126e5e5c0edc18 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Thu, 23 Mar 2023 10:34:29 +0100 Subject: [PATCH 389/691] [gn] Port e655d8a54880 more --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index bab71dfafae19..8372efe72f7eb 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -554,7 +554,6 @@ if (current_toolchain == default_toolchain) { "__mutex/mutex.h", "__mutex/tag_types.h", "__mutex/unique_lock.h", - "__mutex_base", "__node_handle", "__numeric/accumulate.h", "__numeric/adjacent_difference.h", From 61b0a492ccb1ab62c8c8aa1b154ce4c7d69a6da2 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Wed, 22 Mar 2023 10:05:48 +0000 Subject: [PATCH 390/691] [llvm][ARM] Refactor isMnemonicVPTPredicable Fixes #61607 Several names were repeated in this giant list. I have refactored it and removed the duplicates. Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D146619 --- .../lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 104 +++++++----------- 1 file changed, 37 insertions(+), 67 deletions(-) diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index e0bc431fd4bd8..6cbb7120e2667 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -12887,71 +12887,41 @@ bool ARMAsmParser::isMnemonicVPTPredicable(StringRef Mnemonic, if (!hasMVE()) return false; - return Mnemonic.startswith("vabav") || Mnemonic.startswith("vaddv") || - Mnemonic.startswith("vaddlv") || Mnemonic.startswith("vminnmv") || - Mnemonic.startswith("vminnmav") || Mnemonic.startswith("vminv") || - Mnemonic.startswith("vminav") || Mnemonic.startswith("vmaxnmv") || - Mnemonic.startswith("vmaxnmav") || Mnemonic.startswith("vmaxv") || - Mnemonic.startswith("vmaxav") || Mnemonic.startswith("vmladav") || - Mnemonic.startswith("vrmlaldavh") || Mnemonic.startswith("vrmlalvh") || - Mnemonic.startswith("vmlsdav") || Mnemonic.startswith("vmlav") || - Mnemonic.startswith("vmlaldav") || Mnemonic.startswith("vmlalv") || - Mnemonic.startswith("vmaxnm") || Mnemonic.startswith("vminnm") || - Mnemonic.startswith("vmax") || Mnemonic.startswith("vmin") || - Mnemonic.startswith("vshlc") || Mnemonic.startswith("vmovlt") || - Mnemonic.startswith("vmovlb") || Mnemonic.startswith("vshll") || - Mnemonic.startswith("vrshrn") || Mnemonic.startswith("vshrn") || - Mnemonic.startswith("vqrshrun") || Mnemonic.startswith("vqshrun") || - Mnemonic.startswith("vqrshrn") || Mnemonic.startswith("vqshrn") || - Mnemonic.startswith("vbic") || Mnemonic.startswith("vrev64") || - Mnemonic.startswith("vrev32") || Mnemonic.startswith("vrev16") || - Mnemonic.startswith("vmvn") || Mnemonic.startswith("veor") || - Mnemonic.startswith("vorn") || Mnemonic.startswith("vorr") || - Mnemonic.startswith("vand") || Mnemonic.startswith("vmul") || - Mnemonic.startswith("vqrdmulh") || Mnemonic.startswith("vqdmulh") || - Mnemonic.startswith("vsub") || Mnemonic.startswith("vadd") || - Mnemonic.startswith("vqsub") || Mnemonic.startswith("vqadd") || - Mnemonic.startswith("vabd") || Mnemonic.startswith("vrhadd") || - Mnemonic.startswith("vhsub") || Mnemonic.startswith("vhadd") || - Mnemonic.startswith("vdup") || Mnemonic.startswith("vcls") || - Mnemonic.startswith("vclz") || Mnemonic.startswith("vneg") || - Mnemonic.startswith("vabs") || Mnemonic.startswith("vqneg") || - Mnemonic.startswith("vqabs") || - (Mnemonic.startswith("vrint") && Mnemonic != "vrintr") || - Mnemonic.startswith("vcmla") || Mnemonic.startswith("vfma") || - Mnemonic.startswith("vfms") || Mnemonic.startswith("vcadd") || - Mnemonic.startswith("vadd") || Mnemonic.startswith("vsub") || - Mnemonic.startswith("vshl") || Mnemonic.startswith("vqshl") || - Mnemonic.startswith("vqrshl") || Mnemonic.startswith("vrshl") || - Mnemonic.startswith("vsri") || Mnemonic.startswith("vsli") || - Mnemonic.startswith("vrshr") || Mnemonic.startswith("vshr") || - Mnemonic.startswith("vpsel") || Mnemonic.startswith("vcmp") || - Mnemonic.startswith("vqdmladh") || Mnemonic.startswith("vqrdmladh") || - Mnemonic.startswith("vqdmlsdh") || Mnemonic.startswith("vqrdmlsdh") || - Mnemonic.startswith("vcmul") || Mnemonic.startswith("vrmulh") || - Mnemonic.startswith("vqmovn") || Mnemonic.startswith("vqmovun") || - Mnemonic.startswith("vmovnt") || Mnemonic.startswith("vmovnb") || - Mnemonic.startswith("vmaxa") || Mnemonic.startswith("vmaxnma") || - Mnemonic.startswith("vhcadd") || Mnemonic.startswith("vadc") || - Mnemonic.startswith("vsbc") || Mnemonic.startswith("vrshr") || - Mnemonic.startswith("vshr") || Mnemonic.startswith("vstrb") || - Mnemonic.startswith("vldrb") || - (Mnemonic.startswith("vstrh") && Mnemonic != "vstrhi") || - (Mnemonic.startswith("vldrh") && Mnemonic != "vldrhi") || - Mnemonic.startswith("vstrw") || Mnemonic.startswith("vldrw") || - Mnemonic.startswith("vldrd") || Mnemonic.startswith("vstrd") || - Mnemonic.startswith("vqdmull") || Mnemonic.startswith("vbrsr") || - Mnemonic.startswith("vfmas") || Mnemonic.startswith("vmlas") || - Mnemonic.startswith("vmla") || Mnemonic.startswith("vqdmlash") || - Mnemonic.startswith("vqdmlah") || Mnemonic.startswith("vqrdmlash") || - Mnemonic.startswith("vqrdmlah") || Mnemonic.startswith("viwdup") || - Mnemonic.startswith("vdwdup") || Mnemonic.startswith("vidup") || - Mnemonic.startswith("vddup") || Mnemonic.startswith("vctp") || - Mnemonic.startswith("vpnot") || Mnemonic.startswith("vbic") || - Mnemonic.startswith("vrmlsldavh") || Mnemonic.startswith("vmlsldav") || - Mnemonic.startswith("vcvt") || - MS.isVPTPredicableCDEInstr(Mnemonic) || - (Mnemonic.startswith("vmov") && - !(ExtraToken == ".f16" || ExtraToken == ".32" || - ExtraToken == ".16" || ExtraToken == ".8")); + if (MS.isVPTPredicableCDEInstr(Mnemonic) || + (Mnemonic.startswith("vldrh") && Mnemonic != "vldrhi") || + (Mnemonic.startswith("vmov") && + !(ExtraToken == ".f16" || ExtraToken == ".32" || ExtraToken == ".16" || + ExtraToken == ".8")) || + (Mnemonic.startswith("vrint") && Mnemonic != "vrintr") || + (Mnemonic.startswith("vstrh") && Mnemonic != "vstrhi")) + return true; + + const char *predicable_prefixes[] = { + "vabav", "vabd", "vabs", "vadc", "vadd", + "vaddlv", "vaddv", "vand", "vbic", "vbrsr", + "vcadd", "vcls", "vclz", "vcmla", "vcmp", + "vcmul", "vctp", "vcvt", "vddup", "vdup", + "vdwdup", "veor", "vfma", "vfmas", "vfms", + "vhadd", "vhcadd", "vhsub", "vidup", "viwdup", + "vldrb", "vldrd", "vldrw", "vmax", "vmaxa", + "vmaxav", "vmaxnm", "vmaxnma", "vmaxnmav", "vmaxnmv", + "vmaxv", "vmin", "vminav", "vminnm", "vminnmav", + "vminnmv", "vminv", "vmla", "vmladav", "vmlaldav", + "vmlalv", "vmlas", "vmlav", "vmlsdav", "vmlsldav", + "vmovlb", "vmovlt", "vmovnb", "vmovnt", "vmul", + "vmvn", "vneg", "vorn", "vorr", "vpnot", + "vpsel", "vqabs", "vqadd", "vqdmladh", "vqdmlah", + "vqdmlash", "vqdmlsdh", "vqdmulh", "vqdmull", "vqmovn", + "vqmovun", "vqneg", "vqrdmladh", "vqrdmlah", "vqrdmlash", + "vqrdmlsdh", "vqrdmulh", "vqrshl", "vqrshrn", "vqrshrun", + "vqshl", "vqshrn", "vqshrun", "vqsub", "vrev16", + "vrev32", "vrev64", "vrhadd", "vrmlaldavh", "vrmlalvh", + "vrmlsldavh", "vrmulh", "vrshl", "vrshr", "vrshrn", + "vsbc", "vshl", "vshlc", "vshll", "vshr", + "vshrn", "vsli", "vsri", "vstrb", "vstrd", + "vstrw", "vsub"}; + + return std::any_of( + std::begin(predicable_prefixes), std::end(predicable_prefixes), + [&Mnemonic](const char *prefix) { return Mnemonic.startswith(prefix); }); } From 4f17d75b24ee3f75fb9755461c04a73ed2f018f8 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 23 Mar 2023 09:42:45 +0000 Subject: [PATCH 391/691] [X86] LowerVectorAllZero - early out if the type size is not pow2. NFC. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 6cf359d6d217a..e828fe4b9dd15 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -24167,6 +24167,10 @@ static SDValue LowerVectorAllZero(const SDLoc &DL, SDValue V, ISD::CondCode CC, return SDValue(); } + // Quit if not convertable to legal scalar or 128/256-bit vector. + if (!llvm::has_single_bit(VT.getSizeInBits())) + return SDValue(); + assert((CC == ISD::SETEQ || CC == ISD::SETNE) && "Unsupported ISD::CondCode"); X86CC = (CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE); @@ -24188,10 +24192,6 @@ static SDValue LowerVectorAllZero(const SDLoc &DL, SDValue V, ISD::CondCode CC, DAG.getConstant(0, DL, IntVT)); } - // Quit if not splittable to 128/256-bit vector. - if (!llvm::has_single_bit(VT.getSizeInBits())) - return SDValue(); - // Split down to 128/256-bit vector. unsigned TestSize = Subtarget.hasAVX() ? 256 : 128; while (VT.getSizeInBits() > TestSize) { From 7a5b95732ade6c2de69b26f1038aa0a5afc39393 Mon Sep 17 00:00:00 2001 From: mydeveloperday Date: Thu, 23 Mar 2023 09:45:00 +0000 Subject: [PATCH 392/691] [clang-format] NFC Format.h and ClangFormatStyleOptions.rst are out of date Regenerate the style documentation, requires some minor sphinx changes to avoid warnings Differential Revision: https://reviews.llvm.org/D146704 --- clang/docs/ClangFormatStyleOptions.rst | 89 ++++++++++++++++++++++++++ clang/include/clang/Format/Format.h | 48 +++++++++++--- 2 files changed, 127 insertions(+), 10 deletions(-) diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index fd8f2bbb54322..37500d7bff52d 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -3642,6 +3642,95 @@ the configuration (without a prefix: ``Auto``). **MacroBlockEnd** (``String``) :versionbadge:`clang-format 3.7` :ref:`¶ ` A regular expression matching macros that end a block. +.. _Macros: +<<<<<<< +======= + +**Macros** (``List of Strings``) :versionbadge:`clang-format 17.0` :ref:`¶ ` + A list of macros of the form ``=`` . + + Code will be parsed with macros expanded, in order to determine how to + interpret and format the macro arguments. + + For example, the code: + + .. code-block:: c++ + + A(a*b); + + will usually be interpreted as a call to a function A, and the + multiplication expression will be formatted as `a * b`. + + If we specify the macro definition: + + .. code-block:: yaml + + Macros: + - A(x)=x + + the code will now be parsed as a declaration of the variable b of type a*, + and formatted as `a* b` (depending on pointer-binding rules). + + Features and restrictions: + * Both function-like macros and object-like macros are supported. + * Macro arguments must be used exactly once in the expansion. + * No recursive expansion; macros referencing other macros will be + ignored. + * Overloading by arity is supported: for example, given the macro + definitions A=x, A()=y, A(a)=a: + + + .. code-block:: c++ + + A; -> x; + A(); -> y; + A(z); -> z; + A(a, b); // will not be expanded. + +.. _MaxEmptyLinesToKeep: +>>>>>>> + +**Macros** (``List of Strings``) :versionbadge:`clang-format 17.0` :ref:`¶ ` + A list of macros of the form ``=`` . + + Code will be parsed with macros expanded, in order to determine how to + interpret and format the macro arguments. + + For example, the code: + + .. code-block:: c++ + + A(a*b); + + will usually be interpreted as a call to a function A, and the + multiplication expression will be formatted as `a * b`. + + If we specify the macro definition: + + .. code-block:: yaml + + Macros: + - A(x)=x + + the code will now be parsed as a declaration of the variable b of type a*, + and formatted as `a* b` (depending on pointer-binding rules). + + Features and restrictions: + * Both function-like macros and object-like macros are supported. + * Macro arguments must be used exactly once in the expansion. + * No recursive expansion; macros referencing other macros will be + ignored. + * Overloading by arity is supported: for example, given the macro + definitions A=x, A()=y, A(a)=a: + + + .. code-block:: c++ + + A; -> x; + A(); -> y; + A(z); -> z; + A(a, b); // will not be expanded. + .. _MaxEmptyLinesToKeep: **MaxEmptyLinesToKeep** (``Unsigned``) :versionbadge:`clang-format 3.7` :ref:`¶ ` diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 66904a6a11232..e2709cca3967f 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -2754,28 +2754,56 @@ struct FormatStyle { /// \code /// A(a*b); /// \endcode + /// /// will usually be interpreted as a call to a function A, and the /// multiplication expression will be formatted as `a * b`. /// /// If we specify the macro definition: - /// \code + /// \code{.yaml} /// Macros: /// - A(x)=x /// \endcode + /// /// the code will now be parsed as a declaration of the variable b of type a*, /// and formatted as `a* b` (depending on pointer-binding rules). /// /// Features and restrictions: - /// * Both function-like macros and object-like macros are supported. - /// * Macro arguments must be used exactly once in the expansion. - /// * No recursive expansion; macros referencing other macros will be +<<<<<<< +======= + /// * Both function-like macros and object-like macros are supported. + /// * Macro arguments must be used exactly once in the expansion. + /// * No recursive expansion; macros referencing other macros will be + /// ignored. + /// * Overloading by arity is supported: for example, given the macro + /// definitions A=x, A()=y, A(a)=a: + /// + /// \code + /// A; -> x; + /// A(); -> y; + /// A(z); -> z; + /// A(a, b); // will not be expanded. + /// \endcode + /// + /// \version 17.0 + std::vector Macros; + + /// The maximum number of consecutive empty lines to keep. +>>>>>>> + /// * Both function-like macros and object-like macros are supported. + /// * Macro arguments must be used exactly once in the expansion. + /// * No recursive expansion; macros referencing other macros will be /// ignored. - /// * Overloading by arity is supported: for example, given the macro - /// definitions A=x, A()=y, A(a)=a, - /// 'A;' -> 'x;' - /// 'A();' -> 'y;' - /// 'A(z);' -> 'z;' - /// 'A(a, b) will not be expanded. + /// * Overloading by arity is supported: for example, given the macro + /// definitions A=x, A()=y, A(a)=a: + /// + /// \code + /// A; -> x; + /// A(); -> y; + /// A(z); -> z; + /// A(a, b); // will not be expanded. + /// \endcode + /// + /// \version 17.0 std::vector Macros; /// The maximum number of consecutive empty lines to keep. From 7c928205c1f5a972f1f4dbeae83bd979c9a617d7 Mon Sep 17 00:00:00 2001 From: mydeveloperday Date: Thu, 23 Mar 2023 09:52:59 +0000 Subject: [PATCH 393/691] Revert "[clang-format] NFC Format.h and ClangFormatStyleOptions.rst are out of date" This reverts commit 7a5b95732ade6c2de69b26f1038aa0a5afc39393. --- clang/docs/ClangFormatStyleOptions.rst | 89 -------------------------- clang/include/clang/Format/Format.h | 48 +++----------- 2 files changed, 10 insertions(+), 127 deletions(-) diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index 37500d7bff52d..fd8f2bbb54322 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -3642,95 +3642,6 @@ the configuration (without a prefix: ``Auto``). **MacroBlockEnd** (``String``) :versionbadge:`clang-format 3.7` :ref:`¶ ` A regular expression matching macros that end a block. -.. _Macros: -<<<<<<< -======= - -**Macros** (``List of Strings``) :versionbadge:`clang-format 17.0` :ref:`¶ ` - A list of macros of the form ``=`` . - - Code will be parsed with macros expanded, in order to determine how to - interpret and format the macro arguments. - - For example, the code: - - .. code-block:: c++ - - A(a*b); - - will usually be interpreted as a call to a function A, and the - multiplication expression will be formatted as `a * b`. - - If we specify the macro definition: - - .. code-block:: yaml - - Macros: - - A(x)=x - - the code will now be parsed as a declaration of the variable b of type a*, - and formatted as `a* b` (depending on pointer-binding rules). - - Features and restrictions: - * Both function-like macros and object-like macros are supported. - * Macro arguments must be used exactly once in the expansion. - * No recursive expansion; macros referencing other macros will be - ignored. - * Overloading by arity is supported: for example, given the macro - definitions A=x, A()=y, A(a)=a: - - - .. code-block:: c++ - - A; -> x; - A(); -> y; - A(z); -> z; - A(a, b); // will not be expanded. - -.. _MaxEmptyLinesToKeep: ->>>>>>> - -**Macros** (``List of Strings``) :versionbadge:`clang-format 17.0` :ref:`¶ ` - A list of macros of the form ``=`` . - - Code will be parsed with macros expanded, in order to determine how to - interpret and format the macro arguments. - - For example, the code: - - .. code-block:: c++ - - A(a*b); - - will usually be interpreted as a call to a function A, and the - multiplication expression will be formatted as `a * b`. - - If we specify the macro definition: - - .. code-block:: yaml - - Macros: - - A(x)=x - - the code will now be parsed as a declaration of the variable b of type a*, - and formatted as `a* b` (depending on pointer-binding rules). - - Features and restrictions: - * Both function-like macros and object-like macros are supported. - * Macro arguments must be used exactly once in the expansion. - * No recursive expansion; macros referencing other macros will be - ignored. - * Overloading by arity is supported: for example, given the macro - definitions A=x, A()=y, A(a)=a: - - - .. code-block:: c++ - - A; -> x; - A(); -> y; - A(z); -> z; - A(a, b); // will not be expanded. - .. _MaxEmptyLinesToKeep: **MaxEmptyLinesToKeep** (``Unsigned``) :versionbadge:`clang-format 3.7` :ref:`¶ ` diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index e2709cca3967f..66904a6a11232 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -2754,56 +2754,28 @@ struct FormatStyle { /// \code /// A(a*b); /// \endcode - /// /// will usually be interpreted as a call to a function A, and the /// multiplication expression will be formatted as `a * b`. /// /// If we specify the macro definition: - /// \code{.yaml} + /// \code /// Macros: /// - A(x)=x /// \endcode - /// /// the code will now be parsed as a declaration of the variable b of type a*, /// and formatted as `a* b` (depending on pointer-binding rules). /// /// Features and restrictions: -<<<<<<< -======= - /// * Both function-like macros and object-like macros are supported. - /// * Macro arguments must be used exactly once in the expansion. - /// * No recursive expansion; macros referencing other macros will be - /// ignored. - /// * Overloading by arity is supported: for example, given the macro - /// definitions A=x, A()=y, A(a)=a: - /// - /// \code - /// A; -> x; - /// A(); -> y; - /// A(z); -> z; - /// A(a, b); // will not be expanded. - /// \endcode - /// - /// \version 17.0 - std::vector Macros; - - /// The maximum number of consecutive empty lines to keep. ->>>>>>> - /// * Both function-like macros and object-like macros are supported. - /// * Macro arguments must be used exactly once in the expansion. - /// * No recursive expansion; macros referencing other macros will be + /// * Both function-like macros and object-like macros are supported. + /// * Macro arguments must be used exactly once in the expansion. + /// * No recursive expansion; macros referencing other macros will be /// ignored. - /// * Overloading by arity is supported: for example, given the macro - /// definitions A=x, A()=y, A(a)=a: - /// - /// \code - /// A; -> x; - /// A(); -> y; - /// A(z); -> z; - /// A(a, b); // will not be expanded. - /// \endcode - /// - /// \version 17.0 + /// * Overloading by arity is supported: for example, given the macro + /// definitions A=x, A()=y, A(a)=a, + /// 'A;' -> 'x;' + /// 'A();' -> 'y;' + /// 'A(z);' -> 'z;' + /// 'A(a, b) will not be expanded. std::vector Macros; /// The maximum number of consecutive empty lines to keep. From 26d954bd4004dd01771308a5061a865073993130 Mon Sep 17 00:00:00 2001 From: Yevgeny Rouban Date: Thu, 23 Mar 2023 16:19:19 +0700 Subject: [PATCH 394/691] [AsmParser] Avoid instantiating LLVMContext if not needed. Try 2. The deleted copy constructor LLVMContext(LLVMContext &) got its parameter changed to const to allow the latest clang compiler to instantiatiate template std::optional. Differential Revision: https://reviews.llvm.org/D142699 --- llvm/include/llvm/IR/LLVMContext.h | 2 +- llvm/lib/AsmParser/Parser.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/IR/LLVMContext.h b/llvm/include/llvm/IR/LLVMContext.h index a8095a5c2fd30..c13a783e86c7a 100644 --- a/llvm/include/llvm/IR/LLVMContext.h +++ b/llvm/include/llvm/IR/LLVMContext.h @@ -68,7 +68,7 @@ class LLVMContext { public: LLVMContextImpl *const pImpl; LLVMContext(); - LLVMContext(LLVMContext &) = delete; + LLVMContext(const LLVMContext &) = delete; LLVMContext &operator=(const LLVMContext &) = delete; ~LLVMContext(); diff --git a/llvm/lib/AsmParser/Parser.cpp b/llvm/lib/AsmParser/Parser.cpp index 035eea81378e5..eded892f358a8 100644 --- a/llvm/lib/AsmParser/Parser.cpp +++ b/llvm/lib/AsmParser/Parser.cpp @@ -28,9 +28,9 @@ static bool parseAssemblyInto(MemoryBufferRef F, Module *M, std::unique_ptr Buf = MemoryBuffer::getMemBuffer(F); SM.AddNewSourceBuffer(std::move(Buf), SMLoc()); - LLVMContext Context; + std::optional OptContext; return LLParser(F.getBuffer(), SM, Err, M, Index, - M ? M->getContext() : Context, Slots) + M ? M->getContext() : OptContext.emplace(), Slots) .Run(UpgradeDebugInfo, DataLayoutCallback); } From d25751779baa37356265b004edc7e55ee4a4c383 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Thu, 23 Mar 2023 17:45:16 +0800 Subject: [PATCH 395/691] Bump RV32E version to 2.0 RV32E was recently [ratified](https://github.com/riscv/riscv-isa-manual/commit/afd613691cb89ccd7584206e8a6d1866fe77ec88) so we should update the version as our MC-layer support is complete. Reviewed By: kito-cheng Differential Revision: https://reviews.llvm.org/D144384 --- llvm/lib/Support/RISCVISAInfo.cpp | 2 +- llvm/test/MC/RISCV/attribute-arch.s | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp index 603b1f3d64737..93cf66ff1f739 100644 --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -41,7 +41,7 @@ static constexpr StringLiteral AllStdExts = "mafdqlcbkjtpvnh"; static const RISCVSupportedExtension SupportedExtensions[] = { {"i", RISCVExtensionVersion{2, 0}}, - {"e", RISCVExtensionVersion{1, 9}}, + {"e", RISCVExtensionVersion{2, 0}}, {"m", RISCVExtensionVersion{2, 0}}, {"a", RISCVExtensionVersion{2, 0}}, {"f", RISCVExtensionVersion{2, 0}}, diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s index 7856dcf94cd1c..af0b3fe0cdc29 100644 --- a/llvm/test/MC/RISCV/attribute-arch.s +++ b/llvm/test/MC/RISCV/attribute-arch.s @@ -12,6 +12,9 @@ .attribute arch, "rv32i2p0" # CHECK: attribute 5, "rv32i2p0" +.attribute arch, "rv32e" +# CHECK: attribute 5, "rv32e2p0" + .attribute arch, "rv32i2_m2" # CHECK: attribute 5, "rv32i2p0_m2p0" From c2de8ff92753acdb1ace7a27cc11cb09f28eb8fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Gr=C3=A4nitz?= Date: Thu, 23 Mar 2023 11:10:39 +0100 Subject: [PATCH 396/691] [JITLink] Initial AArch32 backend This first version lays the foundations for AArch32 support in JITLink. ELFLinkGraphBuilder_aarch32 processes REL-type relocations and populates LinkGraphs from ELF object files for both big- and little-endian systems. The ArmCfg member controls subarchitecture-specific details throughout the linking process (i.e. it's passed to ELFJITLinker_aarch32). Relocation types follow the ABI documentation's division into classes: Data (endian-sensitive), Arm (32-bit little-endian) and Thumb (2x 16-bit little-endian, "Thumb32" in the docs). The implementation of instruction encoding/decoding for relocation resolution is implemented symmetrically and is testable in isolation (see AArch32 category in JITLinkTests). Callable Thumb functions are marked with a ThumbSymbol target-flag and stored in the LinkGraph with their real addresses. The thumb-bit is added back in when the owning JITDylib requests the address for such a symbol. The StubsManager can generate (absolute) Thumb-state stubs for branch range extensions on v7+ targets. Proper GOT/PLT handling is not yet implemented. This patch is based on the backend implementation in ez-clang and has just enough functionality to model the infrastructure and link a Thumb function `main()` that calls `printf()` to dump "Hello Arm!" on Armv7a. It was tested on Raspberry Pi with 32-bit Raspbian OS. Reviewed By: lhames Differential Revision: https://reviews.llvm.org/D144083 --- .../ExecutionEngine/JITLink/ELF_aarch32.h | 38 ++ .../llvm/ExecutionEngine/JITLink/aarch32.h | 293 ++++++++++ .../ExecutionEngine/JITLink/CMakeLists.txt | 2 + llvm/lib/ExecutionEngine/JITLink/ELF.cpp | 9 + .../JITLink/ELFLinkGraphBuilder.h | 21 + .../ExecutionEngine/JITLink/ELF_aarch32.cpp | 299 ++++++++++ llvm/lib/ExecutionEngine/JITLink/aarch32.cpp | 514 ++++++++++++++++++ .../Orc/ObjectLinkingLayer.cpp | 6 +- .../JITLink/AArch32/ELF_thumbv7_printf.s | 46 ++ .../JITLink/AArch32/lit.local.cfg | 2 + .../ExecutionEngine/JITLink/AArch32Tests.cpp | 200 +++++++ .../ExecutionEngine/JITLink/CMakeLists.txt | 1 + 12 files changed, 1430 insertions(+), 1 deletion(-) create mode 100644 llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch32.h create mode 100644 llvm/include/llvm/ExecutionEngine/JITLink/aarch32.h create mode 100644 llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp create mode 100644 llvm/lib/ExecutionEngine/JITLink/aarch32.cpp create mode 100644 llvm/test/ExecutionEngine/JITLink/AArch32/ELF_thumbv7_printf.s create mode 100644 llvm/test/ExecutionEngine/JITLink/AArch32/lit.local.cfg create mode 100644 llvm/unittests/ExecutionEngine/JITLink/AArch32Tests.cpp diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch32.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch32.h new file mode 100644 index 0000000000000..25d1c3aac2c26 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch32.h @@ -0,0 +1,38 @@ +//===---- ELF_aarch32.h - JIT link functions for arm/thumb -----*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// jit-link functions for ELF/aarch32. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_JITLINK_ELF_AARCH32 +#define LLVM_EXECUTIONENGINE_JITLINK_ELF_AARCH32 + +#include "llvm/ExecutionEngine/JITLink/JITLink.h" +#include "llvm/ExecutionEngine/JITLink/aarch32.h" + +namespace llvm { +namespace jitlink { + +/// Create a LinkGraph from an ELF/arm relocatable object +/// +/// Note: The graph does not take ownership of the underlying buffer, nor copy +/// its contents. The caller is responsible for ensuring that the object buffer +/// outlives the graph. +Expected> +createLinkGraphFromELFObject_aarch32(MemoryBufferRef ObjectBuffer); + +/// jit-link the given object buffer, which must be an ELF arm/thumb object +/// file. +void link_ELF_aarch32(std::unique_ptr G, + std::unique_ptr Ctx); + +} // end namespace jitlink +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_JITLINK_ELF_AARCH32 diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/aarch32.h b/llvm/include/llvm/ExecutionEngine/JITLink/aarch32.h new file mode 100644 index 0000000000000..8488b10278771 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/JITLink/aarch32.h @@ -0,0 +1,293 @@ +//===------ aarch32.h - Generic JITLink arm/thumb utilities -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Generic utilities for graphs representing arm/thumb objects. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_JITLINK_AARCH32 +#define LLVM_EXECUTIONENGINE_JITLINK_AARCH32 + +#include "TableManager.h" +#include "llvm/ExecutionEngine/JITLink/JITLink.h" +#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" +#include "llvm/Support/ARMBuildAttributes.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace jitlink { +namespace aarch32 { + +/// JITLink-internal AArch32 fixup kinds +enum EdgeKind_aarch32 : Edge::Kind { + + /// + /// Relocations of class Data + /// + FirstDataRelocation = Edge::FirstRelocation, + + /// Plain 32-bit value relocation in target endianness + Data_Delta32 = FirstDataRelocation, + + LastDataRelocation = Data_Delta32, + + /// + /// Relocations of class Arm (covers fixed-width 4-byte instruction subset) + /// + FirstArmRelocation, + + /// TODO: Arm_Call is here only as a placeholder for now. + Arm_Call = FirstArmRelocation, + + LastArmRelocation = Arm_Call, + + /// + /// Relocations of class Thumb16 and Thumb32 (covers Thumb instruction subset) + /// + FirstThumbRelocation, + + /// Write immediate value for PC-relative branch with link (can bridge between + /// Arm and Thumb). + Thumb_Call = FirstThumbRelocation, + + /// Write immediate value for (unconditional) PC-relative branch without link. + Thumb_Jump24, + + /// Write immediate value to the lower halfword of the destination register + Thumb_MovwAbsNC, + + /// Write immediate value to the top halfword of the destination register + Thumb_MovtAbs, + + LastThumbRelocation = Thumb_MovtAbs, +}; + +/// Flags enum for AArch32-specific symbol properties +enum TargetFlags_aarch32 : TargetFlagsType { + ThumbSymbol = 1 << 0, +}; + +/// Human-readable name for a given CPU architecture kind +const char *getCPUArchName(ARMBuildAttrs::CPUArch K); + +/// Get a human-readable name for the given AArch32 edge kind. +const char *getEdgeKindName(Edge::Kind K); + +/// AArch32 uses stubs for a number of purposes, like branch range extension +/// or interworking between Arm and Thumb instruction subsets. +/// +/// Stub implementations vary depending on CPU architecture (v4, v6, v7), +/// instruction subset and branch type (absolute/PC-relative). +/// +/// For each kind of stub, the StubsFlavor defines one concrete form that is +/// used throughout the LinkGraph. +/// +/// Stubs are often called "veneers" in the official docs and online. +/// +enum StubsFlavor { + Unsupported = 0, + Thumbv7, +}; + +/// JITLink sub-arch configuration for Arm CPU models +struct ArmConfig { + bool J1J2BranchEncoding = false; + StubsFlavor Stubs = Unsupported; +}; + +/// Obtain the sub-arch configuration for a given Arm CPU model. +inline ArmConfig getArmConfigForCPUArch(ARMBuildAttrs::CPUArch CPUArch) { + ArmConfig ArmCfg; + switch (CPUArch) { + case ARMBuildAttrs::v7: + case ARMBuildAttrs::v8_A: + ArmCfg.J1J2BranchEncoding = true; + ArmCfg.Stubs = Thumbv7; + break; + default: + DEBUG_WITH_TYPE("jitlink", { + dbgs() << " Warning: ARM config not defined for CPU architecture " + << getCPUArchName(CPUArch); + }); + break; + } + return ArmCfg; +} + +/// Immutable pair of halfwords, Hi and Lo, with overflow check +struct HalfWords { + constexpr HalfWords() : Hi(0), Lo(0) {} + constexpr HalfWords(uint32_t Hi, uint32_t Lo) : Hi(Hi), Lo(Lo) { + assert(isUInt<16>(Hi) && "Overflow in first half-word"); + assert(isUInt<16>(Lo) && "Overflow in second half-word"); + } + const uint16_t Hi; // First halfword + const uint16_t Lo; // Second halfword +}; + +/// Collection of named constants per fixup kind. It may contain but is not +/// limited to the following entries: +/// +/// Opcode - Values of the op-code bits in the instruction, with +/// unaffected bits nulled +/// OpcodeMask - Mask with all bits set that encode the op-code +/// ImmMask - Mask with all bits set that encode the immediate value +/// RegMask - Mask with all bits set that encode the register +/// +template struct FixupInfo {}; + +template <> struct FixupInfo { + static constexpr HalfWords Opcode{0xf000, 0x8000}; + static constexpr HalfWords OpcodeMask{0xf800, 0x8000}; + static constexpr HalfWords ImmMask{0x07ff, 0x2fff}; + static constexpr uint16_t LoBitConditional = 0x1000; +}; + +template <> struct FixupInfo { + static constexpr HalfWords Opcode{0xf000, 0xc000}; + static constexpr HalfWords OpcodeMask{0xf800, 0xc000}; + static constexpr HalfWords ImmMask{0x07ff, 0x2fff}; + static constexpr uint16_t LoBitH = 0x0001; + static constexpr uint16_t LoBitNoBlx = 0x1000; +}; + +template <> struct FixupInfo { + static constexpr HalfWords Opcode{0xf2c0, 0x0000}; + static constexpr HalfWords OpcodeMask{0xfbf0, 0x8000}; + static constexpr HalfWords ImmMask{0x040f, 0x70ff}; + static constexpr HalfWords RegMask{0x0000, 0x0f00}; +}; + +template <> +struct FixupInfo : public FixupInfo { + static constexpr HalfWords Opcode{0xf240, 0x0000}; +}; + +/// Helper function to read the initial addend for Data-class relocations. +Expected readAddendData(LinkGraph &G, Block &B, const Edge &E); + +/// Helper function to read the initial addend for Arm-class relocations. +Expected readAddendArm(LinkGraph &G, Block &B, const Edge &E); + +/// Helper function to read the initial addend for Thumb-class relocations. +Expected readAddendThumb(LinkGraph &G, Block &B, const Edge &E, + const ArmConfig &ArmCfg); + +/// Read the initial addend for a REL-type relocation. It's the value encoded +/// in the immediate field of the fixup location by the compiler. +inline Expected readAddend(LinkGraph &G, Block &B, const Edge &E, + const ArmConfig &ArmCfg) { + Edge::Kind Kind = E.getKind(); + if (Kind <= LastDataRelocation) + return readAddendData(G, B, E); + + if (Kind <= LastArmRelocation) + return readAddendArm(G, B, E); + + if (Kind <= LastThumbRelocation) + return readAddendThumb(G, B, E, ArmCfg); + + llvm_unreachable("Relocation must be of class Data, Arm or Thumb"); +} + +/// Helper function to apply the fixup for Data-class relocations. +Error applyFixupData(LinkGraph &G, Block &B, const Edge &E); + +/// Helper function to apply the fixup for Arm-class relocations. +Error applyFixupArm(LinkGraph &G, Block &B, const Edge &E); + +/// Helper function to apply the fixup for Thumb-class relocations. +Error applyFixupThumb(LinkGraph &G, Block &B, const Edge &E, + const ArmConfig &ArmCfg); + +/// Apply fixup expression for edge to block content. +inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E, + const ArmConfig &ArmCfg) { + Edge::Kind Kind = E.getKind(); + + if (Kind <= LastDataRelocation) + return applyFixupData(G, B, E); + + if (Kind <= LastArmRelocation) + return applyFixupArm(G, B, E); + + if (Kind <= LastThumbRelocation) + return applyFixupThumb(G, B, E, ArmCfg); + + llvm_unreachable("Relocation must be of class Data, Arm or Thumb"); +} + +/// Stubs builder for a specific StubsFlavor +/// +/// Right now we only have one default stub kind, but we want to extend this +/// and allow creation of specific kinds in the future (e.g. branch range +/// extension or interworking). +/// +/// Let's keep it simple for the moment and not wire this through a GOT. +/// +template +class StubsManager : public TableManager> { +public: + StubsManager() = default; + + /// Name of the object file section that will contain all our stubs. + static StringRef getSectionName() { return "__llvm_jitlink_STUBS"; } + + /// Implements link-graph traversal via visitExistingEdges(). + bool visitEdge(LinkGraph &G, Block *B, Edge &E) { + if (E.getTarget().isDefined()) + return false; + + switch (E.getKind()) { + case Thumb_Call: + case Thumb_Jump24: { + DEBUG_WITH_TYPE("jitlink", { + dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at " + << B->getFixupAddress(E) << " (" << B->getAddress() << " + " + << formatv("{0:x}", E.getOffset()) << ")\n"; + }); + E.setTarget(this->getEntryForTarget(G, E.getTarget())); + return true; + } + } + return false; + } + + /// Create a branch range extension stub for the class's flavor. + Symbol &createEntry(LinkGraph &G, Symbol &Target); + +private: + /// Create a new node in the link-graph for the given stub template. + template + Block &addStub(LinkGraph &G, const uint8_t (&Code)[Size], + uint64_t Alignment) { + ArrayRef Template(reinterpret_cast(Code), Size); + return G.createContentBlock(getStubsSection(G), Template, + orc::ExecutorAddr(), Alignment, 0); + } + + /// Get or create the object file section that will contain all our stubs. + Section &getStubsSection(LinkGraph &G) { + if (!StubsSection) + StubsSection = &G.createSection(getSectionName(), + orc::MemProt::Read | orc::MemProt::Exec); + return *StubsSection; + } + + Section *StubsSection = nullptr; +}; + +/// Create a branch range extension stub with Thumb encoding for v7 CPUs. +template <> +Symbol &StubsManager::createEntry(LinkGraph &G, Symbol &Target); + +} // namespace aarch32 +} // namespace jitlink +} // namespace llvm + +#endif // LLVM_EXECUTIONENGINE_JITLINK_AARCH32 diff --git a/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt b/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt index 52ff5e8370031..bc86f45d3c185 100644 --- a/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt @@ -20,6 +20,7 @@ add_llvm_component_library(LLVMJITLink # ELF ELF.cpp ELFLinkGraphBuilder.cpp + ELF_aarch32.cpp ELF_aarch64.cpp ELF_i386.cpp ELF_loongarch.cpp @@ -33,6 +34,7 @@ add_llvm_component_library(LLVMJITLink COFF_x86_64.cpp # Architectures: + aarch32.cpp aarch64.cpp i386.cpp loongarch.cpp diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF.cpp index ef0f19a785712..340a0ce134475 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF.cpp @@ -13,6 +13,7 @@ #include "llvm/ExecutionEngine/JITLink/ELF.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/ExecutionEngine/JITLink/ELF_aarch32.h" #include "llvm/ExecutionEngine/JITLink/ELF_aarch64.h" #include "llvm/ExecutionEngine/JITLink/ELF_i386.h" #include "llvm/ExecutionEngine/JITLink/ELF_loongarch.h" @@ -69,6 +70,8 @@ createLinkGraphFromELFObject(MemoryBufferRef ObjectBuffer) { switch (*TargetMachineArch) { case ELF::EM_AARCH64: return createLinkGraphFromELFObject_aarch64(ObjectBuffer); + case ELF::EM_ARM: + return createLinkGraphFromELFObject_aarch32(ObjectBuffer); case ELF::EM_LOONGARCH: return createLinkGraphFromELFObject_loongarch(ObjectBuffer); case ELF::EM_RISCV: @@ -90,6 +93,12 @@ void link_ELF(std::unique_ptr G, case Triple::aarch64: link_ELF_aarch64(std::move(G), std::move(Ctx)); return; + case Triple::arm: + case Triple::armeb: + case Triple::thumb: + case Triple::thumbeb: + link_ELF_aarch32(std::move(G), std::move(Ctx)); + return; case Triple::loongarch32: case Triple::loongarch64: link_ELF_loongarch(std::move(G), std::move(Ctx)); diff --git a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h index 9d2d4958dcf6c..1d98acf868695 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h +++ b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h @@ -127,6 +127,12 @@ class ELFLinkGraphBuilder : public ELFLinkGraphBuilderBase { Error graphifySections(); Error graphifySymbols(); + /// Override in derived classes to suppress certain sections in the link + /// graph. + virtual bool excludeSection(const typename ELFT::Shdr &Sect) const { + return false; + } + /// Traverse all matching ELFT::Rela relocation records in the given section. /// The handler function Func should be callable with this signature: /// Error(const typename ELFT::Rela &, @@ -321,6 +327,13 @@ template Error ELFLinkGraphBuilder::graphifySections() { auto Name = Obj.getSectionName(Sec, SectionStringTab); if (!Name) return Name.takeError(); + if (excludeSection(Sec)) { + LLVM_DEBUG({ + dbgs() << " " << SecIndex << ": Skipping section \"" << *Name + << "\" explicitly\n"; + }); + continue; + } // Skip null sections. if (Sec.sh_type == ELF::SHT_NULL) { @@ -564,6 +577,10 @@ Error ELFLinkGraphBuilder::forEachRelaRelocation( LLVM_DEBUG(dbgs() << " skipped (dwarf section)\n\n"); return Error::success(); } + if (excludeSection(**FixupSection)) { + LLVM_DEBUG(dbgs() << " skipped (fixup section excluded explicitly)\n\n"); + return Error::success(); + } // Lookup the link-graph node corresponding to the target section name. auto *BlockToFix = getGraphBlock(RelSect.sh_info); @@ -610,6 +627,10 @@ Error ELFLinkGraphBuilder::forEachRelRelocation( LLVM_DEBUG(dbgs() << " skipped (dwarf section)\n\n"); return Error::success(); } + if (excludeSection(**FixupSection)) { + LLVM_DEBUG(dbgs() << " skipped (fixup section excluded explicitly)\n\n"); + return Error::success(); + } // Lookup the link-graph node corresponding to the target section name. auto *BlockToFix = getGraphBlock(RelSect.sh_info); diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp new file mode 100644 index 0000000000000..0010088fef1e7 --- /dev/null +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp @@ -0,0 +1,299 @@ +//===----- ELF_aarch32.cpp - JIT linker implementation for arm/thumb ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// ELF/aarch32 jit-link implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/JITLink/ELF_aarch32.h" + +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/ExecutionEngine/JITLink/JITLink.h" +#include "llvm/ExecutionEngine/JITLink/aarch32.h" +#include "llvm/Object/ELF.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/TargetParser/ARMTargetParser.h" + +#include "ELFLinkGraphBuilder.h" +#include "JITLinkGeneric.h" + +#define DEBUG_TYPE "jitlink" + +using namespace llvm::object; + +namespace llvm { +namespace jitlink { + +/// Translate from ELF relocation type to JITLink-internal edge kind. +Expected getJITLinkEdgeKind(uint32_t ELFType) { + switch (ELFType) { + case ELF::R_ARM_REL32: + return aarch32::Data_Delta32; + case ELF::R_ARM_CALL: + return aarch32::Arm_Call; + case ELF::R_ARM_THM_CALL: + return aarch32::Thumb_Call; + case ELF::R_ARM_THM_JUMP24: + return aarch32::Thumb_Jump24; + case ELF::R_ARM_THM_MOVW_ABS_NC: + return aarch32::Thumb_MovwAbsNC; + case ELF::R_ARM_THM_MOVT_ABS: + return aarch32::Thumb_MovtAbs; + } + + return make_error( + "Unsupported aarch32 relocation " + formatv("{0:d}: ", ELFType) + + object::getELFRelocationTypeName(ELF::EM_ARM, ELFType)); +} + +/// Translate from JITLink-internal edge kind back to ELF relocation type. +Expected getELFRelocationType(Edge::Kind Kind) { + switch (static_cast(Kind)) { + case aarch32::Data_Delta32: + return ELF::R_ARM_REL32; + case aarch32::Arm_Call: + return ELF::R_ARM_CALL; + case aarch32::Thumb_Call: + return ELF::R_ARM_THM_CALL; + case aarch32::Thumb_Jump24: + return ELF::R_ARM_THM_JUMP24; + case aarch32::Thumb_MovwAbsNC: + return ELF::R_ARM_THM_MOVW_ABS_NC; + case aarch32::Thumb_MovtAbs: + return ELF::R_ARM_THM_MOVT_ABS; + } + + return make_error(formatv("Invalid aarch32 edge {0:d}: ", + Kind)); +} + +/// Get a human-readable name for the given ELF AArch32 edge kind. +const char *getELFAArch32EdgeKindName(Edge::Kind R) { + // No ELF-specific edge kinds yet + return aarch32::getEdgeKindName(R); +} + +class ELFJITLinker_aarch32 : public JITLinker { + friend class JITLinker; + +public: + ELFJITLinker_aarch32(std::unique_ptr Ctx, + std::unique_ptr G, PassConfiguration PassCfg, + aarch32::ArmConfig ArmCfg) + : JITLinker(std::move(Ctx), std::move(G), std::move(PassCfg)), + ArmCfg(std::move(ArmCfg)) {} + +private: + aarch32::ArmConfig ArmCfg; + + Error applyFixup(LinkGraph &G, Block &B, const Edge &E) const { + return aarch32::applyFixup(G, B, E, ArmCfg); + } +}; + +template +class ELFLinkGraphBuilder_aarch32 + : public ELFLinkGraphBuilder> { +private: + using ELFT = ELFType; + using Base = ELFLinkGraphBuilder; + + bool excludeSection(const typename ELFT::Shdr &Sect) const override { + // TODO: An .ARM.exidx (Exception Index table) entry is 8-bytes in size and + // consists of 2 words. It might be sufficient to process only relocations + // in the the second word (offset 4). Please find more details in: Exception + // Handling ABI for the Arm® Architecture -> Index table entries + if (Sect.sh_type == ELF::SHT_ARM_EXIDX) + return true; + return false; + } + + Error addRelocations() override { + LLVM_DEBUG(dbgs() << "Processing relocations:\n"); + using Self = ELFLinkGraphBuilder_aarch32; + for (const auto &RelSect : Base::Sections) { + if (Error Err = Base::forEachRelRelocation(RelSect, this, + &Self::addSingleRelRelocation)) + return Err; + } + return Error::success(); + } + + Error addSingleRelRelocation(const typename ELFT::Rel &Rel, + const typename ELFT::Shdr &FixupSect, + Block &BlockToFix) { + uint32_t SymbolIndex = Rel.getSymbol(false); + auto ObjSymbol = Base::Obj.getRelocationSymbol(Rel, Base::SymTabSec); + if (!ObjSymbol) + return ObjSymbol.takeError(); + + Symbol *GraphSymbol = Base::getGraphSymbol(SymbolIndex); + if (!GraphSymbol) + return make_error( + formatv("Could not find symbol at given index, did you add it to " + "JITSymbolTable? index: {0}, shndx: {1} Size of table: {2}", + SymbolIndex, (*ObjSymbol)->st_shndx, + Base::GraphSymbols.size()), + inconvertibleErrorCode()); + + uint32_t Type = Rel.getType(false); + Expected Kind = getJITLinkEdgeKind(Type); + if (!Kind) + return Kind.takeError(); + + auto FixupAddress = orc::ExecutorAddr(FixupSect.sh_addr) + Rel.r_offset; + Edge::OffsetT Offset = FixupAddress - BlockToFix.getAddress(); + Edge E(*Kind, Offset, *GraphSymbol, 0); + + Expected Addend = + aarch32::readAddend(*Base::G, BlockToFix, E, ArmCfg); + if (!Addend) + return Addend.takeError(); + + E.setAddend(*Addend); + LLVM_DEBUG({ + dbgs() << " "; + printEdge(dbgs(), BlockToFix, E, getELFAArch32EdgeKindName(*Kind)); + dbgs() << "\n"; + }); + + BlockToFix.addEdge(std::move(E)); + return Error::success(); + } + + aarch32::ArmConfig ArmCfg; + +protected: + TargetFlagsType makeTargetFlags(const typename ELFT::Sym &Sym) override { + if (Sym.getValue() & 0x01) + return aarch32::ThumbSymbol; + return TargetFlagsType{}; + } + + orc::ExecutorAddrDiff getRawOffset(const typename ELFT::Sym &Sym, + TargetFlagsType Flags) override { + assert((makeTargetFlags(Sym) & Flags) == Flags); + static constexpr uint64_t ThumbBit = 0x01; + return Sym.getValue() & ~ThumbBit; + } + +public: + ELFLinkGraphBuilder_aarch32(StringRef FileName, const ELFFile &Obj, + Triple TT, aarch32::ArmConfig ArmCfg) + : ELFLinkGraphBuilder(Obj, std::move(TT), FileName, + getELFAArch32EdgeKindName), + ArmCfg(std::move(ArmCfg)) {} +}; + +template +Error buildTables_ELF_aarch32(LinkGraph &G) { + LLVM_DEBUG(dbgs() << "Visiting edges in graph:\n"); + + aarch32::StubsManager PLT; + visitExistingEdges(G, PLT); + return Error::success(); +} + +Expected> +createLinkGraphFromELFObject_aarch32(MemoryBufferRef ObjectBuffer) { + LLVM_DEBUG({ + dbgs() << "Building jitlink graph for new input " + << ObjectBuffer.getBufferIdentifier() << "...\n"; + }); + + auto ELFObj = ObjectFile::createELFObjectFile(ObjectBuffer); + if (!ELFObj) + return ELFObj.takeError(); + + // Find out what exact AArch32 instruction set and features we target. + auto TT = (*ELFObj)->makeTriple(); + ARM::ArchKind AK = ARM::parseArch(TT.getArchName()); + if (AK == ARM::ArchKind::INVALID) + return make_error( + "Failed to build ELF link graph: Invalid ARM ArchKind"); + + // Resolve our internal configuration for the target. If at some point the + // CPUArch alone becomes too unprecise, we can find more details in the + // Tag_CPU_arch_profile. + aarch32::ArmConfig ArmCfg; + using namespace ARMBuildAttrs; + auto Arch = static_cast(ARM::getArchAttr(AK)); + switch (Arch) { + case v7: + case v8_A: + ArmCfg = aarch32::getArmConfigForCPUArch(Arch); + assert(ArmCfg.Stubs != aarch32::Unsupported && + "Provide a config for each supported CPU"); + break; + default: + return make_error( + "Failed to build ELF link graph: Unsupported CPU arch " + + StringRef(aarch32::getCPUArchName(Arch))); + } + + // Populate the link-graph. + switch (TT.getArch()) { + case Triple::arm: + case Triple::thumb: { + auto &ELFFile = cast>(**ELFObj).getELFFile(); + return ELFLinkGraphBuilder_aarch32( + (*ELFObj)->getFileName(), ELFFile, TT, ArmCfg) + .buildGraph(); + } + case Triple::armeb: + case Triple::thumbeb: { + auto &ELFFile = cast>(**ELFObj).getELFFile(); + return ELFLinkGraphBuilder_aarch32((*ELFObj)->getFileName(), + ELFFile, TT, ArmCfg) + .buildGraph(); + } + default: + return make_error( + "Failed to build ELF/aarch32 link graph: Invalid target triple " + + TT.getTriple()); + } +} + +void link_ELF_aarch32(std::unique_ptr G, + std::unique_ptr Ctx) { + const Triple &TT = G->getTargetTriple(); + + using namespace ARMBuildAttrs; + ARM::ArchKind AK = ARM::parseArch(TT.getArchName()); + auto CPU = static_cast(ARM::getArchAttr(AK)); + aarch32::ArmConfig ArmCfg = aarch32::getArmConfigForCPUArch(CPU); + + PassConfiguration PassCfg; + if (Ctx->shouldAddDefaultTargetPasses(TT)) { + // Add a mark-live pass. + if (auto MarkLive = Ctx->getMarkLivePass(TT)) + PassCfg.PrePrunePasses.push_back(std::move(MarkLive)); + else + PassCfg.PrePrunePasses.push_back(markAllSymbolsLive); + + switch (ArmCfg.Stubs) { + case aarch32::Thumbv7: + PassCfg.PostPrunePasses.push_back( + buildTables_ELF_aarch32); + break; + case aarch32::Unsupported: + llvm_unreachable("Check before building graph"); + } + } + + if (auto Err = Ctx->modifyPassConfig(*G, PassCfg)) + return Ctx->notifyFailed(std::move(Err)); + + ELFJITLinker_aarch32::link(std::move(Ctx), std::move(G), std::move(PassCfg), + std::move(ArmCfg)); +} + +} // namespace jitlink +} // namespace llvm diff --git a/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp b/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp new file mode 100644 index 0000000000000..6f49a4578cf7c --- /dev/null +++ b/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp @@ -0,0 +1,514 @@ +//===--------- aarch32.cpp - Generic JITLink arm/thumb utilities ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Generic utilities for graphs representing arm/thumb objects. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/JITLink/aarch32.h" + +#include "llvm/ADT/StringExtras.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/ExecutionEngine/JITLink/JITLink.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/MathExtras.h" + +#define DEBUG_TYPE "jitlink" + +namespace llvm { +namespace jitlink { +namespace aarch32 { + +using namespace support; +using namespace support::endian; + +/// Encode 22-bit immediate value for branch instructions without J1J2 range +/// extension (formats B T4, BL T1 and BLX T2). +/// +/// 00000:Imm11H:Imm11L:0 -> [ 00000:Imm11H, 00000:Imm11L ] +/// J1^ ^J2 will always be 1 +/// +HalfWords encodeImmBT4BlT1BlxT2(int64_t Value) { + constexpr uint32_t J1J2 = 0x2800; + uint32_t Imm11H = (Value >> 12) & 0x07ff; + uint32_t Imm11L = (Value >> 1) & 0x07ff; + return HalfWords{Imm11H, Imm11L | J1J2}; +} + +/// Decode 22-bit immediate value for branch instructions without J1J2 range +/// extension (formats B T4, BL T1 and BLX T2). +/// +/// [ 00000:Imm11H, 00000:Imm11L ] -> 00000:Imm11H:Imm11L:0 +/// J1^ ^J2 will always be 1 +/// +int64_t decodeImmBT4BlT1BlxT2(uint32_t Hi, uint32_t Lo) { + uint32_t Imm11H = Hi & 0x07ff; + uint32_t Imm11L = Lo & 0x07ff; + return SignExtend64<22>(Imm11H << 12 | Imm11L << 1); +} + +/// Encode 25-bit immediate value for branch instructions with J1J2 range +/// extension (formats B T4, BL T1 and BLX T2). +/// +/// S:I1:I2:Imm10:Imm11:0 -> [ 00000:S:Imm10, 00:J1:0:J2:Imm11 ] +/// +HalfWords encodeImmBT4BlT1BlxT2_J1J2(int64_t Value) { + uint32_t S = (Value >> 14) & 0x0400; + uint32_t J1 = (((~(Value >> 10)) ^ (Value >> 11)) & 0x2000); + uint32_t J2 = (((~(Value >> 11)) ^ (Value >> 13)) & 0x0800); + uint32_t Imm10 = (Value >> 12) & 0x03ff; + uint32_t Imm11 = (Value >> 1) & 0x07ff; + return HalfWords{S | Imm10, J1 | J2 | Imm11}; +} + +/// Decode 25-bit immediate value for branch instructions with J1J2 range +/// extension (formats B T4, BL T1 and BLX T2). +/// +/// [ 00000:S:Imm10, 00:J1:0:J2:Imm11] -> S:I1:I2:Imm10:Imm11:0 +/// +int64_t decodeImmBT4BlT1BlxT2_J1J2(uint32_t Hi, uint32_t Lo) { + uint32_t S = Hi & 0x0400; + uint32_t I1 = ~((Lo ^ (Hi << 3)) << 10) & 0x00800000; + uint32_t I2 = ~((Lo ^ (Hi << 1)) << 11) & 0x00400000; + uint32_t Imm10 = Hi & 0x03ff; + uint32_t Imm11 = Lo & 0x07ff; + return SignExtend64<25>(S << 14 | I1 | I2 | Imm10 << 12 | Imm11 << 1); +} + +/// Encode 16-bit immediate value for move instruction formats MOVT T1 and +/// MOVW T3. +/// +/// Imm4:Imm1:Imm3:Imm8 -> [ 00000:i:000000:Imm4, 0:Imm3:0000:Imm8 ] +/// +HalfWords encodeImmMovtT1MovwT3(uint16_t Value) { + uint32_t Imm4 = (Value >> 12) & 0x0f; + uint32_t Imm1 = (Value >> 11) & 0x01; + uint32_t Imm3 = (Value >> 8) & 0x07; + uint32_t Imm8 = Value & 0xff; + return HalfWords{Imm1 << 10 | Imm4, Imm3 << 12 | Imm8}; +} + +/// Decode 16-bit immediate value from move instruction formats MOVT T1 and +/// MOVW T3. +/// +/// [ 00000:i:000000:Imm4, 0:Imm3:0000:Imm8 ] -> Imm4:Imm1:Imm3:Imm8 +/// +uint16_t decodeImmMovtT1MovwT3(uint32_t Hi, uint32_t Lo) { + uint32_t Imm4 = Hi & 0x0f; + uint32_t Imm1 = (Hi >> 10) & 0x01; + uint32_t Imm3 = (Lo >> 12) & 0x07; + uint32_t Imm8 = Lo & 0xff; + uint32_t Imm16 = Imm4 << 12 | Imm1 << 11 | Imm3 << 8 | Imm8; + assert(Imm16 <= 0xffff && "Decoded value out-of-range"); + return Imm16; +} + +/// Encode register ID for instruction formats MOVT T1 and MOVW T3. +/// +/// Rd4 -> [0000000000000000, 0000:Rd4:00000000] +/// +HalfWords encodeRegMovtT1MovwT3(int64_t Value) { + uint32_t Rd4 = (Value & 0x0f) << 8; + return HalfWords{0, Rd4}; +} + +/// Decode register ID from instruction formats MOVT T1 and MOVW T3. +/// +/// [0000000000000000, 0000:Rd4:00000000] -> Rd4 +/// +int64_t decodeRegMovtT1MovwT3(uint32_t Hi, uint32_t Lo) { + uint32_t Rd4 = (Lo >> 8) & 0x0f; + return Rd4; +} + +/// 32-bit Thumb instructions are stored as two little-endian halfwords. +/// An instruction at address A encodes bytes A+1, A in the first halfword (Hi), +/// followed by bytes A+3, A+2 in the second halfword (Lo). +struct WritableThumbRelocation { + /// Create a writable reference to a Thumb32 fixup. + WritableThumbRelocation(char *FixupPtr) + : Hi{*reinterpret_cast(FixupPtr)}, + Lo{*reinterpret_cast(FixupPtr + 2)} {} + + support::ulittle16_t &Hi; // First halfword + support::ulittle16_t &Lo; // Second halfword +}; + +struct ThumbRelocation { + /// Create a read-only reference to a Thumb32 fixup. + ThumbRelocation(const char *FixupPtr) + : Hi{*reinterpret_cast(FixupPtr)}, + Lo{*reinterpret_cast(FixupPtr + 2)} {} + + /// Create a read-only Thumb32 fixup from a writeable one. + ThumbRelocation(WritableThumbRelocation &Writable) + : Hi{Writable.Hi}, Lo(Writable.Lo) {} + + const support::ulittle16_t &Hi; // First halfword + const support::ulittle16_t &Lo; // Second halfword +}; + +Error makeUnexpectedOpcodeError(const LinkGraph &G, const ThumbRelocation &R, + Edge::Kind Kind) { + return make_error( + formatv("Invalid opcode [ 0x{0:x4}, 0x{1:x4} ] for relocation: {2}", R.Hi, + R.Lo, G.getEdgeKindName(Kind))); +} + +template bool checkOpcode(const ThumbRelocation &R) { + uint16_t Hi = R.Hi & FixupInfo::OpcodeMask.Hi; + uint16_t Lo = R.Lo & FixupInfo::OpcodeMask.Lo; + return Hi == FixupInfo::Opcode.Hi && Lo == FixupInfo::Opcode.Lo; +} + +template +bool checkRegister(const ThumbRelocation &R, HalfWords Reg) { + uint16_t Hi = R.Hi & FixupInfo::RegMask.Hi; + uint16_t Lo = R.Lo & FixupInfo::RegMask.Lo; + return Hi == Reg.Hi && Lo == Reg.Lo; +} + +template +bool writeRegister(WritableThumbRelocation &R, HalfWords Reg) { + static constexpr HalfWords Mask = FixupInfo::RegMask; + assert((Mask.Hi & Reg.Hi) == Reg.Hi && (Mask.Hi & Reg.Hi) == Reg.Hi && + "Value bits exceed bit range of given mask"); + R.Hi = (R.Hi & ~Mask.Hi) | Reg.Hi; + R.Lo = (R.Lo & ~Mask.Lo) | Reg.Lo; +} + +template +void writeImmediate(WritableThumbRelocation &R, HalfWords Imm) { + static constexpr HalfWords Mask = FixupInfo::ImmMask; + assert((Mask.Hi & Imm.Hi) == Imm.Hi && (Mask.Hi & Imm.Hi) == Imm.Hi && + "Value bits exceed bit range of given mask"); + R.Hi = (R.Hi & ~Mask.Hi) | Imm.Hi; + R.Lo = (R.Lo & ~Mask.Lo) | Imm.Lo; +} + +Expected readAddendData(LinkGraph &G, Block &B, const Edge &E) { + endianness Endian = G.getEndianness(); + assert(Endian != native && "Declare as little or big explicitly"); + + Edge::Kind Kind = E.getKind(); + const char *BlockWorkingMem = B.getContent().data(); + const char *FixupPtr = BlockWorkingMem + E.getOffset(); + + switch (Kind) { + case Data_Delta32: + return SignExtend64<32>((Endian == little) ? read32(FixupPtr) + : read32(FixupPtr)); + default: + return make_error( + "In graph " + G.getName() + ", section " + B.getSection().getName() + + " can not read implicit addend for aarch32 edge kind " + + G.getEdgeKindName(E.getKind())); + } +} + +Expected readAddendArm(LinkGraph &G, Block &B, const Edge &E) { + Edge::Kind Kind = E.getKind(); + + switch (Kind) { + case Arm_Call: + return make_error( + "Addend extraction for relocation type not yet implemented: " + + StringRef(G.getEdgeKindName(Kind))); + default: + return make_error( + "In graph " + G.getName() + ", section " + B.getSection().getName() + + " can not read implicit addend for aarch32 edge kind " + + G.getEdgeKindName(E.getKind())); + } +} + +Expected readAddendThumb(LinkGraph &G, Block &B, const Edge &E, + const ArmConfig &ArmCfg) { + ThumbRelocation R(B.getContent().data() + E.getOffset()); + Edge::Kind Kind = E.getKind(); + + switch (Kind) { + case Thumb_Call: + if (!checkOpcode(R)) + return makeUnexpectedOpcodeError(G, R, Kind); + return LLVM_LIKELY(ArmCfg.J1J2BranchEncoding) + ? decodeImmBT4BlT1BlxT2_J1J2(R.Hi, R.Lo) + : decodeImmBT4BlT1BlxT2(R.Hi, R.Lo); + + case Thumb_Jump24: + if (!checkOpcode(R)) + return makeUnexpectedOpcodeError(G, R, Kind); + if (R.Lo & FixupInfo::LoBitConditional) + return make_error("Relocation expects an unconditional " + "B.W branch instruction: " + + StringRef(G.getEdgeKindName(Kind))); + return LLVM_LIKELY(ArmCfg.J1J2BranchEncoding) + ? decodeImmBT4BlT1BlxT2_J1J2(R.Hi, R.Lo) + : decodeImmBT4BlT1BlxT2(R.Hi, R.Lo); + + case Thumb_MovwAbsNC: + if (!checkOpcode(R)) + return makeUnexpectedOpcodeError(G, R, Kind); + // Initial addend is interpreted as a signed value + return SignExtend64<16>(decodeImmMovtT1MovwT3(R.Hi, R.Lo)); + + case Thumb_MovtAbs: + if (!checkOpcode(R)) + return makeUnexpectedOpcodeError(G, R, Kind); + // Initial addend is interpreted as a signed value + return SignExtend64<16>(decodeImmMovtT1MovwT3(R.Hi, R.Lo)); + + default: + return make_error( + "In graph " + G.getName() + ", section " + B.getSection().getName() + + " can not read implicit addend for aarch32 edge kind " + + G.getEdgeKindName(E.getKind())); + } +} + +Error applyFixupData(LinkGraph &G, Block &B, const Edge &E) { + using namespace support; + + char *BlockWorkingMem = B.getAlreadyMutableContent().data(); + char *FixupPtr = BlockWorkingMem + E.getOffset(); + + auto Write32 = [FixupPtr, Endian = G.getEndianness()](int64_t Value) { + assert(Endian != native && "Must be explicit: little or big"); + assert(isInt<32>(Value) && "Must be in signed 32-bit range"); + uint32_t Imm = static_cast(Value); + if (LLVM_LIKELY(Endian == little)) + endian::write32(FixupPtr, Imm); + else + endian::write32(FixupPtr, Imm); + }; + + Edge::Kind Kind = E.getKind(); + uint64_t FixupAddress = (B.getAddress() + E.getOffset()).getValue(); + int64_t Addend = E.getAddend(); + Symbol &TargetSymbol = E.getTarget(); + uint64_t TargetAddress = TargetSymbol.getAddress().getValue(); + assert(!TargetSymbol.hasTargetFlags(ThumbSymbol)); + + // Regular data relocations have size 4, alignment 1 and write the full 32-bit + // result to the place; no need for overflow checking. There are three + // exceptions: R_ARM_ABS8, R_ARM_ABS16, R_ARM_PREL31 + switch (Kind) { + case Data_Delta32: { + int64_t Value = TargetAddress - FixupAddress + Addend; + if (!isInt<32>(Value)) + return makeTargetOutOfRangeError(G, B, E); + Write32(Value); + return Error::success(); + } + default: + return make_error( + "In graph " + G.getName() + ", section " + B.getSection().getName() + + " encountered unfixable aarch32 edge kind " + + G.getEdgeKindName(E.getKind())); + } +} + +Error applyFixupArm(LinkGraph &G, Block &B, const Edge &E) { + Edge::Kind Kind = E.getKind(); + + switch (Kind) { + case Arm_Call: + return make_error( + "Fix-up for relocation type not yet implemented: " + + StringRef(G.getEdgeKindName(Kind))); + default: + return make_error( + "In graph " + G.getName() + ", section " + B.getSection().getName() + + " encountered unfixable aarch32 edge kind " + + G.getEdgeKindName(E.getKind())); + } +} + +Error applyFixupThumb(LinkGraph &G, Block &B, const Edge &E, + const ArmConfig &ArmCfg) { + WritableThumbRelocation R(B.getAlreadyMutableContent().data() + + E.getOffset()); + + Edge::Kind Kind = E.getKind(); + uint64_t FixupAddress = (B.getAddress() + E.getOffset()).getValue(); + int64_t Addend = E.getAddend(); + Symbol &TargetSymbol = E.getTarget(); + uint64_t TargetAddress = TargetSymbol.getAddress().getValue(); + if (TargetSymbol.hasTargetFlags(ThumbSymbol)) + TargetAddress |= 0x01; + + switch (Kind) { + case Thumb_Jump24: { + if (!checkOpcode(R)) + return makeUnexpectedOpcodeError(G, R, Kind); + if (R.Lo & FixupInfo::LoBitConditional) + return make_error("Relocation expects an unconditional " + "B.W branch instruction: " + + StringRef(G.getEdgeKindName(Kind))); + if (!(TargetSymbol.hasTargetFlags(ThumbSymbol))) + return make_error("Branch relocation needs interworking " + "stub when bridging to ARM: " + + StringRef(G.getEdgeKindName(Kind))); + + int64_t Value = TargetAddress - FixupAddress + Addend; + if (LLVM_LIKELY(ArmCfg.J1J2BranchEncoding)) { + if (!isInt<25>(Value)) + return makeTargetOutOfRangeError(G, B, E); + writeImmediate(R, encodeImmBT4BlT1BlxT2_J1J2(Value)); + } else { + if (!isInt<22>(Value)) + return makeTargetOutOfRangeError(G, B, E); + writeImmediate(R, encodeImmBT4BlT1BlxT2(Value)); + } + + return Error::success(); + } + + case Thumb_Call: { + if (!checkOpcode(R)) + return makeUnexpectedOpcodeError(G, R, Kind); + + int64_t Value = TargetAddress - FixupAddress + Addend; + + // The call instruction itself is Thumb. The call destination can either be + // Thumb or Arm. We use BL to stay in Thumb and BLX to change to Arm. + bool TargetIsArm = !TargetSymbol.hasTargetFlags(ThumbSymbol); + bool InstrIsBlx = (R.Lo & FixupInfo::LoBitNoBlx) == 0; + if (TargetIsArm != InstrIsBlx) { + if (LLVM_LIKELY(TargetIsArm)) { + // Change opcode BL -> BLX and fix range value (account for 4-byte + // aligned destination while instruction may only be 2-byte aligned + // and clear Thumb bit). + R.Lo = R.Lo & ~FixupInfo::LoBitNoBlx; + R.Lo = R.Lo & ~FixupInfo::LoBitH; + Value = alignTo(Value, 4); + } else { + // Change opcode BLX -> BL and set Thumb bit + R.Lo = R.Lo & ~FixupInfo::LoBitNoBlx; + Value |= 0x01; + } + } + + if (LLVM_LIKELY(ArmCfg.J1J2BranchEncoding)) { + if (!isInt<25>(Value)) + return makeTargetOutOfRangeError(G, B, E); + writeImmediate(R, encodeImmBT4BlT1BlxT2_J1J2(Value)); + } else { + if (!isInt<22>(Value)) + return makeTargetOutOfRangeError(G, B, E); + writeImmediate(R, encodeImmBT4BlT1BlxT2(Value)); + } + + assert(((R.Lo & FixupInfo::LoBitNoBlx) || + (R.Lo & FixupInfo::LoBitH) == 0) && + "Opcode BLX implies H bit is clear (avoid UB in BLX T2)"); + return Error::success(); + } + + case Thumb_MovwAbsNC: { + if (!checkOpcode(R)) + return makeUnexpectedOpcodeError(G, R, Kind); + uint16_t Value = (TargetAddress + Addend) & 0xffff; + writeImmediate(R, encodeImmMovtT1MovwT3(Value)); + return Error::success(); + } + + case Thumb_MovtAbs: { + if (!checkOpcode(R)) + return makeUnexpectedOpcodeError(G, R, Kind); + uint16_t Value = ((TargetAddress + Addend) >> 16) & 0xffff; + writeImmediate(R, encodeImmMovtT1MovwT3(Value)); + return Error::success(); + } + + default: + return make_error( + "In graph " + G.getName() + ", section " + B.getSection().getName() + + " encountered unfixable aarch32 edge kind " + + G.getEdgeKindName(E.getKind())); + } +} + +const uint8_t Thumbv7ABS[] = { + 0x40, 0xf2, 0x00, 0x0c, // movw r12, #0x0000 ; lower 16-bit + 0xc0, 0xf2, 0x00, 0x0c, // movt r12, #0x0000 ; upper 16-bit + 0x60, 0x47 // bx r12 +}; + +template <> +Symbol &StubsManager::createEntry(LinkGraph &G, Symbol &Target) { + constexpr uint64_t Alignment = 4; + Block &B = addStub(G, Thumbv7ABS, Alignment); + LLVM_DEBUG({ + const char *StubPtr = B.getContent().data(); + HalfWords Reg12 = encodeRegMovtT1MovwT3(12); + assert(checkRegister(StubPtr, Reg12) && + checkRegister(StubPtr + 4, Reg12) && + "Linker generated stubs may only corrupt register r12 (IP)"); + }); + B.addEdge(Thumb_MovwAbsNC, 0, Target, 0); + B.addEdge(Thumb_MovtAbs, 4, Target, 0); + Symbol &Stub = G.addAnonymousSymbol(B, 0, B.getSize(), true, false); + Stub.setTargetFlags(ThumbSymbol); + return Stub; +} + +const char *getEdgeKindName(Edge::Kind K) { +#define KIND_NAME_CASE(K) \ + case K: \ + return #K; + + switch (K) { + KIND_NAME_CASE(Data_Delta32) + KIND_NAME_CASE(Arm_Call) + KIND_NAME_CASE(Thumb_Call) + KIND_NAME_CASE(Thumb_Jump24) + KIND_NAME_CASE(Thumb_MovwAbsNC) + KIND_NAME_CASE(Thumb_MovtAbs) + default: + return getGenericEdgeKindName(K); + } +#undef KIND_NAME_CASE +} + +const char *getCPUArchName(ARMBuildAttrs::CPUArch K) { +#define CPUARCH_NAME_CASE(K) \ + case K: \ + return #K; + + using namespace ARMBuildAttrs; + switch (K) { + CPUARCH_NAME_CASE(Pre_v4) + CPUARCH_NAME_CASE(v4) + CPUARCH_NAME_CASE(v4T) + CPUARCH_NAME_CASE(v5T) + CPUARCH_NAME_CASE(v5TE) + CPUARCH_NAME_CASE(v5TEJ) + CPUARCH_NAME_CASE(v6) + CPUARCH_NAME_CASE(v6KZ) + CPUARCH_NAME_CASE(v6T2) + CPUARCH_NAME_CASE(v6K) + CPUARCH_NAME_CASE(v7) + CPUARCH_NAME_CASE(v6_M) + CPUARCH_NAME_CASE(v6S_M) + CPUARCH_NAME_CASE(v7E_M) + CPUARCH_NAME_CASE(v8_A) + CPUARCH_NAME_CASE(v8_R) + CPUARCH_NAME_CASE(v8_M_Base) + CPUARCH_NAME_CASE(v8_M_Main) + CPUARCH_NAME_CASE(v8_1_M_Main) + CPUARCH_NAME_CASE(v9_A) + } + llvm_unreachable("Missing CPUArch in switch?"); +#undef CPUARCH_NAME_CASE +} + +} // namespace aarch32 +} // namespace jitlink +} // namespace llvm diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp index 2c270cd66285d..83a09b8d41e91 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp @@ -8,6 +8,7 @@ #include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" #include "llvm/ExecutionEngine/JITLink/EHFrameSupport.h" +#include "llvm/ExecutionEngine/JITLink/aarch32.h" #include "llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h" #include "llvm/ExecutionEngine/Orc/ObjectFileInterface.h" #include "llvm/ExecutionEngine/Orc/Shared/ObjectFormats.h" @@ -40,7 +41,10 @@ bool hasInitializerSection(jitlink::LinkGraph &G) { } JITTargetAddress getJITSymbolPtrForSymbol(Symbol &Sym) { - return Sym.getAddress().getValue(); + uint64_t CallableAddr = Sym.getAddress().getValue(); + if (Sym.isCallable() && Sym.hasTargetFlags(aarch32::ThumbSymbol)) + CallableAddr |= 0x01; // thumb bit + return CallableAddr; } JITSymbolFlags getJITSymbolFlagsForSymbol(Symbol &Sym) { diff --git a/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_thumbv7_printf.s b/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_thumbv7_printf.s new file mode 100644 index 0000000000000..11a77c95cfa8f --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_thumbv7_printf.s @@ -0,0 +1,46 @@ +// RUN: llvm-mc -triple=thumbv7-none-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t.o %s +// RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb -slab-page-size 4096 -abs printf=0x76bbe880 -show-entry-es %t.o | FileCheck %s + +// Check that main is a thumb symbol (with LSB set) and printf is arm (with LSB clear) +// +// CHECK-LABEL: Symbol table: +// CHECK-NEXT: "main": 0x{{[0-9a-f]+[13579bdf]}} [Callable] Ready +// CHECK-NEXT: "printf": 0x76bbe880 [Data] Ready + + .globl main + .p2align 2 + .type main,%function + .code 16 + .thumb_func +main: + .fnstart + .save {r7, lr} + push {r7, lr} + .setfp r7, sp + mov r7, sp + .pad #8 + sub sp, #8 + movs r0, #0 + str r0, [sp] + str r0, [sp, #4] + ldr r0, .LCPI0_0 +.LPC0_0: + add r0, pc + bl printf + ldr r0, [sp] + add sp, #8 + pop {r7, pc} + + .p2align 2 +.LCPI0_0: + .long .L.str-(.LPC0_0+4) + + .size main, .-main + .cantunwind + .fnend + + .type .L.str,%object + .section .rodata.str1.1,"aMS",%progbits,1 +.L.str: + .asciz "Hello AArch32!\n" + .size .L.str, 12 diff --git a/llvm/test/ExecutionEngine/JITLink/AArch32/lit.local.cfg b/llvm/test/ExecutionEngine/JITLink/AArch32/lit.local.cfg new file mode 100644 index 0000000000000..20e19aeb06f9d --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/AArch32/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'ARM' in config.root.targets: + config.unsupported = True diff --git a/llvm/unittests/ExecutionEngine/JITLink/AArch32Tests.cpp b/llvm/unittests/ExecutionEngine/JITLink/AArch32Tests.cpp new file mode 100644 index 0000000000000..0e41174040b68 --- /dev/null +++ b/llvm/unittests/ExecutionEngine/JITLink/AArch32Tests.cpp @@ -0,0 +1,200 @@ +//===------- AArch32Tests.cpp - Unit tests for the AArch32 backend --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::jitlink; +using namespace llvm::jitlink::aarch32; +using namespace llvm::support; +using namespace llvm::support::endian; + +struct MutableHalfWords { + MutableHalfWords(HalfWords Preset) : Hi(Preset.Hi), Lo(Preset.Lo) {} + + void patch(HalfWords Value, HalfWords Mask) { + Hi = (Hi & ~Mask.Hi) | Value.Hi; + Lo = (Lo & ~Mask.Lo) | Value.Lo; + } + + uint16_t Hi; // First halfword + uint16_t Lo; // Second halfword +}; + +namespace llvm { +namespace jitlink { + +Expected getJITLinkEdgeKind(uint32_t ELFType); +Expected getELFRelocationType(Edge::Kind Kind); + +} // namespace jitlink +} // namespace llvm + +TEST(AArch32_ELF, EdgeKinds) { + // Fails: Invalid ELF type -> JITLink kind + Expected ErrKind = getJITLinkEdgeKind(ELF::R_ARM_NONE); + EXPECT_TRUE(errorToBool(ErrKind.takeError())); + + // Fails: Invalid JITLink kind -> ELF type + Expected ErrType = getELFRelocationType(Edge::Invalid); + EXPECT_TRUE(errorToBool(ErrType.takeError())); + + for (Edge::Kind K = FirstDataRelocation; K < LastThumbRelocation; K += 1) { + Expected ELFType = getELFRelocationType(K); + EXPECT_FALSE(errorToBool(ELFType.takeError())) + << "Failed to translate JITLink kind -> ELF type"; + + Expected JITLinkKind = getJITLinkEdgeKind(*ELFType); + EXPECT_FALSE(errorToBool(JITLinkKind.takeError())) + << "Failed to translate ELF type -> JITLink kind"; + + EXPECT_EQ(*JITLinkKind, K) << "Round-trip value inconsistent?"; + } +} + +namespace llvm { +namespace jitlink { +namespace aarch32 { + +HalfWords encodeImmBT4BlT1BlxT2(int64_t Value); +HalfWords encodeImmBT4BlT1BlxT2_J1J2(int64_t Value); +HalfWords encodeImmMovtT1MovwT3(uint16_t Value); +HalfWords encodeRegMovtT1MovwT3(int64_t Value); + +int64_t decodeImmBT4BlT1BlxT2(uint32_t Hi, uint32_t Lo); +int64_t decodeImmBT4BlT1BlxT2_J1J2(uint32_t Hi, uint32_t Lo); +uint16_t decodeImmMovtT1MovwT3(uint32_t Hi, uint32_t Lo); +int64_t decodeRegMovtT1MovwT3(uint32_t Hi, uint32_t Lo); + +} // namespace aarch32 +} // namespace jitlink +} // namespace llvm + +// Big-endian for v7 and v8 (and v6 unless in legacy backwards compatible mode +// be32) have little-endian instructions and big-endian data. In ELF relocatable +// objects big-endian instructions may still be encountered. A be8 supporting +// linker is expected to endian-reverse instructions for the executable. +template +static HalfWords makeHalfWords(std::array Mem) { + return HalfWords{read16(Mem.data()), read16(Mem.data() + 2)}; +} + +/// 25-bit branch with link (with J1J2 range extension) +TEST(AArch32_Relocations, Thumb_Call_J1J2) { + static_assert(isInt<25>(16777215), "Max value"); + static_assert(isInt<25>(-16777215), "Min value"); + static_assert(!isInt<25>(16777217), "First overflow"); + static_assert(!isInt<25>(-16777217), "First underflow"); + + constexpr HalfWords ImmMask = FixupInfo::ImmMask; + + static std::array MemPresets{ + makeHalfWords({0xff, 0xf7, 0xfe, 0xef}), // common + makeHalfWords({0x00, 0x00, 0x00, 0x00}), // zeros + makeHalfWords({0xff, 0xff, 0xff, 0xff}), // ones + }; + + auto EncodeDecode = [ImmMask](int64_t In, MutableHalfWords &Mem) { + Mem.patch(encodeImmBT4BlT1BlxT2_J1J2(In), ImmMask); + return decodeImmBT4BlT1BlxT2_J1J2(Mem.Hi, Mem.Lo); + }; + + for (MutableHalfWords Mem : MemPresets) { + HalfWords UnaffectedBits(Mem.Hi & ~ImmMask.Hi, Mem.Lo & ~ImmMask.Lo); + + EXPECT_EQ(EncodeDecode(1, Mem), 0); // Zero value + EXPECT_EQ(EncodeDecode(0x41, Mem), 0x40); // Common value + EXPECT_EQ(EncodeDecode(16777215, Mem), 16777214); // Maximum value + EXPECT_EQ(EncodeDecode(-16777215, Mem), -16777216); // Minimum value + EXPECT_NE(EncodeDecode(16777217, Mem), 16777217); // First overflow + EXPECT_NE(EncodeDecode(-16777217, Mem), -16777217); // First underflow + + EXPECT_TRUE(UnaffectedBits.Hi == (Mem.Hi & ~ImmMask.Hi) && + UnaffectedBits.Lo == (Mem.Lo & ~ImmMask.Lo)) + << "Diff outside immediate field"; + } +} + +/// 22-bit branch with link (without J1J2 range extension) +TEST(AArch32_Relocations, Thumb_Call_Bare) { + static_assert(isInt<22>(2097151), "Max value"); + static_assert(isInt<22>(-2097151), "Min value"); + static_assert(!isInt<22>(2097153), "First overflow"); + static_assert(!isInt<22>(-2097153), "First underflow"); + + constexpr HalfWords ImmMask = FixupInfo::ImmMask; + + static std::array MemPresets{ + makeHalfWords({0xff, 0xf7, 0xfe, 0xef}), // common + makeHalfWords({0x00, 0x00, 0x00, 0x00}), // zeros + makeHalfWords({0xff, 0xff, 0xff, 0xff}), // ones + }; + + auto EncodeDecode = [ImmMask](int64_t In, MutableHalfWords &Mem) { + Mem.patch(encodeImmBT4BlT1BlxT2_J1J2(In), ImmMask); + return decodeImmBT4BlT1BlxT2_J1J2(Mem.Hi, Mem.Lo); + }; + + for (MutableHalfWords Mem : MemPresets) { + HalfWords UnaffectedBits(Mem.Hi & ~ImmMask.Hi, Mem.Lo & ~ImmMask.Lo); + + EXPECT_EQ(EncodeDecode(1, Mem), 0); // Zero value + EXPECT_EQ(EncodeDecode(0x41, Mem), 0x40); // Common value + EXPECT_EQ(EncodeDecode(2097151, Mem), 2097150); // Maximum value + EXPECT_EQ(EncodeDecode(-2097151, Mem), -2097152); // Minimum value + EXPECT_NE(EncodeDecode(2097153, Mem), 2097153); // First overflow + EXPECT_NE(EncodeDecode(-2097153, Mem), -2097153); // First underflow + + EXPECT_TRUE(UnaffectedBits.Hi == (Mem.Hi & ~ImmMask.Hi) && + UnaffectedBits.Lo == (Mem.Lo & ~ImmMask.Lo)) + << "Diff outside immediate field"; + } +} + +/// Write immediate value to the top halfword of the destination register +TEST(AArch32_Relocations, Thumb_MovtAbs) { + static_assert(isUInt<16>(65535), "Max value"); + static_assert(!isUInt<16>(65536), "First overflow"); + + constexpr HalfWords ImmMask = FixupInfo::ImmMask; + constexpr HalfWords RegMask = FixupInfo::RegMask; + + static std::array Registers{0, 5, 12}; + static std::array MemPresets{ + makeHalfWords({0xff, 0xf7, 0xfe, 0xef}), // common + makeHalfWords({0x00, 0x00, 0x00, 0x00}), // zeros + makeHalfWords({0xff, 0xff, 0xff, 0xff}), // ones + }; + + auto EncodeDecode = [ImmMask](uint32_t In, MutableHalfWords &Mem) { + Mem.patch(encodeImmMovtT1MovwT3(In), ImmMask); + return decodeImmMovtT1MovwT3(Mem.Hi, Mem.Lo); + }; + + for (MutableHalfWords Mem : MemPresets) { + for (uint8_t Reg : Registers) { + HalfWords UnaffectedBits(Mem.Hi & ~(ImmMask.Hi | RegMask.Hi), + Mem.Lo & ~(ImmMask.Lo | RegMask.Lo)); + + Mem.patch(encodeRegMovtT1MovwT3(Reg), RegMask); + EXPECT_EQ(EncodeDecode(0x76bb, Mem), 0x76bb); // Common value + EXPECT_EQ(EncodeDecode(0, Mem), 0); // Minimum value + EXPECT_EQ(EncodeDecode(0xffff, Mem), 0xffff); // Maximum value + EXPECT_NE(EncodeDecode(0x10000, Mem), 0x10000); // First overflow + + // Destination register as well as unaffacted bits should be intact + EXPECT_EQ(decodeRegMovtT1MovwT3(Mem.Hi, Mem.Lo), Reg); + EXPECT_TRUE(UnaffectedBits.Hi == (Mem.Hi & ~(ImmMask.Hi | RegMask.Hi)) && + UnaffectedBits.Lo == (Mem.Lo & ~(ImmMask.Lo | RegMask.Lo))) + << "Diff outside immediate/register field"; + } + } +} diff --git a/llvm/unittests/ExecutionEngine/JITLink/CMakeLists.txt b/llvm/unittests/ExecutionEngine/JITLink/CMakeLists.txt index 1a71a62d3756d..978914c748c63 100644 --- a/llvm/unittests/ExecutionEngine/JITLink/CMakeLists.txt +++ b/llvm/unittests/ExecutionEngine/JITLink/CMakeLists.txt @@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS ) add_llvm_unittest(JITLinkTests + AArch32Tests.cpp EHFrameSupportTests.cpp LinkGraphTests.cpp ) From b7677846da66fd003a538f87fd8de948cfcc3d6a Mon Sep 17 00:00:00 2001 From: Carlos Galvez Date: Thu, 23 Mar 2023 10:30:01 +0000 Subject: [PATCH 397/691] [clang-tidy][NFC] Fix broken link in Release Notes --- clang-tools-extra/docs/ReleaseNotes.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 89419141cebbd..a5f090045615c 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -238,8 +238,8 @@ Changes in existing checks string for ``Prefix`` or ``Suffix`` options could result in the style not being used. -- Fixed an issue in :doc:`google-avoid-underscore-in-googletest-name - ` when using +- Fixed an issue in :doc:`google-readability-avoid-underscore-in-googletest-name + ` when using ``DISABLED_`` in the test suite name. Removed checks From 61f33def1375a68afc5681627a62ce24446e45e2 Mon Sep 17 00:00:00 2001 From: Adam Paszke Date: Thu, 23 Mar 2023 10:12:56 +0000 Subject: [PATCH 398/691] [mlir][Vector] Make sure that vector.contract preserves extra attributes while parsing The old implementation parsed the optional attribute dict, only to replace its contents by using `assign`. Reviewed By: ftynse Differential Revision: https://reviews.llvm.org/D146707 --- mlir/lib/Dialect/Vector/IR/VectorOps.cpp | 4 ++-- mlir/test/Dialect/Vector/ops.mlir | 12 ++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp index 9796693b4b6cd..21daff60c7e62 100644 --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -640,7 +640,7 @@ ParseResult ContractionOp::parse(OpAsmParser &parser, OperationState &result) { auto loc = parser.getCurrentLocation(); DictionaryAttr dictAttr; // TODO: Unify linalg op attribute parsing. - if (parser.parseAttribute(dictAttr, "_", result.attributes) || + if (parser.parseAttribute(dictAttr) || parser.parseOperand(lhsInfo) || parser.parseComma() || parser.parseOperand(rhsInfo) || parser.parseComma() || parser.parseOperand(accInfo) || @@ -653,7 +653,7 @@ ParseResult ContractionOp::parse(OpAsmParser &parser, OperationState &result) { parser.resolveOperand(accInfo, resultType, result.operands) || parser.addTypeToList(resultType, result.types)) return failure(); - result.attributes.assign(dictAttr.getValue().begin(), + result.attributes.append(dictAttr.getValue().begin(), dictAttr.getValue().end()); // Convert array of string into an array of IteratyType enums. This is needed, diff --git a/mlir/test/Dialect/Vector/ops.mlir b/mlir/test/Dialect/Vector/ops.mlir index 60e1507293f7e..4013d5daee8cc 100644 --- a/mlir/test/Dialect/Vector/ops.mlir +++ b/mlir/test/Dialect/Vector/ops.mlir @@ -291,6 +291,18 @@ func.func @contraction_to_scalar(%arg0: vector<10xf32>, %arg1: vector<10xf32>) - return %0 : f32 } +// CHECK-LABEL: @contraction_extra_attrs +func.func @contraction_extra_attrs(%arg0: vector<10xf32>, %arg1: vector<10xf32>) -> f32 { + // CHECK: %[[C0:.*]] = arith.constant 0.000000e+00 : f32 + %f0 = arith.constant 0.0: f32 + // CHECK: %[[X:.*]] = vector.contract {indexing_maps = [#{{.*}}, #{{.*}}, #{{.*}}], iterator_types = ["reduction"], kind = #vector.kind} %{{.*}}, %{{.*}}, %[[C0]] {first_attr = 1 : i32, second_attr = "string"} : vector<10xf32>, vector<10xf32> into f32 + %0 = vector.contract #contraction_to_scalar_trait %arg0, %arg1, %f0 + {first_attr = 1 : i32, second_attr = "string"} + : vector<10xf32>, vector<10xf32> into f32 + // CHECK: return %[[X]] : f32 + return %0 : f32 +} + #contraction_to_scalar_max_accesses = [ affine_map<(i) -> (i)>, affine_map<(i) -> (i)>, From 8d16c6809a080947057ae21b9f6165105b4b2ad8 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Fri, 10 Feb 2023 11:03:55 +0000 Subject: [PATCH 399/691] [RISCV] Increase default vectorizer LMUL to 2 After some discussion and experimentation, we have seen that changing the default number of vector register bits to LMUL=2 strikes a sweet spot. Whilst we could be clever here and make the vectorizer smarter about dynamically selecting an LMUL that a) Doesn't affect register pressure b) Suitable for the microarchitecture we would need to teach its heuristics about RISC-V register grouping specifics. Instead this just does the easy, pragmatic thing by changing the default to a safe value that doesn't affect register pressure signifcantly[1], but should increase throughput and unlock more interleaving. [1] Register spilling when compiling sqlite at various levels of `-riscv-v-register-bit-width-lmul`: LMUL=1 2573 spills LMUL=2 2583 spills LMUL=4 2819 spills LMUL=8 3256 spills Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D143723 --- .../Target/RISCV/RISCVTargetTransformInfo.cpp | 2 +- .../LoopVectorize/RISCV/defaults.ll | 70 +- .../Transforms/LoopVectorize/RISCV/divrem.ll | 586 ++++++----- .../LoopVectorize/RISCV/illegal-type.ll | 30 +- .../LoopVectorize/RISCV/inloop-reduction.ll | 28 +- .../RISCV/interleaved-accesses.ll | 130 ++- .../Transforms/LoopVectorize/RISCV/lmul.ll | 2 +- .../LoopVectorize/RISCV/low-trip-count.ll | 18 +- .../LoopVectorize/RISCV/mask-index-type.ll | 40 +- .../RISCV/masked_gather_scatter.ll | 152 +-- .../LoopVectorize/RISCV/riscv-interleaved.ll | 4 +- .../LoopVectorize/RISCV/riscv-unroll.ll | 18 +- .../LoopVectorize/RISCV/safe-dep-distance.ll | 107 +- .../LoopVectorize/RISCV/scalable-basics.ll | 358 ++++--- .../RISCV/scalable-reductions.ll | 68 +- .../LoopVectorize/RISCV/scalable-tailfold.ll | 185 ++-- .../LoopVectorize/RISCV/short-trip-count.ll | 33 +- .../LoopVectorize/RISCV/uniform-load-store.ll | 961 +++++++++--------- .../Transforms/LoopVectorize/RISCV/zvl32b.ll | 37 +- .../RISCV/rvv-min-vector-size.ll | 28 +- 20 files changed, 1495 insertions(+), 1362 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index a6a216392de74..2f6b747140e59 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -25,7 +25,7 @@ static cl::opt RVVRegisterWidthLMUL( cl::desc( "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used " "by autovectorized code. Fractional LMULs are not supported."), - cl::init(1), cl::Hidden); + cl::init(2), cl::Hidden); static cl::opt SLPMaxVF( "riscv-v-slp-max-vf", diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll b/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll index d58ff5051c621..4b93ea30cf252 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll @@ -14,27 +14,30 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v) { ; CHECK-LABEL: @vector_add( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: store [[TMP5]], ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -49,7 +52,7 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v) { ; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; @@ -74,32 +77,35 @@ define i64 @vector_add_reduce(ptr noalias nocapture %a) { ; CHECK-LABEL: @vector_add_reduce( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5]] = add [[VEC_PHI]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7]] = add [[VEC_PHI]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.add.nxv1i64( [[TMP5]]) +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP7]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -111,7 +117,7 @@ define i64 @vector_add_reduce(ptr noalias nocapture %a) { ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[SUM_NEXT_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll index 7d079d13dc710..119f50df5b8e3 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll @@ -11,27 +11,30 @@ define void @vector_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-LABEL: @vector_udiv( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = udiv [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: store [[TMP5]], ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = udiv [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -54,26 +57,26 @@ define void @vector_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: entry: ; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; FIXED: vector.ph: -; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0 -; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer +; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 +; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 -; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 -; FIXED-NEXT: [[TMP6:%.*]] = udiv <2 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; FIXED-NEXT: [[TMP7:%.*]] = udiv <2 x i64> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]] -; FIXED-NEXT: store <2 x i64> [[TMP6]], ptr [[TMP4]], align 8 -; FIXED-NEXT: store <2 x i64> [[TMP7]], ptr [[TMP5]], align 8 -; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 +; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 +; FIXED-NEXT: [[TMP6:%.*]] = udiv <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; FIXED-NEXT: [[TMP7:%.*]] = udiv <4 x i64> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]] +; FIXED-NEXT: store <4 x i64> [[TMP6]], ptr [[TMP4]], align 8 +; FIXED-NEXT: store <4 x i64> [[TMP7]], ptr [[TMP5]], align 8 +; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; FIXED: middle.block: @@ -115,27 +118,30 @@ define void @vector_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-LABEL: @vector_sdiv( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = sdiv [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: store [[TMP5]], ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = sdiv [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -158,26 +164,26 @@ define void @vector_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: entry: ; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; FIXED: vector.ph: -; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0 -; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer +; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 +; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 -; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 -; FIXED-NEXT: [[TMP6:%.*]] = sdiv <2 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; FIXED-NEXT: [[TMP7:%.*]] = sdiv <2 x i64> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]] -; FIXED-NEXT: store <2 x i64> [[TMP6]], ptr [[TMP4]], align 8 -; FIXED-NEXT: store <2 x i64> [[TMP7]], ptr [[TMP5]], align 8 -; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 +; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 +; FIXED-NEXT: [[TMP6:%.*]] = sdiv <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; FIXED-NEXT: [[TMP7:%.*]] = sdiv <4 x i64> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]] +; FIXED-NEXT: store <4 x i64> [[TMP6]], ptr [[TMP4]], align 8 +; FIXED-NEXT: store <4 x i64> [[TMP7]], ptr [[TMP5]], align 8 +; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; FIXED: middle.block: @@ -219,27 +225,30 @@ define void @vector_urem(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-LABEL: @vector_urem( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = urem [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: store [[TMP5]], ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = urem [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -262,26 +271,26 @@ define void @vector_urem(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: entry: ; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; FIXED: vector.ph: -; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0 -; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer +; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 +; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 -; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 -; FIXED-NEXT: [[TMP6:%.*]] = urem <2 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; FIXED-NEXT: [[TMP7:%.*]] = urem <2 x i64> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]] -; FIXED-NEXT: store <2 x i64> [[TMP6]], ptr [[TMP4]], align 8 -; FIXED-NEXT: store <2 x i64> [[TMP7]], ptr [[TMP5]], align 8 -; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 +; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 +; FIXED-NEXT: [[TMP6:%.*]] = urem <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; FIXED-NEXT: [[TMP7:%.*]] = urem <4 x i64> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]] +; FIXED-NEXT: store <4 x i64> [[TMP6]], ptr [[TMP4]], align 8 +; FIXED-NEXT: store <4 x i64> [[TMP7]], ptr [[TMP5]], align 8 +; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; FIXED: middle.block: @@ -323,27 +332,30 @@ define void @vector_srem(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-LABEL: @vector_srem( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = srem [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: store [[TMP5]], ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = srem [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -366,26 +378,26 @@ define void @vector_srem(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: entry: ; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; FIXED: vector.ph: -; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0 -; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer +; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 +; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 -; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 -; FIXED-NEXT: [[TMP6:%.*]] = srem <2 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; FIXED-NEXT: [[TMP7:%.*]] = srem <2 x i64> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]] -; FIXED-NEXT: store <2 x i64> [[TMP6]], ptr [[TMP4]], align 8 -; FIXED-NEXT: store <2 x i64> [[TMP7]], ptr [[TMP5]], align 8 -; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 +; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 +; FIXED-NEXT: [[TMP6:%.*]] = srem <4 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; FIXED-NEXT: [[TMP7:%.*]] = srem <4 x i64> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]] +; FIXED-NEXT: store <4 x i64> [[TMP6]], ptr [[TMP4]], align 8 +; FIXED-NEXT: store <4 x i64> [[TMP7]], ptr [[TMP5]], align 8 +; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; FIXED: middle.block: @@ -427,31 +439,34 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-LABEL: @predicated_udiv( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = select [[TMP5]], [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP7:%.*]] = udiv [[WIDE_LOAD]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = xor [[TMP5]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP5]], [[TMP7]], [[WIDE_LOAD]] -; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = select [[TMP7]], [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = udiv [[WIDE_LOAD]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = xor [[TMP7]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP7]], [[TMP9]], [[WIDE_LOAD]] +; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -480,34 +495,34 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: entry: ; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; FIXED: vector.ph: -; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0 -; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer +; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 +; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 -; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 -; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer -; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT3]], zeroinitializer -; FIXED-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[BROADCAST_SPLAT]], <2 x i64> -; FIXED-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP7]], <2 x i64> [[BROADCAST_SPLAT3]], <2 x i64> -; FIXED-NEXT: [[TMP10:%.*]] = udiv <2 x i64> [[WIDE_LOAD]], [[TMP8]] -; FIXED-NEXT: [[TMP11:%.*]] = udiv <2 x i64> [[WIDE_LOAD1]], [[TMP9]] -; FIXED-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[TMP6]], -; FIXED-NEXT: [[TMP13:%.*]] = xor <2 x i1> [[TMP7]], -; FIXED-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[TMP10]], <2 x i64> [[WIDE_LOAD]] -; FIXED-NEXT: [[PREDPHI4:%.*]] = select <2 x i1> [[TMP7]], <2 x i64> [[TMP11]], <2 x i64> [[WIDE_LOAD1]] -; FIXED-NEXT: store <2 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 -; FIXED-NEXT: store <2 x i64> [[PREDPHI4]], ptr [[TMP5]], align 8 -; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 +; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 +; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer +; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT3]], zeroinitializer +; FIXED-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> +; FIXED-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[BROADCAST_SPLAT3]], <4 x i64> +; FIXED-NEXT: [[TMP10:%.*]] = udiv <4 x i64> [[WIDE_LOAD]], [[TMP8]] +; FIXED-NEXT: [[TMP11:%.*]] = udiv <4 x i64> [[WIDE_LOAD1]], [[TMP9]] +; FIXED-NEXT: [[TMP12:%.*]] = xor <4 x i1> [[TMP6]], +; FIXED-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP7]], +; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP10]], <4 x i64> [[WIDE_LOAD]] +; FIXED-NEXT: [[PREDPHI4:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP11]], <4 x i64> [[WIDE_LOAD1]] +; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 +; FIXED-NEXT: store <4 x i64> [[PREDPHI4]], ptr [[TMP5]], align 8 +; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; FIXED: middle.block: @@ -561,31 +576,34 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-LABEL: @predicated_sdiv( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = select [[TMP5]], [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP7:%.*]] = sdiv [[WIDE_LOAD]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = xor [[TMP5]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP5]], [[TMP7]], [[WIDE_LOAD]] -; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[BROADCAST_SPLAT]], zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = select [[TMP7]], [[BROADCAST_SPLAT]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = sdiv [[WIDE_LOAD]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = xor [[TMP7]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP7]], [[TMP9]], [[WIDE_LOAD]] +; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -614,34 +632,34 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: entry: ; FIXED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; FIXED: vector.ph: -; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0 -; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer +; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; FIXED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 +; FIXED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT2]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 -; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 -; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer -; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT3]], zeroinitializer -; FIXED-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[BROADCAST_SPLAT]], <2 x i64> -; FIXED-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP7]], <2 x i64> [[BROADCAST_SPLAT3]], <2 x i64> -; FIXED-NEXT: [[TMP10:%.*]] = sdiv <2 x i64> [[WIDE_LOAD]], [[TMP8]] -; FIXED-NEXT: [[TMP11:%.*]] = sdiv <2 x i64> [[WIDE_LOAD1]], [[TMP9]] -; FIXED-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[TMP6]], -; FIXED-NEXT: [[TMP13:%.*]] = xor <2 x i1> [[TMP7]], -; FIXED-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[TMP10]], <2 x i64> [[WIDE_LOAD]] -; FIXED-NEXT: [[PREDPHI4:%.*]] = select <2 x i1> [[TMP7]], <2 x i64> [[TMP11]], <2 x i64> [[WIDE_LOAD1]] -; FIXED-NEXT: store <2 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 -; FIXED-NEXT: store <2 x i64> [[PREDPHI4]], ptr [[TMP5]], align 8 -; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 +; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 +; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer +; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT3]], zeroinitializer +; FIXED-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> +; FIXED-NEXT: [[TMP9:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[BROADCAST_SPLAT3]], <4 x i64> +; FIXED-NEXT: [[TMP10:%.*]] = sdiv <4 x i64> [[WIDE_LOAD]], [[TMP8]] +; FIXED-NEXT: [[TMP11:%.*]] = sdiv <4 x i64> [[WIDE_LOAD1]], [[TMP9]] +; FIXED-NEXT: [[TMP12:%.*]] = xor <4 x i1> [[TMP6]], +; FIXED-NEXT: [[TMP13:%.*]] = xor <4 x i1> [[TMP7]], +; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP10]], <4 x i64> [[WIDE_LOAD]] +; FIXED-NEXT: [[PREDPHI4:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP11]], <4 x i64> [[WIDE_LOAD1]] +; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 +; FIXED-NEXT: store <4 x i64> [[PREDPHI4]], ptr [[TMP5]], align 8 +; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; FIXED: middle.block: @@ -695,28 +713,31 @@ define void @predicated_udiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; CHECK-LABEL: @predicated_udiv_by_constant( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 42, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP6:%.*]] = udiv [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 27, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP7:%.*]] = xor [[TMP5]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP5]], [[TMP6]], [[WIDE_LOAD]] -; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 42, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP8:%.*]] = udiv [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 27, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = xor [[TMP7]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP7]], [[TMP8]], [[WIDE_LOAD]] +; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -749,24 +770,24 @@ define void @predicated_udiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 -; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 -; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <2 x i64> [[WIDE_LOAD]], -; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <2 x i64> [[WIDE_LOAD1]], -; FIXED-NEXT: [[TMP8:%.*]] = udiv <2 x i64> [[WIDE_LOAD]], -; FIXED-NEXT: [[TMP9:%.*]] = udiv <2 x i64> [[WIDE_LOAD1]], -; FIXED-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP6]], -; FIXED-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[TMP7]], -; FIXED-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[TMP8]], <2 x i64> [[WIDE_LOAD]] -; FIXED-NEXT: [[PREDPHI2:%.*]] = select <2 x i1> [[TMP7]], <2 x i64> [[TMP9]], <2 x i64> [[WIDE_LOAD1]] -; FIXED-NEXT: store <2 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 -; FIXED-NEXT: store <2 x i64> [[PREDPHI2]], ptr [[TMP5]], align 8 -; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 +; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 +; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD]], +; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD1]], +; FIXED-NEXT: [[TMP8:%.*]] = udiv <4 x i64> [[WIDE_LOAD]], +; FIXED-NEXT: [[TMP9:%.*]] = udiv <4 x i64> [[WIDE_LOAD1]], +; FIXED-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], +; FIXED-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], +; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP8]], <4 x i64> [[WIDE_LOAD]] +; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP9]], <4 x i64> [[WIDE_LOAD1]] +; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 +; FIXED-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP5]], align 8 +; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXED-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; FIXED: middle.block: @@ -820,28 +841,31 @@ define void @predicated_sdiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; CHECK-LABEL: @predicated_sdiv_by_constant( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 42, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP6:%.*]] = sdiv [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 27, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP7:%.*]] = xor [[TMP5]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP5]], [[TMP6]], [[WIDE_LOAD]] -; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP4]], align 8 -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 42, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP8:%.*]] = sdiv [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i64 27, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = xor [[TMP7]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP7]], [[TMP8]], [[WIDE_LOAD]] +; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP6]], align 8 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -874,24 +898,24 @@ define void @predicated_sdiv_by_constant(ptr noalias nocapture %a, i64 %n) { ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 -; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 -; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 -; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <2 x i64> [[WIDE_LOAD]], -; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <2 x i64> [[WIDE_LOAD1]], -; FIXED-NEXT: [[TMP8:%.*]] = sdiv <2 x i64> [[WIDE_LOAD]], -; FIXED-NEXT: [[TMP9:%.*]] = sdiv <2 x i64> [[WIDE_LOAD1]], -; FIXED-NEXT: [[TMP10:%.*]] = xor <2 x i1> [[TMP6]], -; FIXED-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[TMP7]], -; FIXED-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP6]], <2 x i64> [[TMP8]], <2 x i64> [[WIDE_LOAD]] -; FIXED-NEXT: [[PREDPHI2:%.*]] = select <2 x i1> [[TMP7]], <2 x i64> [[TMP9]], <2 x i64> [[WIDE_LOAD1]] -; FIXED-NEXT: store <2 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 -; FIXED-NEXT: store <2 x i64> [[PREDPHI2]], ptr [[TMP5]], align 8 -; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8 +; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8 +; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD]], +; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <4 x i64> [[WIDE_LOAD1]], +; FIXED-NEXT: [[TMP8:%.*]] = sdiv <4 x i64> [[WIDE_LOAD]], +; FIXED-NEXT: [[TMP9:%.*]] = sdiv <4 x i64> [[WIDE_LOAD1]], +; FIXED-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP6]], +; FIXED-NEXT: [[TMP11:%.*]] = xor <4 x i1> [[TMP7]], +; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP8]], <4 x i64> [[WIDE_LOAD]] +; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP7]], <4 x i64> [[TMP9]], <4 x i64> [[WIDE_LOAD1]] +; FIXED-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 +; FIXED-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP5]], align 8 +; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXED-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; FIXED: middle.block: @@ -945,12 +969,12 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) { ; CHECK-LABEL: @predicated_sdiv_by_minus_one( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 16 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -959,15 +983,15 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) { ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 1 -; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 -128, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP8:%.*]] = select [[TMP7]], shufflevector ( insertelement ( poison, i8 -1, i64 0), poison, zeroinitializer), shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP9:%.*]] = sdiv [[WIDE_LOAD]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = xor [[TMP7]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP7]], [[TMP9]], [[WIDE_LOAD]] -; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP6]], align 1 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 1 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ne [[WIDE_LOAD]], shufflevector ( insertelement ( poison, i8 -128, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP8:%.*]] = select [[TMP7]], shufflevector ( insertelement ( poison, i8 -1, i64 0), poison, zeroinitializer), shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP9:%.*]] = sdiv [[WIDE_LOAD]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = xor [[TMP7]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP7]], [[TMP9]], [[WIDE_LOAD]] +; CHECK-NEXT: store [[PREDPHI]], ptr [[TMP6]], align 1 ; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 8 +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 16 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] @@ -1003,26 +1027,26 @@ define void @predicated_sdiv_by_minus_one(ptr noalias nocapture %a, i64 %n) { ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXED-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 16 +; FIXED-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 32 ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[TMP0]] ; FIXED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP1]] ; FIXED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 -; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP4]], align 1 -; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 16 -; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <16 x i8>, ptr [[TMP5]], align 1 -; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <16 x i8> [[WIDE_LOAD]], -; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <16 x i8> [[WIDE_LOAD1]], -; FIXED-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> , <16 x i8> -; FIXED-NEXT: [[TMP9:%.*]] = select <16 x i1> [[TMP7]], <16 x i8> , <16 x i8> -; FIXED-NEXT: [[TMP10:%.*]] = sdiv <16 x i8> [[WIDE_LOAD]], [[TMP8]] -; FIXED-NEXT: [[TMP11:%.*]] = sdiv <16 x i8> [[WIDE_LOAD1]], [[TMP9]] -; FIXED-NEXT: [[TMP12:%.*]] = xor <16 x i1> [[TMP6]], -; FIXED-NEXT: [[TMP13:%.*]] = xor <16 x i1> [[TMP7]], -; FIXED-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP6]], <16 x i8> [[TMP10]], <16 x i8> [[WIDE_LOAD]] -; FIXED-NEXT: [[PREDPHI2:%.*]] = select <16 x i1> [[TMP7]], <16 x i8> [[TMP11]], <16 x i8> [[WIDE_LOAD1]] -; FIXED-NEXT: store <16 x i8> [[PREDPHI]], ptr [[TMP4]], align 1 -; FIXED-NEXT: store <16 x i8> [[PREDPHI2]], ptr [[TMP5]], align 1 -; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 +; FIXED-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[TMP4]], align 1 +; FIXED-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 32 +; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <32 x i8>, ptr [[TMP5]], align 1 +; FIXED-NEXT: [[TMP6:%.*]] = icmp ne <32 x i8> [[WIDE_LOAD]], +; FIXED-NEXT: [[TMP7:%.*]] = icmp ne <32 x i8> [[WIDE_LOAD1]], +; FIXED-NEXT: [[TMP8:%.*]] = select <32 x i1> [[TMP6]], <32 x i8> , <32 x i8> +; FIXED-NEXT: [[TMP9:%.*]] = select <32 x i1> [[TMP7]], <32 x i8> , <32 x i8> +; FIXED-NEXT: [[TMP10:%.*]] = sdiv <32 x i8> [[WIDE_LOAD]], [[TMP8]] +; FIXED-NEXT: [[TMP11:%.*]] = sdiv <32 x i8> [[WIDE_LOAD1]], [[TMP9]] +; FIXED-NEXT: [[TMP12:%.*]] = xor <32 x i1> [[TMP6]], +; FIXED-NEXT: [[TMP13:%.*]] = xor <32 x i1> [[TMP7]], +; FIXED-NEXT: [[PREDPHI:%.*]] = select <32 x i1> [[TMP6]], <32 x i8> [[TMP10]], <32 x i8> [[WIDE_LOAD]] +; FIXED-NEXT: [[PREDPHI2:%.*]] = select <32 x i1> [[TMP7]], <32 x i8> [[TMP11]], <32 x i8> [[WIDE_LOAD1]] +; FIXED-NEXT: store <32 x i8> [[PREDPHI]], ptr [[TMP4]], align 1 +; FIXED-NEXT: store <32 x i8> [[PREDPHI2]], ptr [[TMP5]], align 1 +; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64 ; FIXED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXED-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; FIXED: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll index 56c8f901a668d..0f7600e9b2235 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/illegal-type.ll @@ -102,31 +102,31 @@ define void @uniform_store_i1(ptr noalias %dst, ptr noalias %start, i64 %N) { ; CHECK-LABEL: @uniform_store_i1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 32 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 64 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 32 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 64 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[N_VEC]], 8 ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START:%.*]], i64 [[TMP1]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x ptr> poison, ptr [[START]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x ptr> [[BROADCAST_SPLATINSERT]], <16 x ptr> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <16 x ptr> poison, ptr [[START]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <16 x ptr> [[BROADCAST_SPLATINSERT2]], <16 x ptr> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x ptr> poison, ptr [[START]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x ptr> [[BROADCAST_SPLATINSERT]], <32 x ptr> poison, <32 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <32 x ptr> poison, ptr [[START]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <32 x ptr> [[BROADCAST_SPLATINSERT2]], <32 x ptr> poison, <32 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <16 x i64> -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <16 x i64> -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, <16 x ptr> [[TMP2]], i64 1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, <16 x ptr> [[TMP3]], i64 1 -; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <16 x ptr> [[TMP4]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <16 x ptr> [[TMP5]], [[BROADCAST_SPLAT3]] -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x i1> [[TMP7]], i32 15 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <32 x i64> +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <32 x i64> +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, <32 x ptr> [[TMP2]], i64 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, <32 x ptr> [[TMP3]], i64 1 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <32 x ptr> [[TMP4]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <32 x ptr> [[TMP5]], [[BROADCAST_SPLAT3]] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <32 x i1> [[TMP7]], i32 31 ; CHECK-NEXT: store i1 [[TMP8]], ptr [[DST:%.*]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 256 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64 +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 512 ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll index 2eb8ac4086f78..1310ed3618b2c 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll @@ -13,31 +13,31 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; OUTLOOP-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; OUTLOOP: for.body.preheader: ; OUTLOOP-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() -; OUTLOOP-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 2 +; OUTLOOP-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 4 ; OUTLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], [[TMP1]] ; OUTLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; OUTLOOP: vector.ph: ; OUTLOOP-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; OUTLOOP-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 2 +; OUTLOOP-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 4 ; OUTLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP3]] ; OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; OUTLOOP-NEXT: br label [[VECTOR_BODY:%.*]] ; OUTLOOP: vector.body: ; OUTLOOP-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; OUTLOOP-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] ; OUTLOOP-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; OUTLOOP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP4]] ; OUTLOOP-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 -; OUTLOOP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 2 -; OUTLOOP-NEXT: [[TMP7:%.*]] = sext [[WIDE_LOAD]] to -; OUTLOOP-NEXT: [[TMP8]] = add [[VEC_PHI]], [[TMP7]] +; OUTLOOP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 2 +; OUTLOOP-NEXT: [[TMP7:%.*]] = sext [[WIDE_LOAD]] to +; OUTLOOP-NEXT: [[TMP8]] = add [[VEC_PHI]], [[TMP7]] ; OUTLOOP-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() -; OUTLOOP-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 2 +; OUTLOOP-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4 ; OUTLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP10]] ; OUTLOOP-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; OUTLOOP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; OUTLOOP: middle.block: -; OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32( [[TMP8]]) +; OUTLOOP-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP8]]) ; OUTLOOP-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] ; OUTLOOP-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; OUTLOOP: scalar.ph: @@ -67,12 +67,12 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; INLOOP-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] ; INLOOP: for.body.preheader: ; INLOOP-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() -; INLOOP-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 4 +; INLOOP-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 8 ; INLOOP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], [[TMP1]] ; INLOOP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; INLOOP: vector.ph: ; INLOOP-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; INLOOP-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 4 +; INLOOP-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 8 ; INLOOP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP3]] ; INLOOP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; INLOOP-NEXT: br label [[VECTOR_BODY:%.*]] @@ -82,12 +82,12 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; INLOOP-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; INLOOP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP4]] ; INLOOP-NEXT: [[TMP6:%.*]] = getelementptr inbounds i16, ptr [[TMP5]], i32 0 -; INLOOP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 2 -; INLOOP-NEXT: [[TMP7:%.*]] = sext [[WIDE_LOAD]] to -; INLOOP-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP7]]) +; INLOOP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 2 +; INLOOP-NEXT: [[TMP7:%.*]] = sext [[WIDE_LOAD]] to +; INLOOP-NEXT: [[TMP8:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32( [[TMP7]]) ; INLOOP-NEXT: [[TMP9]] = add i32 [[TMP8]], [[VEC_PHI]] ; INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vscale.i32() -; INLOOP-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], 4 +; INLOOP-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], 8 ; INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP11]] ; INLOOP-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; INLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll index d51f7becebeb5..827131ed19117 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll @@ -13,18 +13,18 @@ define void @load_store_factor2_i32(ptr %p) { ; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP3]], align 4 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[STRIDED_VEC]], +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = add <8 x i32> [[STRIDED_VEC]], ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP1]], 1 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i32> [[STRIDED_VEC1]], +; CHECK-NEXT: [[TMP7:%.*]] = add <8 x i32> [[STRIDED_VEC1]], ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP6]], i32 -1 -; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP7]], <8 x i32> -; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x i32> [[TMP9]], <8 x i32> poison, <8 x i32> -; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i32> [[TMP4]], <8 x i32> [[TMP7]], <16 x i32> +; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <16 x i32> [[TMP9]], <16 x i32> poison, <16 x i32> +; CHECK-NEXT: store <16 x i32> [[INTERLEAVED_VEC]], ptr [[TMP8]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: @@ -125,46 +125,46 @@ define void @load_store_factor3_i32(ptr %p) { ; CHECK-LABEL: @load_store_factor3_i32( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; CHECK-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] +; CHECK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] ; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 ; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]] -; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 -; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP10:%.*]] = mul [[VEC_IND]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[P:%.*]], [[TMP10]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i32.nxv2p0( [[TMP11]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP12:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i32.nxv2p0( [[TMP12]], [[TMP11]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP13:%.*]] = add [[TMP10]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[P]], [[TMP13]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call @llvm.masked.gather.nxv2i32.nxv2p0( [[TMP14]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP15:%.*]] = add [[WIDE_MASKED_GATHER1]], shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i32.nxv2p0( [[TMP15]], [[TMP14]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; CHECK-NEXT: [[TMP16:%.*]] = add [[TMP13]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[P]], [[TMP16]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv2i32.nxv2p0( [[TMP17]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; CHECK-NEXT: [[TMP18:%.*]] = add [[WIDE_MASKED_GATHER2]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i32.nxv2p0( [[TMP18]], [[TMP17]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP10:%.*]] = mul [[VEC_IND]], shufflevector ( insertelement ( poison, i64 3, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[P:%.*]], [[TMP10]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP11]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP12:%.*]] = add [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP12]], [[TMP11]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP13:%.*]] = add [[TMP10]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[P]], [[TMP13]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP14]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP15:%.*]] = add [[WIDE_MASKED_GATHER1]], shufflevector ( insertelement ( poison, i32 2, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP15]], [[TMP14]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; CHECK-NEXT: [[TMP16:%.*]] = add [[TMP13]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[P]], [[TMP16]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP17]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; CHECK-NEXT: [[TMP18:%.*]] = add [[WIDE_MASKED_GATHER2]], shufflevector ( insertelement ( poison, i32 3, i64 0), poison, zeroinitializer) +; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0( [[TMP18]], [[TMP17]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) ; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 2 +; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP20]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -404,28 +404,28 @@ define void @combine_load_factor2_i32(ptr noalias %p, ptr noalias %q) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP1]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[TMP5]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP6]], align 4 -; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i32> [[STRIDED_VEC]], [[STRIDED_VEC3]] -; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[STRIDED_VEC2]], [[STRIDED_VEC4]] +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP6]], align 4 +; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <16 x i32>, ptr [[TMP7]], align 4 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <16 x i32> [[WIDE_VEC1]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <16 x i32> [[WIDE_VEC1]], <16 x i32> poison, <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = add <8 x i32> [[STRIDED_VEC]], [[STRIDED_VEC3]] +; CHECK-NEXT: [[TMP9:%.*]] = add <8 x i32> [[STRIDED_VEC2]], [[STRIDED_VEC4]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[Q:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[Q]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0 -; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr [[TMP12]], align 4 -; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP10]], i32 4 -; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[TMP13]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: store <8 x i32> [[TMP8]], ptr [[TMP12]], align 4 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP10]], i32 8 +; CHECK-NEXT: store <8 x i32> [[TMP9]], ptr [[TMP13]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: @@ -479,21 +479,45 @@ exit: define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) { ; CHECK-LABEL: @combine_load_factor2_i64( ; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[TMP2]], i32 0 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP3]], align 4 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> +; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], [[STRIDED_VEC1]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP6]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[OFFSET0:%.*]] = shl i64 [[I]], 1 -; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET0]] +; CHECK-NEXT: [[Q0:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET0]] ; CHECK-NEXT: [[X0:%.*]] = load i64, ptr [[Q0]], align 4 ; CHECK-NEXT: [[OFFSET1:%.*]] = add i64 [[OFFSET0]], 1 ; CHECK-NEXT: [[Q1:%.*]] = getelementptr i64, ptr [[P]], i64 [[OFFSET1]] ; CHECK-NEXT: [[X1:%.*]] = load i64, ptr [[Q1]], align 4 ; CHECK-NEXT: [[RES:%.*]] = add i64 [[X0]], [[X1]] -; CHECK-NEXT: [[DST:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[I]] +; CHECK-NEXT: [[DST:%.*]] = getelementptr i64, ptr [[Q]], i64 [[I]] ; CHECK-NEXT: store i64 [[RES]], ptr [[DST]], align 4 ; CHECK-NEXT: [[NEXTI]] = add i64 [[I]], 1 ; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXTI]], 1024 -; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK-NEXT: br i1 [[DONE]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll b/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll index c456e0e1df7e8..9b3b90a7bc3b6 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/lmul.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S | FileCheck %s -check-prefix=LMUL1 ; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S --riscv-v-register-bit-width-lmul=1 | FileCheck %s -check-prefix=LMUL1 ; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S --riscv-v-register-bit-width-lmul=2 | FileCheck %s -check-prefix=LMUL2 ; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S --riscv-v-register-bit-width-lmul=4 | FileCheck %s -check-prefix=LMUL4 ; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S --riscv-v-register-bit-width-lmul=8 | FileCheck %s -check-prefix=LMUL8 +; RUN: opt < %s -passes=loop-vectorize -mtriple riscv64 -mattr=+v -S | FileCheck %s -check-prefix=LMUL2 define void @load_store(ptr %p) { ; LMUL1-LABEL: @load_store( diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll index dc4522756c369..ace267d72dea0 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll @@ -9,9 +9,9 @@ define void @trip5_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8 +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 8 +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 16 ; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 5, [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] @@ -20,18 +20,18 @@ define void @trip5_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP5]], i64 5) +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP5]], i64 5) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, ptr [[SRC:%.*]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv8i8.p0(ptr [[TMP7]], i32 1, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP8:%.*]] = shl [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr [[TMP7]], i32 1, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP8:%.*]] = shl [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i8 1, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv8i8.p0(ptr [[TMP10]], i32 1, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP11:%.*]] = add [[TMP8]], [[WIDE_MASKED_LOAD1]] -; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0( [[TMP11]], ptr [[TMP10]], i32 1, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call @llvm.masked.load.nxv16i8.p0(ptr [[TMP10]], i32 1, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP11:%.*]] = add [[TMP8]], [[WIDE_MASKED_LOAD1]] +; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0( [[TMP11]], ptr [[TMP10]], i32 1, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 8 +; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[TMP12]], 16 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP13]] ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll index 3d580a5671783..34b06972dab06 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/mask-index-type.ll @@ -12,44 +12,44 @@ define void @test(ptr noalias nocapture %a, ptr noalias nocapture %b, i32 %v) { ; VLENUNK-LABEL: @test( ; VLENUNK-NEXT: entry: ; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 ; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLENUNK: vector.ph: ; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLENUNK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() -; VLENUNK-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer -; VLENUNK-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; VLENUNK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] +; VLENUNK-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; VLENUNK-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer +; VLENUNK-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; VLENUNK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] ; VLENUNK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; VLENUNK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4 ; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]] -; VLENUNK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 -; VLENUNK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 -; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; VLENUNK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; VLENUNK-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 +; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLENUNK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; VLENUNK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; VLENUNK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 -; VLENUNK-NEXT: [[TMP11:%.*]] = icmp ult [[VEC_IND]], shufflevector ( insertelement ( poison, i64 512, i64 0), poison, zeroinitializer) +; VLENUNK-NEXT: [[TMP11:%.*]] = icmp ult [[VEC_IND]], shufflevector ( insertelement ( poison, i64 512, i64 0), poison, zeroinitializer) ; VLENUNK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP10]] ; VLENUNK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[TMP12]], i32 0 -; VLENUNK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i32.p0(ptr [[TMP13]], i32 4, [[TMP11]], poison) -; VLENUNK-NEXT: [[TMP14:%.*]] = xor [[TMP11]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; VLENUNK-NEXT: [[PREDPHI:%.*]] = select [[TMP14]], zeroinitializer, [[WIDE_MASKED_LOAD]] -; VLENUNK-NEXT: [[TMP15:%.*]] = add [[PREDPHI]], [[BROADCAST_SPLAT]] +; VLENUNK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP13]], i32 4, [[TMP11]], poison) +; VLENUNK-NEXT: [[TMP14:%.*]] = xor [[TMP11]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; VLENUNK-NEXT: [[PREDPHI:%.*]] = select [[TMP14]], zeroinitializer, [[WIDE_MASKED_LOAD]] +; VLENUNK-NEXT: [[TMP15:%.*]] = add [[PREDPHI]], [[BROADCAST_SPLAT]] ; VLENUNK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP10]] ; VLENUNK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 -; VLENUNK-NEXT: store [[TMP15]], ptr [[TMP17]], align 4 +; VLENUNK-NEXT: store [[TMP15]], ptr [[TMP17]], align 4 ; VLENUNK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 +; VLENUNK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4 ; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] -; VLENUNK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; VLENUNK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] ; VLENUNK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VLENUNK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VLENUNK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll index ac56579af2d26..00cabd58de913 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll @@ -17,8 +17,9 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-LABEL: @foo4( ; RV32-NEXT: entry: ; RV32-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; RV32-NEXT: [[TMP1:%.*]] = call i64 @llvm.umax.i64(i64 12, i64 [[TMP0]]) -; RV32-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 625, [[TMP1]] +; RV32-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; RV32-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]]) +; RV32-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 625, [[TMP2]] ; RV32-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; RV32: vector.memcheck: ; RV32-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 79880 @@ -33,37 +34,40 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]] ; RV32-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; RV32: vector.ph: -; RV32-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; RV32-NEXT: [[N_MOD_VF:%.*]] = urem i64 625, [[TMP2]] +; RV32-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; RV32-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 +; RV32-NEXT: [[N_MOD_VF:%.*]] = urem i64 625, [[TMP4]] ; RV32-NEXT: [[N_VEC:%.*]] = sub i64 625, [[N_MOD_VF]] ; RV32-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 16 -; RV32-NEXT: [[TMP3:%.*]] = call @llvm.experimental.stepvector.nxv1i64() -; RV32-NEXT: [[TMP4:%.*]] = add [[TMP3]], zeroinitializer -; RV32-NEXT: [[TMP5:%.*]] = mul [[TMP4]], shufflevector ( insertelement ( poison, i64 16, i64 0), poison, zeroinitializer) -; RV32-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP5]] -; RV32-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; RV32-NEXT: [[TMP7:%.*]] = mul i64 16, [[TMP6]] -; RV32-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 -; RV32-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; RV32-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; RV32-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer +; RV32-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 16, i64 0), poison, zeroinitializer) +; RV32-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; RV32-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; RV32-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; RV32-NEXT: [[TMP10:%.*]] = mul i64 16, [[TMP9]] +; RV32-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 +; RV32-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; RV32-NEXT: br label [[VECTOR_BODY:%.*]] ; RV32: vector.body: ; RV32-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; RV32-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; RV32-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], [[VEC_IND]] -; RV32-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv1i32.nxv1p0( [[TMP8]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !0 -; RV32-NEXT: [[TMP9:%.*]] = icmp slt [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer) -; RV32-NEXT: [[TMP10:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; RV32-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[B]], [[TMP10]] -; RV32-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call @llvm.masked.gather.nxv1f64.nxv1p0( [[TMP11]], i32 8, [[TMP9]], poison), !alias.scope !3 -; RV32-NEXT: [[TMP12:%.*]] = sitofp [[WIDE_MASKED_GATHER]] to -; RV32-NEXT: [[TMP13:%.*]] = fadd [[WIDE_MASKED_GATHER6]], [[TMP12]] -; RV32-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] -; RV32-NEXT: call void @llvm.masked.scatter.nxv1f64.nxv1p0( [[TMP13]], [[TMP14]], i32 8, [[TMP9]]), !alias.scope !5, !noalias !7 -; RV32-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; RV32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] -; RV32-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; RV32-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; RV32-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; RV32-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; RV32-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], [[VEC_IND]] +; RV32-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i32.nxv2p0( [[TMP11]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !0 +; RV32-NEXT: [[TMP12:%.*]] = icmp slt [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer) +; RV32-NEXT: [[TMP13:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; RV32-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[B]], [[TMP13]] +; RV32-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call @llvm.masked.gather.nxv2f64.nxv2p0( [[TMP14]], i32 8, [[TMP12]], poison), !alias.scope !3 +; RV32-NEXT: [[TMP15:%.*]] = sitofp [[WIDE_MASKED_GATHER]] to +; RV32-NEXT: [[TMP16:%.*]] = fadd [[WIDE_MASKED_GATHER6]], [[TMP15]] +; RV32-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] +; RV32-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0( [[TMP16]], [[TMP17]], i32 8, [[TMP12]]), !alias.scope !5, !noalias !7 +; RV32-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; RV32-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 +; RV32-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] +; RV32-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; RV32-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; RV32-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; RV32: middle.block: ; RV32-NEXT: [[CMP_N:%.*]] = icmp eq i64 625, [[N_VEC]] ; RV32-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -73,30 +77,31 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32: for.body: ; RV32-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] ; RV32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]] -; RV32-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; RV32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP17]], 100 +; RV32-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; RV32-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP21]], 100 ; RV32-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; RV32: if.then: -; RV32-NEXT: [[TMP18:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1 -; RV32-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP18]] -; RV32-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX3]], align 8 -; RV32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP17]] to double -; RV32-NEXT: [[ADD:%.*]] = fadd double [[TMP19]], [[CONV]] +; RV32-NEXT: [[TMP22:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1 +; RV32-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP22]] +; RV32-NEXT: [[TMP23:%.*]] = load double, ptr [[ARRAYIDX3]], align 8 +; RV32-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP21]] to double +; RV32-NEXT: [[ADD:%.*]] = fadd double [[TMP23]], [[CONV]] ; RV32-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDVARS_IV]] ; RV32-NEXT: store double [[ADD]], ptr [[ARRAYIDX7]], align 8 ; RV32-NEXT: br label [[FOR_INC]] ; RV32: for.inc: ; RV32-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 16 ; RV32-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], 10000 -; RV32-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP10:![0-9]+]] +; RV32-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP11:![0-9]+]] ; RV32: for.end: ; RV32-NEXT: ret void ; ; RV64-LABEL: @foo4( ; RV64-NEXT: entry: ; RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; RV64-NEXT: [[TMP1:%.*]] = call i64 @llvm.umax.i64(i64 12, i64 [[TMP0]]) -; RV64-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 625, [[TMP1]] +; RV64-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; RV64-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 16, i64 [[TMP1]]) +; RV64-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 625, [[TMP2]] ; RV64-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] ; RV64: vector.memcheck: ; RV64-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 79880 @@ -111,37 +116,40 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV64-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]] ; RV64-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; RV64: vector.ph: -; RV64-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; RV64-NEXT: [[N_MOD_VF:%.*]] = urem i64 625, [[TMP2]] +; RV64-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64() +; RV64-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2 +; RV64-NEXT: [[N_MOD_VF:%.*]] = urem i64 625, [[TMP4]] ; RV64-NEXT: [[N_VEC:%.*]] = sub i64 625, [[N_MOD_VF]] ; RV64-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 16 -; RV64-NEXT: [[TMP3:%.*]] = call @llvm.experimental.stepvector.nxv1i64() -; RV64-NEXT: [[TMP4:%.*]] = add [[TMP3]], zeroinitializer -; RV64-NEXT: [[TMP5:%.*]] = mul [[TMP4]], shufflevector ( insertelement ( poison, i64 16, i64 0), poison, zeroinitializer) -; RV64-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP5]] -; RV64-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; RV64-NEXT: [[TMP7:%.*]] = mul i64 16, [[TMP6]] -; RV64-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 -; RV64-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; RV64-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; RV64-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer +; RV64-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 16, i64 0), poison, zeroinitializer) +; RV64-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; RV64-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; RV64-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; RV64-NEXT: [[TMP10:%.*]] = mul i64 16, [[TMP9]] +; RV64-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 +; RV64-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer ; RV64-NEXT: br label [[VECTOR_BODY:%.*]] ; RV64: vector.body: ; RV64-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; RV64-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; RV64-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], [[VEC_IND]] -; RV64-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv1i32.nxv1p0( [[TMP8]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !0 -; RV64-NEXT: [[TMP9:%.*]] = icmp slt [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer) -; RV64-NEXT: [[TMP10:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; RV64-NEXT: [[TMP11:%.*]] = getelementptr inbounds double, ptr [[B]], [[TMP10]] -; RV64-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call @llvm.masked.gather.nxv1f64.nxv1p0( [[TMP11]], i32 8, [[TMP9]], poison), !alias.scope !3 -; RV64-NEXT: [[TMP12:%.*]] = sitofp [[WIDE_MASKED_GATHER]] to -; RV64-NEXT: [[TMP13:%.*]] = fadd [[WIDE_MASKED_GATHER6]], [[TMP12]] -; RV64-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] -; RV64-NEXT: call void @llvm.masked.scatter.nxv1f64.nxv1p0( [[TMP13]], [[TMP14]], i32 8, [[TMP9]]), !alias.scope !5, !noalias !7 -; RV64-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() -; RV64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] -; RV64-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; RV64-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; RV64-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; RV64-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; RV64-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], [[VEC_IND]] +; RV64-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i32.nxv2p0( [[TMP11]], i32 4, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison), !alias.scope !0 +; RV64-NEXT: [[TMP12:%.*]] = icmp slt [[WIDE_MASKED_GATHER]], shufflevector ( insertelement ( poison, i32 100, i64 0), poison, zeroinitializer) +; RV64-NEXT: [[TMP13:%.*]] = shl nuw nsw [[VEC_IND]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; RV64-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[B]], [[TMP13]] +; RV64-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call @llvm.masked.gather.nxv2f64.nxv2p0( [[TMP14]], i32 8, [[TMP12]], poison), !alias.scope !3 +; RV64-NEXT: [[TMP15:%.*]] = sitofp [[WIDE_MASKED_GATHER]] to +; RV64-NEXT: [[TMP16:%.*]] = fadd [[WIDE_MASKED_GATHER6]], [[TMP15]] +; RV64-NEXT: [[TMP17:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] +; RV64-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0( [[TMP16]], [[TMP17]], i32 8, [[TMP12]]), !alias.scope !5, !noalias !7 +; RV64-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; RV64-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 2 +; RV64-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP19]] +; RV64-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; RV64-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; RV64-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; RV64: middle.block: ; RV64-NEXT: [[CMP_N:%.*]] = icmp eq i64 625, [[N_VEC]] ; RV64-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -151,22 +159,22 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV64: for.body: ; RV64-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ] ; RV64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER]], i64 [[INDVARS_IV]] -; RV64-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; RV64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP17]], 100 +; RV64-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; RV64-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP21]], 100 ; RV64-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] ; RV64: if.then: -; RV64-NEXT: [[TMP18:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1 -; RV64-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP18]] -; RV64-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX3]], align 8 -; RV64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP17]] to double -; RV64-NEXT: [[ADD:%.*]] = fadd double [[TMP19]], [[CONV]] +; RV64-NEXT: [[TMP22:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 1 +; RV64-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[TMP22]] +; RV64-NEXT: [[TMP23:%.*]] = load double, ptr [[ARRAYIDX3]], align 8 +; RV64-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP21]] to double +; RV64-NEXT: [[ADD:%.*]] = fadd double [[TMP23]], [[CONV]] ; RV64-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDVARS_IV]] ; RV64-NEXT: store double [[ADD]], ptr [[ARRAYIDX7]], align 8 ; RV64-NEXT: br label [[FOR_INC]] ; RV64: for.inc: ; RV64-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 16 ; RV64-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], 10000 -; RV64-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP10:![0-9]+]] +; RV64-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP11:![0-9]+]] ; RV64: for.end: ; RV64-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll index 5fc46c203167f..7f4eb387a1ece 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll @@ -5,8 +5,8 @@ ; CHECK-LABEL: foo ; CHECK: LV: IC is 2 -; CHECK: %{{.*}} = add <4 x i32> %{{.*}}, -; CHECK: %{{.*}} = add {{.*}}, 8 +; CHECK: %{{.*}} = add <8 x i32> %{{.*}}, +; CHECK: %{{.*}} = add {{.*}}, 16 ; Function Attrs: nofree norecurse nosync nounwind writeonly define dso_local void @foo(i32 signext %n, ptr nocapture %A) local_unnamed_addr #0 { diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-unroll.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-unroll.ll index 4e9ec86df7ca0..e8d5dc6211b70 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-unroll.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-unroll.ll @@ -14,10 +14,10 @@ define ptr @array_add(ptr noalias nocapture readonly %a, ptr noalias nocapture r ; LMUL1-NEXT: [[TMP0:%.*]] = add i32 [[SIZE]], -1 ; LMUL1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; LMUL1-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; LMUL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4 +; LMUL1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8 ; LMUL1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; LMUL1: vector.ph: -; LMUL1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4 +; LMUL1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8 ; LMUL1-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]] ; LMUL1-NEXT: br label [[VECTOR_BODY:%.*]] ; LMUL1: vector.body: @@ -25,15 +25,15 @@ define ptr @array_add(ptr noalias nocapture readonly %a, ptr noalias nocapture r ; LMUL1-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 ; LMUL1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP3]] ; LMUL1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 -; LMUL1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4 +; LMUL1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[TMP5]], align 4 ; LMUL1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP3]] ; LMUL1-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 -; LMUL1-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP7]], align 4 -; LMUL1-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] +; LMUL1-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i32>, ptr [[TMP7]], align 4 +; LMUL1-NEXT: [[TMP8:%.*]] = add nsw <8 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] ; LMUL1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP3]] ; LMUL1-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 -; LMUL1-NEXT: store <4 x i32> [[TMP8]], ptr [[TMP10]], align 4 -; LMUL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; LMUL1-NEXT: store <8 x i32> [[TMP8]], ptr [[TMP10]], align 4 +; LMUL1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; LMUL1-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; LMUL1-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; LMUL1: middle.block: @@ -54,7 +54,7 @@ define ptr @array_add(ptr noalias nocapture readonly %a, ptr noalias nocapture r ; LMUL1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; LMUL1-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; LMUL1-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[SIZE]] -; LMUL1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; LMUL1-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; LMUL1: for.end.loopexit: ; LMUL1-NEXT: br label [[FOR_END]] ; LMUL1: for.end: @@ -108,7 +108,7 @@ define ptr @array_add(ptr noalias nocapture readonly %a, ptr noalias nocapture r ; LMUL2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; LMUL2-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 ; LMUL2-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[SIZE]] -; LMUL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] +; LMUL2-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; LMUL2: for.end.loopexit: ; LMUL2-NEXT: br label [[FOR_END]] ; LMUL2: for.end: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll b/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll index bc4e60425ac4c..a266ae643c1af 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/safe-dep-distance.ll @@ -11,27 +11,30 @@ define void @test(ptr %p) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 200, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 200, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 32 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 200 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i32 0 -; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP7]], align 32 -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 32 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP4]], 200 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0 +; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP9]], align 32 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 200, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -47,7 +50,7 @@ define void @test(ptr %p) { ; CHECK-NEXT: store i64 [[V]], ptr [[A2]], align 32 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[IV]], 199 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -82,12 +85,12 @@ define void @test_may_clobber(ptr %p) { ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[TMP1]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 32 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP0]], 100 ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[TMP4]], i32 0 -; CHECK-NEXT: store <2 x i64> [[WIDE_LOAD]], ptr [[TMP5]], align 32 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: store <4 x i64> [[WIDE_LOAD]], ptr [[TMP5]], align 32 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200 ; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: @@ -132,27 +135,30 @@ define void @trivial_due_max_vscale(ptr %p) { ; CHECK-LABEL: @trivial_due_max_vscale( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 200, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 200, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 32 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 8192 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i32 0 -; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP7]], align 32 -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 32 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP4]], 8192 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0 +; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP9]], align 32 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 200, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] @@ -195,27 +201,30 @@ define void @no_high_lmul_or_interleave(ptr %p) { ; CHECK-LABEL: @no_high_lmul_or_interleave( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 200, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 200, [[TMP1]] ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 200, [[TMP3]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 200, [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[TMP3]], i32 0 -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 32 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], 1024 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP6]], i32 0 -; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP7]], align 32 -; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 32 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP4]], 1024 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i64, ptr [[TMP8]], i32 0 +; CHECK-NEXT: store [[WIDE_LOAD]], ptr [[TMP9]], align 32 +; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP11:%.*]] = mul i64 [[TMP10]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 200, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll index 53e00ad0fee2b..f7bc4bd35f377 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-basics.ll @@ -16,27 +16,30 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { ; VLENUNK-LABEL: @vector_add( ; VLENUNK-NEXT: entry: ; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLENUNK: vector.ph: -; VLENUNK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLENUNK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; VLENUNK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; VLENUNK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; VLENUNK-NEXT: [[TMP5:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; VLENUNK-NEXT: store [[TMP5]], ptr [[TMP4]], align 8 -; VLENUNK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; VLENUNK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VLENUNK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; VLENUNK-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; VLENUNK-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 +; VLENUNK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; VLENUNK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLENUNK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VLENUNK: middle.block: ; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -58,27 +61,30 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { ; VLEN128-LABEL: @vector_add( ; VLEN128-NEXT: entry: ; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLEN128: vector.ph: -; VLEN128-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] ; VLEN128: vector.body: ; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLEN128-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; VLEN128-NEXT: [[TMP5:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; VLEN128-NEXT: store [[TMP5]], ptr [[TMP4]], align 8 -; VLEN128-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; VLEN128-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VLEN128-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; VLEN128-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; VLEN128-NEXT: store [[TMP7]], ptr [[TMP6]], align 8 +; VLEN128-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; VLEN128-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLEN128-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VLEN128: middle.block: ; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -121,27 +127,27 @@ define void @vector_add_i32(ptr noalias nocapture %a, i32 %v, i64 %n) { ; VLENUNK-LABEL: @vector_add_i32( ; VLENUNK-NEXT: entry: ; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 ; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLENUNK: vector.ph: ; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 -; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 +; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 ; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP4]] ; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 -; VLENUNK-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; VLENUNK-NEXT: store [[TMP7]], ptr [[TMP6]], align 4 +; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 +; VLENUNK-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; VLENUNK-NEXT: store [[TMP7]], ptr [[TMP6]], align 4 ; VLENUNK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 ; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; VLENUNK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VLENUNK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -166,27 +172,27 @@ define void @vector_add_i32(ptr noalias nocapture %a, i32 %v, i64 %n) { ; VLEN128-LABEL: @vector_add_i32( ; VLEN128-NEXT: entry: ; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 ; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLEN128: vector.ph: ; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 ; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 -; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[V:%.*]], i64 0 +; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] ; VLEN128: vector.body: ; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 ; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP4]] ; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 -; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 -; VLEN128-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] -; VLEN128-NEXT: store [[TMP7]], ptr [[TMP6]], align 4 +; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 4 +; VLEN128-NEXT: [[TMP7:%.*]] = add [[WIDE_LOAD]], [[BROADCAST_SPLAT]] +; VLEN128-NEXT: store [[TMP7]], ptr [[TMP6]], align 4 ; VLEN128-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; VLEN128-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 ; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] ; VLEN128-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VLEN128-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -287,27 +293,30 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; VLENUNK-LABEL: @indexed_store( ; VLENUNK-NEXT: entry: ; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLENUNK: vector.ph: -; VLENUNK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLENUNK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; VLENUNK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP2]] -; VLENUNK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] -; VLENUNK-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0( [[BROADCAST_SPLAT]], [[TMP5]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; VLENUNK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; VLENUNK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VLENUNK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP4]] +; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; VLENUNK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] +; VLENUNK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[TMP7]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; VLENUNK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; VLENUNK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLENUNK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VLENUNK: middle.block: ; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -329,27 +338,30 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; VLEN128-LABEL: @indexed_store( ; VLEN128-NEXT: entry: ; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLEN128: vector.ph: -; VLEN128-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] ; VLEN128: vector.body: ; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLEN128-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP2]] -; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] -; VLEN128-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0( [[BROADCAST_SPLAT]], [[TMP5]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) -; VLEN128-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; VLEN128-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VLEN128-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP4]] +; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; VLEN128-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] +; VLEN128-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[TMP7]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer)) +; VLEN128-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; VLEN128-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLEN128-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VLEN128: middle.block: ; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -389,34 +401,37 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; VLENUNK-LABEL: @indexed_load( ; VLENUNK-NEXT: entry: ; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLENUNK: vector.ph: -; VLENUNK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLENUNK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] -; VLENUNK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; VLENUNK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP2]] -; VLENUNK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] -; VLENUNK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv1i64.nxv1p0( [[TMP5]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; VLENUNK-NEXT: [[TMP6]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] -; VLENUNK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; VLENUNK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VLENUNK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; VLENUNK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP4]] +; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; VLENUNK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] +; VLENUNK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP7]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; VLENUNK-NEXT: [[TMP8]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] +; VLENUNK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 +; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] +; VLENUNK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLENUNK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VLENUNK: middle.block: -; VLENUNK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.add.nxv1i64( [[TMP6]]) +; VLENUNK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP8]]) ; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; VLENUNK: scalar.ph: ; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VLENUNK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] +; VLENUNK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] ; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] ; VLENUNK: for.body: ; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -430,40 +445,43 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 ; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; VLENUNK: for.end: -; VLENUNK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] +; VLENUNK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] ; VLENUNK-NEXT: ret i64 [[SUM_NEXT_LCSSA]] ; ; VLEN128-LABEL: @indexed_load( ; VLEN128-NEXT: entry: ; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLEN128: vector.ph: -; VLEN128-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] ; VLEN128: vector.body: ; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLEN128-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] -; VLEN128-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP2]] -; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP4]], align 8 -; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] -; VLEN128-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv1i64.nxv1p0( [[TMP5]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) -; VLEN128-NEXT: [[TMP6]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] -; VLEN128-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP7]] -; VLEN128-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VLEN128-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; VLEN128-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP4]] +; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP6]], align 8 +; VLEN128-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_LOAD]] +; VLEN128-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP7]], i32 8, shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), poison) +; VLEN128-NEXT: [[TMP8]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] +; VLEN128-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 +; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] +; VLEN128-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLEN128-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; VLEN128: middle.block: -; VLEN128-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.add.nxv1i64( [[TMP6]]) +; VLEN128-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP8]]) ; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; VLEN128: scalar.ph: ; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; VLEN128-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] +; VLEN128-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] ; VLEN128-NEXT: br label [[FOR_BODY:%.*]] ; VLEN128: for.body: ; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -477,7 +495,7 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 ; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; VLEN128: for.end: -; VLEN128-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ] +; VLEN128-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] ; VLEN128-NEXT: ret i64 [[SUM_NEXT_LCSSA]] ; entry: @@ -503,25 +521,28 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { ; VLENUNK-LABEL: @splat_int( ; VLENUNK-NEXT: entry: ; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLENUNK: vector.ph: -; VLENUNK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLENUNK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; VLENUNK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; VLENUNK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; VLENUNK-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 -; VLENUNK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; VLENUNK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VLENUNK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; VLENUNK-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 +; VLENUNK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; VLENUNK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLENUNK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; VLENUNK: middle.block: ; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -541,25 +562,28 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { ; VLEN128-LABEL: @splat_int( ; VLEN128-NEXT: entry: ; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLEN128: vector.ph: -; VLEN128-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] ; VLEN128: vector.body: ; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLEN128-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; VLEN128-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 -; VLEN128-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; VLEN128-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VLEN128-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; VLEN128-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 +; VLEN128-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; VLEN128-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLEN128-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; VLEN128: middle.block: ; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -595,25 +619,28 @@ define void @splat_ptr(ptr noalias nocapture %a, ptr %v, i64 %n) { ; VLENUNK-LABEL: @splat_ptr( ; VLENUNK-NEXT: entry: ; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLENUNK: vector.ph: -; VLENUNK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[V:%.*]], i64 0 -; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[V:%.*]], i64 0 +; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] ; VLENUNK: vector.body: ; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLENUNK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; VLENUNK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; VLENUNK-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i32 0 -; VLENUNK-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 -; VLENUNK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; VLENUNK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VLENUNK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0 +; VLENUNK-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 +; VLENUNK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; VLENUNK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; VLENUNK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLENUNK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; VLENUNK: middle.block: ; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -633,25 +660,28 @@ define void @splat_ptr(ptr noalias nocapture %a, ptr %v, i64 %n) { ; VLEN128-LABEL: @splat_ptr( ; VLEN128-NEXT: entry: ; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; VLEN128: vector.ph: -; VLEN128-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[V:%.*]], i64 0 -; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[V:%.*]], i64 0 +; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] ; VLEN128: vector.body: ; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; VLEN128-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i32 0 -; VLEN128-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 -; VLEN128-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; VLEN128-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VLEN128-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0 +; VLEN128-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 +; VLEN128-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; VLEN128-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; VLEN128-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VLEN128-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; VLEN128: middle.block: ; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll index 8ed7b6444ec6e..c553977a83626 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll @@ -227,17 +227,17 @@ for.end: } ; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop. -; CHECK-REMARK: vectorized loop (vectorization width: 8, interleaved count: 2) +; CHECK-REMARK: vectorized loop (vectorization width: 16, interleaved count: 2) define bfloat @fadd_fast_bfloat(ptr noalias nocapture readonly %a, i64 %n) { ; CHECK-LABEL: @fadd_fast_bfloat ; CHECK: vector.body: -; CHECK: %[[LOAD1:.*]] = load <8 x bfloat> -; CHECK: %[[LOAD2:.*]] = load <8 x bfloat> -; CHECK: %[[FADD1:.*]] = fadd fast <8 x bfloat> %[[LOAD1]] -; CHECK: %[[FADD2:.*]] = fadd fast <8 x bfloat> %[[LOAD2]] +; CHECK: %[[LOAD1:.*]] = load <16 x bfloat> +; CHECK: %[[LOAD2:.*]] = load <16 x bfloat> +; CHECK: %[[FADD1:.*]] = fadd fast <16 x bfloat> %[[LOAD1]] +; CHECK: %[[FADD2:.*]] = fadd fast <16 x bfloat> %[[LOAD2]] ; CHECK: middle.block: -; CHECK: %[[RDX:.*]] = fadd fast <8 x bfloat> %[[FADD2]], %[[FADD1]] -; CHECK: call fast bfloat @llvm.vector.reduce.fadd.v8bf16(bfloat 0xR8000, <8 x bfloat> %[[RDX]]) +; CHECK: %[[RDX:.*]] = fadd fast <16 x bfloat> %[[FADD2]], %[[FADD1]] +; CHECK: call fast bfloat @llvm.vector.reduce.fadd.v16bf16(bfloat 0xR8000, <16 x bfloat> %[[RDX]]) entry: br label %for.body @@ -328,17 +328,17 @@ for.end: ; MUL ; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop. -; CHECK-REMARK: vectorized loop (vectorization width: 4, interleaved count: 2) +; CHECK-REMARK: vectorized loop (vectorization width: 8, interleaved count: 2) define i32 @mul(ptr nocapture %a, ptr nocapture readonly %b, i64 %n) { ; CHECK-LABEL: @mul ; CHECK: vector.body: -; CHECK: %[[LOAD1:.*]] = load <4 x i32> -; CHECK: %[[LOAD2:.*]] = load <4 x i32> -; CHECK: %[[MUL1:.*]] = mul <4 x i32> %[[LOAD1]] -; CHECK: %[[MUL2:.*]] = mul <4 x i32> %[[LOAD2]] +; CHECK: %[[LOAD1:.*]] = load <8 x i32> +; CHECK: %[[LOAD2:.*]] = load <8 x i32> +; CHECK: %[[MUL1:.*]] = mul <8 x i32> %[[LOAD1]] +; CHECK: %[[MUL2:.*]] = mul <8 x i32> %[[LOAD2]] ; CHECK: middle.block: -; CHECK: %[[RDX:.*]] = mul <4 x i32> %[[MUL2]], %[[MUL1]] -; CHECK: call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %[[RDX]]) +; CHECK: %[[RDX:.*]] = mul <8 x i32> %[[MUL2]], %[[MUL1]] +; CHECK: call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %[[RDX]]) entry: br label %for.body @@ -358,21 +358,21 @@ for.end: ; preds = %for.body, %entry ; Note: This test was added to ensure we always check the legality of reductions (and emit a warning if necessary) before checking for memory dependencies ; CHECK-REMARK: Scalable vectorization not supported for the reduction operations found in this loop. -; CHECK-REMARK: vectorized loop (vectorization width: 4, interleaved count: 2) +; CHECK-REMARK: vectorized loop (vectorization width: 8, interleaved count: 2) define i32 @memory_dependence(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, i64 %n) { ; CHECK-LABEL: @memory_dependence ; CHECK: vector.body: -; CHECK: %[[LOAD1:.*]] = load <4 x i32> -; CHECK: %[[LOAD2:.*]] = load <4 x i32> -; CHECK: %[[LOAD3:.*]] = load <4 x i32> -; CHECK: %[[LOAD4:.*]] = load <4 x i32> -; CHECK: %[[ADD1:.*]] = add nsw <4 x i32> %[[LOAD3]], %[[LOAD1]] -; CHECK: %[[ADD2:.*]] = add nsw <4 x i32> %[[LOAD4]], %[[LOAD2]] -; CHECK: %[[MUL1:.*]] = mul <4 x i32> %[[LOAD3]] -; CHECK: %[[MUL2:.*]] = mul <4 x i32> %[[LOAD4]] +; CHECK: %[[LOAD1:.*]] = load <8 x i32> +; CHECK: %[[LOAD2:.*]] = load <8 x i32> +; CHECK: %[[LOAD3:.*]] = load <8 x i32> +; CHECK: %[[LOAD4:.*]] = load <8 x i32> +; CHECK: %[[ADD1:.*]] = add nsw <8 x i32> %[[LOAD3]], %[[LOAD1]] +; CHECK: %[[ADD2:.*]] = add nsw <8 x i32> %[[LOAD4]], %[[LOAD2]] +; CHECK: %[[MUL1:.*]] = mul <8 x i32> %[[LOAD3]] +; CHECK: %[[MUL2:.*]] = mul <8 x i32> %[[LOAD4]] ; CHECK: middle.block: -; CHECK: %[[RDX:.*]] = mul <4 x i32> %[[MUL2]], %[[MUL1]] -; CHECK: call i32 @llvm.vector.reduce.mul.v4i32(<4 x i32> %[[RDX]]) +; CHECK: %[[RDX:.*]] = mul <8 x i32> %[[MUL2]], %[[MUL1]] +; CHECK: call i32 @llvm.vector.reduce.mul.v8i32(<8 x i32> %[[RDX]]) entry: br label %for.body @@ -396,19 +396,19 @@ for.end: ret i32 %mul } -; CHECK-REMARK: vectorized loop (vectorization width: vscale x 2, interleaved count: 2) +; CHECK-REMARK: vectorized loop (vectorization width: vscale x 4, interleaved count: 2) define float @fmuladd(ptr %a, ptr %b, i64 %n) { ; CHECK-LABEL: @fmuladd( ; CHECK: vector.body: -; CHECK: [[WIDE_LOAD:%.*]] = load -; CHECK: [[WIDE_LOAD2:%.*]] = load -; CHECK: [[WIDE_LOAD3:%.*]] = load -; CHECK: [[WIDE_LOAD4:%.*]] = load -; CHECK: [[MULADD1:%.*]] = call reassoc @llvm.fmuladd.nxv2f32( [[WIDE_LOAD]], [[WIDE_LOAD3]], -; CHECK: [[MULADD2:%.*]] = call reassoc @llvm.fmuladd.nxv2f32( [[WIDE_LOAD2]], [[WIDE_LOAD4]], +; CHECK: [[WIDE_LOAD:%.*]] = load +; CHECK: [[WIDE_LOAD2:%.*]] = load +; CHECK: [[WIDE_LOAD3:%.*]] = load +; CHECK: [[WIDE_LOAD4:%.*]] = load +; CHECK: [[MULADD1:%.*]] = call reassoc @llvm.fmuladd.nxv4f32( [[WIDE_LOAD]], [[WIDE_LOAD3]], +; CHECK: [[MULADD2:%.*]] = call reassoc @llvm.fmuladd.nxv4f32( [[WIDE_LOAD2]], [[WIDE_LOAD4]], ; CHECK: middle.block: -; CHECK: [[BIN_RDX:%.*]] = fadd reassoc [[MULADD2]], [[MULADD1]] -; CHECK: call reassoc float @llvm.vector.reduce.fadd.nxv2f32(float -0.000000e+00, [[BIN_RDX]]) +; CHECK: [[BIN_RDX:%.*]] = fadd reassoc [[MULADD2]], [[MULADD1]] +; CHECK: call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, [[BIN_RDX]]) ; entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll index 6d057f378d199..5e231da7e7b57 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll @@ -12,27 +12,30 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP3]], i64 1024) -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv1i64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP6:%.*]] = add [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: call void @llvm.masked.store.nxv1i64.p0( [[TMP6]], ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP8:%.*]] = add [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP8]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -75,27 +78,30 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP3]], i64 1024) -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv1i64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_MASKED_LOAD]] -; CHECK-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0( [[BROADCAST_SPLAT]], [[TMP6]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[TMP8]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -136,34 +142,37 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP3]], i64 1024) -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv1i64.p0(ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_MASKED_LOAD]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv1i64.nxv1p0( [[TMP6]], i32 8, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP7]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] -; CHECK-NEXT: [[TMP8:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP7]], [[VEC_PHI]] -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] -; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[TMP8]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP9]] = add [[VEC_PHI]], [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP9]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[TMP11]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP12]] +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.nxv1i64( [[TMP8]]) +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64( [[TMP10]]) ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -177,7 +186,7 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: for.end: -; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[SUM_NEXT_LCSSA]] ; entry: @@ -205,25 +214,28 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP3]], i64 1024) -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 -; CHECK-NEXT: call void @llvm.masked.store.nxv1i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -260,26 +272,29 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP3]], i64 1024) +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) ; CHECK-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 -; CHECK-NEXT: call void @llvm.masked.store.nxv1i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll index 0d6ef7c00def8..4c994772643ef 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll @@ -57,33 +57,22 @@ define void @small_trip_count_min_vlen_32(ptr nocapture %a) nounwind vscale_rang ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 2 -; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 4, [[TMP4]] -; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]] -; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i32(i32 [[TMP5]], i32 4) -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP5]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i32.p0(ptr [[TMP7]], i32 4, [[ACTIVE_LANE_MASK]], poison) -; CHECK-NEXT: [[TMP8:%.*]] = add nsw [[WIDE_MASKED_LOAD]], shufflevector ( insertelement ( poison, i32 1, i64 0), poison, zeroinitializer) -; CHECK-NEXT: call void @llvm.masked.store.nxv2i32.p0( [[TMP8]], ptr [[TMP7]], i32 4, [[ACTIVE_LANE_MASK]]) -; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 2 -; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP10]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], +; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr [[TMP2]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 4, 4 +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 4, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 7b6e29388c759..0bdcf5b1efd01 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -13,26 +13,29 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; SCALABLE-LABEL: @uniform_load( ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP3:%.*]] = load i64, ptr [[B:%.*]], align 8 -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 -; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; SCALABLE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 -; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 -; SCALABLE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; SCALABLE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP5:%.*]] = load i64, ptr [[B:%.*]], align 8 +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP7]], align 8 +; SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -58,17 +61,17 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; FIXEDLEN: vector.body: ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; FIXEDLEN-NEXT: [[TMP2:%.*]] = load i64, ptr [[B:%.*]], align 8 -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[TMP2]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8 -; FIXEDLEN-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 2 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP6]], align 8 -; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8 +; FIXEDLEN-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 4 +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP6]], align 8 +; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXEDLEN-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; FIXEDLEN: middle.block: @@ -93,26 +96,29 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TF-SCALABLE: vector.ph: ; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]] +; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-SCALABLE: vector.body: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; TF-SCALABLE-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP3]], i64 1024) -; TF-SCALABLE-NEXT: [[TMP4:%.*]] = load i64, ptr [[B:%.*]], align 8 -; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP4]], i64 0 -; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; TF-SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP3]] -; TF-SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP6]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TF-SCALABLE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; TF-SCALABLE-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) +; TF-SCALABLE-NEXT: [[TMP6:%.*]] = load i64, ptr [[B:%.*]], align 8 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP8]], i32 8, [[ACTIVE_LANE_MASK]]) +; TF-SCALABLE-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; TF-SCALABLE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TF-SCALABLE: middle.block: ; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-SCALABLE: scalar.ph: @@ -138,12 +144,12 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = load i64, ptr [[B:%.*]], align 8 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP1]], i64 0 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; TF-FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; TF-FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TF-FIXEDLEN: middle.block: @@ -183,26 +189,29 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-LABEL: @uniform_load_outside_use( ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP3:%.*]] = load i64, ptr [[B:%.*]], align 8 -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 -; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; SCALABLE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 -; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 -; SCALABLE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; SCALABLE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP5:%.*]] = load i64, ptr [[B:%.*]], align 8 +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP7]], align 8 +; SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -218,7 +227,7 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 ; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; SCALABLE: for.end: -; SCALABLE-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] +; SCALABLE-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] ; SCALABLE-NEXT: ret i64 [[V_LCSSA]] ; ; FIXEDLEN-LABEL: @uniform_load_outside_use( @@ -229,17 +238,17 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; FIXEDLEN: vector.body: ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; FIXEDLEN-NEXT: [[TMP2:%.*]] = load i64, ptr [[B:%.*]], align 8 -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[TMP2]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8 -; FIXEDLEN-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 2 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP6]], align 8 -; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8 +; FIXEDLEN-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 4 +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP6]], align 8 +; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXEDLEN-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; FIXEDLEN: middle.block: @@ -284,12 +293,12 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = load i64, ptr [[B:%.*]], align 8 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP1]], i64 0 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; TF-FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; TF-FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TF-FIXEDLEN: middle.block: @@ -331,39 +340,43 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; SCALABLE-LABEL: @conditional_uniform_load( ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; SCALABLE-NEXT: [[TMP2:%.*]] = call @llvm.experimental.stepvector.nxv1i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = add [[TMP2]], zeroinitializer -; SCALABLE-NEXT: [[TMP4:%.*]] = mul [[TMP3]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP4]] -; SCALABLE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP6:%.*]] = mul i64 1, [[TMP5]] -; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 -; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 -; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; SCALABLE-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer +; SCALABLE-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] +; SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]] +; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP8:%.*]] = icmp ugt [[VEC_IND]], shufflevector ( insertelement ( poison, i64 10, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv1i64.nxv1p0( [[BROADCAST_SPLAT]], i32 8, [[TMP8]], poison) -; SCALABLE-NEXT: [[TMP9:%.*]] = xor [[TMP8]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[PREDPHI:%.*]] = select [[TMP8]], [[WIDE_MASKED_GATHER]], zeroinitializer -; SCALABLE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] -; SCALABLE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0 -; SCALABLE-NEXT: store [[PREDPHI]], ptr [[TMP11]], align 8 -; SCALABLE-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] -; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; SCALABLE-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP11:%.*]] = icmp ugt [[VEC_IND]], shufflevector ( insertelement ( poison, i64 10, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], i32 8, [[TMP11]], poison) +; SCALABLE-NEXT: [[TMP12:%.*]] = xor [[TMP11]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[PREDPHI:%.*]] = select [[TMP11]], [[WIDE_MASKED_GATHER]], zeroinitializer +; SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP10]] +; SCALABLE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0 +; SCALABLE-NEXT: store [[PREDPHI]], ptr [[TMP14]], align 8 +; SCALABLE-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] +; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -391,33 +404,33 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; FIXEDLEN-NEXT: entry: ; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; FIXEDLEN: vector.ph: -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x ptr> poison, ptr [[B]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT2]], <2 x ptr> poison, <2 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[B:%.*]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x ptr> poison, ptr [[B]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT2]], <4 x ptr> poison, <4 x i32> zeroinitializer ; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXEDLEN: vector.body: ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; FIXEDLEN-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], +; FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; FIXEDLEN-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], ; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 -; FIXEDLEN-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], -; FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp ugt <2 x i64> [[STEP_ADD]], -; FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> [[BROADCAST_SPLAT]], i32 8, <2 x i1> [[TMP2]], <2 x i64> poison) -; FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> [[BROADCAST_SPLAT3]], i32 8, <2 x i1> [[TMP3]], <2 x i64> poison) -; FIXEDLEN-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP2]], -; FIXEDLEN-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP3]], -; FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i64> [[WIDE_MASKED_GATHER]], <2 x i64> zeroinitializer -; FIXEDLEN-NEXT: [[PREDPHI5:%.*]] = select <2 x i1> [[TMP3]], <2 x i64> [[WIDE_MASKED_GATHER4]], <2 x i64> zeroinitializer +; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 +; FIXEDLEN-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], +; FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp ugt <4 x i64> [[STEP_ADD]], +; FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT]], i32 8, <4 x i1> [[TMP2]], <4 x i64> poison) +; FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT3]], i32 8, <4 x i1> [[TMP3]], <4 x i64> poison) +; FIXEDLEN-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP2]], +; FIXEDLEN-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], +; FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> [[WIDE_MASKED_GATHER]], <4 x i64> zeroinitializer +; FIXEDLEN-NEXT: [[PREDPHI5:%.*]] = select <4 x i1> [[TMP3]], <4 x i64> [[WIDE_MASKED_GATHER4]], <4 x i64> zeroinitializer ; FIXEDLEN-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXEDLEN-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 -; FIXEDLEN-NEXT: store <2 x i64> [[PREDPHI]], ptr [[TMP8]], align 8 -; FIXEDLEN-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 2 -; FIXEDLEN-NEXT: store <2 x i64> [[PREDPHI5]], ptr [[TMP9]], align 8 -; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], +; FIXEDLEN-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP8]], align 8 +; FIXEDLEN-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 4 +; FIXEDLEN-NEXT: store <4 x i64> [[PREDPHI5]], ptr [[TMP9]], align 8 +; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], ; FIXEDLEN-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; FIXEDLEN: middle.block: @@ -448,42 +461,46 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TF-SCALABLE: vector.ph: ; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]] +; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call @llvm.experimental.stepvector.nxv1i64() -; TF-SCALABLE-NEXT: [[TMP4:%.*]] = add [[TMP3]], zeroinitializer -; TF-SCALABLE-NEXT: [[TMP5:%.*]] = mul [[TMP4]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP5]] -; TF-SCALABLE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul i64 1, [[TMP6]] -; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 -; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 -; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; TF-SCALABLE-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer +; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; TF-SCALABLE-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP9]] +; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 +; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-SCALABLE: vector.body: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 -; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP8]], i64 1024) -; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp ugt [[VEC_IND]], shufflevector ( insertelement ( poison, i64 10, i64 0), poison, zeroinitializer) -; TF-SCALABLE-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP9]], zeroinitializer -; TF-SCALABLE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv1i64.nxv1p0( [[BROADCAST_SPLAT]], i32 8, [[TMP10]], poison) -; TF-SCALABLE-NEXT: [[TMP11:%.*]] = xor [[TMP9]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; TF-SCALABLE-NEXT: [[TMP12:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP11]], zeroinitializer -; TF-SCALABLE-NEXT: [[PREDPHI:%.*]] = select [[TMP10]], [[WIDE_MASKED_GATHER]], zeroinitializer -; TF-SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP8]] -; TF-SCALABLE-NEXT: [[TMP14:%.*]] = or [[TMP10]], [[TMP12]] -; TF-SCALABLE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP13]], i32 0 -; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0( [[PREDPHI]], ptr [[TMP15]], i32 8, [[TMP14]]) -; TF-SCALABLE-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP16]] -; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; TF-SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TF-SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; TF-SCALABLE-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0 +; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP11]], i64 1024) +; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp ugt [[VEC_IND]], shufflevector ( insertelement ( poison, i64 10, i64 0), poison, zeroinitializer) +; TF-SCALABLE-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP12]], zeroinitializer +; TF-SCALABLE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], i32 8, [[TMP13]], poison) +; TF-SCALABLE-NEXT: [[TMP14:%.*]] = xor [[TMP12]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; TF-SCALABLE-NEXT: [[TMP15:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP14]], zeroinitializer +; TF-SCALABLE-NEXT: [[PREDPHI:%.*]] = select [[TMP13]], [[WIDE_MASKED_GATHER]], zeroinitializer +; TF-SCALABLE-NEXT: [[TMP16:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP11]] +; TF-SCALABLE-NEXT: [[TMP17:%.*]] = or [[TMP13]], [[TMP15]] +; TF-SCALABLE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP16]], i32 0 +; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[PREDPHI]], ptr [[TMP18]], i32 8, [[TMP17]]) +; TF-SCALABLE-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 2 +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP20]] +; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; TF-SCALABLE-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TF-SCALABLE: middle.block: ; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-SCALABLE: scalar.ph: @@ -510,22 +527,22 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-FIXEDLEN-NEXT: entry: ; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TF-FIXEDLEN: vector.ph: -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i64 0 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x ptr> poison, ptr [[B:%.*]], i64 0 +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT]], <4 x ptr> poison, <4 x i32> zeroinitializer ; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-FIXEDLEN: vector.body: ; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; TF-FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; TF-FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], -; TF-FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> [[BROADCAST_SPLAT]], i32 8, <2 x i1> [[TMP1]], <2 x i64> poison) -; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = xor <2 x i1> [[TMP1]], -; TF-FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> [[WIDE_MASKED_GATHER]], <2 x i64> zeroinitializer +; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], +; TF-FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT]], i32 8, <4 x i1> [[TMP1]], <4 x i64> poison) +; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], +; TF-FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[WIDE_MASKED_GATHER]], <4 x i64> zeroinitializer ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; TF-FIXEDLEN-NEXT: store <2 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; TF-FIXEDLEN-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; TF-FIXEDLEN-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TF-FIXEDLEN: middle.block: @@ -578,26 +595,29 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; SCALABLE-LABEL: @uniform_load_unaligned( ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP3:%.*]] = load i64, ptr [[B:%.*]], align 1 -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP3]], i64 0 -; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; SCALABLE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 -; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 -; SCALABLE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] -; SCALABLE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP5:%.*]] = load i64, ptr [[B:%.*]], align 1 +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP5]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP7]], align 8 +; SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] +; SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -623,17 +643,17 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; FIXEDLEN: vector.body: ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; FIXEDLEN-NEXT: [[TMP2:%.*]] = load i64, ptr [[B:%.*]], align 1 -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[TMP2]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8 -; FIXEDLEN-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 2 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP6]], align 8 -; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8 +; FIXEDLEN-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 4 +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP6]], align 8 +; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXEDLEN-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; FIXEDLEN: middle.block: @@ -658,26 +678,29 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TF-SCALABLE: vector.ph: ; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]] +; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-SCALABLE: vector.body: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; TF-SCALABLE-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP3]], i64 1024) -; TF-SCALABLE-NEXT: [[TMP4:%.*]] = load i64, ptr [[B:%.*]], align 1 -; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP4]], i64 0 -; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; TF-SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP3]] -; TF-SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 -; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP6]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP7]] -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TF-SCALABLE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; TF-SCALABLE-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) +; TF-SCALABLE-NEXT: [[TMP6:%.*]] = load i64, ptr [[B:%.*]], align 1 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 +; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP8]], i32 8, [[ACTIVE_LANE_MASK]]) +; TF-SCALABLE-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; TF-SCALABLE-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TF-SCALABLE: middle.block: ; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-SCALABLE: scalar.ph: @@ -703,12 +726,12 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = load i64, ptr [[B:%.*]], align 1 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP1]], i64 0 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; TF-FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; TF-FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; TF-FIXEDLEN: middle.block: @@ -748,26 +771,29 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; SCALABLE-LABEL: @uniform_store( ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 ; SCALABLE-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 -; SCALABLE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; SCALABLE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 -; SCALABLE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; SCALABLE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 +; SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -789,23 +815,23 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; FIXEDLEN-NEXT: entry: ; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; FIXEDLEN: vector.ph: -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXEDLEN: vector.body: ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 ; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 -; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8 -; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 +; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8 +; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXEDLEN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; FIXEDLEN: middle.block: @@ -830,26 +856,29 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TF-SCALABLE: vector.ph: ; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]] +; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-SCALABLE: vector.body: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; TF-SCALABLE-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP3]], i64 1024) +; TF-SCALABLE-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) ; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 -; TF-SCALABLE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP3]] -; TF-SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 -; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] -; TF-SCALABLE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TF-SCALABLE-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; TF-SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] +; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; TF-SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; TF-SCALABLE: middle.block: ; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-SCALABLE: scalar.ph: @@ -870,8 +899,8 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; TF-FIXEDLEN-NEXT: entry: ; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TF-FIXEDLEN: vector.ph: -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-FIXEDLEN: vector.body: ; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -879,8 +908,8 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 ; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -; TF-FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; TF-FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; TF-FIXEDLEN: middle.block: @@ -920,35 +949,40 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; SCALABLE-LABEL: @uniform_store_of_loop_varying( ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP2:%.*]] = call @llvm.experimental.stepvector.nxv1i64() -; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[INDEX]], i64 0 -; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; SCALABLE-NEXT: [[TMP3:%.*]] = add zeroinitializer, [[TMP2]] -; SCALABLE-NEXT: [[TMP4:%.*]] = mul [[TMP3]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[TMP5:%.*]] = add [[DOTSPLAT]], [[TMP4]] -; SCALABLE-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP7:%.*]] = call i32 @llvm.vscale.i32() -; SCALABLE-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 -; SCALABLE-NEXT: [[TMP9:%.*]] = extractelement [[TMP5]], i32 [[TMP8]] -; SCALABLE-NEXT: store i64 [[TMP9]], ptr [[B:%.*]], align 8 -; SCALABLE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP6]] -; SCALABLE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP10]], i32 0 -; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP11]], align 8 -; SCALABLE-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]] -; SCALABLE-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; SCALABLE-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[INDEX]], i64 0 +; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[TMP5:%.*]] = add zeroinitializer, [[TMP4]] +; SCALABLE-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[TMP7:%.*]] = add [[DOTSPLAT]], [[TMP6]] +; SCALABLE-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 +; SCALABLE-NEXT: [[TMP10:%.*]] = call i32 @llvm.vscale.i32() +; SCALABLE-NEXT: [[TMP11:%.*]] = mul i32 [[TMP10]], 2 +; SCALABLE-NEXT: [[TMP12:%.*]] = sub i32 [[TMP11]], 1 +; SCALABLE-NEXT: [[TMP13:%.*]] = extractelement [[TMP7]], i32 [[TMP12]] +; SCALABLE-NEXT: store i64 [[TMP13]], ptr [[B:%.*]], align 8 +; SCALABLE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP8]] +; SCALABLE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i32 0 +; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP15]], align 8 +; SCALABLE-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP17:%.*]] = mul i64 [[TMP16]], 2 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP17]] +; SCALABLE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -970,33 +1004,31 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; FIXEDLEN-NEXT: entry: ; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; FIXEDLEN: vector.ph: -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x ptr> poison, ptr [[B]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT2]], <2 x ptr> poison, <2 x i32> zeroinitializer -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT4]], <2 x i64> poison, <2 x i32> zeroinitializer -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT6]], <2 x i64> poison, <2 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXEDLEN: vector.body: ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; FIXEDLEN-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], ; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 -; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[VEC_IND]], <2 x ptr> [[BROADCAST_SPLAT]], i32 8, <2 x i1> ) -; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[STEP_ADD]], <2 x ptr> [[BROADCAST_SPLAT3]], i32 8, <2 x i1> ) -; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] -; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] -; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT5]], ptr [[TMP4]], align 8 -; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT7]], ptr [[TMP5]], align 8 -; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], -; FIXEDLEN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; FIXEDLEN-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; FIXEDLEN-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; FIXEDLEN-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; FIXEDLEN-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 4 +; FIXEDLEN-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 5 +; FIXEDLEN-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 6 +; FIXEDLEN-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 7 +; FIXEDLEN-NEXT: store i64 [[TMP7]], ptr [[B:%.*]], align 8 +; FIXEDLEN-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] +; FIXEDLEN-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP4]] +; FIXEDLEN-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP10]], align 8 +; FIXEDLEN-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 4 +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP11]], align 8 +; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; FIXEDLEN-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; FIXEDLEN-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; FIXEDLEN: middle.block: ; FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 ; FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1019,38 +1051,42 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TF-SCALABLE: vector.ph: ; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]] +; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call @llvm.experimental.stepvector.nxv1i64() -; TF-SCALABLE-NEXT: [[TMP4:%.*]] = add [[TMP3]], zeroinitializer -; TF-SCALABLE-NEXT: [[TMP5:%.*]] = mul [[TMP4]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP5]] -; TF-SCALABLE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul i64 1, [[TMP6]] -; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 -; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 -; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; TF-SCALABLE-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer +; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; TF-SCALABLE-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP9]] +; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 +; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-SCALABLE: vector.body: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 -; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP8]], i64 1024) -; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0( [[VEC_IND]], [[BROADCAST_SPLAT]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP8]] -; TF-SCALABLE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i32 0 -; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0( [[BROADCAST_SPLAT2]], ptr [[TMP10]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP11]] -; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TF-SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; TF-SCALABLE-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0 +; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP11]], i64 1024) +; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[VEC_IND]], [[BROADCAST_SPLAT]], i32 8, [[ACTIVE_LANE_MASK]]) +; TF-SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP11]] +; TF-SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 +; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT2]], ptr [[TMP13]], i32 8, [[ACTIVE_LANE_MASK]]) +; TF-SCALABLE-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 2 +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP15]] +; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; TF-SCALABLE-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; TF-SCALABLE: middle.block: ; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-SCALABLE: scalar.ph: @@ -1071,23 +1107,22 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-FIXEDLEN-NEXT: entry: ; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TF-FIXEDLEN: vector.ph: -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i64 0 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-FIXEDLEN: vector.body: ; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; TF-FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; TF-FIXEDLEN-NEXT: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[VEC_IND]], <2 x ptr> [[BROADCAST_SPLAT]], i32 8, <2 x i1> ) -; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] -; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -; TF-FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP2]], align 8 -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], -; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 +; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 +; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 +; TF-FIXEDLEN-NEXT: store i64 [[TMP3]], ptr [[B:%.*]], align 8 +; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] +; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 +; TF-FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; TF-FIXEDLEN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; TF-FIXEDLEN-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; TF-FIXEDLEN: middle.block: ; TF-FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 ; TF-FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1125,39 +1160,43 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; SCALABLE-LABEL: @conditional_uniform_store( ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; SCALABLE-NEXT: [[TMP2:%.*]] = call @llvm.experimental.stepvector.nxv1i64() -; SCALABLE-NEXT: [[TMP3:%.*]] = add [[TMP2]], zeroinitializer -; SCALABLE-NEXT: [[TMP4:%.*]] = mul [[TMP3]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP4]] -; SCALABLE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[TMP6:%.*]] = mul i64 1, [[TMP5]] -; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 -; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 -; SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; SCALABLE-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; SCALABLE-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer +; SCALABLE-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP6]] +; SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]] +; SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP9]], i64 0 +; SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 0 -; SCALABLE-NEXT: [[TMP8:%.*]] = icmp ugt [[VEC_IND]], shufflevector ( insertelement ( poison, i64 10, i64 0), poison, zeroinitializer) -; SCALABLE-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0( [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]], i32 8, [[TMP8]]) -; SCALABLE-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP7]] -; SCALABLE-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP9]], i32 0 -; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP10]], align 8 -; SCALABLE-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]] -; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; SCALABLE-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; SCALABLE-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP11:%.*]] = icmp ugt [[VEC_IND]], shufflevector ( insertelement ( poison, i64 10, i64 0), poison, zeroinitializer) +; SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]], i32 8, [[TMP11]]) +; SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP10]] +; SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 +; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP13]], align 8 +; SCALABLE-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 2 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] +; SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; SCALABLE-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1184,33 +1223,33 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; FIXEDLEN-NEXT: entry: ; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; FIXEDLEN: vector.ph: -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT2]], <2 x ptr> poison, <2 x i32> zeroinitializer -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT4]], <2 x i64> poison, <2 x i32> zeroinitializer -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <2 x ptr> poison, ptr [[B]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT6]], <2 x ptr> poison, <2 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x ptr> poison, ptr [[B:%.*]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT2]], <4 x ptr> poison, <4 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT4]], <4 x i64> poison, <4 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x ptr> poison, ptr [[B]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT6]], <4 x ptr> poison, <4 x i32> zeroinitializer ; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXEDLEN: vector.body: ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; FIXEDLEN-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], +; FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; FIXEDLEN-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], ; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 -; FIXEDLEN-NEXT: [[TMP2:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], -; FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp ugt <2 x i64> [[STEP_ADD]], -; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[BROADCAST_SPLAT]], <2 x ptr> [[BROADCAST_SPLAT3]], i32 8, <2 x i1> [[TMP2]]) -; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[BROADCAST_SPLAT5]], <2 x ptr> [[BROADCAST_SPLAT7]], i32 8, <2 x i1> [[TMP3]]) +; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 +; FIXEDLEN-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], +; FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp ugt <4 x i64> [[STEP_ADD]], +; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> [[BROADCAST_SPLAT3]], i32 8, <4 x i1> [[TMP2]]) +; FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT5]], <4 x ptr> [[BROADCAST_SPLAT7]], i32 8, <4 x i1> [[TMP3]]) ; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXEDLEN-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 -; FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 2 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT5]], ptr [[TMP7]], align 8 -; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 +; FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 4 +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT5]], ptr [[TMP7]], align 8 +; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], ; FIXEDLEN-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; FIXEDLEN: middle.block: @@ -1240,43 +1279,47 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TF-SCALABLE: vector.ph: ; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]] +; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; TF-SCALABLE-NEXT: [[TMP3:%.*]] = call @llvm.experimental.stepvector.nxv1i64() -; TF-SCALABLE-NEXT: [[TMP4:%.*]] = add [[TMP3]], zeroinitializer -; TF-SCALABLE-NEXT: [[TMP5:%.*]] = mul [[TMP4]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) -; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP5]] -; TF-SCALABLE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul i64 1, [[TMP6]] -; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP7]], i64 0 -; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer -; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 -; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; TF-SCALABLE-NEXT: [[TMP6:%.*]] = add [[TMP5]], zeroinitializer +; TF-SCALABLE-NEXT: [[TMP7:%.*]] = mul [[TMP6]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; TF-SCALABLE-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP7]] +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; TF-SCALABLE-NEXT: [[TMP10:%.*]] = mul i64 1, [[TMP9]] +; TF-SCALABLE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP10]], i64 0 +; TF-SCALABLE-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, ptr [[B:%.*]], i64 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-SCALABLE: vector.body: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; TF-SCALABLE-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 0 -; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP8]], i64 1024) -; TF-SCALABLE-NEXT: [[TMP9:%.*]] = icmp ugt [[VEC_IND]], shufflevector ( insertelement ( poison, i64 10, i64 0), poison, zeroinitializer) -; TF-SCALABLE-NEXT: [[TMP10:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP9]], zeroinitializer -; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0( [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]], i32 8, [[TMP10]]) -; TF-SCALABLE-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP8]] -; TF-SCALABLE-NEXT: [[TMP12:%.*]] = xor [[TMP9]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) -; TF-SCALABLE-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP12]], zeroinitializer -; TF-SCALABLE-NEXT: [[TMP14:%.*]] = or [[TMP10]], [[TMP13]] -; TF-SCALABLE-NEXT: [[TMP15:%.*]] = getelementptr inbounds i64, ptr [[TMP11]], i32 0 -; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP15]], i32 8, [[TMP14]]) -; TF-SCALABLE-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP16]] -; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; TF-SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TF-SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; TF-SCALABLE-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0 +; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP11]], i64 1024) +; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp ugt [[VEC_IND]], shufflevector ( insertelement ( poison, i64 10, i64 0), poison, zeroinitializer) +; TF-SCALABLE-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP12]], zeroinitializer +; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]], i32 8, [[TMP13]]) +; TF-SCALABLE-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP11]] +; TF-SCALABLE-NEXT: [[TMP15:%.*]] = xor [[TMP12]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; TF-SCALABLE-NEXT: [[TMP16:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP15]], zeroinitializer +; TF-SCALABLE-NEXT: [[TMP17:%.*]] = or [[TMP13]], [[TMP16]] +; TF-SCALABLE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP14]], i32 0 +; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP18]], i32 8, [[TMP17]]) +; TF-SCALABLE-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 2 +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP20]] +; TF-SCALABLE-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; TF-SCALABLE-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; TF-SCALABLE: middle.block: ; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-SCALABLE: scalar.ph: @@ -1302,22 +1345,22 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-FIXEDLEN-NEXT: entry: ; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TF-FIXEDLEN: vector.ph: -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x ptr> poison, ptr [[B:%.*]], i64 0 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT1]], <2 x ptr> poison, <2 x i32> zeroinitializer +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x ptr> poison, ptr [[B:%.*]], i64 0 +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x ptr> [[BROADCAST_SPLATINSERT1]], <4 x ptr> poison, <4 x i32> zeroinitializer ; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-FIXEDLEN: vector.body: ; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; TF-FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; TF-FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <2 x i64> [[VEC_IND]], -; TF-FIXEDLEN-NEXT: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[BROADCAST_SPLAT]], <2 x ptr> [[BROADCAST_SPLAT2]], i32 8, <2 x i1> [[TMP1]]) +; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], +; TF-FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> [[BROADCAST_SPLAT2]], i32 8, <4 x i1> [[TMP1]]) ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; TF-FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 -; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; TF-FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], ; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; TF-FIXEDLEN: middle.block: @@ -1368,26 +1411,29 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-LABEL: @uniform_store_unaligned( ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] +; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] ; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: -; SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] +; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] ; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] -; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; SCALABLE-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 +; SCALABLE-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 ; SCALABLE-NEXT: store i64 [[V]], ptr [[B:%.*]], align 1 -; SCALABLE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] -; SCALABLE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 -; SCALABLE-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() -; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; SCALABLE-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; SCALABLE-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] +; SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; SCALABLE-NEXT: store [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 +; SCALABLE-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() +; SCALABLE-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 +; SCALABLE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] +; SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; SCALABLE: middle.block: ; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] @@ -1409,23 +1455,23 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; FIXEDLEN-NEXT: entry: ; FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; FIXEDLEN: vector.ph: -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer -; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i64 0 -; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[V]], i64 0 +; FIXEDLEN-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXEDLEN: vector.body: ; FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 +; FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4 ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[B:%.*]], align 1 ; FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] ; FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 -; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 -; FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8 -; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 +; FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 4 +; FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8 +; FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; FIXEDLEN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; FIXEDLEN: middle.block: @@ -1450,26 +1496,29 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; TF-SCALABLE-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TF-SCALABLE: vector.ph: ; TF-SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP2]] -; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]] +; TF-SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 -; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-SCALABLE: vector.body: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; TF-SCALABLE-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 -; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv1i1.i64(i64 [[TMP3]], i64 1024) +; TF-SCALABLE-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) ; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B:%.*]], align 1 -; TF-SCALABLE-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP3]] -; TF-SCALABLE-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 -; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv1i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP5]], i32 8, [[ACTIVE_LANE_MASK]]) -; TF-SCALABLE-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() -; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] -; TF-SCALABLE-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; TF-SCALABLE-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; TF-SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] +; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; TF-SCALABLE-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) +; TF-SCALABLE-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() +; TF-SCALABLE-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 +; TF-SCALABLE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]] +; TF-SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; TF-SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; TF-SCALABLE: middle.block: ; TF-SCALABLE-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-SCALABLE: scalar.ph: @@ -1490,8 +1539,8 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; TF-FIXEDLEN-NEXT: entry: ; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TF-FIXEDLEN: vector.ph: -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i64 0 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 +; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-FIXEDLEN: vector.body: ; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -1499,8 +1548,8 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[B:%.*]], align 1 ; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -; TF-FIXEDLEN-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; TF-FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; TF-FIXEDLEN: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/zvl32b.ll b/llvm/test/Transforms/LoopVectorize/RISCV/zvl32b.ll index 0e2f916ac08b6..fb2167b7f5c33 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/zvl32b.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/zvl32b.ll @@ -12,33 +12,23 @@ define void @vector_add_i16(ptr noalias nocapture %a, i16 %v, i64 %n) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i16> poison, i16 [[V:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT]], <2 x i16> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <2 x i16> poison, i16 [[V]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <2 x i16> [[BROADCAST_SPLATINSERT4]], <2 x i16> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[V:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], <2 x i64> [[VEC_IND]] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A]], <2 x i64> [[STEP_ADD]] -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[TMP2]], i32 0 -; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[TMP4]], i32 0 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x i16>, ptr [[TMP3]], align 2 -; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <4 x i16>, ptr [[TMP5]], align 2 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i16> [[WIDE_VEC]], <4 x i16> poison, <2 x i32> -; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <4 x i16> [[WIDE_VEC2]], <4 x i16> poison, <2 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i16> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] -; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i16> [[STRIDED_VEC3]], [[BROADCAST_SPLAT5]] -; CHECK-NEXT: call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> [[TMP6]], <2 x ptr> [[TMP0]], i32 2, <2 x i1> ) -; CHECK-NEXT: call void @llvm.masked.scatter.v2i16.v2p0(<2 x i16> [[TMP7]], <2 x ptr> [[TMP1]], i32 2, <2 x i1> ) +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], <4 x i64> [[VEC_IND]] +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x ptr> [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP1]], i32 0 +; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i16>, ptr [[TMP2]], align 2 +; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i16> [[WIDE_VEC]], <8 x i16> poison, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i16> [[STRIDED_VEC]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> [[TMP3]], <4 x ptr> [[TMP0]], i32 2, <4 x i1> ) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1020 -; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1020 +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -56,6 +46,7 @@ define void @vector_add_i16(ptr noalias nocapture %a, i16 %v, i64 %n) { ; CHECK: for.end: ; CHECK-NEXT: ret void ; + entry: br label %for.body diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll index cb017795077f1..08bac7f788c77 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll @@ -10,26 +10,10 @@ target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" target triple = "riscv64" define void @foo(ptr nocapture writeonly %da) { -; CHECK-128-LABEL: @foo( -; CHECK-128-NEXT: entry: -; CHECK-128-NEXT: store i64 0, ptr [[DA:%.*]], align 8 -; CHECK-128-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[DA]], i64 1 -; CHECK-128-NEXT: store i64 0, ptr [[ARRAYIDX1]], align 8 -; CHECK-128-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[DA]], i64 2 -; CHECK-128-NEXT: store i64 0, ptr [[ARRAYIDX2]], align 8 -; CHECK-128-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, ptr [[DA]], i64 3 -; CHECK-128-NEXT: store i64 0, ptr [[ARRAYIDX3]], align 8 -; CHECK-128-NEXT: ret void -; -; CHECK-256-LABEL: @foo( -; CHECK-256-NEXT: entry: -; CHECK-256-NEXT: store <4 x i64> zeroinitializer, ptr [[DA:%.*]], align 8 -; CHECK-256-NEXT: ret void -; -; CHECK-512-LABEL: @foo( -; CHECK-512-NEXT: entry: -; CHECK-512-NEXT: store <4 x i64> zeroinitializer, ptr [[DA:%.*]], align 8 -; CHECK-512-NEXT: ret void +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: store <4 x i64> zeroinitializer, ptr [[DA:%.*]], align 8 +; CHECK-NEXT: ret void ; entry: store i64 0, ptr %da, align 8 @@ -58,3 +42,7 @@ entry: %arrayidx2 = getelementptr inbounds i8, ptr %da, i8 2 ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-128: {{.*}} +; CHECK-256: {{.*}} +; CHECK-512: {{.*}} From 48f97e5751372b4a64144605c5e9f7e5e13e382a Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Fri, 24 Feb 2023 14:50:00 +0100 Subject: [PATCH 400/691] [FlowSensitive] Log analysis progress for debugging purposes The goal is to be able to understand how the analysis executes, and what its incremental and final findings are, by enabling logging and reading the logs. This should include both framework and analysis-specific information. Ad-hoc printf-debugging doesn't seem sufficient for my understanding, at least. Being able to check in logging, turn it on in a production binary, and quickly find particular analysis steps within complex functions seem important. This can be enabled programmatically through DataflowAnalysisOptions, or via the flag -dataflow-log. (Works in unittests, clang-tidy, standalone tools...) Important missing pieces here: - a logger implementation that produces an interactive report (HTML file) which can be navigated via timeline/code/CFG. (I think the Logger interface is sufficient for this, but need to prototype). - display of the application-specific lattice - more useful display for the built-in environment (e.g. meaningful & consistent names for values, hiding redundant variables in the flow condition, hiding unreachable expressions) Differential Revision: https://reviews.llvm.org/D144730 --- .../FlowSensitive/DataflowAnalysisContext.h | 16 +- .../FlowSensitive/DataflowEnvironment.h | 5 +- .../clang/Analysis/FlowSensitive/Logger.h | 85 ++++++++++ .../lib/Analysis/FlowSensitive/CMakeLists.txt | 1 + .../FlowSensitive/DataflowAnalysisContext.cpp | 28 ++++ clang/lib/Analysis/FlowSensitive/Logger.cpp | 108 +++++++++++++ .../TypeErasedDataflowAnalysis.cpp | 15 +- .../Analysis/FlowSensitive/CMakeLists.txt | 1 + .../Analysis/FlowSensitive/LoggerTest.cpp | 152 ++++++++++++++++++ 9 files changed, 403 insertions(+), 8 deletions(-) create mode 100644 clang/include/clang/Analysis/FlowSensitive/Logger.h create mode 100644 clang/lib/Analysis/FlowSensitive/Logger.cpp create mode 100644 clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h index 702aaff9c7e71..a044f477ce1b5 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h @@ -34,6 +34,7 @@ namespace clang { namespace dataflow { +class Logger; /// Skip past nodes that the CFG does not emit. These nodes are invisible to /// flow-sensitive analysis, and should be ignored as they will effectively not @@ -67,6 +68,11 @@ class DataflowAnalysisContext { /// fundamentally limited: some constructs, such as recursion, are /// explicitly unsupported. std::optional ContextSensitiveOpts; + + /// If provided, analysis details will be recorded here. + /// (This is always non-null within an AnalysisContext, the framework + /// provides a fallback no-op logger). + Logger *Log = nullptr; }; /// Constructs a dataflow analysis context. @@ -76,11 +82,9 @@ class DataflowAnalysisContext { /// `S` must not be null. DataflowAnalysisContext(std::unique_ptr S, Options Opts = Options{ - /*ContextSensitiveOpts=*/std::nullopt}) - : S(std::move(S)), TrueVal(createAtomicBoolValue()), - FalseVal(createAtomicBoolValue()), Opts(Opts) { - assert(this->S != nullptr); - } + /*ContextSensitiveOpts=*/std::nullopt, + /*Logger=*/nullptr}); + ~DataflowAnalysisContext(); /// Takes ownership of `Loc` and returns a reference to it. /// @@ -393,6 +397,8 @@ class DataflowAnalysisContext { // Fields modeled by environments covered by this context. llvm::DenseSet ModeledFields; + + std::unique_ptr LogOwner; // If created via flags. }; } // namespace dataflow diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h index e457430a5e646..678e5b871cc83 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -22,6 +22,7 @@ #include "clang/Analysis/FlowSensitive/ControlFlowContext.h" #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" #include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include "clang/Analysis/FlowSensitive/Logger.h" #include "clang/Analysis/FlowSensitive/StorageLocation.h" #include "clang/Analysis/FlowSensitive/Value.h" #include "llvm/ADT/DenseMap.h" @@ -177,10 +178,12 @@ class Environment { /// with a symbolic representation of the `this` pointee. Environment(DataflowAnalysisContext &DACtx, const DeclContext &DeclCtx); - const DataflowAnalysisContext::Options &getAnalysisOptions() { + const DataflowAnalysisContext::Options &getAnalysisOptions() const { return DACtx->getOptions(); } + Logger &logger() const { return *DACtx->getOptions().Log; } + /// Creates and returns an environment to use for an inline analysis of the /// callee. Uses the storage location from each argument in the `Call` as the /// storage location for the corresponding parameter in the callee. diff --git a/clang/include/clang/Analysis/FlowSensitive/Logger.h b/clang/include/clang/Analysis/FlowSensitive/Logger.h new file mode 100644 index 0000000000000..903dfbc30d40d --- /dev/null +++ b/clang/include/clang/Analysis/FlowSensitive/Logger.h @@ -0,0 +1,85 @@ +//===-- Logger.h ------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_LOGGER_H +#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_LOGGER_H + +#include "clang/Analysis/CFG.h" +#include "llvm/Support/raw_ostream.h" +#include + +namespace clang::dataflow { +// Forward declarations so we can use Logger anywhere in the framework. +class ControlFlowContext; +class TypeErasedDataflowAnalysis; +struct TypeErasedDataflowAnalysisState; + +/// A logger is notified as the analysis progresses. +/// It can produce a report of the analysis's findings and how it came to them. +/// +/// The framework reports key structural events (e.g. traversal of blocks). +/// The specific analysis can add extra details to be presented in context. +class Logger { +public: + /// Returns a dummy logger that does nothing. + static Logger &null(); + /// A logger that simply writes messages to the specified ostream in real + /// time. + static std::unique_ptr textual(llvm::raw_ostream &); + + virtual ~Logger() = default; + + /// Called by the framework as we start analyzing a new function or statement. + /// Forms a pair with endAnalysis(). + virtual void beginAnalysis(const ControlFlowContext &, + TypeErasedDataflowAnalysis &) {} + virtual void endAnalysis() {} + + // At any time during the analysis, we're computing the state for some target + // program point. + + /// Called when we start (re-)processing a block in the CFG. + /// The target program point is the entry to the specified block. + /// Calls to log() describe transferBranch(), join() etc. + virtual void enterBlock(const CFGBlock &) {} + /// Called when we start processing an element in the current CFG block. + /// The target program point is after the specified element. + /// Calls to log() describe the transfer() function. + virtual void enterElement(const CFGElement &) {} + + /// Records the analysis state computed for the current program point. + virtual void recordState(TypeErasedDataflowAnalysisState &) {} + /// Records that the analysis state for the current block is now final. + virtual void blockConverged() {} + + /// Called by the framework or user code to report some event. + /// The event is associated with the current context (program point). + /// The Emit function produces the log message. It may or may not be called, + /// depending on if the logger is interested; it should have no side effects. + void log(llvm::function_ref Emit) { + if (!ShouldLogText) + return; + std::string S; + llvm::raw_string_ostream OS(S); + Emit(OS); + logText(S); + } + +protected: + /// ShouldLogText should be false for trivial loggers that ignore logText(). + /// This allows log() to skip evaluating its Emit function. + Logger(bool ShouldLogText = true) : ShouldLogText(ShouldLogText) {} + +private: + bool ShouldLogText; + virtual void logText(llvm::StringRef) {} +}; + +} // namespace clang::dataflow + +#endif diff --git a/clang/lib/Analysis/FlowSensitive/CMakeLists.txt b/clang/lib/Analysis/FlowSensitive/CMakeLists.txt index 1a49998c39c20..a3216518f4dba 100644 --- a/clang/lib/Analysis/FlowSensitive/CMakeLists.txt +++ b/clang/lib/Analysis/FlowSensitive/CMakeLists.txt @@ -2,6 +2,7 @@ add_clang_library(clangAnalysisFlowSensitive ControlFlowContext.cpp DataflowAnalysisContext.cpp DataflowEnvironment.cpp + Logger.cpp Transfer.cpp TypeErasedDataflowAnalysis.cpp Value.cpp diff --git a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp index a1b813982502b..57169baccbd4a 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp @@ -15,13 +15,20 @@ #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" #include "clang/AST/ExprCXX.h" #include "clang/Analysis/FlowSensitive/DebugSupport.h" +#include "clang/Analysis/FlowSensitive/Logger.h" #include "clang/Analysis/FlowSensitive/Value.h" #include "llvm/ADT/SetOperations.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include #include #include +static llvm::cl::opt + DataflowLog("dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional, + llvm::cl::desc("Emit log of dataflow analysis. With no arg, " + "writes textual log to stderr.")); + namespace clang { namespace dataflow { @@ -375,6 +382,27 @@ DataflowAnalysisContext::getControlFlowContext(const FunctionDecl *F) { return nullptr; } +DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr S, + Options Opts) + : S(std::move(S)), TrueVal(createAtomicBoolValue()), + FalseVal(createAtomicBoolValue()), Opts(Opts) { + assert(this->S != nullptr); + // If the -dataflow-log command-line flag was set, synthesize a logger. + // This is ugly but provides a uniform method for ad-hoc debugging dataflow- + // based tools. + if (Opts.Log == nullptr) { + if (DataflowLog.getNumOccurrences() > 0) { + LogOwner = Logger::textual(llvm::errs()); + this->Opts.Log = LogOwner.get(); + // FIXME: if the flag is given a value, write an HTML log to a file. + } else { + this->Opts.Log = &Logger::null(); + } + } +} + +DataflowAnalysisContext::~DataflowAnalysisContext() = default; + } // namespace dataflow } // namespace clang diff --git a/clang/lib/Analysis/FlowSensitive/Logger.cpp b/clang/lib/Analysis/FlowSensitive/Logger.cpp new file mode 100644 index 0000000000000..469fea338e451 --- /dev/null +++ b/clang/lib/Analysis/FlowSensitive/Logger.cpp @@ -0,0 +1,108 @@ +//===-- Logger.cpp --------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/FlowSensitive/Logger.h" +#include "clang/Analysis/FlowSensitive/ControlFlowContext.h" +#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" +#include "llvm/Support/WithColor.h" + +namespace clang::dataflow { + +Logger &Logger::null() { + struct NullLogger final : Logger {}; + static auto *Instance = new NullLogger(); + return *Instance; +} + +namespace { +struct TextualLogger final : Logger { + llvm::raw_ostream &OS; + const CFG *CurrentCFG; + const CFGBlock *CurrentBlock; + const CFGElement *CurrentElement; + unsigned CurrentElementIndex; + bool ShowColors; + llvm::DenseMap VisitCount; + TypeErasedDataflowAnalysis *CurrentAnalysis; + + TextualLogger(llvm::raw_ostream &OS) + : OS(OS), ShowColors(llvm::WithColor::defaultAutoDetectFunction()(OS)) {} + + virtual void beginAnalysis(const ControlFlowContext &CFG, + TypeErasedDataflowAnalysis &Analysis) override { + { + llvm::WithColor Header(OS, llvm::raw_ostream::Colors::RED, /*Bold=*/true); + OS << "=== Beginning data flow analysis ===\n"; + } + if (auto *D = CFG.getDecl()) { + D->print(OS); + OS << "\n"; + D->dump(OS); + } + CurrentCFG = &CFG.getCFG(); + CurrentCFG->print(OS, Analysis.getASTContext().getLangOpts(), ShowColors); + CurrentAnalysis = &Analysis; + } + virtual void endAnalysis() override { + llvm::WithColor Header(OS, llvm::raw_ostream::Colors::RED, /*Bold=*/true); + unsigned Blocks = 0, Steps = 0; + for (const auto &E : VisitCount) { + ++Blocks; + Steps += E.second; + } + llvm::errs() << "=== Finished analysis: " << Blocks << " blocks in " + << Steps << " total steps ===\n"; + } + virtual void enterBlock(const CFGBlock &Block) override { + unsigned Count = ++VisitCount[&Block]; + { + llvm::WithColor Header(OS, llvm::raw_ostream::Colors::RED, /*Bold=*/true); + OS << "=== Entering block B" << Block.getBlockID() << " (iteration " + << Count << ") ===\n"; + } + Block.print(OS, CurrentCFG, CurrentAnalysis->getASTContext().getLangOpts(), + ShowColors); + CurrentBlock = &Block; + CurrentElement = nullptr; + CurrentElementIndex = 0; + } + virtual void enterElement(const CFGElement &Element) override { + ++CurrentElementIndex; + CurrentElement = ∈ + { + llvm::WithColor Subheader(OS, llvm::raw_ostream::Colors::CYAN, + /*Bold=*/true); + OS << "Processing element B" << CurrentBlock->getBlockID() << "." + << CurrentElementIndex << ": "; + Element.dumpToStream(OS); + } + } + void recordState(TypeErasedDataflowAnalysisState &State) override { + { + llvm::WithColor Subheader(OS, llvm::raw_ostream::Colors::CYAN, + /*Bold=*/true); + OS << "Computed state for B" << CurrentBlock->getBlockID() << "." + << CurrentElementIndex << ":\n"; + } + // FIXME: currently the environment dump is verbose and unenlightening. + // FIXME: dump the user-defined lattice, too. + State.Env.dump(OS); + OS << "\n"; + } + void blockConverged() override { + OS << "B" << CurrentBlock->getBlockID() << " has converged!\n"; + } + virtual void logText(llvm::StringRef S) override { OS << S << "\n"; } +}; +} // namespace + +std::unique_ptr Logger::textual(llvm::raw_ostream &OS) { + return std::make_unique(OS); +} + +} // namespace clang::dataflow diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp index d94b547ca17de..08bcd5e65e379 100644 --- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp @@ -191,7 +191,10 @@ struct AnalysisContext { llvm::ArrayRef> BlockStates) : CFCtx(CFCtx), Analysis(Analysis), InitEnv(InitEnv), - BlockStates(BlockStates) {} + Log(InitEnv.logger()), BlockStates(BlockStates) { + Log.beginAnalysis(CFCtx, Analysis); + } + ~AnalysisContext() { Log.endAnalysis(); } /// Contains the CFG being analyzed. const ControlFlowContext &CFCtx; @@ -199,6 +202,7 @@ struct AnalysisContext { TypeErasedDataflowAnalysis &Analysis; /// Initial state to start the analysis. const Environment &InitEnv; + Logger &Log; /// Stores the state of a CFG block if it has been evaluated by the analysis. /// The indices correspond to the block IDs. llvm::ArrayRef> BlockStates; @@ -368,8 +372,11 @@ transferCFGBlock(const CFGBlock &Block, AnalysisContext &AC, std::function PostVisitCFG = nullptr) { + AC.Log.enterBlock(Block); auto State = computeBlockInputState(Block, AC); + AC.Log.recordState(State); for (const auto &Element : Block) { + AC.Log.enterElement(Element); // Built-in analysis if (AC.Analysis.builtinOptions()) { builtinTransfer(Element, State, AC); @@ -382,6 +389,7 @@ transferCFGBlock(const CFGBlock &Block, AnalysisContext &AC, if (PostVisitCFG) { PostVisitCFG(Element, State); } + AC.Log.recordState(State); } return State; } @@ -462,15 +470,18 @@ runTypeErasedDataflowAnalysis( LatticeJoinEffect Effect2 = NewBlockState.Env.widen(OldBlockState->Env, Analysis); if (Effect1 == LatticeJoinEffect::Unchanged && - Effect2 == LatticeJoinEffect::Unchanged) + Effect2 == LatticeJoinEffect::Unchanged) { // The state of `Block` didn't change from widening so there's no need // to revisit its successors. + AC.Log.blockConverged(); continue; + } } else if (Analysis.isEqualTypeErased(OldBlockState->Lattice, NewBlockState.Lattice) && OldBlockState->Env.equivalentTo(NewBlockState.Env, Analysis)) { // The state of `Block` didn't change after transfer so there's no need // to revisit its successors. + AC.Log.blockConverged(); continue; } } diff --git a/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt b/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt index ed38a515be270..c77aeaca90959 100644 --- a/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt +++ b/clang/unittests/Analysis/FlowSensitive/CMakeLists.txt @@ -9,6 +9,7 @@ add_clang_unittest(ClangAnalysisFlowSensitiveTests DataflowAnalysisContextTest.cpp DataflowEnvironmentTest.cpp DebugSupportTest.cpp + LoggerTest.cpp MapLatticeTest.cpp MatchSwitchTest.cpp MultiVarConstantPropagationTest.cpp diff --git a/clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp b/clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp new file mode 100644 index 0000000000000..eab37045c393e --- /dev/null +++ b/clang/unittests/Analysis/FlowSensitive/LoggerTest.cpp @@ -0,0 +1,152 @@ +#include "TestingSupport.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Analysis/FlowSensitive/DataflowAnalysis.h" +#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" +#include "clang/Analysis/FlowSensitive/DataflowLattice.h" +#include "llvm/Testing/Support/Error.h" +#include "gtest/gtest.h" +#include + +namespace clang::dataflow::test { +namespace { + +struct TestLattice { + int Elements = 0; + int Branches = 0; + int Joins = 0; + + LatticeJoinEffect join(const TestLattice &Other) { + if (Joins < 3) { + ++Joins; + Elements += Other.Elements; + Branches += Other.Branches; + return LatticeJoinEffect::Changed; + } + return LatticeJoinEffect::Unchanged; + } + friend bool operator==(const TestLattice &LHS, const TestLattice &RHS) { + return std::tie(LHS.Elements, LHS.Branches, LHS.Joins) == + std::tie(RHS.Elements, RHS.Branches, RHS.Joins); + } +}; + +class TestAnalysis : public DataflowAnalysis { +public: + using DataflowAnalysis::DataflowAnalysis; + + static TestLattice initialElement() { return TestLattice{}; } + void transfer(const CFGElement &, TestLattice &L, Environment &E) { + E.logger().log([](llvm::raw_ostream &OS) { OS << "transfer()"; }); + ++L.Elements; + } + void transferBranch(bool Branch, const Stmt *S, TestLattice &L, + Environment &E) { + E.logger().log([&](llvm::raw_ostream &OS) { + OS << "transferBranch(" << Branch << ")"; + }); + ++L.Branches; + } +}; + +class TestLogger : public Logger { +public: + TestLogger(std::string &S) : OS(S) {} + +private: + llvm::raw_string_ostream OS; + + void beginAnalysis(const ControlFlowContext &, + TypeErasedDataflowAnalysis &) override { + logText("beginAnalysis()"); + } + void endAnalysis() override { logText("\nendAnalysis()"); } + + void enterBlock(const CFGBlock &B) override { + OS << "\nenterBlock(" << B.BlockID << ")\n"; + } + void enterElement(const CFGElement &E) override { + // we don't want the trailing \n + std::string S; + llvm::raw_string_ostream SS(S); + E.dumpToStream(SS); + + OS << "enterElement(" << llvm::StringRef(S).trim() << ")\n"; + } + void recordState(TypeErasedDataflowAnalysisState &S) override { + const TestLattice &L = llvm::any_cast(S.Lattice.Value); + OS << "recordState(Elements=" << L.Elements << ", Branches=" << L.Branches + << ", Joins=" << L.Joins << ")\n"; + } + /// Records that the analysis state for the current block is now final. + void blockConverged() override { logText("blockConverged()"); } + + void logText(llvm::StringRef Text) override { OS << Text << "\n"; } +}; + +TEST(LoggerTest, Sequence) { + const char *Code = R"cpp( +int target(bool b, int p, int q) { + return b ? p : q; +} +)cpp"; + + auto Inputs = AnalysisInputs( + Code, ast_matchers::hasName("target"), + [](ASTContext &C, Environment &) { return TestAnalysis(C); }); + std::vector Args = { + "-fsyntax-only", "-fno-delayed-template-parsing", "-std=c++17"}; + Inputs.ASTBuildArgs = Args; + std::string Log; + TestLogger Logger(Log); + Inputs.BuiltinOptions.Log = &Logger; + + ASSERT_THAT_ERROR(checkDataflow(std::move(Inputs), + [](const AnalysisOutputs &) {}), + llvm::Succeeded()); + + EXPECT_EQ(Log, R"(beginAnalysis() + +enterBlock(4) +recordState(Elements=0, Branches=0, Joins=0) +enterElement(b) +transfer() +recordState(Elements=1, Branches=0, Joins=0) +enterElement(b (ImplicitCastExpr, LValueToRValue, _Bool)) +transfer() +recordState(Elements=2, Branches=0, Joins=0) + +enterBlock(3) +transferBranch(0) +recordState(Elements=2, Branches=1, Joins=0) +enterElement(q) +transfer() +recordState(Elements=3, Branches=1, Joins=0) + +enterBlock(2) +transferBranch(1) +recordState(Elements=2, Branches=1, Joins=0) +enterElement(p) +transfer() +recordState(Elements=3, Branches=1, Joins=0) + +enterBlock(1) +recordState(Elements=6, Branches=2, Joins=1) +enterElement(b ? p : q) +transfer() +recordState(Elements=7, Branches=2, Joins=1) +enterElement(b ? p : q (ImplicitCastExpr, LValueToRValue, int)) +transfer() +recordState(Elements=8, Branches=2, Joins=1) +enterElement(return b ? p : q;) +transfer() +recordState(Elements=9, Branches=2, Joins=1) + +enterBlock(0) +recordState(Elements=9, Branches=2, Joins=1) + +endAnalysis() +)"); +} + +} // namespace +} // namespace clang::dataflow::test From 002c4b7b955b1fc8825b4d6b46bb079390bce812 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Fri, 17 Mar 2023 10:33:07 +0100 Subject: [PATCH 401/691] [clangd] Extend CollectMainFileMacros. Extend the existing MainFileMacros structure: - record more information (InConditionalDirective) in MacroOccurrence - collect macro references inside macro body (fix a long-time FIXME) So that the MainFileMacros preseve enough information, which allows a just-in-time convertion to interop with include-cleaner::Macro for include-cleaer features. See the context in https://reviews.llvm.org/D146017. Differential Revision: https://reviews.llvm.org/D146279 --- clang-tools-extra/clangd/CollectMacros.cpp | 66 +++++++++++++- clang-tools-extra/clangd/CollectMacros.h | 50 ++++------- clang-tools-extra/clangd/ParsedAST.cpp | 15 ++-- clang-tools-extra/clangd/Preamble.cpp | 10 ++- .../clangd/unittests/CollectMacrosTests.cpp | 85 ++++++++++++------- .../unittests/SemanticHighlightingTests.cpp | 4 +- 6 files changed, 149 insertions(+), 81 deletions(-) diff --git a/clang-tools-extra/clangd/CollectMacros.cpp b/clang-tools-extra/clangd/CollectMacros.cpp index 687f86e0a77eb..c0ed8b68ea481 100644 --- a/clang-tools-extra/clangd/CollectMacros.cpp +++ b/clang-tools-extra/clangd/CollectMacros.cpp @@ -9,12 +9,13 @@ #include "CollectMacros.h" #include "AST.h" #include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/STLExtras.h" namespace clang { namespace clangd { void CollectMainFileMacros::add(const Token &MacroNameTok, const MacroInfo *MI, - bool IsDefinition) { + bool IsDefinition, bool InIfCondition) { if (!InMainFile) return; auto Loc = MacroNameTok.getLocation(); @@ -26,9 +27,49 @@ void CollectMainFileMacros::add(const Token &MacroNameTok, const MacroInfo *MI, auto Range = halfOpenToRange( SM, CharSourceRange::getCharRange(Loc, MacroNameTok.getEndLoc())); if (auto SID = getSymbolID(Name, MI, SM)) - Out.MacroRefs[SID].push_back({Range, IsDefinition}); + Out.MacroRefs[SID].push_back({Range, IsDefinition, InIfCondition}); else - Out.UnknownMacros.push_back({Range, IsDefinition}); + Out.UnknownMacros.push_back({Range, IsDefinition, InIfCondition}); +} + +void CollectMainFileMacros::FileChanged(SourceLocation Loc, FileChangeReason, + SrcMgr::CharacteristicKind, FileID) { + InMainFile = isInsideMainFile(Loc, SM); +} +void CollectMainFileMacros::MacroExpands(const Token &MacroName, + const MacroDefinition &MD, + SourceRange Range, + const MacroArgs *Args) { + add(MacroName, MD.getMacroInfo()); +} +void CollectMainFileMacros::MacroUndefined(const clang::Token &MacroName, + const clang::MacroDefinition &MD, + const clang::MacroDirective *Undef) { + add(MacroName, MD.getMacroInfo()); +} +void CollectMainFileMacros::Ifdef(SourceLocation Loc, const Token &MacroName, + const MacroDefinition &MD) { + add(MacroName, MD.getMacroInfo(), /*IsDefinition=*/false, + /*InConditionalDirective=*/true); +} +void CollectMainFileMacros::Ifndef(SourceLocation Loc, const Token &MacroName, + const MacroDefinition &MD) { + add(MacroName, MD.getMacroInfo(), /*IsDefinition=*/false, + /*InConditionalDirective=*/true); +} +void CollectMainFileMacros::Defined(const Token &MacroName, + const MacroDefinition &MD, + SourceRange Range) { + add(MacroName, MD.getMacroInfo(), /*IsDefinition=*/false, + /*InConditionalDirective=*/true); +} +void CollectMainFileMacros::SourceRangeSkipped(SourceRange R, + SourceLocation EndifLoc) { + if (!InMainFile) + return; + Position Begin = sourceLocToPosition(SM, R.getBegin()); + Position End = sourceLocToPosition(SM, R.getEnd()); + Out.SkippedRanges.push_back(Range{Begin, End}); } class CollectPragmaMarks : public PPCallbacks { @@ -58,5 +99,24 @@ collectPragmaMarksCallback(const SourceManager &SM, return std::make_unique(SM, Out); } +void CollectMainFileMacros::MacroDefined(const Token &MacroName, + const MacroDirective *MD) { + + if (!InMainFile) + return; + const auto *MI = MD->getMacroInfo(); + add(MacroName, MD->getMacroInfo(), true); + if (MI) + for (const auto &Tok : MI->tokens()) { + auto *II = Tok.getIdentifierInfo(); + // Could this token be a reference to a macro? (Not param to this macro). + if (!II || !II->hadMacroDefinition() || + llvm::is_contained(MI->params(), II)) + continue; + if (const MacroInfo *MI = PP.getMacroInfo(II)) + add(Tok, MI); + } +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/CollectMacros.h b/clang-tools-extra/clangd/CollectMacros.h index 9d7b478f1c3c7..d5789a2a88912 100644 --- a/clang-tools-extra/clangd/CollectMacros.h +++ b/clang-tools-extra/clangd/CollectMacros.h @@ -13,6 +13,7 @@ #include "SourceCode.h" #include "index/SymbolID.h" #include "clang/Lex/PPCallbacks.h" +#include "clang/Lex/Preprocessor.h" #include "llvm/ADT/DenseMap.h" #include @@ -24,6 +25,8 @@ struct MacroOccurrence { // SourceManager from preamble is not available when we build the AST. Range Rng; bool IsDefinition; + // True if the occurence is used in a conditional directive, e.g. #ifdef MACRO + bool InConditionalDirective; }; struct MainFileMacros { @@ -43,56 +46,37 @@ struct MainFileMacros { /// - collect macros after the preamble of the main file (in ParsedAST.cpp) class CollectMainFileMacros : public PPCallbacks { public: - explicit CollectMainFileMacros(const SourceManager &SM, MainFileMacros &Out) - : SM(SM), Out(Out) {} + explicit CollectMainFileMacros(const Preprocessor &PP, MainFileMacros &Out) + : SM(PP.getSourceManager()), PP(PP), Out(Out) {} void FileChanged(SourceLocation Loc, FileChangeReason, - SrcMgr::CharacteristicKind, FileID) override { - InMainFile = isInsideMainFile(Loc, SM); - } + SrcMgr::CharacteristicKind, FileID) override; - void MacroDefined(const Token &MacroName, const MacroDirective *MD) override { - add(MacroName, MD->getMacroInfo(), /*IsDefinition=*/true); - } + void MacroDefined(const Token &MacroName, const MacroDirective *MD) override; void MacroExpands(const Token &MacroName, const MacroDefinition &MD, - SourceRange Range, const MacroArgs *Args) override { - add(MacroName, MD.getMacroInfo()); - } + SourceRange Range, const MacroArgs *Args) override; void MacroUndefined(const clang::Token &MacroName, const clang::MacroDefinition &MD, - const clang::MacroDirective *Undef) override { - add(MacroName, MD.getMacroInfo()); - } + const clang::MacroDirective *Undef) override; + // FIXME: handle C++23 #elifdef, #elifndef void Ifdef(SourceLocation Loc, const Token &MacroName, - const MacroDefinition &MD) override { - add(MacroName, MD.getMacroInfo()); - } - + const MacroDefinition &MD) override; void Ifndef(SourceLocation Loc, const Token &MacroName, - const MacroDefinition &MD) override { - add(MacroName, MD.getMacroInfo()); - } + const MacroDefinition &MD) override; void Defined(const Token &MacroName, const MacroDefinition &MD, - SourceRange Range) override { - add(MacroName, MD.getMacroInfo()); - } - - void SourceRangeSkipped(SourceRange R, SourceLocation EndifLoc) override { - if (!InMainFile) - return; - Position Begin = sourceLocToPosition(SM, R.getBegin()); - Position End = sourceLocToPosition(SM, R.getEnd()); - Out.SkippedRanges.push_back(Range{Begin, End}); - } + SourceRange Range) override; + + void SourceRangeSkipped(SourceRange R, SourceLocation EndifLoc) override; private: void add(const Token &MacroNameTok, const MacroInfo *MI, - bool IsDefinition = false); + bool IsDefinition = false, bool InConditionalDirective = false); const SourceManager &SM; + const Preprocessor &PP; bool InMainFile = true; MainFileMacros &Out; }; diff --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp index 1671eec133b6e..1501a5c5f3c3b 100644 --- a/clang-tools-extra/clangd/ParsedAST.cpp +++ b/clang-tools-extra/clangd/ParsedAST.cpp @@ -610,11 +610,12 @@ ParsedAST::build(llvm::StringRef Filename, const ParseInputs &Inputs, Macros = Patch->mainFileMacros(); Marks = Patch->marks(); } - Clang->getPreprocessor().addPPCallbacks( - std::make_unique(Clang->getSourceManager(), - Macros)); + auto& PP = Clang->getPreprocessor(); + PP.addPPCallbacks( + std::make_unique( + PP, Macros)); - Clang->getPreprocessor().addPPCallbacks( + PP.addPPCallbacks( collectPragmaMarksCallback(Clang->getSourceManager(), Marks)); // Copy over the includes from the preamble, then combine with the @@ -626,10 +627,10 @@ ParsedAST::build(llvm::StringRef Filename, const ParseInputs &Inputs, CanonIncludes.addSystemHeadersMapping(Clang->getLangOpts()); std::unique_ptr IWYUHandler = collectIWYUHeaderMaps(&CanonIncludes); - Clang->getPreprocessor().addCommentHandler(IWYUHandler.get()); + PP.addCommentHandler(IWYUHandler.get()); // Collect tokens of the main file. - syntax::TokenCollector CollectTokens(Clang->getPreprocessor()); + syntax::TokenCollector CollectTokens(PP); // To remain consistent with preamble builds, these callbacks must be called // exactly here, after preprocessor is initialized and BeginSourceFile() was @@ -660,7 +661,7 @@ ParsedAST::build(llvm::StringRef Filename, const ParseInputs &Inputs, // XXX: This is messy: clang-tidy checks flush some diagnostics at EOF. // However Action->EndSourceFile() would destroy the ASTContext! // So just inform the preprocessor of EOF, while keeping everything alive. - Clang->getPreprocessor().EndSourceFile(); + PP.EndSourceFile(); // UnitDiagsConsumer is local, we can not store it in CompilerInstance that // has a longer lifetime. Clang->getDiagnostics().setClient(new IgnoreDiagnostics); diff --git a/clang-tools-extra/clangd/Preamble.cpp b/clang-tools-extra/clangd/Preamble.cpp index 3b0af0ab50a62..061c67d65f7d8 100644 --- a/clang-tools-extra/clangd/Preamble.cpp +++ b/clang-tools-extra/clangd/Preamble.cpp @@ -133,6 +133,7 @@ class CppFilePreambleCallbacks : public PreambleCallbacks { CanonIncludes.addSystemHeadersMapping(CI.getLangOpts()); LangOpts = &CI.getLangOpts(); SourceMgr = &CI.getSourceManager(); + PP = &CI.getPreprocessor(); Includes.collect(CI); if (Config::current().Diagnostics.UnusedIncludes == Config::IncludesPolicy::Strict || @@ -144,11 +145,11 @@ class CppFilePreambleCallbacks : public PreambleCallbacks { } std::unique_ptr createPPCallbacks() override { - assert(SourceMgr && LangOpts && - "SourceMgr and LangOpts must be set at this point"); + assert(SourceMgr && LangOpts && PP && + "SourceMgr, LangOpts and PP must be set at this point"); return std::make_unique( - std::make_unique(*SourceMgr, Macros), + std::make_unique(*PP, Macros), collectPragmaMarksCallback(*SourceMgr, Marks)); } @@ -215,6 +216,7 @@ class CppFilePreambleCallbacks : public PreambleCallbacks { std::unique_ptr IWYUHandler = nullptr; const clang::LangOptions *LangOpts = nullptr; const SourceManager *SourceMgr = nullptr; + const Preprocessor *PP = nullptr; PreambleBuildStats *Stats; bool ParseForwardingFunctions; std::function BeforeExecuteCallback; @@ -382,7 +384,7 @@ scanPreamble(llvm::StringRef Contents, const tooling::CompileCommand &Cmd) { PP.addPPCallbacks( std::make_unique(PP, SP.TextualDirectives)); PP.addPPCallbacks(collectPragmaMarksCallback(SM, SP.Marks)); - PP.addPPCallbacks(std::make_unique(SM, SP.Macros)); + PP.addPPCallbacks(std::make_unique(PP, SP.Macros)); if (llvm::Error Err = Action.Execute()) return std::move(Err); Action.EndSourceFile(); diff --git a/clang-tools-extra/clangd/unittests/CollectMacrosTests.cpp b/clang-tools-extra/clangd/unittests/CollectMacrosTests.cpp index 196ed5cea4693..163a7f1a31707 100644 --- a/clang-tools-extra/clangd/unittests/CollectMacrosTests.cpp +++ b/clang-tools-extra/clangd/unittests/CollectMacrosTests.cpp @@ -8,12 +8,14 @@ #include "AST.h" #include "Annotations.h" #include "CollectMacros.h" +#include "Matchers.h" #include "SourceCode.h" #include "TestTU.h" #include "clang/Basic/SourceLocation.h" #include "llvm/Support/ScopedPrinter.h" #include "gmock/gmock.h" #include "gtest/gtest.h" +#include namespace clang { namespace clangd { @@ -21,19 +23,24 @@ namespace { using testing::UnorderedElementsAreArray; +MATCHER_P(rangeIs, R, "") { return arg.Rng == R; } +MATCHER(isDef, "") { return arg.IsDefinition; } +MATCHER(inConditionalDirective, "") { return arg.InConditionalDirective; } + TEST(CollectMainFileMacros, SelectedMacros) { // References of the same symbol must have the ranges with the same // name(integer). If there are N different symbols then they must be named // from 1 to N. Macros for which SymbolID cannot be computed must be named - // "Unknown". + // "Unknown". The payload of the annotation describes the extra bit + // information of the MacroOccurrence (e.g. $1(def) => IsDefinition). const char *Tests[] = { R"cpp(// Macros: Cursor on definition. - #define $1[[FOO]](x,y) (x + y) + #define $1(def)[[FOO]](x,y) (x + y) int main() { int x = $1[[FOO]]($1[[FOO]](3, 4), $1[[FOO]](5, 6)); } )cpp", R"cpp( - #define $1[[M]](X) X; - #define $2[[abc]] 123 + #define $1(def)[[M]](X) X; + #define $2(def)[[abc]] 123 int s = $1[[M]]($2[[abc]]); )cpp", // FIXME: Locating macro in duplicate definitions doesn't work. Enable @@ -48,31 +55,50 @@ TEST(CollectMainFileMacros, SelectedMacros) { // #undef $2[[abc]] // )cpp", R"cpp( - #ifdef $Unknown[[UNDEFINED]] + #ifdef $Unknown(condit)[[UNDEFINED]] + #endif + + #ifndef $Unknown(condit)[[UNDEFINED]] + #endif + + #if defined($Unknown(condit)[[UNDEFINED]]) #endif )cpp", R"cpp( - #ifndef $Unknown[[abc]] - #define $1[[abc]] - #ifdef $1[[abc]] + #ifndef $Unknown(condit)[[abc]] + #define $1(def)[[abc]] + #ifdef $1(condit)[[abc]] #endif #endif )cpp", R"cpp( // Macros from token concatenations not included. - #define $1[[CONCAT]](X) X##A() - #define $2[[PREPEND]](X) MACRO##X() - #define $3[[MACROA]]() 123 + #define $1(def)[[CONCAT]](X) X##A() + #define $2(def)[[PREPEND]](X) MACRO##X() + #define $3(def)[[MACROA]]() 123 int B = $1[[CONCAT]](MACRO); int D = $2[[PREPEND]](A); )cpp", R"cpp( - // FIXME: Macro names in a definition are not detected. - #define $1[[MACRO_ARGS2]](X, Y) X Y - #define $2[[FOO]] BAR - #define $3[[BAR]] 1 + #define $1(def)[[MACRO_ARGS2]](X, Y) X Y + #define $3(def)[[BAR]] 1 + #define $2(def)[[FOO]] $3[[BAR]] int A = $2[[FOO]]; )cpp"}; + auto ExpectedResults = [](const Annotations &T, StringRef Name) { + std::vector> ExpectedLocations; + for (const auto &[R, Bits] : T.rangesWithPayload(Name)) { + if (Bits == "def") + ExpectedLocations.push_back(testing::AllOf(rangeIs(R), isDef())); + else if (Bits == "condit") + ExpectedLocations.push_back( + testing::AllOf(rangeIs(R), inConditionalDirective())); + else + ExpectedLocations.push_back(testing::AllOf(rangeIs(R))); + } + return ExpectedLocations; + }; + for (const char *Test : Tests) { Annotations T(Test); auto AST = TestTU::withCode(T.code()).build(); @@ -80,13 +106,16 @@ TEST(CollectMainFileMacros, SelectedMacros) { auto &SM = AST.getSourceManager(); auto &PP = AST.getPreprocessor(); - // Known macros. - for (int I = 1;; I++) { - const auto ExpectedRefs = T.ranges(llvm::to_string(I)); - if (ExpectedRefs.empty()) - break; + for (const auto &[Name, Ranges] : T.all_ranges()) { + if (Name == "Unknown") { + EXPECT_THAT(ActualMacroRefs.UnknownMacros, + UnorderedElementsAreArray(ExpectedResults(T, "Unknown"))) + << "Unknown macros doesn't match in " << Test; + continue; + } - auto Loc = sourceLocationInMainFile(SM, ExpectedRefs.begin()->start); + auto Loc = sourceLocationInMainFile( + SM, offsetToPosition(T.code(), Ranges.front().Begin)); ASSERT_TRUE(bool(Loc)); const auto *Id = syntax::spelledIdentifierTouching(*Loc, AST.getTokens()); ASSERT_TRUE(Id); @@ -94,19 +123,11 @@ TEST(CollectMainFileMacros, SelectedMacros) { assert(Macro); auto SID = getSymbolID(Macro->Name, Macro->Info, SM); - std::vector Ranges; - for (const auto &Ref : ActualMacroRefs.MacroRefs[SID]) - Ranges.push_back(Ref.Rng); - EXPECT_THAT(ExpectedRefs, UnorderedElementsAreArray(Ranges)) - << "Annotation=" << I << ", MacroName=" << Macro->Name + EXPECT_THAT(ActualMacroRefs.MacroRefs[SID], + UnorderedElementsAreArray(ExpectedResults(T, Name))) + << "Annotation=" << Name << ", MacroName=" << Macro->Name << ", Test = " << Test; } - // Unknown macros. - std::vector Ranges; - for (const auto &Ref : AST.getMacros().UnknownMacros) - Ranges.push_back(Ref.Rng); - EXPECT_THAT(Ranges, UnorderedElementsAreArray(T.ranges("Unknown"))) - << "Unknown macros doesn't match in " << Test; } } } // namespace diff --git a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp index 259efcf54a6b2..975378118b7ad 100644 --- a/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp +++ b/clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp @@ -399,7 +399,7 @@ TEST(SemanticHighlighting, GetsCorrectTokens) { #define $Macro_decl[[MACRO_CONCAT]](X, V, T) T foo##X = V #define $Macro_decl[[DEF_VAR]](X, V) int X = V #define $Macro_decl[[DEF_VAR_T]](T, X, V) T X = V - #define $Macro_decl[[DEF_VAR_REV]](V, X) DEF_VAR(X, V) + #define $Macro_decl[[DEF_VAR_REV]](V, X) $Macro[[DEF_VAR]](X, V) #define $Macro_decl[[CPY]](X) X #define $Macro_decl[[DEF_VAR_TYPE]](X, Y) X Y #define $Macro_decl[[SOME_NAME]] variable @@ -431,7 +431,7 @@ TEST(SemanticHighlighting, GetsCorrectTokens) { )cpp", R"cpp( #define $Macro_decl[[fail]](expr) expr - #define $Macro_decl[[assert]](COND) if (!(COND)) { fail("assertion failed" #COND); } + #define $Macro_decl[[assert]](COND) if (!(COND)) { $Macro[[fail]]("assertion failed" #COND); } // Preamble ends. int $Variable_def[[x]]; int $Variable_def[[y]]; From 4dc72d47ce88218ff3c6a7ae724beb6ab3ba2ade Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Tue, 21 Mar 2023 14:41:20 -0700 Subject: [PATCH 402/691] [mlir][Tensor] Add a FoldTensorSubsetOps pass and patterns These patterns follow FoldMemRefAliasOps which is further refactored for reuse. In the process, fix FoldMemRefAliasOps handling of strides for vector.transfer ops which was previously incorrect. These opt-in patterns generalize the existing canonicalizations on vector.transfer ops. In the future the blanket canonicalizations will be retired. They are kept for now to minimize porting disruptions. Differential Revision: https://reviews.llvm.org/D146624 --- .../Dialect/Affine/ViewLikeInterfaceUtils.h | 27 ++ .../mlir/Dialect/Tensor/IR/TensorOps.td | 4 + .../mlir/Dialect/Tensor/Transforms/Passes.h | 14 +- .../mlir/Dialect/Tensor/Transforms/Passes.td | 18 +- .../Dialect/Tensor/Transforms/Transforms.h | 25 +- mlir/include/mlir/IR/AffineMap.h | 18 +- .../Affine/Utils/ViewLikeInterfaceUtils.cpp | 32 +++ .../MemRef/Transforms/FoldMemRefAliasOps.cpp | 168 +++++------ mlir/lib/Dialect/Tensor/IR/TensorOps.cpp | 20 ++ .../Dialect/Tensor/Transforms/Bufferize.cpp | 2 +- .../Dialect/Tensor/Transforms/CMakeLists.txt | 2 + .../Tensor/Transforms/FoldTensorSubsetOps.cpp | 173 ++++++++++++ ...eConsecutiveInsertExtractSlicePatterns.cpp | 2 + mlir/lib/Dialect/Tensor/Utils/CMakeLists.txt | 1 + mlir/lib/Dialect/Tensor/Utils/Utils.cpp | 1 + mlir/lib/Dialect/Vector/IR/VectorOps.cpp | 21 +- mlir/lib/IR/AffineMap.cpp | 20 ++ .../Dialect/MemRef/fold-memref-alias-ops.mlir | 55 ++-- .../Tensor/fold-tensor-subset-ops.mlir | 262 ++++++++++++++++++ .../llvm-project-overlay/mlir/BUILD.bazel | 2 + 20 files changed, 711 insertions(+), 156 deletions(-) create mode 100644 mlir/lib/Dialect/Tensor/Transforms/FoldTensorSubsetOps.cpp create mode 100644 mlir/test/Dialect/Tensor/fold-tensor-subset-ops.mlir diff --git a/mlir/include/mlir/Dialect/Affine/ViewLikeInterfaceUtils.h b/mlir/include/mlir/Dialect/Affine/ViewLikeInterfaceUtils.h index 3fac9409bf381..42156ac5de24d 100644 --- a/mlir/include/mlir/Dialect/Affine/ViewLikeInterfaceUtils.h +++ b/mlir/include/mlir/Dialect/Affine/ViewLikeInterfaceUtils.h @@ -13,6 +13,7 @@ #include "mlir/Interfaces/ViewLikeInterface.h" namespace mlir { +class RewriterBase; /// Fills the `combinedOffsets`, `combinedSizes` and `combinedStrides` to use /// when combining a producer slice **into** a consumer slice. @@ -21,6 +22,7 @@ namespace mlir { /// - Combined offsets = producer_offsets * consumer_strides + consumer_offsets /// - Combined sizes = consumer_sizes /// - Combined strides = producer_strides * consumer_strides +// TODO: unify this API with resolveSourceIndicesOffsetsAndStrides or deprecate. LogicalResult mergeOffsetsSizesAndStrides(OpBuilder &builder, Location loc, ArrayRef producerOffsets, @@ -36,6 +38,7 @@ mergeOffsetsSizesAndStrides(OpBuilder &builder, Location loc, /// Fills the `combinedOffsets`, `combinedSizes` and `combinedStrides` to use /// when combining a `producer` slice op **into** a `consumer` slice op. +// TODO: unify this API with resolveSourceIndicesOffsetsAndStrides or deprecate. LogicalResult mergeOffsetsSizesAndStrides(OpBuilder &builder, Location loc, OffsetSizeAndStrideOpInterface producer, @@ -45,6 +48,30 @@ mergeOffsetsSizesAndStrides(OpBuilder &builder, Location loc, SmallVector &combinedSizes, SmallVector &combinedStrides); +/// Given the 'indicesVals' of a load/store operation operating on an op with +/// offsets and strides, return the combined indices. +/// +/// For example, using `memref.load` and `memref.subview` as an illustration: +/// +/// ``` +/// %0 = ... : memref<12x42xf32> +/// %1 = memref.subview %0[%arg0, %arg1][...][%stride1, %stride2] : +/// memref<12x42xf32> to memref<4x4xf32, offset=?, strides=[?, ?]> +/// %2 = load %1[%i1, %i2] : memref<4x4xf32, offset=?, strides=[?, ?]> +/// ``` +/// +/// could be folded into: +/// +/// ``` +/// %2 = load %0[%arg0 + %i1 * %stride1][%arg1 + %i2 * %stride2] : +/// memref<12x42xf32> +/// ``` +void resolveSourceIndicesOffsetsAndStrides( + RewriterBase &rewriter, Location loc, ArrayRef mixedOffsets, + ArrayRef mixedStrides, + const llvm::SmallBitVector &rankReducedDims, ValueRange indicesVals, + SmallVectorImpl &sourceIndices); + } // namespace mlir #endif // MLIR_DIALECT_AFFINE_VIEWLIKEINTERFACEUTILS_H diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td index 66d6dcc7b27ed..721615fdd2607 100644 --- a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td +++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td @@ -858,6 +858,10 @@ def Tensor_InsertSliceOp : Tensor_OpWithOffsetSizesAndStrides<"insert_slice", [ return {rank, rank, rank}; } + /// Return the dimensions of the dest that are omitted to insert a source + /// when the result is rank-extended. + llvm::SmallBitVector getDroppedDims(); + /// Return the number of leading operands before the `offsets`, `sizes` and /// and `strides` operands. static unsigned getOffsetSizeAndStrideStartOperandIndex() { return 2; } diff --git a/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.h b/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.h index df695dbec19a7..48f9066934a25 100644 --- a/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.h @@ -12,23 +12,27 @@ #include "mlir/Pass/Pass.h" namespace mlir { +namespace tensor { -#define GEN_PASS_DECL -#include "mlir/Dialect/Tensor/Transforms/Passes.h.inc" +//===----------------------------------------------------------------------===// +// Passes +//===----------------------------------------------------------------------===// -/// Creates an instance of `tensor` dialect bufferization pass. +/// Creates an instance of the `tensor` subset folding pass. +std::unique_ptr createFoldTensorSubsetOpsPass(); + +/// Creates an instance of the `tensor` dialect bufferization pass. std::unique_ptr createTensorBufferizePass(); //===----------------------------------------------------------------------===// // Registration //===----------------------------------------------------------------------===// -namespace tensor { /// Generate the code for registering passes. #define GEN_PASS_REGISTRATION #include "mlir/Dialect/Tensor/Transforms/Passes.h.inc" -} // namespace tensor +} // namespace tensor } // namespace mlir #endif // MLIR_DIALECT_TENSOR_TRANSFORMS_PASSES_H_ diff --git a/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.td b/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.td index 2bf774d404bf5..b4673599a5def 100644 --- a/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.td @@ -11,9 +11,25 @@ include "mlir/Pass/PassBase.td" +def FoldTensorSubsetOps : Pass<"fold-tensor-subset-ops"> { + let summary = "Fold tensor subset ops into producer/consumer ops"; + let description = [{ + The pass folds tensor subset ops into producer/consumer ops. + + At the moment, the following foldings occur when possible: + - tensor.extract_slice into vector.transfer_read + - vector.transfer_write into tensor.insert_slice + + }]; + let constructor = "mlir::tensor::createFoldTensorSubsetOpsPass()"; + let dependentDialects = [ + "AffineDialect", "tensor::TensorDialect", "vector::VectorDialect" + ]; +} + def TensorBufferize : Pass<"tensor-bufferize", "func::FuncOp"> { let summary = "Bufferize the `tensor` dialect"; - let constructor = "mlir::createTensorBufferizePass()"; + let constructor = "mlir::tensor::createTensorBufferizePass()"; } #endif // MLIR_DIALECT_TENSOR_TRANSFORMS_PASSES diff --git a/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h index 4cdf360c51d72..c0c46e9981dfa 100644 --- a/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h @@ -18,11 +18,9 @@ struct TilingResult; namespace tensor { -/// Populates `patterns` with patterns to wrap a tensor.pad op with an scf.if op -/// to separate the cases where we don't need padding (all pad sizes are -/// actually zeros) and where we indeed need padding. -void populateSplitPaddingPatterns(RewritePatternSet &patterns, - PatternBenefit baseBenefit = 1); +//===----------------------------------------------------------------------===// +// Patterns +//===----------------------------------------------------------------------===// /// Pattern to swap an `tensor.extract_slice` with its producer when the /// producer implements the `TilingInterface`. The pattern itself does not @@ -32,6 +30,23 @@ void populateSplitPaddingPatterns(RewritePatternSet &patterns, FailureOr replaceExtractSliceWithTiledProducer( OpBuilder &builder, tensor::ExtractSliceOp sliceOp, OpResult producerOp); +//===----------------------------------------------------------------------===// +// Populate functions. +//===----------------------------------------------------------------------===// + +/// Collects a set of patterns to rewrite ops within the tensor dialect. +void populateExpandOpsPatterns(RewritePatternSet &patterns); + +/// Appends patterns for folding tensor aliasing ops into consumer load/store +/// ops into `patterns`. +void populateFoldTensorSubsetOpPatterns(RewritePatternSet &patterns); + +/// Populates `patterns` with patterns to wrap a tensor.pad op with an scf.if op +/// to separate the cases where we don't need padding (all pad sizes are +/// actually zeros) and where we indeed need padding. +void populateSplitPaddingPatterns(RewritePatternSet &patterns, + PatternBenefit baseBenefit = 1); + /// Collects patterns to merge consecutive tensor.insert_slice/extract_slice /// into one. These patterns are in in this separate entry point because the /// bufferization is sensitive over IR structure, particularly those diff --git a/mlir/include/mlir/IR/AffineMap.h b/mlir/include/mlir/IR/AffineMap.h index cc7c794f1f933..75a268c483955 100644 --- a/mlir/include/mlir/IR/AffineMap.h +++ b/mlir/include/mlir/IR/AffineMap.h @@ -249,11 +249,11 @@ class AffineMap { /// Returns a new AffineMap with the same number of dims and symbols and one /// less result at `pos`, dropped. - AffineMap dropResult(int64_t pos) { return dropResults({pos}); } + AffineMap dropResult(int64_t pos) const { return dropResults({pos}); } // Returns a new AffineMap with the same number of dims and symbols, but all - // positions in `positions` dropped from results. - AffineMap dropResults(ArrayRef positions) { + // results in `positions` dropped. + AffineMap dropResults(ArrayRef positions) const { SmallVector reverse_sorted_positions = llvm::to_vector(positions); llvm::sort(reverse_sorted_positions, std::greater()); @@ -263,9 +263,13 @@ class AffineMap { return AffineMap::get(getNumDims(), getNumSymbols(), exprs, getContext()); } + // Returns a new AffineMap with the same number of dims and symbols, but all + // results in `positions` dropped. + AffineMap dropResults(const llvm::SmallBitVector &positions) const; + /// Returns a new AffineMap with the same number of dims and symbols and an /// extra result inserted at `pos`. - AffineMap insertResult(AffineExpr expr, unsigned pos) { + AffineMap insertResult(AffineExpr expr, unsigned pos) const { auto exprs = llvm::to_vector<4>(getResults()); exprs.insert(exprs.begin() + pos, expr); return AffineMap::get(getNumDims(), getNumSymbols(), exprs, getContext()); @@ -583,6 +587,12 @@ llvm::SmallBitVector getUnusedDimsBitVector(ArrayRef maps); // by any of the maps in the input array `maps`. llvm::SmallBitVector getUnusedSymbolsBitVector(ArrayRef maps); +/// Expand `map` to operate on `rank` dims while projecting out the dims in +/// `projectedDimensions`. This amounts to composing `map` with +/// `id(rank).dropResults(projectedDimensions)`. +AffineMap expandDimsToRank(AffineMap map, int64_t rank, + const llvm::SmallBitVector &projectedDimensions); + inline raw_ostream &operator<<(raw_ostream &os, AffineMap map) { map.print(os); return os; diff --git a/mlir/lib/Dialect/Affine/Utils/ViewLikeInterfaceUtils.cpp b/mlir/lib/Dialect/Affine/Utils/ViewLikeInterfaceUtils.cpp index c506239744c48..f53edcefe3c79 100644 --- a/mlir/lib/Dialect/Affine/Utils/ViewLikeInterfaceUtils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/ViewLikeInterfaceUtils.cpp @@ -8,6 +8,8 @@ #include "mlir/Dialect/Affine/ViewLikeInterfaceUtils.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/Utils/Utils.h" +#include "mlir/IR/PatternMatch.h" using namespace mlir; @@ -74,3 +76,33 @@ LogicalResult mlir::mergeOffsetsSizesAndStrides( droppedProducerDims, consumerOffsets, consumerSizes, consumerStrides, combinedOffsets, combinedSizes, combinedStrides); } + +void mlir::resolveSourceIndicesOffsetsAndStrides( + RewriterBase &rewriter, Location loc, ArrayRef mixedOffsets, + ArrayRef mixedStrides, + const llvm::SmallBitVector &rankReducedDims, ValueRange indicesVals, + SmallVectorImpl &sourceIndices) { + OpFoldResult zero = rewriter.getIndexAttr(0); + + // For each dimension that is rank-reduced, add a zero to the indices. + int64_t indicesDim = 0; + SmallVector indices; + for (auto dim : llvm::seq(0, mixedOffsets.size())) { + OpFoldResult ofr = + (rankReducedDims.test(dim)) ? zero : indicesVals[indicesDim++]; + indices.push_back(ofr); + } + + sourceIndices.resize(indices.size()); + sourceIndices.clear(); + for (auto [offset, index, stride] : + llvm::zip_equal(mixedOffsets, indices, mixedStrides)) { + AffineExpr off, idx, str; + bindSymbols(rewriter.getContext(), off, idx, str); + OpFoldResult ofr = makeComposedFoldedAffineApply( + rewriter, loc, AffineMap::get(0, 3, off + idx * str), + {offset, index, stride}); + sourceIndices.push_back( + getValueOrCreateConstantIndexOp(rewriter, loc, ofr)); + } +} diff --git a/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp b/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp index c1c3478b06efc..c850348c85480 100644 --- a/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp +++ b/mlir/lib/Dialect/MemRef/Transforms/FoldMemRefAliasOps.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/ViewLikeInterfaceUtils.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Arith/Utils/Utils.h" #include "mlir/Dialect/GPU/IR/GPUDialect.h" @@ -19,7 +20,9 @@ #include "mlir/Dialect/MemRef/Transforms/Passes.h" #include "mlir/Dialect/Utils/IndexingUtils.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/IR/AffineMap.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/TypeSwitch.h" @@ -150,70 +153,6 @@ resolveSourceIndicesCollapseShape(Location loc, PatternRewriter &rewriter, return success(); } -/// Given the 'indices' of an load/store operation where the memref is a result -/// of a subview op, returns the indices w.r.t to the source memref of the -/// subview op. For example -/// -/// %0 = ... : memref<12x42xf32> -/// %1 = subview %0[%arg0, %arg1][][%stride1, %stride2] : memref<12x42xf32> to -/// memref<4x4xf32, offset=?, strides=[?, ?]> -/// %2 = load %1[%i1, %i2] : memref<4x4xf32, offset=?, strides=[?, ?]> -/// -/// could be folded into -/// -/// %2 = load %0[%arg0 + %i1 * %stride1][%arg1 + %i2 * %stride2] : -/// memref<12x42xf32> -static LogicalResult -resolveSourceIndicesSubView(Location loc, PatternRewriter &rewriter, - memref::SubViewOp subViewOp, ValueRange indices, - SmallVectorImpl &sourceIndices) { - SmallVector mixedOffsets = subViewOp.getMixedOffsets(); - SmallVector mixedSizes = subViewOp.getMixedSizes(); - SmallVector mixedStrides = subViewOp.getMixedStrides(); - - SmallVector useIndices; - // Check if this is rank-reducing case. Then for every unit-dim size add a - // zero to the indices. - int64_t resultDim = 0; - llvm::SmallBitVector unusedDims = subViewOp.getDroppedDims(); - for (auto dim : llvm::seq(0, subViewOp.getSourceType().getRank())) { - if (unusedDims.test(dim)) - useIndices.push_back(rewriter.create(loc, 0)); - else - useIndices.push_back(indices[resultDim++]); - } - if (useIndices.size() != mixedOffsets.size()) - return failure(); - sourceIndices.resize(useIndices.size()); - for (auto index : llvm::seq(0, mixedOffsets.size())) { - SmallVector dynamicOperands; - AffineExpr expr = rewriter.getAffineDimExpr(0); - int64_t numSymbols = 0; - dynamicOperands.push_back(useIndices[index]); - - // Multiply the stride; - if (auto attr = mixedStrides[index].dyn_cast()) { - expr = expr * attr.cast().getInt(); - } else { - dynamicOperands.push_back(mixedStrides[index].get()); - expr = expr * rewriter.getAffineSymbolExpr(numSymbols++); - } - - // Add the offset. - if (auto attr = mixedOffsets[index].dyn_cast()) { - expr = expr + attr.cast().getInt(); - } else { - dynamicOperands.push_back(mixedOffsets[index].get()); - expr = expr + rewriter.getAffineSymbolExpr(numSymbols++); - } - Location loc = subViewOp.getLoc(); - OpFoldResult ofr = makeComposedFoldedAffineApply( - rewriter, loc, AffineMap::get(1, numSymbols, expr), dynamicOperands); - sourceIndices[index] = getValueOrCreateConstantIndexOp(rewriter, loc, ofr); - } - return success(); -} - /// Helpers to access the memref operand for each op. template static Value getMemRefOperand(LoadOrStoreOpTy op) { @@ -236,25 +175,6 @@ static Value getMemRefOperand(gpu::SubgroupMmaStoreMatrixOp op) { return op.getDstMemref(); } -/// Given the permutation map of the original -/// `vector.transfer_read`/`vector.transfer_write` operations compute the -/// permutation map to use after the subview is folded with it. -static AffineMapAttr getPermutationMapAttr(MLIRContext *context, - memref::SubViewOp subViewOp, - AffineMap currPermutationMap) { - llvm::SmallBitVector unusedDims = subViewOp.getDroppedDims(); - SmallVector exprs; - int64_t sourceRank = subViewOp.getSourceType().getRank(); - for (auto dim : llvm::seq(0, sourceRank)) { - if (unusedDims.test(dim)) - continue; - exprs.push_back(getAffineDimExpr(dim, context)); - } - auto resultDimToSourceDimMap = AffineMap::get(sourceRank, 0, exprs, context); - return AffineMapAttr::get( - currPermutationMap.compose(resultDimToSourceDimMap)); -} - //===----------------------------------------------------------------------===// // Patterns //===----------------------------------------------------------------------===// @@ -390,6 +310,42 @@ calculateExpandedAccessIndices(AffineMap affineMap, return expandedIndices; } +template +static LogicalResult +preconditionsFoldSubViewOpImpl(RewriterBase &rewriter, XferOp xferOp, + memref::SubViewOp subviewOp) { + static_assert( + !llvm::is_one_of::value, + "must be a vector transfer op"); + if (xferOp.hasOutOfBoundsDim()) + return rewriter.notifyMatchFailure(xferOp, "out of bounds transfer dim"); + if (xferOp.getMask()) + return rewriter.notifyMatchFailure(xferOp, "masked transfer"); + if (!subviewOp.hasUnitStride()) { + return rewriter.notifyMatchFailure( + xferOp, "non-1 stride subview, need to track strides in folded memref"); + } + return success(); +} + +static LogicalResult preconditionsFoldSubViewOp(RewriterBase &rewriter, + Operation *op, + memref::SubViewOp subviewOp) { + return success(); +} + +static LogicalResult preconditionsFoldSubViewOp(RewriterBase &rewriter, + vector::TransferReadOp readOp, + memref::SubViewOp subviewOp) { + return preconditionsFoldSubViewOpImpl(rewriter, readOp, subviewOp); +} + +static LogicalResult preconditionsFoldSubViewOp(RewriterBase &rewriter, + vector::TransferWriteOp writeOp, + memref::SubViewOp subviewOp) { + return preconditionsFoldSubViewOpImpl(rewriter, writeOp, subviewOp); +} + template LogicalResult LoadOpOfSubViewOpFolder::matchAndRewrite( OpTy loadOp, PatternRewriter &rewriter) const { @@ -397,7 +353,12 @@ LogicalResult LoadOpOfSubViewOpFolder::matchAndRewrite( getMemRefOperand(loadOp).template getDefiningOp(); if (!subViewOp) - return failure(); + return rewriter.notifyMatchFailure(loadOp, "not a subview producer"); + + LogicalResult preconditionResult = + preconditionsFoldSubViewOp(rewriter, loadOp, subViewOp); + if (failed(preconditionResult)) + return preconditionResult; SmallVector indices(loadOp.getIndices().begin(), loadOp.getIndices().end()); @@ -410,9 +371,10 @@ LogicalResult LoadOpOfSubViewOpFolder::matchAndRewrite( indices.assign(expandedIndices.begin(), expandedIndices.end()); } SmallVector sourceIndices; - if (failed(resolveSourceIndicesSubView(loadOp.getLoc(), rewriter, subViewOp, - indices, sourceIndices))) - return failure(); + resolveSourceIndicesOffsetsAndStrides( + rewriter, loadOp.getLoc(), subViewOp.getMixedOffsets(), + subViewOp.getMixedStrides(), subViewOp.getDroppedDims(), indices, + sourceIndices); llvm::TypeSwitch(loadOp) .Case([&](AffineLoadOp op) { @@ -423,14 +385,13 @@ LogicalResult LoadOpOfSubViewOpFolder::matchAndRewrite( rewriter.replaceOpWithNewOp( loadOp, subViewOp.getSource(), sourceIndices, op.getNontemporal()); }) - .Case([&](vector::TransferReadOp transferReadOp) { + .Case([&](vector::TransferReadOp op) { rewriter.replaceOpWithNewOp( - transferReadOp, transferReadOp.getVectorType(), - subViewOp.getSource(), sourceIndices, - getPermutationMapAttr(rewriter.getContext(), subViewOp, - transferReadOp.getPermutationMap()), - transferReadOp.getPadding(), - /*mask=*/Value(), transferReadOp.getInBoundsAttr()); + op, op.getVectorType(), subViewOp.getSource(), sourceIndices, + AffineMapAttr::get(expandDimsToRank( + op.getPermutationMap(), subViewOp.getSourceType().getRank(), + subViewOp.getDroppedDims())), + op.getPadding(), /*mask=*/Value(), op.getInBoundsAttr()); }) .Case([&](gpu::SubgroupMmaLoadMatrixOp op) { rewriter.replaceOpWithNewOp( @@ -512,7 +473,12 @@ LogicalResult StoreOpOfSubViewOpFolder::matchAndRewrite( getMemRefOperand(storeOp).template getDefiningOp(); if (!subViewOp) - return failure(); + return rewriter.notifyMatchFailure(storeOp, "not a subview producer"); + + LogicalResult preconditionResult = + preconditionsFoldSubViewOp(rewriter, storeOp, subViewOp); + if (failed(preconditionResult)) + return preconditionResult; SmallVector indices(storeOp.getIndices().begin(), storeOp.getIndices().end()); @@ -525,9 +491,10 @@ LogicalResult StoreOpOfSubViewOpFolder::matchAndRewrite( indices.assign(expandedIndices.begin(), expandedIndices.end()); } SmallVector sourceIndices; - if (failed(resolveSourceIndicesSubView(storeOp.getLoc(), rewriter, subViewOp, - indices, sourceIndices))) - return failure(); + resolveSourceIndicesOffsetsAndStrides( + rewriter, storeOp.getLoc(), subViewOp.getMixedOffsets(), + subViewOp.getMixedStrides(), subViewOp.getDroppedDims(), indices, + sourceIndices); llvm::TypeSwitch(storeOp) .Case([&](AffineStoreOp op) { @@ -542,8 +509,9 @@ LogicalResult StoreOpOfSubViewOpFolder::matchAndRewrite( .Case([&](vector::TransferWriteOp op) { rewriter.replaceOpWithNewOp( op, op.getValue(), subViewOp.getSource(), sourceIndices, - getPermutationMapAttr(rewriter.getContext(), subViewOp, - op.getPermutationMap()), + AffineMapAttr::get(expandDimsToRank( + op.getPermutationMap(), subViewOp.getSourceType().getRank(), + subViewOp.getDroppedDims())), op.getInBoundsAttr()); }) .Case([&](gpu::SubgroupMmaStoreMatrixOp op) { diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp index 9d26e51e04fd5..93db7da27abdd 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp @@ -2396,6 +2396,26 @@ struct InsertSliceOpSourceCastInserter final }; } // namespace +llvm::SmallBitVector InsertSliceOp::getDroppedDims() { + ArrayRef resultShape = getType().getShape(); + SmallVector mixedSizes = getMixedSizes(); + llvm::SmallBitVector droppedDims(mixedSizes.size()); + unsigned shapePos = 0; + for (const auto &size : enumerate(mixedSizes)) { + std::optional sizeVal = getConstantIntValue(size.value()); + // If the size is not 1, or if the current matched dimension of the result + // is the same static shape as the size value (which is 1), then the + // dimension is preserved. + if (!sizeVal || *sizeVal != 1 || + (shapePos < resultShape.size() && resultShape[shapePos] == 1)) { + shapePos++; + continue; + } + droppedDims.set(size.index()); + } + return droppedDims; +} + void InsertSliceOp::getCanonicalizationPatterns(RewritePatternSet &results, MLIRContext *context) { results.add, diff --git a/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp index 426b1363c6a0e..d27c4576a8b7a 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp @@ -53,6 +53,6 @@ struct TensorBufferizePass }; } // namespace -std::unique_ptr mlir::createTensorBufferizePass() { +std::unique_ptr mlir::tensor::createTensorBufferizePass() { return std::make_unique(); } diff --git a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt index 5ed3d97b2719f..9f6780730dc71 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt @@ -4,6 +4,7 @@ add_mlir_dialect_library(MLIRTensorTransforms EmptyOpPatterns.cpp ExtractSliceFromReshapeUtils.cpp FoldIntoPackAndUnpackPatterns.cpp + FoldTensorSubsetOps.cpp MergeConsecutiveInsertExtractSlicePatterns.cpp ReshapePatterns.cpp SplitPaddingPatterns.cpp @@ -29,4 +30,5 @@ add_mlir_dialect_library(MLIRTensorTransforms MLIRTensorDialect MLIRTilingInterface MLIRTransforms + MLIRVectorDialect ) diff --git a/mlir/lib/Dialect/Tensor/Transforms/FoldTensorSubsetOps.cpp b/mlir/lib/Dialect/Tensor/Transforms/FoldTensorSubsetOps.cpp new file mode 100644 index 0000000000000..80ecb868dff6a --- /dev/null +++ b/mlir/lib/Dialect/Tensor/Transforms/FoldTensorSubsetOps.cpp @@ -0,0 +1,173 @@ +//===- FoldTensorSubsetOps.cpp - Fold tensor subset ops -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Fold tensor subset ops with producer / consumers. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Affine/ViewLikeInterfaceUtils.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Tensor/Transforms/Passes.h" +#include "mlir/Dialect/Tensor/Transforms/Transforms.h" +#include "mlir/Dialect/Utils/IndexingUtils.h" +#include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/IR/AffineMap.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "llvm/ADT/TypeSwitch.h" + +namespace mlir { +namespace tensor { +#define GEN_PASS_DEF_FOLDTENSORSUBSETOPS +#include "mlir/Dialect/Tensor/Transforms/Passes.h.inc" +} // namespace tensor +} // namespace mlir + +using namespace mlir; + +static Value getTensorOperand(vector::TransferReadOp op) { + return op.getSource(); +} + +static Value getTensorOperand(tensor::InsertSliceOp op) { + return op.getSource(); +} + +//===----------------------------------------------------------------------===// +// Patterns +//===----------------------------------------------------------------------===// + +namespace { +/// Merge extract_slice operation with load/transferRead operation. +class TransferReadOfExtractSliceOpFolder final + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::TransferReadOp readOp, + PatternRewriter &rewriter) const override; +}; + +/// Merge insert_slice operation with store/transferWriteOp operation. +class InsertSliceOfTransferWriteOpFolder final + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(tensor::InsertSliceOp insertSliceOp, + PatternRewriter &rewriter) const override; +}; +} // namespace + +template +static LogicalResult preconditionsFoldExtractOrInsertWithTransferOp( + RewriterBase &rewriter, XferOp xferOp, + ExtractOrInsertOp extractOrInsertSliceOp) { + if (xferOp.hasOutOfBoundsDim()) + return rewriter.notifyMatchFailure(xferOp, "out of bounds transfer dim"); + if (xferOp.getMask()) + return rewriter.notifyMatchFailure(xferOp, "masked transfer"); + if (!extractOrInsertSliceOp.hasUnitStride()) { + return rewriter.notifyMatchFailure( + xferOp, "non-1 stride insert/extract, requires keeping track of " + "strides, this may result in needing to insert " + "vector.insert_strided_slice/extract_strided_slice ops"); + } + return success(); +} + +LogicalResult TransferReadOfExtractSliceOpFolder::matchAndRewrite( + vector::TransferReadOp readOp, PatternRewriter &rewriter) const { + auto extractSliceOp = + getTensorOperand(readOp).getDefiningOp(); + if (!extractSliceOp) + return rewriter.notifyMatchFailure(readOp, "not an extract_slice"); + + LogicalResult preconditionResult = + preconditionsFoldExtractOrInsertWithTransferOp(rewriter, readOp, + extractSliceOp); + if (failed(preconditionResult)) + return preconditionResult; + + SmallVector indices(readOp.getIndices().begin(), + readOp.getIndices().end()); + SmallVector sourceIndices; + resolveSourceIndicesOffsetsAndStrides( + rewriter, readOp.getLoc(), extractSliceOp.getMixedOffsets(), + extractSliceOp.getMixedStrides(), extractSliceOp.getDroppedDims(), + indices, sourceIndices); + + rewriter.replaceOpWithNewOp( + readOp, readOp.getVectorType(), extractSliceOp.getSource(), sourceIndices, + AffineMapAttr::get(expandDimsToRank( + readOp.getPermutationMap(), extractSliceOp.getSourceType().getRank(), + extractSliceOp.getDroppedDims())), + readOp.getPadding(), + /*mask=*/Value(), readOp.getInBoundsAttr()); + + return success(); +} + +LogicalResult InsertSliceOfTransferWriteOpFolder::matchAndRewrite( + tensor::InsertSliceOp insertSliceOp, PatternRewriter &rewriter) const { + auto writeOp = getTensorOperand(insertSliceOp) + .template getDefiningOp(); + if (!writeOp) + return rewriter.notifyMatchFailure(insertSliceOp, "not a transfer_write"); + + LogicalResult preconditionResult = + preconditionsFoldExtractOrInsertWithTransferOp(rewriter, writeOp, + insertSliceOp); + if (failed(preconditionResult)) + return preconditionResult; + + SmallVector indices(writeOp.getIndices().begin(), + writeOp.getIndices().end()); + SmallVector sourceIndices; + resolveSourceIndicesOffsetsAndStrides( + rewriter, writeOp.getLoc(), insertSliceOp.getMixedOffsets(), + insertSliceOp.getMixedStrides(), insertSliceOp.getDroppedDims(), indices, + sourceIndices); + + rewriter.replaceOpWithNewOp( + insertSliceOp, writeOp.getValue(), insertSliceOp.getDest(), sourceIndices, + AffineMapAttr::get(expandDimsToRank(writeOp.getPermutationMap(), + insertSliceOp.getDestType().getRank(), + insertSliceOp.getDroppedDims())), + writeOp.getInBoundsAttr()); + + return success(); +} + +void tensor::populateFoldTensorSubsetOpPatterns(RewritePatternSet &patterns) { + patterns.add(patterns.getContext()); +} +//===----------------------------------------------------------------------===// +// Pass registration +//===----------------------------------------------------------------------===// + +namespace { + +struct FoldTensorSubsetOpsPass final + : public tensor::impl::FoldTensorSubsetOpsBase { + void runOnOperation() override; +}; + +} // namespace + +void FoldTensorSubsetOpsPass::runOnOperation() { + RewritePatternSet patterns(&getContext()); + tensor::populateFoldTensorSubsetOpPatterns(patterns); + (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); +} + +std::unique_ptr tensor::createFoldTensorSubsetOpsPass() { + return std::make_unique(); +} diff --git a/mlir/lib/Dialect/Tensor/Transforms/MergeConsecutiveInsertExtractSlicePatterns.cpp b/mlir/lib/Dialect/Tensor/Transforms/MergeConsecutiveInsertExtractSlicePatterns.cpp index 4169882046556..895d1b1f02f07 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/MergeConsecutiveInsertExtractSlicePatterns.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/MergeConsecutiveInsertExtractSlicePatterns.cpp @@ -18,6 +18,7 @@ using namespace mlir::tensor; namespace { /// Merges consecutive tensor.extract_slice ops into one. +// TODO: move to FoldTensorSubsetOps and unify APIs with FoldMemRefAliasOps. struct MergeConsecutiveExtractSlice : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; @@ -41,6 +42,7 @@ struct MergeConsecutiveExtractSlice : public OpRewritePattern { }; /// Merges consecutive tensor.insert_slice ops into one. +// TODO: move to FoldTensorSubsetOps and unify APIs with FoldMemRefAliasOps. template struct MergeConsecutiveInsertSlice : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; diff --git a/mlir/lib/Dialect/Tensor/Utils/CMakeLists.txt b/mlir/lib/Dialect/Tensor/Utils/CMakeLists.txt index efc78420b9e17..b7848b1a44229 100644 --- a/mlir/lib/Dialect/Tensor/Utils/CMakeLists.txt +++ b/mlir/lib/Dialect/Tensor/Utils/CMakeLists.txt @@ -7,6 +7,7 @@ add_mlir_dialect_library(MLIRTensorUtils LINK_LIBS PUBLIC MLIRAffineDialect MLIRArithDialect + MLIRArithUtils MLIRIR MLIRTensorDialect ) diff --git a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp index a5847250fa915..4c09c540bde2e 100644 --- a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp @@ -14,6 +14,7 @@ #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Arith/Utils/Utils.h" #include "mlir/Dialect/Utils/IndexingUtils.h" using namespace mlir; diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp index 21daff60c7e62..ce7d1844ac7f1 100644 --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -3733,6 +3733,8 @@ namespace { /// %1 = vector.transfer_read %t[%p0, %p1], %cst {in_bounds = [true, true]} /// : tensor, vector<4x5xf32> /// ``` +// TODO: this is brittle and should be deprecated in favor of a more general +// pattern that applies on-demand. struct FoldExtractSliceIntoTransferRead : public OpRewritePattern { public: @@ -3883,9 +3885,13 @@ struct TransferReadAfterWriteToBroadcast void TransferReadOp::getCanonicalizationPatterns(RewritePatternSet &results, MLIRContext *context) { - results - .add( - context); + // clang-format off + results.add < + // TODO: this is brittle and should be deprecated in favor of a + // more general pattern that applies on-demand. + FoldExtractSliceIntoTransferRead, + TransferReadAfterWriteToBroadcast>(context); + // clang-format on } //===----------------------------------------------------------------------===// @@ -4235,6 +4241,8 @@ class FoldWaw final : public OpRewritePattern { /// %1 = vector.transfer_write %v, %t2[%a, %b] {in_bounds = [true, true]} /// : vector<4x5xf32>, tensor /// ``` +// TODO: this is brittle and should be deprecated in favor of a more general +// pattern that applies on-demand. struct FoldInsertSliceIntoTransferWrite : public OpRewritePattern { public: @@ -4417,8 +4425,13 @@ struct SwapExtractSliceOfTransferWrite void TransferWriteOp::getCanonicalizationPatterns(RewritePatternSet &results, MLIRContext *context) { - results.add(context); + // clang-format on } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/IR/AffineMap.cpp b/mlir/lib/IR/AffineMap.cpp index 39c8ab96aa662..9ac181f46b578 100644 --- a/mlir/lib/IR/AffineMap.cpp +++ b/mlir/lib/IR/AffineMap.cpp @@ -8,6 +8,7 @@ #include "mlir/IR/AffineMap.h" #include "AffineMapDetail.h" +#include "mlir/IR/AffineExpr.h" #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/Support/LogicalResult.h" @@ -15,8 +16,10 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" +#include #include #include #include @@ -467,6 +470,15 @@ AffineMap::replace(const DenseMap &map) const { return AffineMap::inferFromExprList(newResults).front(); } +AffineMap AffineMap::dropResults(const llvm::SmallBitVector &positions) const { + auto exprs = llvm::to_vector<4>(getResults()); + // TODO: this is a pretty terrible API .. is there anything better? + for (auto pos = positions.find_last(); pos != -1; + pos = positions.find_prev(pos)) + exprs.erase(exprs.begin() + pos); + return AffineMap::get(getNumDims(), getNumSymbols(), exprs, getContext()); +} + AffineMap AffineMap::compose(AffineMap map) const { assert(getNumDims() == map.getNumResults() && "Number of results mismatch"); // Prepare `map` by concatenating the symbols and rewriting its exprs. @@ -808,6 +820,14 @@ llvm::SmallBitVector mlir::getUnusedSymbolsBitVector(ArrayRef maps) { return numSymbolsBitVector; } +AffineMap +mlir::expandDimsToRank(AffineMap map, int64_t rank, + const llvm::SmallBitVector &projectedDimensions) { + auto id = AffineMap::getMultiDimIdentityMap(rank, map.getContext()); + AffineMap proj = id.dropResults(projectedDimensions); + return map.compose(proj); +} + //===----------------------------------------------------------------------===// // MutableAffineMap. //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir b/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir index bcbad20a2fd7a..a29f86eb4a263 100644 --- a/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir +++ b/mlir/test/Dialect/MemRef/fold-memref-alias-ops.mlir @@ -6,7 +6,7 @@ func.func @fold_static_stride_subview_with_load(%arg0 : memref<12x32xf32>, %arg1 return %1 : f32 } // CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0, s1] -> (s0 + s1 * 2)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1] -> (s0 + s1 * 3)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1] -> (s0 + s1 * 3)> // CHECK: func @fold_static_stride_subview_with_load // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref<12x32xf32> // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index @@ -25,7 +25,7 @@ func.func @fold_dynamic_stride_subview_with_load(%arg0 : memref<12x32xf32>, %arg %1 = memref.load %0[%arg3, %arg4] : memref<4x4xf32, strided<[?, ?], offset: ?>> return %1 : f32 } -// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1, s2] -> (s1 + s2 * s0)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * s2)> // CHECK: func @fold_dynamic_stride_subview_with_load // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref<12x32xf32> // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index @@ -34,8 +34,8 @@ func.func @fold_dynamic_stride_subview_with_load(%arg0 : memref<12x32xf32>, %arg // CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG5:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG6:[a-zA-Z0-9_]+]]: index -// CHECK-DAG: %[[I1:.+]] = affine.apply #[[MAP]]()[%[[ARG5]], %[[ARG1]], %[[ARG3]]] -// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP]]()[%[[ARG6]], %[[ARG2]], %[[ARG4]]] +// CHECK-DAG: %[[I1:.+]] = affine.apply #[[MAP]]()[%[[ARG1]], %[[ARG3]], %[[ARG5]]] +// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP]]()[%[[ARG2]], %[[ARG4]], %[[ARG6]]] // CHECK: memref.load %[[ARG0]][%[[I1]], %[[I2]]] // ----- @@ -66,7 +66,7 @@ func.func @fold_dynamic_stride_subview_with_store(%arg0 : memref<12x32xf32>, %ar memref.store %arg7, %0[%arg3, %arg4] : memref<4x4xf32, strided<[?, ?], offset: ?>> return } -// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1, s2] -> (s1 + s2 * s0)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * s2)> // CHECK: func @fold_dynamic_stride_subview_with_store // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref<12x32xf32> // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index @@ -75,8 +75,8 @@ func.func @fold_dynamic_stride_subview_with_store(%arg0 : memref<12x32xf32>, %ar // CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG5:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG6:[a-zA-Z0-9_]+]]: index -// CHECK-DAG: %[[I1:.+]] = affine.apply #[[MAP]]()[%[[ARG5]], %[[ARG1]], %[[ARG3]]] -// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP]]()[%[[ARG6]], %[[ARG2]], %[[ARG4]]] +// CHECK-DAG: %[[I1:.+]] = affine.apply #[[MAP]]()[%[[ARG1]], %[[ARG3]], %[[ARG5]]] +// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP]]()[%[[ARG2]], %[[ARG4]], %[[ARG6]]] // CHECK: memref.store %{{.+}}, %[[ARG0]][%[[I1]], %[[I2]]] // ----- @@ -85,7 +85,7 @@ func.func @fold_subview_with_transfer_read_0d( %arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index) -> vector { %f1 = arith.constant 1.0 : f32 - %0 = memref.subview %arg0[%arg1, %arg2][1, 1][2, %arg3] : memref<12x32xf32> to memref> + %0 = memref.subview %arg0[%arg1, %arg2][1, 1][1, 1] : memref<12x32xf32> to memref> %1 = vector.transfer_read %0[], %f1 : memref>, vector return %1 : vector } @@ -100,22 +100,14 @@ func.func @fold_subview_with_transfer_read_0d( func.func @fold_subview_with_transfer_read(%arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %arg4 : index, %arg5 : index, %arg6 : index) -> vector<4xf32> { %f1 = arith.constant 1.0 : f32 + %0 = memref.subview %arg0[%arg1, %arg2][4, 4][%arg5, %arg6] : memref<12x32xf32> to memref<4x4xf32, strided<[?, ?], offset: ?>> %1 = vector.transfer_read %0[%arg3, %arg4], %f1 {in_bounds = [true]} : memref<4x4xf32, strided<[?, ?], offset: ?>>, vector<4xf32> return %1 : vector<4xf32> } -// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1, s2] -> (s1 + s2 * s0)> // CHECK: func @fold_subview_with_transfer_read -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref<12x32xf32> -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index -// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: index -// CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]]: index -// CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]]: index -// CHECK-SAME: %[[ARG5:[a-zA-Z0-9_]+]]: index -// CHECK-SAME: %[[ARG6:[a-zA-Z0-9_]+]]: index -// CHECK-DAG: %[[I1:.+]] = affine.apply #[[MAP]]()[%[[ARG5]], %[[ARG1]], %[[ARG3]]] -// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP]]()[%[[ARG6]], %[[ARG2]], %[[ARG4]]] -// CHECK: vector.transfer_read %[[ARG0]][%[[I1]], %[[I2]]] +// Can't fold this atm since we don't emit the proper vector.extract_strided_slice. +// CHECK: memref.subview // ----- @@ -123,7 +115,7 @@ func.func @fold_static_stride_subview_with_transfer_write_0d( %arg0 : memref<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index, %v : vector) { %f1 = arith.constant 1.0 : f32 - %0 = memref.subview %arg0[%arg1, %arg2][1, 1][2, %arg3] : memref<12x32xf32> to memref> + %0 = memref.subview %arg0[%arg1, %arg2][1, 1][1, 1] : memref<12x32xf32> to memref> vector.transfer_write %v, %0[] {in_bounds = []} : vector, memref> return } @@ -143,18 +135,9 @@ func.func @fold_static_stride_subview_with_transfer_write(%arg0 : memref<12x32xf vector.transfer_write %arg7, %0[%arg3, %arg4] {in_bounds = [true]} : vector<4xf32>, memref<4x4xf32, strided<[?, ?], offset: ?>> return } -// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1, s2] -> (s1 + s2 * s0)> // CHECK: func @fold_static_stride_subview_with_transfer_write -// CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref<12x32xf32> -// CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index -// CHECK-SAME: %[[ARG2:[a-zA-Z0-9_]+]]: index -// CHECK-SAME: %[[ARG3:[a-zA-Z0-9_]+]]: index -// CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]]: index -// CHECK-SAME: %[[ARG5:[a-zA-Z0-9_]+]]: index -// CHECK-SAME: %[[ARG6:[a-zA-Z0-9_]+]]: index -// CHECK-DAG: %[[I1:.+]] = affine.apply #[[MAP]]()[%[[ARG5]], %[[ARG1]], %[[ARG3]]] -// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP]]()[%[[ARG6]], %[[ARG2]], %[[ARG4]]] -// CHECK: vector.transfer_write %{{.+}}, %[[ARG0]][%[[I1]], %[[I2]]] +// Can't fold this atm since we don't emit the proper vector.extract_strided_slice. +// CHECK: memref.subview // ----- @@ -168,7 +151,7 @@ func.func @fold_rank_reducing_subview_with_load %1 = memref.load %0[%arg13, %arg14, %arg15, %arg16] : memref<4x1x4x1xf32, strided<[?, ?, ?, ?], offset: ?>> return %1 : f32 } -// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1, s2] -> (s1 + s2 * s0)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1, s2] -> (s0 + s1 * s2)> // CHECK: func @fold_rank_reducing_subview_with_load // CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]+]]: memref // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: index @@ -187,10 +170,10 @@ func.func @fold_rank_reducing_subview_with_load // CHECK-SAME: %[[ARG14:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG15:[a-zA-Z0-9_]+]]: index // CHECK-SAME: %[[ARG16:[a-zA-Z0-9_]+]]: index -// CHECK-DAG: %[[I0:.+]] = affine.apply #[[MAP]]()[%[[ARG7]], %[[ARG1]], %[[ARG13]]] -// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP]]()[%[[ARG9]], %[[ARG3]], %[[ARG14]]] -// CHECK-DAG: %[[I3:.+]] = affine.apply #[[MAP]]()[%[[ARG10]], %[[ARG4]], %[[ARG15]]] -// CHECK-DAG: %[[I4:.+]] = affine.apply #[[MAP]]()[%[[ARG11]], %[[ARG5]], %[[ARG16]]] +// CHECK-DAG: %[[I0:.+]] = affine.apply #[[MAP]]()[%[[ARG1]], %[[ARG13]], %[[ARG7]]] +// CHECK-DAG: %[[I2:.+]] = affine.apply #[[MAP]]()[%[[ARG3]], %[[ARG14]], %[[ARG9]]] +// CHECK-DAG: %[[I3:.+]] = affine.apply #[[MAP]]()[%[[ARG4]], %[[ARG15]], %[[ARG10]]] +// CHECK-DAG: %[[I4:.+]] = affine.apply #[[MAP]]()[%[[ARG5]], %[[ARG16]], %[[ARG11]]] // CHECK: memref.load %[[ARG0]][%[[I0]], %[[ARG2]], %[[I2]], %[[I3]], %[[I4]], %[[ARG6]]] // ----- diff --git a/mlir/test/Dialect/Tensor/fold-tensor-subset-ops.mlir b/mlir/test/Dialect/Tensor/fold-tensor-subset-ops.mlir new file mode 100644 index 0000000000000..93a0d77bc698f --- /dev/null +++ b/mlir/test/Dialect/Tensor/fold-tensor-subset-ops.mlir @@ -0,0 +1,262 @@ +// RUN: mlir-opt -fold-tensor-subset-ops -split-input-file %s | FileCheck %s + +func.func @fold_vector_transfer_read_with_rank_reduced_extract_slice( + %arg0 : tensor, + %arg1: index, %arg2 : index, %arg3 : index, %arg4: index, %arg5 : index, + %arg6 : index) -> vector<4xf32> { + %cst = arith.constant 0.0 : f32 + %0 = tensor.extract_slice %arg0[0, %arg1, %arg2] [1, %arg3, %arg4] [1, 1, 1] + : tensor to + tensor + %1 = vector.transfer_read %0[%arg5, %arg6], %cst {in_bounds = [true]} + : tensor, vector<4xf32> + return %1 : vector<4xf32> +} +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<()[s0, s1] -> (s0 + s1)> +// CHECK: func @fold_vector_transfer_read_with_rank_reduced_extract_slice +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG5:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG6:[a-zA-Z0-9]+]]: index +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[$MAP1]]()[%[[ARG1]], %[[ARG5]]] +// CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[$MAP1]]()[%[[ARG2]], %[[ARG6]]] +// CHECK: vector.transfer_read %[[ARG0]][%[[C0]], %[[IDX0]], %[[IDX1]]], %{{.*}} : tensor, + %i1: index, %i2: index, %i3: index, %i4: index) -> vector<4xf32> { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + %f0 = arith.constant 0.000000e+00 : f32 + + // Can't fold this atm since we don' emit the proper vector.extract_strided_slice. +// CHECK: tensor.extract_slice + %0 = tensor.extract_slice %src[0, %i1, %i2, %i3] [1, 4, 1, 4] [2, 3, 4, 5] : tensor<1x8x8x8xf32> to tensor<1x4x4xf32> + %1 = vector.transfer_read %0[%c1, %i4, %c2], %f0 {in_bounds = [true]} : tensor<1x4x4xf32>, vector<4xf32> + return %1 : vector<4xf32> +} + +// ----- + +// CHECK-DAG: #[[$ADD_4:.+]] = affine_map<()[s0] -> (s0 + 4)> + +// CHECK-LABEL: func @transfer_read_of_extract_slice( +// CHECK-SAME: %[[t:.*]]: tensor, %[[s1:.*]]: index, %[[s2:.*]]: index +// CHECK-DAG: %[[c8:.*]] = arith.constant 8 : index +// CHECK: %[[add:.*]] = affine.apply #[[$ADD_4]]()[%[[s1]]] +// CHECK: %[[r:.*]] = vector.transfer_read %[[t]][%[[c8]], %[[add]]], %{{.*}} {in_bounds = [true, true]} : tensor, vector<5x6xf32> +// CHECK: return %[[r]] +func.func @transfer_read_of_extract_slice(%t : tensor, %s1 : index, %s2 : index) -> vector<5x6xf32> { + %c3 = arith.constant 3 : index + %c4 = arith.constant 4 : index + %cst = arith.constant 0.0 : f32 + %0 = tensor.extract_slice %t[5, %s1] [10, %s2] [1, 1] : tensor to tensor<10x?xf32> + %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = [true, true]} : tensor<10x?xf32>, vector<5x6xf32> + return %1 : vector<5x6xf32> +} +// ----- + +func.func @fold_extract_slice_with_transfer_read_0d( + %arg0 : tensor<12x32xf32>, %arg1 : index, %arg2 : index, %arg3 : index) + -> vector { + %f1 = arith.constant 1.0 : f32 + %0 = tensor.extract_slice %arg0[%arg1, %arg2][1, 1][1, 1] : tensor<12x32xf32> to tensor + %1 = vector.transfer_read %0[], %f1 : tensor, vector + return %1 : vector +} +// CHECK: func @fold_extract_slice_with_transfer_read_0d +// CHECK-SAME: %[[T:[a-zA-Z0-9_]+]]: tensor<12x32xf32> +// CHECK-SAME: %[[SZ0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[SZ1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[ST1:[a-zA-Z0-9_]+]]: index +// CHECK: vector.transfer_read %[[T]][%[[SZ0]], %[[SZ1]]] + +// ----- + +// CHECK-DAG: #[[$ADD_4:.+]] = affine_map<()[s0] -> (s0 + 4)> + +// CHECK-LABEL: func @transfer_read_of_extract_slice( +// CHECK-SAME: %[[t:.*]]: tensor, %[[s1:.*]]: index, %[[s2:.*]]: index +// CHECK-DAG: %[[c8:.*]] = arith.constant 8 : index +// CHECK: %[[add:.*]] = affine.apply #[[$ADD_4]]()[%[[s1]]] +// CHECK: %[[r:.*]] = vector.transfer_read %[[t]][%[[c8]], %[[add]]], %{{.*}} {in_bounds = [true]} : tensor, vector<6xf32> +// CHECK: return %[[r]] +func.func @transfer_read_of_extract_slice(%t : tensor, %s1 : index, %s2 : index) -> vector<6xf32> { + %c3 = arith.constant 3 : index + %c4 = arith.constant 4 : index + %cst = arith.constant 0.0 : f32 + %0 = tensor.extract_slice %t[5, %s1] [10, %s2] [1, 1] : tensor to tensor<10x?xf32> + %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = [true]} : tensor<10x?xf32>, vector<6xf32> + return %1 : vector<6xf32> +} + +// ----- + +// CHECK-DAG: #[[$ADD_3:.+]] = affine_map<()[s0] -> (s0 + 3)> + +// CHECK-LABEL: func @transfer_read_of_extract_slice_rank_reducing( +// CHECK-SAME: %[[t:.*]]: tensor, %[[s1:.*]]: index, %[[s2:.*]]: index +// CHECK-DAG: %[[c5:.*]] = arith.constant 5 : index +// CHECK-DAG: %[[c10:.*]] = arith.constant 10 : index +// CHECK: %[[add:.*]] = affine.apply #[[$ADD_3]]()[%[[s1]]] +// CHECK: %[[r:.*]] = vector.transfer_read %[[t]][%[[c5]], %[[add]], %[[c10]]], %{{.*}} {in_bounds = [true, true]} : tensor, vector<5x6xf32> +// CHECK: return %[[r]] +func.func @transfer_read_of_extract_slice_rank_reducing(%t : tensor, %s1 : index, %s2 : index) -> vector<5x6xf32> { + %c3 = arith.constant 3 : index + %c4 = arith.constant 4 : index + %cst = arith.constant 0.0 : f32 + %0 = tensor.extract_slice %t[5, %s1, 6] [1, %s2, 12] [1, 1, 1] : tensor to tensor + %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = [true, true]} : tensor, vector<5x6xf32> + return %1 : vector<5x6xf32> +} + +// ----- + +// CHECK-DAG: #[[$ADD_4:.+]] = affine_map<()[s0] -> (s0 + 4)> +// CHECK-DAG: #[[$d0d2:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)> + +// CHECK-LABEL: func @transfer_read_of_extract_slice_swappy_rank_reducing( +// CHECK-SAME: %[[t:.*]]: tensor, %[[s1:.*]]: index, %[[s2:.*]]: index +func.func @transfer_read_of_extract_slice_swappy_rank_reducing(%t : tensor, %s1 : index, %s2 : index) -> vector<5x6xf32> { + %c3 = arith.constant 3 : index + %c4 = arith.constant 4 : index + %cst = arith.constant 0.0 : f32 + +// CHECK-NOT: extract_slice +// CHECK: %[[c8:.*]] = arith.constant 8 : index +// CHECK: %[[add:.*]] = affine.apply #[[$ADD_4]]()[%[[s2]]] +// CHECK: %[[r:.*]] = vector.transfer_read %[[t]][%[[c8]], %[[s1]], %[[add]]] +// CHECK-SAME: permutation_map = #[[$d0d2]] +// CHECK-SAME: tensor, vector<5x6xf32> + %0 = tensor.extract_slice %t[5, %s1, %s2] [%s2, 1, 12] [1, 1, 1] : tensor to tensor + %1 = vector.transfer_read %0[%c3, %c4], %cst {in_bounds = [true, true]} : tensor, vector<5x6xf32> + + return %1 : vector<5x6xf32> +} + +// ----- + +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1] -> (s0 + s1)> + +// CHECK: func @fold_vector_transfer_write_with_rank_reduced_insert_slice +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: vector<4xf32> +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG5:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG6:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG7:[a-zA-Z0-9]+]]: index +func.func @fold_vector_transfer_write_with_rank_reduced_insert_slice( + %arg0 : tensor, + %arg1 : vector<4xf32>, %arg2: index, %arg3 : index, %arg4 : index, + %arg5: index, %arg6 : index, %arg7 : index, + %st : tensor) -> tensor { + %cst = arith.constant 0.0 : f32 + +// CHECK-NOT: insert_slice +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[ARG6]]] +// CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]]()[%[[ARG3]], %[[ARG7]]] +// CHECK-DAG: vector.transfer_write %[[ARG1]], %[[ARG0]][%[[C0]], %[[IDX0]], %[[IDX1]]] {in_bounds = [true]} : vector<4xf32>, tensor, tensor + %1 = tensor.insert_slice %0 into %arg0[0, %arg2, %arg3] [1, %arg4, %arg5] [1, 1, 1] + : tensor into tensor + return %1 : tensor +} + +// ----- + +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0, s1] -> (s0 + s1)> +// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2) -> (d1)> + +// CHECK: func @fold_vector_transfer_write_with_inner_rank_reduced_insert_slice +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor +// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: vector<4xf32> +// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG3:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG4:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG5:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG6:[a-zA-Z0-9]+]]: index +// CHECK-SAME: %[[ARG7:[a-zA-Z0-9]+]]: index +func.func @fold_vector_transfer_write_with_inner_rank_reduced_insert_slice( + %arg0 : tensor, + %arg1 : vector<4xf32>, %arg2: index, %arg3 : index, %arg4 : index, + %arg5: index, %arg6 : index, %arg7 : index, + %st : tensor) -> tensor { + %cst = arith.constant 0.0 : f32 + + // CHECK-NOT: insert_slice + // CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index + // CHECK-DAG: %[[IDX0:.+]] = affine.apply #[[MAP1]]()[%[[ARG2]], %[[ARG6]]] + // CHECK-DAG: %[[IDX1:.+]] = affine.apply #[[MAP1]]()[%[[ARG3]], %[[ARG7]]] + // CHECK-DAG: vector.transfer_write %[[ARG1]], %[[ARG0]][%[[IDX0]], %[[IDX1]], %[[C0]]] + // CHECK-SAME: {in_bounds = [true], permutation_map = #[[MAP2]]} : vector<4xf32>, tensor, tensor + %1 = tensor.insert_slice %0 into %arg0[%arg2, %arg3, 0] [%arg4, %arg5, 1] [1, 1, 1] + : tensor into tensor + return %1 : tensor +} + +// ----- + +// CHECK-LABEL: func @insert_slice_of_transfer_write( +// CHECK-SAME: %[[t1:.*]]: tensor, %[[v:.*]]: vector<5x6xf32>, %[[s:.*]]: index +func.func @insert_slice_of_transfer_write(%t1 : tensor, %v : vector<5x6xf32>, %s : index, %t2 : tensor<5x6xf32>) -> tensor { + %c0 = arith.constant 0 : index + + // CHECK-NOT: insert_slice +// CHECK: %[[c3:.*]] = arith.constant 3 : index +// CHECK: %[[r:.*]] = vector.transfer_write %[[v]], %[[t1]][%[[c3]], %[[s]]] {in_bounds = [true, true]} : vector<5x6xf32>, tensor +// CHECK: return %[[r]] + %0 = vector.transfer_write %v, %t2[%c0, %c0] {in_bounds = [true, true]} : vector<5x6xf32>, tensor<5x6xf32> + %1 = tensor.insert_slice %0 into %t1[3, %s] [5, 6] [1, 1] : tensor<5x6xf32> into tensor + return %1 : tensor +} + +// ----- + +// CHECK-DAG: #[[$d0d2:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)> + +// CHECK-LABEL: func @insert_slice_of_transfer_write_swappy_rank_extending( +// CHECK-SAME: %[[t1:.*]]: tensor, %[[v:.*]]: vector<5x6xf32>, %[[s:.*]]: index +func.func @insert_slice_of_transfer_write_swappy_rank_extending( + %t1 : tensor, %v : vector<5x6xf32>, + %s : index, %t2 : tensor<5x6xf32>) -> tensor { + %c0 = arith.constant 0 : index + +// CHECK-NOT: insert_slice +// CHECK-DAG: %[[c3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[c4:.*]] = arith.constant 4 : index +// CHECK: %[[r:.*]] = vector.transfer_write %[[v]], %[[t1]][%[[c4]], %[[c3]], %[[s]]] +// CHECK-SAME: {in_bounds = [true, true], permutation_map = #[[$d0d2]]} : vector<5x6xf32>, tensor +// CHECK: return %[[r]] + %0 = vector.transfer_write %v, %t2[%c0, %c0] {in_bounds = [true, true]} : vector<5x6xf32>, tensor<5x6xf32> + %1 = tensor.insert_slice %0 into %t1[4, 3, %s] [5, 1, 6] [1, 1, 1] : tensor<5x6xf32> into tensor + return %1 : tensor +} + +// ----- + +// CHECK-LABEL: func @insert_slice_of_transfer_write_rank_extending( +// CHECK-SAME: %[[t1:.*]]: tensor, %[[v:.*]]: vector<5x6xf32>, %[[s:.*]]: index +// CHECK-DAG: %[[c3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[c4:.*]] = arith.constant 4 : index +// CHECK: %[[r:.*]] = vector.transfer_write %[[v]], %[[t1]][%[[c4]], %[[c3]], %[[s]]] {in_bounds = [true, true]} : vector<5x6xf32>, tensor +// CHECK: return %[[r]] +func.func @insert_slice_of_transfer_write_rank_extending(%t1 : tensor, %v : vector<5x6xf32>, %s : index, %t2 : tensor<5x6xf32>) -> tensor { + %c0 = arith.constant 0 : index + %0 = vector.transfer_write %v, %t2[%c0, %c0] {in_bounds = [true, true]} : vector<5x6xf32>, tensor<5x6xf32> + %1 = tensor.insert_slice %0 into %t1[4, 3, %s] [1, 5, 6] [1, 1, 1] : tensor<5x6xf32> into tensor + return %1 : tensor +} diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 4071d92641839..8538c3db59dcd 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -5607,6 +5607,7 @@ cc_library( deps = [ ":AffineDialect", ":ArithDialect", + ":ArithUtils", ":DialectUtils", ":TensorDialect", "//llvm:Support", @@ -5663,6 +5664,7 @@ cc_library( ":TensorPassIncGen", ":TilingInterface", ":Transforms", + ":VectorDialect", "//llvm:Support", ], ) From da44224d3aa806be8b1c4a29c791387e151b4f23 Mon Sep 17 00:00:00 2001 From: Alex Bradbury Date: Thu, 23 Mar 2023 11:17:40 +0000 Subject: [PATCH 403/691] [RISCV][test] Fix broken unit test after d25751779ba The patch had missed the RISCVISAInfoTest.cpp change. --- llvm/unittests/Support/RISCVISAInfoTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/unittests/Support/RISCVISAInfoTest.cpp b/llvm/unittests/Support/RISCVISAInfoTest.cpp index 2f623a94ce2a8..0b749eb0c6815 100644 --- a/llvm/unittests/Support/RISCVISAInfoTest.cpp +++ b/llvm/unittests/Support/RISCVISAInfoTest.cpp @@ -141,7 +141,7 @@ TEST(ParseArchString, AcceptsSupportedBaseISAsAndSetsXLenAndFLen) { RISCVISAInfo &InfoRV32E = **MaybeRV32E; RISCVISAInfo::OrderedExtensionMap ExtsRV32E = InfoRV32E.getExtensions(); EXPECT_EQ(ExtsRV32E.size(), 1UL); - EXPECT_TRUE(ExtsRV32E.at("e") == (RISCVExtensionInfo{1, 9})); + EXPECT_TRUE(ExtsRV32E.at("e") == (RISCVExtensionInfo{2, 0})); EXPECT_EQ(InfoRV32E.getXLen(), 32U); EXPECT_EQ(InfoRV32E.getFLen(), 0U); From 80db8b03a9ad39594277462e302b0b33e5d8c8dd Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 23 Mar 2023 11:20:19 +0000 Subject: [PATCH 404/691] [gn build] Port 48f97e575137 --- .../utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn | 1 + .../gn/secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn | 1 + 2 files changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn index 2f9db59141183..cfd54004a9ff3 100644 --- a/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Analysis/FlowSensitive/BUILD.gn @@ -10,6 +10,7 @@ static_library("FlowSensitive") { "DataflowAnalysisContext.cpp", "DataflowEnvironment.cpp", "DebugSupport.cpp", + "Logger.cpp", "Transfer.cpp", "TypeErasedDataflowAnalysis.cpp", "Value.cpp", diff --git a/llvm/utils/gn/secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn index a3a3966fed26b..22eb6721272ba 100644 --- a/llvm/utils/gn/secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/Analysis/FlowSensitive/BUILD.gn @@ -23,6 +23,7 @@ unittest("ClangAnalysisFlowSensitiveTests") { "DataflowAnalysisContextTest.cpp", "DataflowEnvironmentTest.cpp", "DebugSupportTest.cpp", + "LoggerTest.cpp", "MapLatticeTest.cpp", "MatchSwitchTest.cpp", "MultiVarConstantPropagationTest.cpp", From 20725d34b29ff2881ecc1d2a5b8a796e2996c313 Mon Sep 17 00:00:00 2001 From: LLVM GN Syncbot Date: Thu, 23 Mar 2023 11:20:20 +0000 Subject: [PATCH 405/691] [gn build] Port c2de8ff92753 --- .../gn/secondary/llvm/lib/ExecutionEngine/JITLink/BUILD.gn | 2 ++ .../secondary/llvm/unittests/ExecutionEngine/JITLink/BUILD.gn | 1 + 2 files changed, 3 insertions(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/JITLink/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/JITLink/BUILD.gn index de4074c3bfb64..edd0f2a3539d6 100644 --- a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/JITLink/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/JITLink/BUILD.gn @@ -25,6 +25,7 @@ static_library("JITLink") { "EHFrameSupport.cpp", "ELF.cpp", "ELFLinkGraphBuilder.cpp", + "ELF_aarch32.cpp", "ELF_aarch64.cpp", "ELF_i386.cpp", "ELF_loongarch.cpp", @@ -37,6 +38,7 @@ static_library("JITLink") { "MachOLinkGraphBuilder.cpp", "MachO_arm64.cpp", "MachO_x86_64.cpp", + "aarch32.cpp", "aarch64.cpp", "i386.cpp", "loongarch.cpp", diff --git a/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/JITLink/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/JITLink/BUILD.gn index d0d5225c9d6c9..d0f99ce939cfe 100644 --- a/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/JITLink/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/JITLink/BUILD.gn @@ -14,6 +14,7 @@ unittest("JITLinkTests") { "//llvm/lib/Testing/Support", ] sources = [ + "AArch32Tests.cpp", "EHFrameSupportTests.cpp", "LinkGraphTests.cpp", ] From f44c7dec67fee8a41450c8a46d9e944f88f82eb6 Mon Sep 17 00:00:00 2001 From: luxufan Date: Thu, 23 Mar 2023 19:31:29 +0800 Subject: [PATCH 406/691] [Local] Use most generic range if K does not dominate J or K doesn't have a !noundef Since D141386 has changed the return value of !range from IUB to poison, metadata !range shouldn't be preserved even if K dominates J. If this patch was accepted, I plan to adjust metadata !nonnull as well. BTW, I found that metadata !noundef is not handled in combineMetadata, is this intentional? Reviewed By: nikic Differential Revision: https://reviews.llvm.org/D142687 --- llvm/lib/Transforms/Utils/Local.cpp | 9 +--- llvm/test/Transforms/GVN/range.ll | 50 +++++++++++++++---- .../Transforms/JumpThreading/thread-loads.ll | 5 +- llvm/test/Transforms/NewGVN/range.ll | 23 +++++---- 4 files changed, 55 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 5c1fd6e9ae4a4..31deb08d45989 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -2675,14 +2675,7 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J, intersectAccessGroups(K, J)); break; case LLVMContext::MD_range: - - // If K does move, use most generic range. Otherwise keep the range of - // K. - if (DoesKMove) - // FIXME: If K does move, we should drop the range info and nonnull. - // Currently this function is used with DoesKMove in passes - // doing hoisting/sinking and the current behavior of using the - // most generic range is correct in those cases. + if (DoesKMove || !K->hasMetadata(LLVMContext::MD_noundef)) K->setMetadata(Kind, MDNode::getMostGenericRange(JMD, KMD)); break; case LLVMContext::MD_fpmath: diff --git a/llvm/test/Transforms/GVN/range.ll b/llvm/test/Transforms/GVN/range.ll index ae0801ee59da1..48605aef0fe7d 100644 --- a/llvm/test/Transforms/GVN/range.ll +++ b/llvm/test/Transforms/GVN/range.ll @@ -17,7 +17,7 @@ define i32 @test1(ptr %p) { define i32 @test2(ptr %p) { ; CHECK-LABEL: define i32 @test2 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4 ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -30,7 +30,7 @@ define i32 @test2(ptr %p) { define i32 @test3(ptr %p) { ; CHECK-LABEL: define i32 @test3 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG1:![0-9]+]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -43,7 +43,7 @@ define i32 @test3(ptr %p) { define i32 @test4(ptr %p) { ; CHECK-LABEL: define i32 @test4 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG2:![0-9]+]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -56,7 +56,7 @@ define i32 @test4(ptr %p) { define i32 @test5(ptr %p) { ; CHECK-LABEL: define i32 @test5 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG1:![0-9]+]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG3:![0-9]+]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -69,7 +69,7 @@ define i32 @test5(ptr %p) { define i32 @test6(ptr %p) { ; CHECK-LABEL: define i32 @test6 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG2:![0-9]+]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG4:![0-9]+]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -82,7 +82,7 @@ define i32 @test6(ptr %p) { define i32 @test7(ptr %p) { ; CHECK-LABEL: define i32 @test7 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG3:![0-9]+]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG5:![0-9]+]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -95,7 +95,7 @@ define i32 @test7(ptr %p) { define i32 @test8(ptr %p) { ; CHECK-LABEL: define i32 @test8 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG4:![0-9]+]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4 ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -105,6 +105,31 @@ define i32 @test8(ptr %p) { ret i32 %c } +define i32 @load_noundef_load(ptr %p) { +; CHECK-LABEL: define i32 @load_noundef_load +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]], !noundef !6 +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] +; + %a = load i32, ptr %p, !range !0, !noundef !11 + %b = load i32, ptr %p, !range !1 + %c = add i32 %a, %b + ret i32 %c +} + +define i32 @load_load_noundef(ptr %p) { +; CHECK-LABEL: define i32 @load_load_noundef +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG1]] +; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] +; CHECK-NEXT: ret i32 [[C]] +; + %a = load i32, ptr %p, !range !0 + %b = load i32, ptr %p, !range !1, !noundef !11 + %c = add i32 %a, %b + ret i32 %c +} !0 = !{i32 0, i32 2} !1 = !{i32 3, i32 5} @@ -117,10 +142,13 @@ define i32 @test8(ptr %p) { !8 = !{i32 5, i32 1} !9 = !{i32 1, i32 5} !10 = !{i32 5, i32 1} +!11 = !{} ;. ; CHECK: [[RNG0]] = !{i32 0, i32 2} -; CHECK: [[RNG1]] = !{i32 -5, i32 -2} -; CHECK: [[RNG2]] = !{i32 10, i32 1} -; CHECK: [[RNG3]] = !{i32 1, i32 2, i32 3, i32 4} -; CHECK: [[RNG4]] = !{i32 1, i32 5} +; CHECK: [[RNG1]] = !{i32 0, i32 2, i32 3, i32 5} +; CHECK: [[RNG2]] = !{i32 0, i32 5} +; CHECK: [[RNG3]] = !{i32 -5, i32 -2, i32 1, i32 5} +; CHECK: [[RNG4]] = !{i32 10, i32 1} +; CHECK: [[RNG5]] = !{i32 3, i32 4, i32 5, i32 2} +; CHECK: [[META6:![0-9]+]] = !{} ;. diff --git a/llvm/test/Transforms/JumpThreading/thread-loads.ll b/llvm/test/Transforms/JumpThreading/thread-loads.ll index a730be9492c80..85952e8e6db4f 100644 --- a/llvm/test/Transforms/JumpThreading/thread-loads.ll +++ b/llvm/test/Transforms/JumpThreading/thread-loads.ll @@ -322,12 +322,12 @@ bb3: define void @test8(ptr, ptr, ptr) { ; CHECK-LABEL: @test8( ; CHECK-NEXT: ret2: -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[TMP0:%.*]], align 4, !range [[RNG4:![0-9]+]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[TMP0:%.*]], align 4, !range [[RNG4:![0-9]+]], !noundef !5 ; CHECK-NEXT: store i32 [[A]], ptr [[TMP1:%.*]], align 4 ; CHECK-NEXT: [[XXX:%.*]] = tail call i32 (...) @f1() #[[ATTR0]] ; CHECK-NEXT: ret void ; - %a = load i32, ptr %0, !tbaa !0, !range !4, !alias.scope !9, !noalias !10 + %a = load i32, ptr %0, !tbaa !0, !range !4, !alias.scope !9, !noalias !10, !noundef !11 %b = load i32, ptr %0, !range !5 store i32 %a, ptr %1 %c = icmp eq i32 %b, 8 @@ -693,3 +693,4 @@ right_x: !8 = !{!8, !6} !9 = !{!7} !10 = !{!8} +!11 = !{} diff --git a/llvm/test/Transforms/NewGVN/range.ll b/llvm/test/Transforms/NewGVN/range.ll index 8803737c7bd4a..c853693897ab3 100644 --- a/llvm/test/Transforms/NewGVN/range.ll +++ b/llvm/test/Transforms/NewGVN/range.ll @@ -17,7 +17,7 @@ define i32 @test1(ptr %p) { define i32 @test2(ptr %p) { ; CHECK-LABEL: define i32 @test2 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4 ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -30,7 +30,7 @@ define i32 @test2(ptr %p) { define i32 @test3(ptr %p) { ; CHECK-LABEL: define i32 @test3 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG1:![0-9]+]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -43,7 +43,7 @@ define i32 @test3(ptr %p) { define i32 @test4(ptr %p) { ; CHECK-LABEL: define i32 @test4 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG0]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG2:![0-9]+]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -56,7 +56,7 @@ define i32 @test4(ptr %p) { define i32 @test5(ptr %p) { ; CHECK-LABEL: define i32 @test5 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG1:![0-9]+]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG3:![0-9]+]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -69,7 +69,7 @@ define i32 @test5(ptr %p) { define i32 @test6(ptr %p) { ; CHECK-LABEL: define i32 @test6 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG2:![0-9]+]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG4:![0-9]+]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -82,7 +82,7 @@ define i32 @test6(ptr %p) { define i32 @test7(ptr %p) { ; CHECK-LABEL: define i32 @test7 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG3:![0-9]+]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG5:![0-9]+]] ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -95,7 +95,7 @@ define i32 @test7(ptr %p) { define i32 @test8(ptr %p) { ; CHECK-LABEL: define i32 @test8 ; CHECK-SAME: (ptr [[P:%.*]]) { -; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4, !range [[RNG4:![0-9]+]] +; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[P]], align 4 ; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[A]] ; CHECK-NEXT: ret i32 [[C]] ; @@ -119,8 +119,9 @@ define i32 @test8(ptr %p) { !10 = !{i32 5, i32 1} ;. ; CHECK: [[RNG0]] = !{i32 0, i32 2} -; CHECK: [[RNG1]] = !{i32 -5, i32 -2} -; CHECK: [[RNG2]] = !{i32 10, i32 1} -; CHECK: [[RNG3]] = !{i32 1, i32 2, i32 3, i32 4} -; CHECK: [[RNG4]] = !{i32 1, i32 5} +; CHECK: [[RNG1]] = !{i32 0, i32 2, i32 3, i32 5} +; CHECK: [[RNG2]] = !{i32 0, i32 5} +; CHECK: [[RNG3]] = !{i32 -5, i32 -2, i32 1, i32 5} +; CHECK: [[RNG4]] = !{i32 10, i32 1} +; CHECK: [[RNG5]] = !{i32 3, i32 4, i32 5, i32 2} ;. From 67d828fb2c0168e1fe0f1caeba8fc7dc47b0c3ff Mon Sep 17 00:00:00 2001 From: Jacek Caban Date: Thu, 23 Mar 2023 13:18:14 +0200 Subject: [PATCH 407/691] [Object][NFC] Factor out computeHeadersSize. In preparation for COFF archives support. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D143537 --- llvm/lib/Object/ArchiveWriter.cpp | 64 +++++++++++++++++++------------ 1 file changed, 40 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp index 0d3aad658fe43..e2c97417d0789 100644 --- a/llvm/lib/Object/ArchiveWriter.cpp +++ b/llvm/lib/Object/ArchiveWriter.cpp @@ -356,7 +356,7 @@ static void printNBits(raw_ostream &Out, object::Archive::Kind Kind, static uint64_t computeSymbolTableSize(object::Archive::Kind Kind, uint64_t NumSyms, uint64_t OffsetSize, - StringRef StringTable, + uint64_t StringTableSize, uint32_t *Padding = nullptr) { assert((OffsetSize == 4 || OffsetSize == 8) && "Unsupported OffsetSize"); uint64_t Size = OffsetSize; // Number of entries @@ -366,7 +366,7 @@ static uint64_t computeSymbolTableSize(object::Archive::Kind Kind, Size += NumSyms * OffsetSize; // Table if (isBSDLike(Kind)) Size += OffsetSize; // byte count - Size += StringTable.size(); + Size += StringTableSize; // ld64 expects the members to be 8-byte aligned for 64-bit content and at // least 4-byte aligned for 32-bit content. Opt for the larger encoding // uniformly. @@ -398,9 +398,24 @@ static void writeSymbolTableHeader(raw_ostream &Out, object::Archive::Kind Kind, } } +static uint64_t computeHeadersSize(object::Archive::Kind Kind, uint64_t NumSyms, + uint64_t SymNamesSize) { + uint32_t OffsetSize = is64BitKind(Kind) ? 8 : 4; + uint64_t SymtabSize = + computeSymbolTableSize(Kind, NumSyms, OffsetSize, SymNamesSize); + auto computeSymbolTableHeaderSize = [=] { + SmallString<0> TmpBuf; + raw_svector_ostream Tmp(TmpBuf); + writeSymbolTableHeader(Tmp, Kind, true, SymtabSize); + return TmpBuf.size(); + }; + + return strlen("!\n") + computeSymbolTableHeaderSize() + SymtabSize; +} + static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, bool Deterministic, ArrayRef Members, - StringRef StringTable, + StringRef StringTable, uint64_t MembersOffset, uint64_t PrevMemberOffset = 0) { // We don't write a symbol table on an archive with no members -- except on // Darwin, where the linker will abort unless the archive has a symbol table. @@ -413,17 +428,16 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, uint64_t OffsetSize = is64BitKind(Kind) ? 8 : 4; uint32_t Pad; - uint64_t Size = computeSymbolTableSize(Kind, NumSyms, OffsetSize, StringTable, &Pad); + uint64_t Size = computeSymbolTableSize(Kind, NumSyms, OffsetSize, + StringTable.size(), &Pad); writeSymbolTableHeader(Out, Kind, Deterministic, Size, PrevMemberOffset); - uint64_t Pos = isAIXBigArchive(Kind) ? sizeof(object::BigArchive::FixLenHdr) - : Out.tell() + Size; - if (isBSDLike(Kind)) printNBits(Out, Kind, NumSyms * 2 * OffsetSize); else printNBits(Out, Kind, NumSyms); + uint64_t Pos = MembersOffset; for (const MemberData &M : Members) { for (unsigned StringOffset : M.Symbols) { if (isBSDLike(Kind)) @@ -679,9 +693,8 @@ static Error writeArchiveToStream(raw_ostream &Out, Data.insert(Data.begin(), computeStringTable(StringTableBuf)); // We would like to detect if we need to switch to a 64-bit symbol table. - uint64_t LastMemberEndOffset = - isAIXBigArchive(Kind) ? sizeof(object::BigArchive::FixLenHdr) : 8; - uint64_t LastMemberHeaderOffset = LastMemberEndOffset; + uint64_t LastMemberEndOffset = 0; + uint64_t LastMemberHeaderOffset = 0; uint64_t NumSyms = 0; for (const auto &M : Data) { // Record the start of the member's offset @@ -691,19 +704,13 @@ static Error writeArchiveToStream(raw_ostream &Out, NumSyms += M.Symbols.size(); } + std::optional HeadersSize; + // The symbol table is put at the end of the big archive file. The symbol // table is at the start of the archive file for other archive formats. - if (WriteSymtab && !isAIXBigArchive(Kind)) { + if (WriteSymtab && !is64BitKind(Kind)) { // We assume 32-bit offsets to see if 32-bit symbols are possible or not. - uint64_t SymtabSize = computeSymbolTableSize(Kind, NumSyms, 4, SymNamesBuf); - auto computeSymbolTableHeaderSize = - [=] { - SmallString<0> TmpBuf; - raw_svector_ostream Tmp(TmpBuf); - writeSymbolTableHeader(Tmp, Kind, Deterministic, SymtabSize); - return TmpBuf.size(); - }; - LastMemberHeaderOffset += computeSymbolTableHeaderSize() + SymtabSize; + HeadersSize = computeHeadersSize(Kind, NumSyms, SymNamesBuf.size()); // The SYM64 format is used when an archive's member offsets are larger than // 32-bits can hold. The need for this shift in format is detected by @@ -720,11 +727,12 @@ static Error writeArchiveToStream(raw_ostream &Out, // If LastMemberHeaderOffset isn't going to fit in a 32-bit varible we need // to switch to 64-bit. Note that the file can be larger than 4GB as long as // the last member starts before the 4GB offset. - if (LastMemberHeaderOffset >= Sym64Threshold) { + if (*HeadersSize + LastMemberHeaderOffset >= Sym64Threshold) { if (Kind == object::Archive::K_DARWIN) Kind = object::Archive::K_DARWIN64; else Kind = object::Archive::K_GNU64; + HeadersSize.reset(); } } @@ -736,11 +744,19 @@ static Error writeArchiveToStream(raw_ostream &Out, Out << "!\n"; if (!isAIXBigArchive(Kind)) { - if (WriteSymtab) - writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf); + if (WriteSymtab) { + if (!HeadersSize) + HeadersSize = computeHeadersSize(Kind, NumSyms, SymNamesBuf.size()); + writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf, + *HeadersSize); + } for (const MemberData &M : Data) Out << M.Header << M.Data << M.Padding; } else { + HeadersSize = sizeof(object::BigArchive::FixLenHdr); + LastMemberEndOffset += *HeadersSize; + LastMemberHeaderOffset += *HeadersSize; + // For the big archive (AIX) format, compute a table of member names and // offsets, used in the member table. uint64_t MemberTableNameStrTblSize = 0; @@ -813,7 +829,7 @@ static Error writeArchiveToStream(raw_ostream &Out, if (WriteSymtab && NumSyms > 0) writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf, - LastMemberEndOffset); + *HeadersSize, LastMemberEndOffset); } } Out.flush(); From 257dc54be2d292acec2d6b609ebb0a4e424c9e30 Mon Sep 17 00:00:00 2001 From: Jacek Caban Date: Thu, 23 Mar 2023 13:19:53 +0200 Subject: [PATCH 408/691] [Object][NFC] Don't insert string table into object members vector. Having string table in members vector does not fit later patches in this series. Symbol map needs to refer to objects' offsets, but string table should not be referenced. Also for ARM64EC, the new table is inserted after string table. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D143538 --- llvm/lib/Object/ArchiveWriter.cpp | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp index e2c97417d0789..cd0429a271227 100644 --- a/llvm/lib/Object/ArchiveWriter.cpp +++ b/llvm/lib/Object/ArchiveWriter.cpp @@ -398,7 +398,8 @@ static void writeSymbolTableHeader(raw_ostream &Out, object::Archive::Kind Kind, } } -static uint64_t computeHeadersSize(object::Archive::Kind Kind, uint64_t NumSyms, +static uint64_t computeHeadersSize(object::Archive::Kind Kind, + uint64_t StringMemberSize, uint64_t NumSyms, uint64_t SymNamesSize) { uint32_t OffsetSize = is64BitKind(Kind) ? 8 : 4; uint64_t SymtabSize = @@ -410,7 +411,7 @@ static uint64_t computeHeadersSize(object::Archive::Kind Kind, uint64_t NumSyms, return TmpBuf.size(); }; - return strlen("!\n") + computeSymbolTableHeaderSize() + SymtabSize; + return strlen("!\n") + computeSymbolTableHeaderSize() + SymtabSize + StringMemberSize; } static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, @@ -689,8 +690,14 @@ static Error writeArchiveToStream(raw_ostream &Out, return E; std::vector &Data = *DataOrErr; - if (!StringTableBuf.empty() && !isAIXBigArchive(Kind)) - Data.insert(Data.begin(), computeStringTable(StringTableBuf)); + uint64_t StringTableSize = 0; + MemberData StringTableMember; + if (!StringTableBuf.empty() && !isAIXBigArchive(Kind)) { + StringTableMember = computeStringTable(StringTableBuf); + StringTableSize = StringTableMember.Header.size() + + StringTableMember.Data.size() + + StringTableMember.Padding.size(); + } // We would like to detect if we need to switch to a 64-bit symbol table. uint64_t LastMemberEndOffset = 0; @@ -710,7 +717,8 @@ static Error writeArchiveToStream(raw_ostream &Out, // table is at the start of the archive file for other archive formats. if (WriteSymtab && !is64BitKind(Kind)) { // We assume 32-bit offsets to see if 32-bit symbols are possible or not. - HeadersSize = computeHeadersSize(Kind, NumSyms, SymNamesBuf.size()); + HeadersSize = + computeHeadersSize(Kind, StringTableSize, NumSyms, SymNamesBuf.size()); // The SYM64 format is used when an archive's member offsets are larger than // 32-bits can hold. The need for this shift in format is detected by @@ -746,10 +754,16 @@ static Error writeArchiveToStream(raw_ostream &Out, if (!isAIXBigArchive(Kind)) { if (WriteSymtab) { if (!HeadersSize) - HeadersSize = computeHeadersSize(Kind, NumSyms, SymNamesBuf.size()); + HeadersSize = computeHeadersSize(Kind, StringTableSize, NumSyms, + SymNamesBuf.size()); writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf, *HeadersSize); } + + if (StringTableSize) + Out << StringTableMember.Header << StringTableMember.Data + << StringTableMember.Padding; + for (const MemberData &M : Data) Out << M.Header << M.Data << M.Padding; } else { From 4fcbf3842007569880fa916831efefda6b1bd032 Mon Sep 17 00:00:00 2001 From: Jacek Caban Date: Thu, 23 Mar 2023 13:20:15 +0200 Subject: [PATCH 409/691] [llvm-lib] Use COFF archive format in llvm-lib (other archive tools don't use this format). We currently just use GNU format for llvm-lib. This mostly works, but ARM64EC needs an additional section that does not really fit GNU format. This patch implements writing in COFF format (as in, it's what archive reader considers as K_COFF). This mostly requires symbol emitting symbol map. Note that, just like in case of MSVC, symbols are de-duplicated in both usual symbol table and the new symbol map. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D143540 --- llvm/lib/Object/ArchiveWriter.cpp | 135 ++++++++++++++++---- llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp | 6 +- llvm/test/tools/llvm-lib/duplicate.test | 9 ++ 3 files changed, 126 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Object/ArchiveWriter.cpp b/llvm/lib/Object/ArchiveWriter.cpp index cd0429a271227..2d0f92e43a344 100644 --- a/llvm/lib/Object/ArchiveWriter.cpp +++ b/llvm/lib/Object/ArchiveWriter.cpp @@ -17,6 +17,7 @@ #include "llvm/BinaryFormat/Magic.h" #include "llvm/IR/LLVMContext.h" #include "llvm/Object/Archive.h" +#include "llvm/Object/COFF.h" #include "llvm/Object/Error.h" #include "llvm/Object/IRObjectFile.h" #include "llvm/Object/MachO.h" @@ -43,6 +44,10 @@ using namespace llvm; +struct SymMap { + std::map Map; +}; + NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef) : Buf(MemoryBuffer::getMemBuffer(BufRef, false)), MemberName(BufRef.getBufferIdentifier()) {} @@ -169,18 +174,21 @@ static bool isAIXBigArchive(object::Archive::Kind Kind) { return Kind == object::Archive::K_AIXBIG; } +static bool isCOFFArchive(object::Archive::Kind Kind) { + return Kind == object::Archive::K_COFF; +} + static bool isBSDLike(object::Archive::Kind Kind) { switch (Kind) { case object::Archive::K_GNU: case object::Archive::K_GNU64: case object::Archive::K_AIXBIG: + case object::Archive::K_COFF: return false; case object::Archive::K_BSD: case object::Archive::K_DARWIN: case object::Archive::K_DARWIN64: return true; - case object::Archive::K_COFF: - break; } llvm_unreachable("not supported for writting"); } @@ -191,6 +199,10 @@ static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val) { isBSDLike(Kind) ? support::little : support::big); } +template static void printLE(raw_ostream &Out, T Val) { + support::endian::write(Out, Val, support::little); +} + static void printRestOfMemberHeader( raw_ostream &Out, const sys::TimePoint &ModTime, unsigned UID, unsigned GID, unsigned Perms, uint64_t Size) { @@ -295,7 +307,11 @@ printMemberHeader(raw_ostream &Out, uint64_t Pos, raw_ostream &StringTable, auto Insertion = MemberNames.insert({M.MemberName, uint64_t(0)}); if (Insertion.second) { Insertion.first->second = StringTable.tell(); - StringTable << M.MemberName << "/\n"; + StringTable << M.MemberName; + if (isCOFFArchive(Kind)) + StringTable << '\0'; + else + StringTable << "/\n"; } NamePos = Insertion.first->second; } @@ -376,6 +392,22 @@ static uint64_t computeSymbolTableSize(object::Archive::Kind Kind, uint32_t Pad = isAIXBigArchive(Kind) ? 0 : offsetToAlignment(Size, Align(isBSDLike(Kind) ? 8 : 2)); + + Size += Pad; + if (Padding) + *Padding = Pad; + return Size; +} + +static uint64_t computeSymbolMapSize(uint64_t NumObj, SymMap &SymMap, + uint32_t *Padding = nullptr) { + uint64_t Size = sizeof(uint32_t) * 2; // Number of symbols and objects entries + Size += NumObj * sizeof(uint32_t); // Offset table + + for (auto S : SymMap.Map) + Size += sizeof(uint16_t) + S.first.length() + 1; + + uint32_t Pad = offsetToAlignment(Size, Align(2)); Size += Pad; if (Padding) *Padding = Pad; @@ -399,8 +431,9 @@ static void writeSymbolTableHeader(raw_ostream &Out, object::Archive::Kind Kind, } static uint64_t computeHeadersSize(object::Archive::Kind Kind, + uint64_t NumMembers, uint64_t StringMemberSize, uint64_t NumSyms, - uint64_t SymNamesSize) { + uint64_t SymNamesSize, SymMap *SymMap) { uint32_t OffsetSize = is64BitKind(Kind) ? 8 : 4; uint64_t SymtabSize = computeSymbolTableSize(Kind, NumSyms, OffsetSize, SymNamesSize); @@ -410,8 +443,13 @@ static uint64_t computeHeadersSize(object::Archive::Kind Kind, writeSymbolTableHeader(Tmp, Kind, true, SymtabSize); return TmpBuf.size(); }; + uint32_t HeaderSize = computeSymbolTableHeaderSize(); + uint64_t Size = strlen("!\n") + HeaderSize + SymtabSize; + + if (SymMap) + Size += HeaderSize + computeSymbolMapSize(NumMembers, *SymMap); - return strlen("!\n") + computeSymbolTableHeaderSize() + SymtabSize + StringMemberSize; + return Size + StringMemberSize; } static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, @@ -420,7 +458,7 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, uint64_t PrevMemberOffset = 0) { // We don't write a symbol table on an archive with no members -- except on // Darwin, where the linker will abort unless the archive has a symbol table. - if (StringTable.empty() && !isDarwin(Kind)) + if (StringTable.empty() && !isDarwin(Kind) && !isCOFFArchive(Kind)) return; unsigned NumSyms = 0; @@ -457,8 +495,35 @@ static void writeSymbolTable(raw_ostream &Out, object::Archive::Kind Kind, Out.write(uint8_t(0)); } +static void writeSymbolMap(raw_ostream &Out, object::Archive::Kind Kind, + bool Deterministic, ArrayRef Members, + SymMap &SymMap, uint64_t MembersOffset) { + uint32_t Pad; + uint64_t Size = computeSymbolMapSize(Members.size(), SymMap, &Pad); + writeSymbolTableHeader(Out, Kind, Deterministic, Size, 0); + + uint32_t Pos = MembersOffset; + + printLE(Out, Members.size()); + for (const MemberData &M : Members) { + printLE(Out, Pos); // member offset + Pos += M.Header.size() + M.Data.size() + M.Padding.size(); + } + + printLE(Out, SymMap.Map.size()); + + for (auto S : SymMap.Map) + printLE(Out, S.second); + for (auto S : SymMap.Map) + Out << S.first << '\0'; + + while (Pad--) + Out.write(uint8_t(0)); +} + static Expected> -getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) { +getSymbols(MemoryBufferRef Buf, uint16_t Index, raw_ostream &SymNames, + SymMap *SymMap, bool &HasObject) { std::vector Ret; // In the scenario when LLVMContext is populated SymbolicFile will contain a @@ -487,10 +552,22 @@ getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) { for (const object::BasicSymbolRef &S : Obj->symbols()) { if (!isArchiveSymbol(S)) continue; - Ret.push_back(SymNames.tell()); - if (Error E = S.printName(SymNames)) - return std::move(E); - SymNames << '\0'; + if (SymMap) { + std::string Name; + raw_string_ostream NameStream(Name); + if (Error E = S.printName(NameStream)) + return std::move(E); + if (SymMap->Map.find(Name) != SymMap->Map.end()) + continue; // ignore duplicated symbol + SymMap->Map[Name] = Index; + Ret.push_back(SymNames.tell()); + SymNames << Name << '\0'; + } else { + Ret.push_back(SymNames.tell()); + if (Error E = S.printName(SymNames)) + return std::move(E); + SymNames << '\0'; + } } return Ret; } @@ -498,7 +575,8 @@ getSymbols(MemoryBufferRef Buf, raw_ostream &SymNames, bool &HasObject) { static Expected> computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, object::Archive::Kind Kind, bool Thin, bool Deterministic, - bool NeedSymbols, ArrayRef NewMembers) { + bool NeedSymbols, SymMap *SymMap, + ArrayRef NewMembers) { static char PaddingData[8] = {'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'}; uint64_t Pos = @@ -564,7 +642,7 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, // The big archive format needs to know the offset of the previous member // header. - unsigned PrevOffset = 0; + unsigned PrevOffset = 0, Index = 0; for (const NewArchiveMember &M : NewMembers) { std::string Header; raw_string_ostream Out(Header); @@ -572,6 +650,8 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, MemoryBufferRef Buf = M.Buf->getMemBufferRef(); StringRef Data = Thin ? "" : Buf.getBuffer(); + Index++; + // ld64 expects the members to be 8-byte aligned for 64-bit content and at // least 4-byte aligned for 32-bit content. Opt for the larger encoding // uniformly. This matches the behaviour with cctools and ensures that ld64 @@ -612,7 +692,7 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, std::vector Symbols; if (NeedSymbols) { Expected> SymbolsOrErr = - getSymbols(Buf, SymNames, HasObject); + getSymbols(Buf, Index, SymNames, SymMap, HasObject); if (!SymbolsOrErr) return createFileError(M.MemberName, SymbolsOrErr.takeError()); Symbols = std::move(*SymbolsOrErr); @@ -624,7 +704,7 @@ computeMemberData(raw_ostream &StringTable, raw_ostream &SymNames, // If there are no symbols, emit an empty symbol table, to satisfy Solaris // tools, older versions of which expect a symbol table in a non-empty // archive, regardless of whether there are any symbols in it. - if (HasObject && SymNames.tell() == 0) + if (HasObject && SymNames.tell() == 0 && !isCOFFArchive(Kind)) SymNames << '\0' << '\0' << '\0'; return Ret; } @@ -682,10 +762,16 @@ static Error writeArchiveToStream(raw_ostream &Out, raw_svector_ostream SymNames(SymNamesBuf); SmallString<0> StringTableBuf; raw_svector_ostream StringTable(StringTableBuf); + SymMap SymMap; - Expected> DataOrErr = - computeMemberData(StringTable, SymNames, Kind, Thin, Deterministic, - WriteSymtab, NewMembers); + // COFF symbol map uses 16-bit indexes, so we can't use it if there are too + // many members. + if (isCOFFArchive(Kind) && NewMembers.size() > 0xfffe) + Kind = object::Archive::K_GNU; + + Expected> DataOrErr = computeMemberData( + StringTable, SymNames, Kind, Thin, Deterministic, WriteSymtab, + isCOFFArchive(Kind) ? &SymMap : nullptr, NewMembers); if (Error E = DataOrErr.takeError()) return E; std::vector &Data = *DataOrErr; @@ -717,8 +803,9 @@ static Error writeArchiveToStream(raw_ostream &Out, // table is at the start of the archive file for other archive formats. if (WriteSymtab && !is64BitKind(Kind)) { // We assume 32-bit offsets to see if 32-bit symbols are possible or not. - HeadersSize = - computeHeadersSize(Kind, StringTableSize, NumSyms, SymNamesBuf.size()); + HeadersSize = computeHeadersSize(Kind, Data.size(), StringTableSize, + NumSyms, SymNamesBuf.size(), + isCOFFArchive(Kind) ? &SymMap : nullptr); // The SYM64 format is used when an archive's member offsets are larger than // 32-bits can hold. The need for this shift in format is detected by @@ -754,10 +841,14 @@ static Error writeArchiveToStream(raw_ostream &Out, if (!isAIXBigArchive(Kind)) { if (WriteSymtab) { if (!HeadersSize) - HeadersSize = computeHeadersSize(Kind, StringTableSize, NumSyms, - SymNamesBuf.size()); + HeadersSize = computeHeadersSize( + Kind, Data.size(), StringTableSize, NumSyms, SymNamesBuf.size(), + isCOFFArchive(Kind) ? &SymMap : nullptr); writeSymbolTable(Out, Kind, Deterministic, Data, SymNamesBuf, *HeadersSize); + + if (isCOFFArchive(Kind)) + writeSymbolMap(Out, Kind, Deterministic, Data, SymMap, *HeadersSize); } if (StringTableSize) diff --git a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp index 3a609eefcb10e..9ca63bead9bc7 100644 --- a/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp +++ b/llvm/lib/ToolDrivers/llvm-lib/LibDriver.cpp @@ -455,10 +455,12 @@ int llvm::libDriverMain(ArrayRef ArgsArr) { // For compatibility with MSVC, reverse member vector after de-duplication. std::reverse(Members.begin(), Members.end()); + bool Thin = Args.hasArg(OPT_llvmlibthin); if (Error E = writeArchive(OutputPath, Members, - /*WriteSymtab=*/true, object::Archive::K_GNU, - /*Deterministic*/ true, Args.hasArg(OPT_llvmlibthin))) { + /*WriteSymtab=*/true, + Thin ? object::Archive::K_GNU : object::Archive::K_COFF, + /*Deterministic*/ true, Thin)) { handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) { llvm::errs() << OutputPath << ": " << EI.message() << "\n"; }); diff --git a/llvm/test/tools/llvm-lib/duplicate.test b/llvm/test/tools/llvm-lib/duplicate.test index 098858d4fbcd1..87dae66cb80be 100644 --- a/llvm/test/tools/llvm-lib/duplicate.test +++ b/llvm/test/tools/llvm-lib/duplicate.test @@ -14,3 +14,12 @@ CHECK: bar.o CHECK-NEXT: abc.o CHECK-NEXT: foo.o CHECK-NOT: foo.o + +# Check that symbol map contains sorted, de-duplicated symbols. +RUN: cd %t && llvm-lib -out:foo.lib foo.o foo.o abc.o bar.o foo.o foo.o +RUN: llvm-nm --print-armap %t/foo.lib | FileCheck %s --check-prefix=DUP +# DUP: Archive map +# DUP-NEXT: a in abc.o +# DUP-NEXT: b in bar.o +# DUP-NEXT: c in abc.o +# DUP-EMPTY From a5988034a44d039f95db3067e4ad0dfeeca155c3 Mon Sep 17 00:00:00 2001 From: Jacek Caban Date: Thu, 23 Mar 2023 13:20:37 +0200 Subject: [PATCH 410/691] [lld] Fill .text section gaps with INT3 only on x86 targets. It doesn't make sense on ARM and using default 0 fill is compatible with MSVC. (It's more noticeable ARM64EC targets, where additional padding mixed with alignment is used for entry thunk association, so there are more gaps). Reviewed By: mstorsjo Differential Revision: https://reviews.llvm.org/D145962 --- lld/COFF/Writer.cpp | 3 +- lld/test/COFF/arm-thumb-thunks-multipass.s | 2 +- lld/test/COFF/arm64-import2.test | 2 +- lld/test/COFF/gaps-fill.test | 78 ++++++++++++++++++++++ 4 files changed, 82 insertions(+), 3 deletions(-) create mode 100644 lld/test/COFF/gaps-fill.test diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp index 0909b14d81901..603703e65290b 100644 --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -1953,7 +1953,8 @@ void Writer::writeSections() { // Fill gaps between functions in .text with INT3 instructions // instead of leaving as NUL bytes (which can be interpreted as // ADD instructions). - if (sec->header.Characteristics & IMAGE_SCN_CNT_CODE) + if ((sec->header.Characteristics & IMAGE_SCN_CNT_CODE) && + (ctx.config.machine == AMD64 || ctx.config.machine == I386)) memset(secBuf, 0xCC, sec->getRawSize()); parallelForEach(sec->chunks, [&](Chunk *c) { c->writeTo(secBuf + c->getRVA() - sec->getRVA()); diff --git a/lld/test/COFF/arm-thumb-thunks-multipass.s b/lld/test/COFF/arm-thumb-thunks-multipass.s index 71ce53d99b31f..c10b22963187b 100644 --- a/lld/test/COFF/arm-thumb-thunks-multipass.s +++ b/lld/test/COFF/arm-thumb-thunks-multipass.s @@ -67,4 +67,4 @@ far_func\i: // FUNC01-THUNKS: 40500a: f2c0 0c10 movt r12, #16 // FUNC01-THUNKS: 40500e: 44e7 add pc, r12 // The instruction below is padding from the .balign -// FUNC01-THUNKS: 405010: cccc ldm r4!, {r2, r3, r6, r7} +// FUNC01-THUNKS: 405010: 0000 movs r0, r0 diff --git a/lld/test/COFF/arm64-import2.test b/lld/test/COFF/arm64-import2.test index 9b95f1a29b834..342671211db87 100644 --- a/lld/test/COFF/arm64-import2.test +++ b/lld/test/COFF/arm64-import2.test @@ -18,7 +18,7 @@ # AFTER: 140001000: 94000004 bl 0x140001010 # AFTER: 140001004: 94000006 bl 0x14000101c # AFTER: 140001008: d65f03c0 ret -# AFTER: 14000100c: ccccccff +# AFTER: 14000100c: 000000ff # AFTER: 140001010: b0000010 adrp x16, 0x140002000 # AFTER: 140001014: f9403210 ldr x16, [x16, #96] # AFTER: 140001018: d61f0200 br x16 diff --git a/lld/test/COFF/gaps-fill.test b/lld/test/COFF/gaps-fill.test new file mode 100644 index 0000000000000..17cd9cbc86ab3 --- /dev/null +++ b/lld/test/COFF/gaps-fill.test @@ -0,0 +1,78 @@ +# REQUIRES: aarch64 +# RUN: split-file %s %t.dir + +# RUN: llvm-mc -filetype=obj -triple=aarch64-windows %t.dir/arm64-dllmain.s -o %t.dir/arm64-dllmain.obj +# RUN: llvm-mc -filetype=obj -triple=aarch64-windows %t.dir/arm64-p4sym.s -o %t.dir/arm64-p4sym.obj +# RUN: lld-link -dll -machine:arm64 %t.dir/arm64-dllmain.obj %t.dir/arm64-p4sym.obj -out:%t.dll + +# RUN: llvm-objdump -dz %t.dll | FileCheck -check-prefix=CHECK-ARM64 %s +# CHECK-ARM64: 180001000: 52800020 mov w0, #0x1 +# CHECK-ARM64: 180001004: d65f03c0 ret +# CHECK-ARM64: 180001008: 00000000 +# CHECK-ARM64: 18000100c: 00000000 +# CHECK-ARM64: 180001010: 52800040 mov w0, #0x2 +# CHECK-ARM64: 180001014: d65f03c0 ret + +#--- arm64-dllmain.s + .def _DllMainCRTStartup; + .scl 2; + .type 32; + .endef + .globl _DllMainCRTStartup + .p2align 2 +_DllMainCRTStartup: + mov w0, #1 + ret + +#--- arm64-p4sym.s + .def p4sym; + .scl 2; + .type 32; + .endef + .globl p4sym + .p2align 4 +p4sym: + mov w0, #2 + ret + +# RUN: llvm-mc -filetype=obj -triple=x86_64-windows %t.dir/x86_64-dllmain.s -o %t.dir/x86_64-dllmain.obj +# RUN: llvm-mc -filetype=obj -triple=x86_64-windows %t.dir/x86_64-p4sym.s -o %t.dir/x86_64-p4sym.obj +# RUN: lld-link -dll -machine:amd64 %t.dir/x86_64-dllmain.obj %t.dir/x86_64-p4sym.obj -out:%t.dll + +# RUN: llvm-objdump -dz %t.dll | FileCheck -check-prefix=CHECK-X64 %s +# CHECK-X64: 180001000: b8 01 00 00 00 movl $0x1, %eax +# CHECK-X64: 180001005: c3 retq +# CHECK-X64: 180001006: cc int3 +# CHECK-X64: 180001007: cc int3 +# CHECK-X64: 180001008: cc int3 +# CHECK-X64: 180001009: cc int3 +# CHECK-X64: 18000100a: cc int3 +# CHECK-X64: 18000100b: cc int3 +# CHECK-X64: 18000100c: cc int3 +# CHECK-X64: 18000100d: cc int3 +# CHECK-X64: 18000100e: cc int3 +# CHECK-X64: 18000100f: cc int3 +# CHECK-X64: 180001010: b8 02 00 00 00 movl $0x2, %eax +# CHECK-X64: 180001015: c3 retq + +#--- x86_64-dllmain.s + .def _DllMainCRTStartup; + .scl 2; + .type 32; + .endef + .globl _DllMainCRTStartup + .p2align 4, 0x90 +_DllMainCRTStartup: + movl $1, %eax + retq + +#--- x86_64-p4sym.s + .def p4sym; + .scl 2; + .type 32; + .endef + .globl p4sym + .p2align 4, 0x90 +p4sym: + movl $2, %eax + retq From 0cbfd68af79fa4262e2f5f8939f94fccd439cb0b Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Thu, 23 Mar 2023 12:43:34 +0100 Subject: [PATCH 411/691] =?UTF-8?q?[mlir]=20Fix=20call=20of=20overloaded?= =?UTF-8?q?=20=E2=80=98dropResults()?= =?UTF-8?q?=E2=80=99=20is=20ambiguous?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NFC --- mlir/include/mlir/IR/AffineMap.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mlir/include/mlir/IR/AffineMap.h b/mlir/include/mlir/IR/AffineMap.h index 75a268c483955..e21dc9c950c5a 100644 --- a/mlir/include/mlir/IR/AffineMap.h +++ b/mlir/include/mlir/IR/AffineMap.h @@ -249,7 +249,9 @@ class AffineMap { /// Returns a new AffineMap with the same number of dims and symbols and one /// less result at `pos`, dropped. - AffineMap dropResult(int64_t pos) const { return dropResults({pos}); } + AffineMap dropResult(int64_t pos) const { + return dropResults(ArrayRef({pos})); + } // Returns a new AffineMap with the same number of dims and symbols, but all // results in `positions` dropped. From c2c9de4ae1251a6a9ee7eed6403bbb41a386bbcb Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Thu, 23 Mar 2023 12:56:33 +0100 Subject: [PATCH 412/691] [gn] port a282ea4898efe --- llvm/utils/gn/secondary/clang/test/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang/test/BUILD.gn b/llvm/utils/gn/secondary/clang/test/BUILD.gn index 480e1cd5a89c4..c88db82c10192 100644 --- a/llvm/utils/gn/secondary/clang/test/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/test/BUILD.gn @@ -176,6 +176,7 @@ group("test") { "//llvm/tools/llvm-nm:symlinks", "//llvm/tools/llvm-objcopy:symlinks", "//llvm/tools/llvm-objdump:symlinks", + "//llvm/tools/llvm-pdbutil", "//llvm/tools/llvm-profdata", "//llvm/tools/llvm-rc:symlinks", "//llvm/tools/llvm-readobj:symlinks", From 18d56880a89ad7d58f8543d148facebd079cef19 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Thu, 23 Mar 2023 08:05:15 -0400 Subject: [PATCH 413/691] Revert "libclang: Pass Clang install directory to driver via argv[0]." This reverts commit 201fdef40dd6ec193d18d39638454a3c972f1fec. There was an issue found in post-commit by: https://lab.llvm.org/buildbot/#/builders/91/builds/15272 --- clang/docs/ReleaseNotes.rst | 8 -------- clang/include/clang-c/Index.h | 9 ++------- clang/test/Index/record-completion-invocation.c | 2 +- clang/test/Index/record-parsing-invocation.c | 4 ++-- clang/tools/libclang/CIndex.cpp | 11 +---------- 5 files changed, 6 insertions(+), 28 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 94e0f10a31743..005bf99a62457 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -368,14 +368,6 @@ libclang has an evaluable bit width. Fixes undefined behavior when called on a bit-field whose width depends on a template paramter. -- ``clang_parseTranslationUnit`` and ``clang_parseTranslationUnit2`` have been - changed to automatically locate the Clang installation directory relative to - the location of the libclang binary and use it for system headers installed - alongside the Clang installation. It is no longer necessary to manually - locate such system headers or use the ``clang_parseTranslationUnit2FullArgv`` - function for this purpose if libclang has been installed in the default - location. - Static Analyzer --------------- - Fix incorrect alignment attribute on the this parameter of certain diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index 8275f2941a41c..c7d32e6a152ae 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -899,13 +899,8 @@ CINDEX_LINKAGE enum CXErrorCode clang_parseTranslationUnit2( /** * Same as clang_parseTranslationUnit2 but requires a full command line - * for \c command_line_args including argv[0]. - * - * This is useful if the driver uses paths relative to the binary and either - * you are targeting libclang versions older than Clang 17, or libclang is - * installed to a non-standard location. Clang 17 and newer will automatically - * use the correct argv[0] if libclang is installed in the lib directory - * parallel to the bin directory where the clang binary is installed. + * for \c command_line_args including argv[0]. This is useful if the standard + * library paths are relative to the binary. */ CINDEX_LINKAGE enum CXErrorCode clang_parseTranslationUnit2FullArgv( CXIndex CIdx, const char *source_filename, diff --git a/clang/test/Index/record-completion-invocation.c b/clang/test/Index/record-completion-invocation.c index 75eb9083908ae..4b667134fa2d4 100644 --- a/clang/test/Index/record-completion-invocation.c +++ b/clang/test/Index/record-completion-invocation.c @@ -9,4 +9,4 @@ // RUN: env LIBCLANG_DISABLE_CRASH_RECOVERY=1 CINDEXTEST_INVOCATION_EMISSION_PATH=%t not --crash c-index-test -code-completion-at=%s:10:1 "-remap-file=%s,%S/Inputs/record-parsing-invocation-remap.c" %s // RUN: cat %t/libclang-* | FileCheck %s -// CHECK: {"toolchain":"{{.*}}","libclang.operation":"complete","libclang.opts":1,"args":["{{.*}}bin{{.*}}clang","-fno-spell-checking","{{.*}}record-completion-invocation.c","-Xclang","-detailed-preprocessing-record","-fallow-editor-placeholders"],"invocation-args":["-code-completion-at={{.*}}record-completion-invocation.c:10:1"],"unsaved_file_hashes":[{"name":"{{.*}}record-completion-invocation.c","md5":"aee23773de90e665992b48209351d70e"}]} +// CHECK: {"toolchain":"{{.*}}","libclang.operation":"complete","libclang.opts":1,"args":["clang","-fno-spell-checking","{{.*}}record-completion-invocation.c","-Xclang","-detailed-preprocessing-record","-fallow-editor-placeholders"],"invocation-args":["-code-completion-at={{.*}}record-completion-invocation.c:10:1"],"unsaved_file_hashes":[{"name":"{{.*}}record-completion-invocation.c","md5":"aee23773de90e665992b48209351d70e"}]} diff --git a/clang/test/Index/record-parsing-invocation.c b/clang/test/Index/record-parsing-invocation.c index f370f014fb1cc..e0c4cdb05fb00 100644 --- a/clang/test/Index/record-parsing-invocation.c +++ b/clang/test/Index/record-parsing-invocation.c @@ -25,5 +25,5 @@ # pragma clang __debug parser_crash #endif -// CHECK: {"toolchain":"{{.*}}","libclang.operation":"parse","libclang.opts":1,"args":["{{.*}}bin{{.*}}clang","-fno-spell-checking","{{.*}}record-parsing-invocation.c","-Xclang","-detailed-preprocessing-record","-fallow-editor-placeholders"]} -// CHECK-UNSAVED: {"toolchain":"{{.*}}","libclang.operation":"parse","libclang.opts":1,"args":["{{.*}}bin{{.*}}clang","-fno-spell-checking","{{.*}}record-parsing-invocation.c","-Xclang","-detailed-preprocessing-record","-fallow-editor-placeholders"],"unsaved_file_hashes":[{"name":"{{.*}}record-parsing-invocation.c","md5":"aee23773de90e665992b48209351d70e"}]} +// CHECK: {"toolchain":"{{.*}}","libclang.operation":"parse","libclang.opts":1,"args":["clang","-fno-spell-checking","{{.*}}record-parsing-invocation.c","-Xclang","-detailed-preprocessing-record","-fallow-editor-placeholders"]} +// CHECK-UNSAVED: {"toolchain":"{{.*}}","libclang.operation":"parse","libclang.opts":1,"args":["clang","-fno-spell-checking","{{.*}}record-parsing-invocation.c","-Xclang","-detailed-preprocessing-record","-fallow-editor-placeholders"],"unsaved_file_hashes":[{"name":"{{.*}}record-parsing-invocation.c","md5":"aee23773de90e665992b48209351d70e"}]} diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 2aa12667d37e9..30416e46ce173 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -4013,17 +4013,8 @@ enum CXErrorCode clang_parseTranslationUnit2( struct CXUnsavedFile *unsaved_files, unsigned num_unsaved_files, unsigned options, CXTranslationUnit *out_TU) { noteBottomOfStack(); - - if (!CIdx) - return CXError_InvalidArguments; - - SmallString<64> ClangPath( - static_cast(CIdx)->getClangToolchainPath()); - llvm::sys::path::append(ClangPath, "bin"); - llvm::sys::path::append(ClangPath, "clang"); - SmallVector Args; - Args.push_back(ClangPath.c_str()); + Args.push_back("clang"); Args.append(command_line_args, command_line_args + num_command_line_args); return clang_parseTranslationUnit2FullArgv( CIdx, source_filename, Args.data(), Args.size(), unsaved_files, From 43fcfdb1d6a63129ffbb7d77174ccb56863d0b30 Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Mon, 20 Mar 2023 09:07:18 +0100 Subject: [PATCH 414/691] [IncludeCleaner][clangd] Mark umbrella headers as users of private Private headers inside umbrella files shouldn't be marked as unused. Differential Revision: https://reviews.llvm.org/D146406 --- clang-tools-extra/clangd/IncludeCleaner.cpp | 16 +++++++- .../clangd/unittests/IncludeCleanerTests.cpp | 21 ++++++++++ .../include-cleaner/lib/Analysis.cpp | 22 +++++++++-- .../unittests/AnalysisTest.cpp | 38 ++++++++++++++----- clang/include/clang/Testing/TestAST.h | 3 ++ clang/lib/Testing/TestAST.cpp | 5 ++- 6 files changed, 89 insertions(+), 16 deletions(-) diff --git a/clang-tools-extra/clangd/IncludeCleaner.cpp b/clang-tools-extra/clangd/IncludeCleaner.cpp index 98135529f259b..ee470bd8b963f 100644 --- a/clang-tools-extra/clangd/IncludeCleaner.cpp +++ b/clang-tools-extra/clangd/IncludeCleaner.cpp @@ -93,8 +93,6 @@ bool isFilteredByConfig(const Config &Cfg, llvm::StringRef HeaderPath) { static bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST, const Config &Cfg, const include_cleaner::PragmaIncludes *PI) { - if (PI && PI->shouldKeep(Inc.HashLine + 1)) - return false; // FIXME(kirillbobyrev): We currently do not support the umbrella headers. // System headers are likely to be standard library headers. // Until we have good support for umbrella headers, don't warn about them. @@ -108,6 +106,20 @@ static bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST, auto FE = AST.getSourceManager().getFileManager().getFileRef( AST.getIncludeStructure().getRealPath(HID)); assert(FE); + if (PI) { + if (PI->shouldKeep(Inc.HashLine + 1)) + return false; + // Check if main file is the public interface for a private header. If so we + // shouldn't diagnose it as unused. + if(auto PHeader = PI->getPublic(*FE); !PHeader.empty()) { + PHeader = PHeader.trim("<>\""); + // Since most private -> public mappings happen in a verbatim way, we + // check textually here. This might go wrong in presence of symlinks or + // header mappings. But that's not different than rest of the places. + if(AST.tuPath().endswith(PHeader)) + return false; + } + } // Headers without include guards have side effects and are not // self-contained, skip them. if (!AST.getPreprocessor().getHeaderSearchInfo().isFileMultipleIncludeGuarded( diff --git a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp index 409e3cee791c3..69b4e07439c38 100644 --- a/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp +++ b/clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp @@ -30,6 +30,7 @@ #include "gtest/gtest.h" #include #include +#include #include namespace clang { @@ -328,6 +329,26 @@ TEST(IncludeCleaner, NoDiagsForObjC) { ParsedAST AST = TU.build(); EXPECT_THAT(AST.getDiagnostics(), llvm::ValueIs(IsEmpty())); } + +TEST(IncludeCleaner, UmbrellaUsesPrivate) { + TestTU TU; + TU.Code = R"cpp( + #include "private.h" + )cpp"; + TU.AdditionalFiles["private.h"] = guard(R"cpp( + // IWYU pragma: private, include "public.h" + void foo() {} + )cpp"); + TU.Filename = "public.h"; + Config Cfg; + Cfg.Diagnostics.UnusedIncludes = Config::IncludesPolicy::Strict; + WithContextValue Ctx(Config::Key, std::move(Cfg)); + ParsedAST AST = TU.build(); + EXPECT_THAT(AST.getDiagnostics(), llvm::ValueIs(IsEmpty())); + IncludeCleanerFindings Findings = computeIncludeCleanerFindings(AST); + EXPECT_THAT(Findings.UnusedIncludes, IsEmpty()); +} + } // namespace } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/include-cleaner/lib/Analysis.cpp b/clang-tools-extra/include-cleaner/lib/Analysis.cpp index 6237bdb46babf..fb0879b7aab63 100644 --- a/clang-tools-extra/include-cleaner/lib/Analysis.cpp +++ b/clang-tools-extra/include-cleaner/lib/Analysis.cpp @@ -90,9 +90,25 @@ AnalysisResults analyze(llvm::ArrayRef ASTRoots, }); AnalysisResults Results; - for (const Include &I : Inc.all()) - if (!Used.contains(&I) && PI && !PI->shouldKeep(I.Line)) - Results.Unused.push_back(&I); + for (const Include &I : Inc.all()) { + if (Used.contains(&I)) + continue; + if (PI) { + if (PI->shouldKeep(I.Line)) + continue; + // Check if main file is the public interface for a private header. If so + // we shouldn't diagnose it as unused. + if (auto PHeader = PI->getPublic(I.Resolved); !PHeader.empty()) { + PHeader = PHeader.trim("<>\""); + // Since most private -> public mappings happen in a verbatim way, we + // check textually here. This might go wrong in presence of symlinks or + // header mappings. But that's not different than rest of the places. + if (MainFile->tryGetRealPathName().endswith(PHeader)) + continue; + } + } + Results.Unused.push_back(&I); + } for (llvm::StringRef S : Missing.keys()) Results.Missing.push_back(S.str()); llvm::sort(Results.Missing); diff --git a/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp b/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp index c34c6c0a29a81..a2084d4f37903 100644 --- a/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/AnalysisTest.cpp @@ -24,6 +24,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" #include +#include namespace clang::include_cleaner { namespace { @@ -212,17 +213,34 @@ int x = a + c; return std::make_unique(PP, PI); }; - TestAST AST(Inputs); - auto Decls = AST.context().getTranslationUnitDecl()->decls(); - auto Results = - analyze(std::vector{Decls.begin(), Decls.end()}, - PP.MacroReferences, PP.Includes, &PI, AST.sourceManager(), - AST.preprocessor().getHeaderSearchInfo()); + { + TestAST AST(Inputs); + auto Decls = AST.context().getTranslationUnitDecl()->decls(); + auto Results = + analyze(std::vector{Decls.begin(), Decls.end()}, + PP.MacroReferences, PP.Includes, &PI, AST.sourceManager(), + AST.preprocessor().getHeaderSearchInfo()); + + const Include *B = PP.Includes.atLine(3); + ASSERT_EQ(B->Spelled, "b.h"); + EXPECT_THAT(Results.Missing, ElementsAre("\"c.h\"")); + EXPECT_THAT(Results.Unused, ElementsAre(B)); + } - const Include *B = PP.Includes.atLine(3); - ASSERT_EQ(B->Spelled, "b.h"); - EXPECT_THAT(Results.Missing, ElementsAre("\"c.h\"")); - EXPECT_THAT(Results.Unused, ElementsAre(B)); + // Check that umbrella header uses private include. + { + Inputs.Code = R"cpp(#include "private.h")cpp"; + Inputs.ExtraFiles["private.h"] = + guard("// IWYU pragma: private, include \"public.h\""); + Inputs.FileName = "public.h"; + PP.Includes = {}; + PI = {}; + TestAST AST(Inputs); + EXPECT_FALSE(PP.Includes.all().empty()); + auto Results = analyze({}, {}, PP.Includes, &PI, AST.sourceManager(), + AST.preprocessor().getHeaderSearchInfo()); + EXPECT_THAT(Results.Unused, testing::IsEmpty()); + } } TEST(FixIncludes, Basic) { diff --git a/clang/include/clang/Testing/TestAST.h b/clang/include/clang/Testing/TestAST.h index 7ba2ca882b91c..845e31f65438b 100644 --- a/clang/include/clang/Testing/TestAST.h +++ b/clang/include/clang/Testing/TestAST.h @@ -49,6 +49,9 @@ struct TestInputs { /// Keys are plain filenames ("foo.h"), values are file content. llvm::StringMap ExtraFiles = {}; + /// Filename to use for translation unit. A default will be used when empty. + std::string FileName; + /// By default, error diagnostics during parsing are reported as gtest errors. /// To suppress this, set ErrorOK or include "error-ok" in a comment in Code. /// In either case, all diagnostics appear in TestAST::diagnostics(). diff --git a/clang/lib/Testing/TestAST.cpp b/clang/lib/Testing/TestAST.cpp index 8c79fcd7d6363..3a50c2d9b5d05 100644 --- a/clang/lib/Testing/TestAST.cpp +++ b/clang/lib/Testing/TestAST.cpp @@ -16,6 +16,7 @@ #include "llvm/Support/VirtualFileSystem.h" #include "gtest/gtest.h" +#include namespace clang { namespace { @@ -91,7 +92,9 @@ TestAST::TestAST(const TestInputs &In) { Argv.push_back(S.c_str()); for (const auto &S : In.ExtraArgs) Argv.push_back(S.c_str()); - std::string Filename = getFilenameForTesting(In.Language).str(); + std::string Filename = In.FileName; + if (Filename.empty()) + Filename = getFilenameForTesting(In.Language).str(); Argv.push_back(Filename.c_str()); Clang->setInvocation(std::make_unique()); if (!CompilerInvocation::CreateFromArgs(Clang->getInvocation(), Argv, From 6aa7cc037f2f95c237c1d82c523f8857fa3a10c3 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 23 Mar 2023 12:18:45 +0000 Subject: [PATCH 415/691] [X86] LowerVectorAllZero - add 512-bit support with AVX512 vptestnmd+kortestw patterns Another step toward #53419 - this is also another step towards expanding MatchVectorAllZeroTest to match any pair of vectors and merge EmitAVX512Test into it. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 13 +++++- llvm/test/CodeGen/X86/ptest.ll | 15 +++--- llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll | 46 ++++++++----------- 3 files changed, 35 insertions(+), 39 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e828fe4b9dd15..e006388b6e928 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -24192,14 +24192,23 @@ static SDValue LowerVectorAllZero(const SDLoc &DL, SDValue V, ISD::CondCode CC, DAG.getConstant(0, DL, IntVT)); } - // Split down to 128/256-bit vector. - unsigned TestSize = Subtarget.hasAVX() ? 256 : 128; + // Split down to 128/256/512-bit vector. + unsigned TestSize = + Subtarget.useAVX512Regs() ? 512 : (Subtarget.hasAVX() ? 256 : 128); while (VT.getSizeInBits() > TestSize) { auto Split = DAG.SplitVector(V, DL); VT = Split.first.getValueType(); V = DAG.getNode(ISD::OR, DL, VT, Split.first, Split.second); } + bool UseKORTEST = Subtarget.useAVX512Regs(); + if (UseKORTEST && VT.is512BitVector()) { + V = DAG.getBitcast(MVT::v16i32, MaskBits(V)); + V = DAG.getSetCC(DL, MVT::v16i1, V, + getZeroVector(MVT::v16i32, Subtarget, DAG, DL), ISD::SETEQ); + return DAG.getNode(X86ISD::KORTEST, DL, MVT::i32, V, V); + } + bool UsePTEST = Subtarget.hasSSE41(); if (UsePTEST) { MVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; diff --git a/llvm/test/CodeGen/X86/ptest.ll b/llvm/test/CodeGen/X86/ptest.ll index 066cbb6193317..c417c5d15b874 100644 --- a/llvm/test/CodeGen/X86/ptest.ll +++ b/llvm/test/CodeGen/X86/ptest.ll @@ -148,9 +148,8 @@ define i32 @veccond512(<16 x i32> %input) { ; ; AVX512-LABEL: veccond512: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: je .LBB2_2 ; AVX512-NEXT: # %bb.1: # %if-true-block ; AVX512-NEXT: xorl %eax, %eax @@ -268,10 +267,9 @@ define i32 @vectest512(<16 x i32> %input) { ; ; AVX512-LABEL: vectest512: ; AVX512: # %bb.0: -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 ; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -380,9 +378,8 @@ define i32 @vecsel512(<16 x i32> %input, i32 %a, i32 %b) { ; AVX512-LABEL: vecsel512: ; AVX512: # %bb.0: ; AVX512-NEXT: movl %edi, %eax -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: cmovel %esi, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll index fcb0ab6090398..5d921c0aa2c62 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll @@ -105,9 +105,8 @@ define i1 @test_v8i64(<8 x i64> %a0) { ; ; AVX512-LABEL: test_v8i64: ; AVX512: # %bb.0: -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -169,9 +168,8 @@ define i1 @test_v16i64(<16 x i64> %a0) { ; AVX512-LABEL: test_v16i64: ; AVX512: # %bb.0: ; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -298,9 +296,8 @@ define i1 @test_v16i32(<16 x i32> %a0) { ; ; AVX512-LABEL: test_v16i32: ; AVX512: # %bb.0: -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -362,9 +359,8 @@ define i1 @test_v32i32(<32 x i32> %a0) { ; AVX512-LABEL: test_v32i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -510,9 +506,8 @@ define i1 @test_v32i16(<32 x i16> %a0) { ; ; AVX512-LABEL: test_v32i16: ; AVX512: # %bb.0: -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -574,9 +569,8 @@ define i1 @test_v64i16(<64 x i16> %a0) { ; AVX512-LABEL: test_v64i16: ; AVX512: # %bb.0: ; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -741,9 +735,8 @@ define i1 @test_v64i8(<64 x i8> %a0) { ; ; AVX512-LABEL: test_v64i8: ; AVX512: # %bb.0: -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -805,9 +798,8 @@ define i1 @test_v128i8(<128 x i8> %a0) { ; AVX512-LABEL: test_v128i8: ; AVX512: # %bb.0: ; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1014,10 +1006,8 @@ define i1 @mask_v128i8(<128 x i8> %a0) { ; AVX512-LABEL: mask_v128i8: ; AVX512: # %bb.0: ; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673] -; AVX512-NEXT: vptest %ymm1, %ymm0 +; AVX512-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq From c39dd7c1db97fa367cb6282067b74cd8e55ef09a Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Thu, 23 Mar 2023 12:17:57 +0000 Subject: [PATCH 416/691] [RISCV][MC] Add support for RV64E Implement MC support for the recently ratified RV64E base instruction set. Differential Revision: https://reviews.llvm.org/D143570 --- clang/test/Driver/riscv-arch.c | 10 ------ clang/test/Driver/riscv-features.c | 4 --- llvm/docs/RISCVUsage.rst | 8 ++--- llvm/docs/ReleaseNotes.rst | 1 + llvm/lib/Support/RISCVISAInfo.cpp | 23 ++++-------- .../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 16 ++++----- .../RISCV/Disassembler/RISCVDisassembler.cpp | 4 +-- .../RISCV/MCTargetDesc/RISCVBaseInfo.cpp | 14 +++++--- .../Target/RISCV/MCTargetDesc/RISCVBaseInfo.h | 1 + .../RISCV/MCTargetDesc/RISCVELFStreamer.cpp | 1 + .../MCTargetDesc/RISCVTargetStreamer.cpp | 8 ++--- llvm/lib/Target/RISCV/RISCVFeatures.td | 10 +++--- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 +-- .../RISCV/mattr-invalid-combination.ll | 5 --- llvm/test/CodeGen/RISCV/rv32e.ll | 7 ---- llvm/test/CodeGen/RISCV/rve.ll | 8 +++++ llvm/test/MC/RISCV/attribute-arch.s | 6 +++- llvm/test/MC/RISCV/elf-flags.s | 3 ++ llvm/test/MC/RISCV/invalid-attribute.s | 2 +- .../test/MC/RISCV/mattr-invalid-combination.s | 4 --- llvm/test/MC/RISCV/rv32e-invalid.s | 6 +++- llvm/test/MC/RISCV/rv32e-valid.s | 5 +++ llvm/test/MC/RISCV/rv64e-valid.s | 36 +++++++++++++++++++ llvm/test/MC/RISCV/target-abi-invalid.s | 17 +++++++++ llvm/test/MC/RISCV/target-abi-valid.s | 4 +++ llvm/unittests/Support/RISCVISAInfoTest.cpp | 15 +++++--- 26 files changed, 139 insertions(+), 83 deletions(-) delete mode 100644 llvm/test/CodeGen/RISCV/mattr-invalid-combination.ll delete mode 100644 llvm/test/CodeGen/RISCV/rv32e.ll create mode 100644 llvm/test/CodeGen/RISCV/rve.ll delete mode 100644 llvm/test/MC/RISCV/mattr-invalid-combination.s create mode 100644 llvm/test/MC/RISCV/rv64e-valid.s diff --git a/clang/test/Driver/riscv-arch.c b/clang/test/Driver/riscv-arch.c index 610f79d64ada2..cbc1464cbcd6f 100644 --- a/clang/test/Driver/riscv-arch.c +++ b/clang/test/Driver/riscv-arch.c @@ -198,11 +198,6 @@ // Testing specific messages and unsupported extensions. -// RUN: %clang --target=riscv64-unknown-elf -march=rv64e -### %s \ -// RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV64E %s -// RV64E: error: invalid arch name 'rv64e', -// RV64E: standard user-level extension 'e' requires 'rv32' - // RUN: %clang --target=riscv32-unknown-elf -march=rv32imC -### %s \ // RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32-LOWER %s // RV32-LOWER: error: invalid arch name 'rv32imC', @@ -223,11 +218,6 @@ // RV32-ORDER: error: invalid arch name 'rv32imcq', // RV32-ORDER: standard user-level extension not given in canonical order 'q' -// RUN: %clang --target=riscv32-unknown-elf -march=rv64e -### %s \ -// RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV64-EER %s -// RV64-EER: error: invalid arch name 'rv64e', -// RV64-EER: standard user-level extension 'e' requires 'rv32' - // RUN: %clang --target=riscv32-unknown-elf -march=rv32izve32f -### %s \ // RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32-ZVE32F-ER %s // RV32-ZVE32F-ER: error: invalid arch name 'rv32izve32f', diff --git a/clang/test/Driver/riscv-features.c b/clang/test/Driver/riscv-features.c index 98445b1920301..b189fdeacec8c 100644 --- a/clang/test/Driver/riscv-features.c +++ b/clang/test/Driver/riscv-features.c @@ -33,10 +33,6 @@ // DEFAULT-LINUX-SAME: "-target-feature" "+d" // DEFAULT-LINUX-SAME: "-target-feature" "+c" -// RUN: not %clang -cc1 -triple riscv64-unknown-elf -target-feature +e 2>&1 | FileCheck %s -check-prefix=RV64-WITH-E - -// RV64-WITH-E: error: invalid feature combination: standard user-level extension 'e' requires 'rv32' - // RUN: not %clang -c --target=riscv64-linux-gnu -gsplit-dwarf %s 2>&1 | FileCheck %s --check-prefix=ERR-SPLIT-DWARF // RUN: not %clang -c --target=riscv64 -gsplit-dwarf=single %s 2>&1 | FileCheck %s --check-prefix=ERR-SPLIT-DWARF // RUN: %clang -### -c --target=riscv64 -mno-relax -g -gsplit-dwarf %s 2>&1 | FileCheck %s --check-prefix=SPLIT-DWARF diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst index 429b59a8d9404..ffd1028cedd80 100644 --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -15,9 +15,9 @@ supported variations of the RISC-V specification. It lives in the Base ISAs ========= -The specification defines four base instruction sets: RV32I, RV32E, RV64I, -and RV128I. Currently, LLVM fully supports RV32I, and RV64I. RV32E is -supported by the assembly-based tools only. RV128I is not supported. +The specification defines five base instruction sets: RV32I, RV32E, RV64I, +RV64E, and RV128I. Currently, LLVM fully supports RV32I, and RV64I. RV32E and +RV64E are supported by the assembly-based tools only. RV128I is not supported. To specify the target triple: @@ -27,7 +27,7 @@ To specify the target triple: Architecture Description ============ ============================================================== ``riscv32`` RISC-V with XLEN=32 (i.e. RV32I or RV32E) - ``riscv64`` RISC-V with XLEN=64 (i.e. RV64I) + ``riscv64`` RISC-V with XLEN=64 (i.e. RV64I or RV64E) ============ ============================================================== To select an E variant ISA (e.g. RV32E instead of RV32I), use the base diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index d87d20704f166..525f57a90dfb0 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -144,6 +144,7 @@ Changes to the RISC-V Backend * Adds support for the vendor-defined XTHeadCmo (cache management operations) extension. * Adds support for the vendor-defined XTHeadSync (multi-core synchronization instructions) extension. * Added support for the vendor-defined XTHeadFMemIdx (indexed memory operations for floating point) extension. +* Assembler support for RV64E was added. Changes to the WebAssembly Backend ---------------------------------- diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp index 93cf66ff1f739..35c249a7b3703 100644 --- a/llvm/lib/Support/RISCVISAInfo.cpp +++ b/llvm/lib/Support/RISCVISAInfo.cpp @@ -584,8 +584,9 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, bool HasRV64 = Arch.startswith("rv64"); // ISA string must begin with rv32 or rv64. if (!(Arch.startswith("rv32") || HasRV64) || (Arch.size() < 5)) { - return createStringError(errc::invalid_argument, - "string must begin with rv32{i,e,g} or rv64{i,g}"); + return createStringError( + errc::invalid_argument, + "string must begin with rv32{i,e,g} or rv64{i,e,g}"); } unsigned XLen = HasRV64 ? 64 : 32; @@ -601,14 +602,7 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, default: return createStringError(errc::invalid_argument, "first letter should be 'e', 'i' or 'g'"); - case 'e': { - // Extension 'e' is not allowed in rv64. - if (HasRV64) - return createStringError( - errc::invalid_argument, - "standard user-level extension 'e' requires 'rv32'"); - break; - } + case 'e': case 'i': break; case 'g': @@ -828,8 +822,6 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, } Error RISCVISAInfo::checkDependency() { - bool IsRv32 = XLen == 32; - bool HasE = Exts.count("e") != 0; bool HasD = Exts.count("d") != 0; bool HasF = Exts.count("f") != 0; bool HasZfinx = Exts.count("zfinx") != 0; @@ -839,11 +831,6 @@ Error RISCVISAInfo::checkDependency() { bool HasZve64d = Exts.count("zve64d") != 0; bool HasZvl = MinVLen != 0; - if (HasE && !IsRv32) - return createStringError( - errc::invalid_argument, - "standard user-level extension 'e' requires 'rv32'"); - if (HasF && HasZfinx) return createStringError(errc::invalid_argument, "'f' and 'zfinx' extensions are incompatible"); @@ -1115,6 +1102,8 @@ StringRef RISCVISAInfo::computeDefaultABI() const { } else if (XLen == 64) { if (hasExtension("d")) return "lp64d"; + if (hasExtension("e")) + return "lp64e"; return "lp64"; } llvm_unreachable("Invalid XLEN"); diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index d984f39321a6e..1627761052284 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -67,7 +67,7 @@ class RISCVAsmParser : public MCTargetAsmParser { SMLoc getLoc() const { return getParser().getTok().getLoc(); } bool isRV64() const { return getSTI().hasFeature(RISCV::Feature64Bit); } - bool isRV32E() const { return getSTI().hasFeature(RISCV::FeatureRV32E); } + bool isRVE() const { return getSTI().hasFeature(RISCV::FeatureRVE); } RISCVTargetStreamer &getTargetStreamer() { assert(getParser().getStreamer().getTargetStreamer() && @@ -1352,9 +1352,9 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // Attempts to match Name as a register (either using the default name or // alternative ABI names), setting RegNo to the matching register. Upon -// failure, returns true and sets RegNo to 0. If IsRV32E then registers +// failure, returns true and sets RegNo to 0. If IsRVE then registers // x16-x31 will be rejected. -static bool matchRegisterNameHelper(bool IsRV32E, MCRegister &RegNo, +static bool matchRegisterNameHelper(bool IsRVE, MCRegister &RegNo, StringRef Name) { RegNo = MatchRegisterName(Name); // The 16-/32- and 64-bit FPRs have the same asm name. Check that the initial @@ -1366,7 +1366,7 @@ static bool matchRegisterNameHelper(bool IsRV32E, MCRegister &RegNo, static_assert(RISCV::F0_D < RISCV::F0_F, "FPR matching must be updated"); if (RegNo == RISCV::NoRegister) RegNo = MatchRegisterAltName(Name); - if (IsRV32E && RegNo >= RISCV::X16 && RegNo <= RISCV::X31) + if (IsRVE && RegNo >= RISCV::X16 && RegNo <= RISCV::X31) RegNo = RISCV::NoRegister; return RegNo == RISCV::NoRegister; } @@ -1387,7 +1387,7 @@ OperandMatchResultTy RISCVAsmParser::tryParseRegister(MCRegister &RegNo, RegNo = 0; StringRef Name = getLexer().getTok().getIdentifier(); - if (matchRegisterNameHelper(isRV32E(), (MCRegister &)RegNo, Name)) + if (matchRegisterNameHelper(isRVE(), (MCRegister &)RegNo, Name)) return MatchOperand_NoMatch; getParser().Lex(); // Eat identifier token. @@ -1420,7 +1420,7 @@ OperandMatchResultTy RISCVAsmParser::parseRegister(OperandVector &Operands, case AsmToken::Identifier: StringRef Name = getLexer().getTok().getIdentifier(); MCRegister RegNo; - matchRegisterNameHelper(isRV32E(), RegNo, Name); + matchRegisterNameHelper(isRVE(), RegNo, Name); if (RegNo == RISCV::NoRegister) { if (HadParens) @@ -1908,7 +1908,7 @@ OperandMatchResultTy RISCVAsmParser::parseMaskReg(OperandVector &Operands) { return MatchOperand_ParseFail; } MCRegister RegNo; - matchRegisterNameHelper(isRV32E(), RegNo, Name); + matchRegisterNameHelper(isRVE(), RegNo, Name); if (RegNo == RISCV::NoRegister) return MatchOperand_NoMatch; @@ -1927,7 +1927,7 @@ OperandMatchResultTy RISCVAsmParser::parseGPRAsFPR(OperandVector &Operands) { StringRef Name = getLexer().getTok().getIdentifier(); MCRegister RegNo; - matchRegisterNameHelper(isRV32E(), RegNo, Name); + matchRegisterNameHelper(isRVE(), RegNo, Name); if (RegNo == RISCV::NoRegister) return MatchOperand_NoMatch; diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index 15352c1c0885d..2d01d6df3a198 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -61,9 +61,9 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVDisassembler() { static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, uint32_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { - bool IsRV32E = Decoder->getSubtargetInfo().hasFeature(RISCV::FeatureRV32E); + bool IsRVE = Decoder->getSubtargetInfo().hasFeature(RISCV::FeatureRVE); - if (RegNo >= 32 || (IsRV32E && RegNo >= 16)) + if (RegNo >= 32 || (IsRVE && RegNo >= 16)) return MCDisassembler::Fail; MCRegister Reg = RISCV::X0 + RegNo; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp index 98c8e883e5960..8f891a04def53 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp @@ -40,7 +40,7 @@ ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits, StringRef ABIName) { auto TargetABI = getTargetABI(ABIName); bool IsRV64 = TT.isArch64Bit(); - bool IsRV32E = FeatureBits[RISCV::FeatureRV32E]; + bool IsRVE = FeatureBits[RISCV::FeatureRVE]; if (!ABIName.empty() && TargetABI == ABI_Unknown) { errs() @@ -54,11 +54,18 @@ ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits, errs() << "64-bit ABIs are not supported for 32-bit targets (ignoring " "target-abi)\n"; TargetABI = ABI_Unknown; - } else if (IsRV32E && TargetABI != ABI_ILP32E && TargetABI != ABI_Unknown) { + } else if (!IsRV64 && IsRVE && TargetABI != ABI_ILP32E && + TargetABI != ABI_Unknown) { // TODO: move this checking to RISCVTargetLowering and RISCVAsmParser errs() << "Only the ilp32e ABI is supported for RV32E (ignoring target-abi)\n"; TargetABI = ABI_Unknown; + } else if (IsRV64 && IsRVE && TargetABI != ABI_LP64E && + TargetABI != ABI_Unknown) { + // TODO: move this checking to RISCVTargetLowering and RISCVAsmParser + errs() + << "Only the lp64e ABI is supported for RV64E (ignoring target-abi)\n"; + TargetABI = ABI_Unknown; } if (TargetABI != ABI_Unknown) @@ -80,6 +87,7 @@ ABI getTargetABI(StringRef ABIName) { .Case("lp64", ABI_LP64) .Case("lp64f", ABI_LP64F) .Case("lp64d", ABI_LP64D) + .Case("lp64e", ABI_LP64E) .Default(ABI_Unknown); return TargetABI; } @@ -101,8 +109,6 @@ void validate(const Triple &TT, const FeatureBitset &FeatureBits) { report_fatal_error("RV64 target requires an RV64 CPU"); if (!TT.isArch64Bit() && !FeatureBits[RISCV::Feature32Bit]) report_fatal_error("RV32 target requires an RV32 CPU"); - if (TT.isArch64Bit() && FeatureBits[RISCV::FeatureRV32E]) - report_fatal_error("RV32E can't be enabled for an RV64 target"); if (FeatureBits[RISCV::Feature32Bit] && FeatureBits[RISCV::Feature64Bit]) report_fatal_error("RV32 and RV64 can't be combined"); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index 70fdc0e4ff120..175059fdf08e5 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -408,6 +408,7 @@ enum ABI { ABI_LP64, ABI_LP64F, ABI_LP64D, + ABI_LP64E, ABI_Unknown }; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp index a05254b0ae579..356bb895c6ed4 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp @@ -103,6 +103,7 @@ void RISCVTargetELFStreamer::finish() { EFlags |= ELF::EF_RISCV_FLOAT_ABI_DOUBLE; break; case RISCVABI::ABI_ILP32E: + case RISCVABI::ABI_LP64E: EFlags |= ELF::EF_RISCV_RVE; break; case RISCVABI::ABI_Unknown: diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp index 756cc14a87014..f7bcc197b1872 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp @@ -47,10 +47,10 @@ void RISCVTargetStreamer::setTargetABI(RISCVABI::ABI ABI) { } void RISCVTargetStreamer::emitTargetAttributes(const MCSubtargetInfo &STI) { - if (STI.hasFeature(RISCV::FeatureRV32E)) - emitAttribute(RISCVAttrs::STACK_ALIGN, RISCVAttrs::ALIGN_4); - else - emitAttribute(RISCVAttrs::STACK_ALIGN, RISCVAttrs::ALIGN_16); + if (STI.hasFeature(RISCV::FeatureRVE)) + report_fatal_error("Codegen not yet implemented for RVE"); + + emitAttribute(RISCVAttrs::STACK_ALIGN, RISCVAttrs::ALIGN_16); auto ParseResult = RISCVFeatures::parseFeatureBits( STI.hasFeature(RISCV::Feature64Bit), STI.getFeatureBits()); diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index b0bb2992f6b42..0cf73bc37be84 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -589,11 +589,11 @@ def IsRV32 : Predicate<"!Subtarget->is64Bit()">, defvar RV32 = DefaultMode; def RV64 : HwMode<"+64bit", [IsRV64]>; -def FeatureRV32E - : SubtargetFeature<"e", "IsRV32E", "true", - "Implements RV32E (provides 16 rather than 32 GPRs)">; -def IsRV32E : Predicate<"Subtarget->isRV32E()">, - AssemblerPredicate<(all_of FeatureRV32E)>; +def FeatureRVE + : SubtargetFeature<"e", "IsRVE", "true", + "Implements RV{32,64}E (provides 16 rather than 32 GPRs)">; +def IsRVE : Predicate<"Subtarget->isRVE()">, + AssemblerPredicate<(all_of FeatureRVE)>; def FeatureRelax : SubtargetFeature<"relax", "EnableLinkerRelax", "true", diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 595e094662f9a..9310c8161cd46 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -76,8 +76,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { - if (Subtarget.isRV32E()) - report_fatal_error("Codegen not yet implemented for RV32E"); + if (Subtarget.isRVE()) + report_fatal_error("Codegen not yet implemented for RVE"); RISCVABI::ABI ABI = Subtarget.getTargetABI(); assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); diff --git a/llvm/test/CodeGen/RISCV/mattr-invalid-combination.ll b/llvm/test/CodeGen/RISCV/mattr-invalid-combination.ll deleted file mode 100644 index e5bdb96fd0741..0000000000000 --- a/llvm/test/CodeGen/RISCV/mattr-invalid-combination.ll +++ /dev/null @@ -1,5 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: not --crash llc -mtriple=riscv64 -mattr=+e < %s 2>&1 \ -; RUN: | FileCheck -check-prefix=RV64E %s - -; RV64E: LLVM ERROR: RV32E can't be enabled for an RV64 target diff --git a/llvm/test/CodeGen/RISCV/rv32e.ll b/llvm/test/CodeGen/RISCV/rv32e.ll deleted file mode 100644 index 88379ab438725..0000000000000 --- a/llvm/test/CodeGen/RISCV/rv32e.ll +++ /dev/null @@ -1,7 +0,0 @@ -; RUN: not --crash llc -mtriple=riscv32 -mattr=+e < %s 2>&1 | FileCheck %s - -; CHECK: LLVM ERROR: Codegen not yet implemented for RV32E - -define void @nothing() nounwind { - ret void -} diff --git a/llvm/test/CodeGen/RISCV/rve.ll b/llvm/test/CodeGen/RISCV/rve.ll new file mode 100644 index 0000000000000..29b9bab61f7ff --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rve.ll @@ -0,0 +1,8 @@ +; RUN: not --crash llc -mtriple=riscv32 -mattr=+e < %s 2>&1 | FileCheck %s +; RUN: not --crash llc -mtriple=riscv64 -mattr=+e < %s 2>&1 | FileCheck %s + +; CHECK: LLVM ERROR: Codegen not yet implemented for RVE + +define void @nothing() nounwind { + ret void +} diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s index af0b3fe0cdc29..db1856e1c6677 100644 --- a/llvm/test/MC/RISCV/attribute-arch.s +++ b/llvm/test/MC/RISCV/attribute-arch.s @@ -1,7 +1,8 @@ ## Arch string without version. # RUN: llvm-mc %s -triple=riscv32 -filetype=asm | FileCheck %s -# RUN: llvm-mc %s -triple=riscv64 -filetype=asm | FileCheck %s +# RUN: llvm-mc %s -triple=riscv64 -filetype=asm \ +# RUN: | FileCheck --check-prefixes=CHECK,CHECK-RV64 %s .attribute arch, "rv32i" # CHECK: attribute 5, "rv32i2p0" @@ -15,6 +16,9 @@ .attribute arch, "rv32e" # CHECK: attribute 5, "rv32e2p0" +.attribute arch, "rv64e" +# CHECK-RV64: attribute 5, "rv64e2p0" + .attribute arch, "rv32i2_m2" # CHECK: attribute 5, "rv32i2p0_m2p0" diff --git a/llvm/test/MC/RISCV/elf-flags.s b/llvm/test/MC/RISCV/elf-flags.s index 543eadede1f70..546e129fb7194 100644 --- a/llvm/test/MC/RISCV/elf-flags.s +++ b/llvm/test/MC/RISCV/elf-flags.s @@ -5,6 +5,9 @@ # RUN: llvm-mc -triple=riscv32 -mattr=+e -filetype=obj < %s \ # RUN: | llvm-readobj --file-headers - \ # RUN: | FileCheck -check-prefix=CHECK-RVE %s +# RUN: llvm-mc -triple=riscv64 -mattr=+e -filetype=obj < %s \ +# RUN: | llvm-readobj --file-headers - \ +# RUN: | FileCheck -check-prefix=CHECK-RVE %s # RUN: llvm-mc -triple=riscv32 -mattr=+experimental-ztso -filetype=obj < %s | llvm-readobj --file-headers - | FileCheck -check-prefixes=CHECK-TSO %s # RUN: llvm-mc -triple=riscv64 -mattr=+experimental-ztso -filetype=obj < %s | llvm-readobj --file-headers - | FileCheck -check-prefixes=CHECK-TSO %s diff --git a/llvm/test/MC/RISCV/invalid-attribute.s b/llvm/test/MC/RISCV/invalid-attribute.s index 761a98902d5ef..3514452997266 100644 --- a/llvm/test/MC/RISCV/invalid-attribute.s +++ b/llvm/test/MC/RISCV/invalid-attribute.s @@ -7,7 +7,7 @@ # RUN: not llvm-mc %s -triple=riscv64 -filetype=asm 2>&1 | FileCheck %s .attribute arch, "foo" -# CHECK: [[@LINE-1]]:18: error: invalid arch name 'foo', string must begin with rv32{i,e,g} or rv64{i,g} +# CHECK: [[@LINE-1]]:18: error: invalid arch name 'foo', string must begin with rv32{i,e,g} or rv64{i,e,g} .attribute arch, "rv32i2p0_y2p0" # CHECK: [[@LINE-1]]:18: error: invalid arch name 'rv32i2p0_y2p0', invalid standard user-level extension 'y' diff --git a/llvm/test/MC/RISCV/mattr-invalid-combination.s b/llvm/test/MC/RISCV/mattr-invalid-combination.s deleted file mode 100644 index f75fd3723ed49..0000000000000 --- a/llvm/test/MC/RISCV/mattr-invalid-combination.s +++ /dev/null @@ -1,4 +0,0 @@ -# RUN: not --crash llvm-mc -triple riscv64 -mattr=+e < %s 2>&1 \ -# RUN: | FileCheck %s -check-prefix=RV64E - -# RV64E: LLVM ERROR: RV32E can't be enabled for an RV64 target diff --git a/llvm/test/MC/RISCV/rv32e-invalid.s b/llvm/test/MC/RISCV/rv32e-invalid.s index 760e7d49e5c46..9c19d3f40bcff 100644 --- a/llvm/test/MC/RISCV/rv32e-invalid.s +++ b/llvm/test/MC/RISCV/rv32e-invalid.s @@ -2,9 +2,13 @@ # RUN: llvm-mc -filetype=obj -triple=riscv32 < %s \ # RUN: | llvm-objdump --mattr=+e -M no-aliases -d -r - \ # RUN: | FileCheck -check-prefix=CHECK-DIS %s +# RUN: not llvm-mc -triple riscv64 -mattr=+e < %s 2>&1 | FileCheck %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 < %s \ +# RUN: | llvm-objdump --mattr=+e -M no-aliases -d -r - \ +# RUN: | FileCheck -check-prefix=CHECK-DIS %s # Perform a simple check that registers x16-x31 (and the equivalent ABI names) -# are rejected for RV32E, when both assembling and disassembling. +# are rejected for RV32E/RV64E, when both assembling and disassembling. # CHECK-DIS: 37 18 00 00 diff --git a/llvm/test/MC/RISCV/rv32e-valid.s b/llvm/test/MC/RISCV/rv32e-valid.s index b0f435da50544..c2b77736d92b5 100644 --- a/llvm/test/MC/RISCV/rv32e-valid.s +++ b/llvm/test/MC/RISCV/rv32e-valid.s @@ -3,6 +3,11 @@ # RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+e < %s \ # RUN: | llvm-objdump -M no-aliases -d -r - \ # RUN: | FileCheck -check-prefixes=CHECK-OBJ,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc %s -triple=riscv64 -riscv-no-aliases -mattr=+e -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+e < %s \ +# RUN: | llvm-objdump -M no-aliases -d -r - \ +# RUN: | FileCheck -check-prefixes=CHECK-OBJ,CHECK-ASM-AND-OBJ %s # This file provides a basic test for RV32E, checking that the expected # set of registers and instructions are accepted. diff --git a/llvm/test/MC/RISCV/rv64e-valid.s b/llvm/test/MC/RISCV/rv64e-valid.s new file mode 100644 index 0000000000000..4780fd6ece4ab --- /dev/null +++ b/llvm/test/MC/RISCV/rv64e-valid.s @@ -0,0 +1,36 @@ +# RUN: llvm-mc %s -triple=riscv64 -riscv-no-aliases -mattr=+e -show-encoding \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM-AND-OBJ %s +# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+e < %s \ +# RUN: | llvm-objdump -M no-aliases -d -r - \ +# RUN: | FileCheck -check-prefixes=CHECK-ASM-AND-OBJ %s + +# This file provides a basic test for RV64E, checking that the expected +# set of registers and instructions are accepted. It only tests instructions +# that are not valid in RV32E. + +# CHECK-ASM-AND-OBJ: ld a4, 25(a5) +ld x14, 25(x15) +# CHECK-ASM-AND-OBJ: sd a2, 36(a3) +sd a2, 36(a3) + +# CHECK-ASM-AND-OBJ: addiw a4, a5, 37 +addiw a4, a5, 37 +# CHECK-ASM-AND-OBJ: slliw t1, t1, 31 +slliw t1, t1, 31 +# CHECK-ASM-AND-OBJ: srliw a0, a4, 0 +srliw a0, a4, 0 +# CHECK-ASM-AND-OBJ: sraiw a1, sp, 15 +sraiw a1, sp, 15 +# CHECK-ASM-AND-OBJ: slliw t0, t1, 13 +slliw t0, t1, 13 + +# CHECK-ASM-AND-OBJ: addw ra, zero, zero +addw ra, zero, zero +# CHECK-ASM-AND-OBJ: subw t0, t2, t1 +subw t0, t2, t1 +# CHECK-ASM-AND-OBJ: sllw a5, a4, a3 +sllw a5, a4, a3 +# CHECK-ASM-AND-OBJ: srlw a0, s0, t0 +srlw a0, s0, t0 +# CHECK-ASM-AND-OBJ: sraw t0, a3, zero +sraw t0, a3, zero diff --git a/llvm/test/MC/RISCV/target-abi-invalid.s b/llvm/test/MC/RISCV/target-abi-invalid.s index 20e9f89153e05..d7dba182fd166 100644 --- a/llvm/test/MC/RISCV/target-abi-invalid.s +++ b/llvm/test/MC/RISCV/target-abi-invalid.s @@ -32,6 +32,8 @@ # RUN: | FileCheck -check-prefix=RV32EF-LP64F %s # RUN: llvm-mc -triple=riscv32 -mattr=+e,+d -target-abi lp64f < %s 2>&1 \ # RUN: | FileCheck -check-prefix=RV32EFD-LP64D %s +# RUN: llvm-mc -triple=riscv32 -mattr=+e -target-abi lp64e %s 2>&1 \ +# RUN: | FileCheck -check-prefix=RV32E-LP64E %s # RV32I-LP64: 64-bit ABIs are not supported for 32-bit targets (ignoring target-abi) # RV32IF-LP64F: 64-bit ABIs are not supported for 32-bit targets (ignoring target-abi) @@ -39,6 +41,7 @@ # RV32E-LP64: 64-bit ABIs are not supported for 32-bit targets (ignoring target-abi) # RV32EF-LP64F: 64-bit ABIs are not supported for 32-bit targets (ignoring target-abi) # RV32EFD-LP64D: 64-bit ABIs are not supported for 32-bit targets (ignoring target-abi) +# RV32E-LP64E: 64-bit ABIs are not supported for 32-bit targets (ignoring target-abi) # RUN: llvm-mc -triple=riscv32 -target-abi ilp32f < %s 2>&1 \ # RUN: | FileCheck -check-prefix=RV32I-ILP32F %s @@ -76,4 +79,18 @@ # RV32EFD-ILP32F: Only the ilp32e ABI is supported for RV32E (ignoring target-abi) # RV32EFD-ILP32D: Only the ilp32e ABI is supported for RV32E (ignoring target-abi) +# RUN: llvm-mc -triple=riscv64 -mattr=+e -target-abi lp64 < %s 2>&1 \ +# RUN: | FileCheck -check-prefix=RV64EF-LP64F %s +# RUN: llvm-mc -triple=riscv64 -mattr=+e,+f -target-abi lp64f < %s 2>&1 \ +# RUN: | FileCheck -check-prefix=RV64EF-LP64F %s +# RUN: llvm-mc -triple=riscv64 -mattr=+e,+d -target-abi lp64f < %s 2>&1 \ +# RUN: | FileCheck -check-prefix=RV64EFD-LP64F %s +# RUN: llvm-mc -triple=riscv64 -mattr=+e,+d -target-abi lp64d < %s 2>&1 \ +# RUN: | FileCheck -check-prefix=RV64EFD-LP64D %s + +# RV64E-LP64: Only the lp64e ABI is supported for RV64E (ignoring target-abi) +# RV64EF-LP64F: Only the lp64e ABI is supported for RV64E (ignoring target-abi) +# RV64EFD-LP64F: Only the lp64e ABI is supported for RV64E (ignoring target-abi) +# RV64EFD-LP64D: Only the lp64e ABI is supported for RV64E (ignoring target-abi) + nop diff --git a/llvm/test/MC/RISCV/target-abi-valid.s b/llvm/test/MC/RISCV/target-abi-valid.s index dab4420d0248c..63c0d4bf2e468 100644 --- a/llvm/test/MC/RISCV/target-abi-valid.s +++ b/llvm/test/MC/RISCV/target-abi-valid.s @@ -47,6 +47,10 @@ # RUN: | llvm-readobj --file-headers - \ # RUN: | FileCheck -check-prefix=CHECK-RVE %s +# RUN: llvm-mc -triple=riscv64 -target-abi lp64e -filetype=obj < %s \ +# RUN: | llvm-readobj --file-headers - \ +# RUN: | FileCheck -check-prefix=CHECK-RVE %s + # CHECK-NONE: Flags [ (0x0) # CHECK-NONE-NEXT: ] diff --git a/llvm/unittests/Support/RISCVISAInfoTest.cpp b/llvm/unittests/Support/RISCVISAInfoTest.cpp index 0b749eb0c6815..05997d2d2d2c4 100644 --- a/llvm/unittests/Support/RISCVISAInfoTest.cpp +++ b/llvm/unittests/Support/RISCVISAInfoTest.cpp @@ -109,7 +109,7 @@ TEST(ParseArchString, RejectsUpperCase) { TEST(ParseArchString, RejectsInvalidBaseISA) { for (StringRef Input : {"rv32", "rv64", "rv65i"}) { EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()), - "string must begin with rv32{i,e,g} or rv64{i,g}"); + "string must begin with rv32{i,e,g} or rv64{i,e,g}"); } for (StringRef Input : {"rv32j", "rv64k", "rv32_i"}) { EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()), @@ -118,11 +118,9 @@ TEST(ParseArchString, RejectsInvalidBaseISA) { } TEST(ParseArchString, RejectsUnsupportedBaseISA) { - EXPECT_EQ(toString(RISCVISAInfo::parseArchString("rv64e", true).takeError()), - "standard user-level extension 'e' requires 'rv32'"); for (StringRef Input : {"rv128i", "rv128g"}) { EXPECT_EQ(toString(RISCVISAInfo::parseArchString(Input, true).takeError()), - "string must begin with rv32{i,e,g} or rv64{i,g}"); + "string must begin with rv32{i,e,g} or rv64{i,e,g}"); } } @@ -167,6 +165,15 @@ TEST(ParseArchString, AcceptsSupportedBaseISAsAndSetsXLenAndFLen) { EXPECT_EQ(InfoRV64I.getXLen(), 64U); EXPECT_EQ(InfoRV64I.getFLen(), 0U); + auto MaybeRV64E = RISCVISAInfo::parseArchString("rv64e", true); + ASSERT_THAT_EXPECTED(MaybeRV64E, Succeeded()); + RISCVISAInfo &InfoRV64E = **MaybeRV64E; + RISCVISAInfo::OrderedExtensionMap ExtsRV64E = InfoRV64E.getExtensions(); + EXPECT_EQ(ExtsRV64E.size(), 1UL); + EXPECT_TRUE(ExtsRV64E.at("e") == (RISCVExtensionInfo{2, 0})); + EXPECT_EQ(InfoRV64E.getXLen(), 64U); + EXPECT_EQ(InfoRV64E.getFLen(), 0U); + auto MaybeRV64G = RISCVISAInfo::parseArchString("rv64g", true); ASSERT_THAT_EXPECTED(MaybeRV64G, Succeeded()); RISCVISAInfo &InfoRV64G = **MaybeRV64G; From 3d65cd405d64afd86a59c1f58098dfe891841271 Mon Sep 17 00:00:00 2001 From: Yi Kong Date: Thu, 23 Mar 2023 20:29:17 +0800 Subject: [PATCH 417/691] [llvm-objdump] Fix help message for --print-imm-hex Commit cc2457ca1bbd changed the default but forgot to update the help message. --- llvm/tools/llvm-objdump/ObjdumpOpts.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/tools/llvm-objdump/ObjdumpOpts.td b/llvm/tools/llvm-objdump/ObjdumpOpts.td index de7f883d24a80..c6627c75157b8 100644 --- a/llvm/tools/llvm-objdump/ObjdumpOpts.td +++ b/llvm/tools/llvm-objdump/ObjdumpOpts.td @@ -145,10 +145,10 @@ def reloc : Flag<["--"], "reloc">, def : Flag<["-"], "r">, Alias, HelpText<"Alias for --reloc">; def print_imm_hex : Flag<["--"], "print-imm-hex">, - HelpText<"Use hex format for immediate values">; + HelpText<"Use hex format for immediate values (default)">; def no_print_imm_hex : Flag<["--"], "no-print-imm-hex">, - HelpText<"Do not use hex format for immediate values (default)">; + HelpText<"Do not use hex format for immediate values">; def : Flag<["--"], "print-imm-hex=false">, Alias; def private_headers : Flag<["--"], "private-headers">, From 7fef15edd4d35d5f2dcaa8cd32d5c8add028dc67 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 23 Mar 2023 12:37:11 +0000 Subject: [PATCH 418/691] Revert rG6aa7cc037f2f95c237c1d82c523f8857fa3a10c3 - "[X86] LowerVectorAllZero - add 512-bit support with AVX512 vptestnmd+kortestw patterns" Reverted - I need to adjust the implementation so we can properly refactor it into a "LowerVectorAllEqual" function --- llvm/lib/Target/X86/X86ISelLowering.cpp | 13 +----- llvm/test/CodeGen/X86/ptest.ll | 15 +++--- llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll | 46 +++++++++++-------- 3 files changed, 39 insertions(+), 35 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e006388b6e928..e828fe4b9dd15 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -24192,23 +24192,14 @@ static SDValue LowerVectorAllZero(const SDLoc &DL, SDValue V, ISD::CondCode CC, DAG.getConstant(0, DL, IntVT)); } - // Split down to 128/256/512-bit vector. - unsigned TestSize = - Subtarget.useAVX512Regs() ? 512 : (Subtarget.hasAVX() ? 256 : 128); + // Split down to 128/256-bit vector. + unsigned TestSize = Subtarget.hasAVX() ? 256 : 128; while (VT.getSizeInBits() > TestSize) { auto Split = DAG.SplitVector(V, DL); VT = Split.first.getValueType(); V = DAG.getNode(ISD::OR, DL, VT, Split.first, Split.second); } - bool UseKORTEST = Subtarget.useAVX512Regs(); - if (UseKORTEST && VT.is512BitVector()) { - V = DAG.getBitcast(MVT::v16i32, MaskBits(V)); - V = DAG.getSetCC(DL, MVT::v16i1, V, - getZeroVector(MVT::v16i32, Subtarget, DAG, DL), ISD::SETEQ); - return DAG.getNode(X86ISD::KORTEST, DL, MVT::i32, V, V); - } - bool UsePTEST = Subtarget.hasSSE41(); if (UsePTEST) { MVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; diff --git a/llvm/test/CodeGen/X86/ptest.ll b/llvm/test/CodeGen/X86/ptest.ll index c417c5d15b874..066cbb6193317 100644 --- a/llvm/test/CodeGen/X86/ptest.ll +++ b/llvm/test/CodeGen/X86/ptest.ll @@ -148,8 +148,9 @@ define i32 @veccond512(<16 x i32> %input) { ; ; AVX512-LABEL: veccond512: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 -; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vptest %ymm0, %ymm0 ; AVX512-NEXT: je .LBB2_2 ; AVX512-NEXT: # %bb.1: # %if-true-block ; AVX512-NEXT: xorl %eax, %eax @@ -267,9 +268,10 @@ define i32 @vectest512(<16 x i32> %input) { ; ; AVX512-LABEL: vectest512: ; AVX512: # %bb.0: -; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: vptest %ymm0, %ymm0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -378,8 +380,9 @@ define i32 @vecsel512(<16 x i32> %input, i32 %a, i32 %b) { ; AVX512-LABEL: vecsel512: ; AVX512: # %bb.0: ; AVX512-NEXT: movl %edi, %eax -; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 -; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vptest %ymm0, %ymm0 ; AVX512-NEXT: cmovel %esi, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll index 5d921c0aa2c62..fcb0ab6090398 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll @@ -105,8 +105,9 @@ define i1 @test_v8i64(<8 x i64> %a0) { ; ; AVX512-LABEL: test_v8i64: ; AVX512: # %bb.0: -; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 -; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vptest %ymm0, %ymm0 ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -168,8 +169,9 @@ define i1 @test_v16i64(<16 x i64> %a0) { ; AVX512-LABEL: test_v16i64: ; AVX512: # %bb.0: ; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 -; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vptest %ymm0, %ymm0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -296,8 +298,9 @@ define i1 @test_v16i32(<16 x i32> %a0) { ; ; AVX512-LABEL: test_v16i32: ; AVX512: # %bb.0: -; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 -; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vptest %ymm0, %ymm0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -359,8 +362,9 @@ define i1 @test_v32i32(<32 x i32> %a0) { ; AVX512-LABEL: test_v32i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 -; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vptest %ymm0, %ymm0 ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -506,8 +510,9 @@ define i1 @test_v32i16(<32 x i16> %a0) { ; ; AVX512-LABEL: test_v32i16: ; AVX512: # %bb.0: -; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 -; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vptest %ymm0, %ymm0 ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -569,8 +574,9 @@ define i1 @test_v64i16(<64 x i16> %a0) { ; AVX512-LABEL: test_v64i16: ; AVX512: # %bb.0: ; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 -; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vptest %ymm0, %ymm0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -735,8 +741,9 @@ define i1 @test_v64i8(<64 x i8> %a0) { ; ; AVX512-LABEL: test_v64i8: ; AVX512: # %bb.0: -; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 -; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vptest %ymm0, %ymm0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -798,8 +805,9 @@ define i1 @test_v128i8(<128 x i8> %a0) { ; AVX512-LABEL: test_v128i8: ; AVX512: # %bb.0: ; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vptestnmd %zmm0, %zmm0, %k0 -; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vptest %ymm0, %ymm0 ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1006,8 +1014,10 @@ define i1 @mask_v128i8(<128 x i8> %a0) { ; AVX512-LABEL: mask_v128i8: ; AVX512: # %bb.0: ; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vptestnmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0 -; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673] +; AVX512-NEXT: vptest %ymm1, %ymm0 ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq From c6e9823724ef6bdfee262289ee34d162db436af0 Mon Sep 17 00:00:00 2001 From: Iain Sandoe Date: Fri, 3 Jun 2022 10:43:38 +0100 Subject: [PATCH 419/691] [C++20][Modules] Introduce an implementation module. We need to be able to distinguish individual TUs from the same module in cases where TU-local entities either need to be hidden (or, for some cases of ADL in template instantiation, need to be detected as exposures). This creates a module type for the implementation which implicitly imports its primary module interface per C++20: [module.unit/8] 'A module-declaration that contains neither an export-keyword nor a module-partition implicitly imports the primary module interface unit of the module as if by a module-import-declaration. Implementation modules are never serialized (-emit-module-interface for an implementation unit is diagnosed and rejected). Differential Revision: https://reviews.llvm.org/D126959 --- clang/include/clang/Basic/Module.h | 28 +++++++-- clang/include/clang/Lex/ModuleMap.h | 12 ++++ clang/include/clang/Sema/Sema.h | 4 ++ clang/lib/AST/Decl.cpp | 1 + clang/lib/CodeGen/CGDeclCXX.cpp | 6 +- clang/lib/CodeGen/CodeGenModule.cpp | 2 + clang/lib/Frontend/FrontendActions.cpp | 2 + clang/lib/Lex/ModuleMap.cpp | 42 ++++++++++--- clang/lib/Sema/SemaDecl.cpp | 20 ++++--- clang/lib/Sema/SemaModule.cpp | 59 +++++++++++-------- clang/lib/Serialization/ASTWriter.cpp | 2 +- .../CXX/module/basic/basic.def.odr/p4.cppm | 10 ++-- .../test/CXX/module/basic/basic.link/p2.cppm | 10 ++-- clang/test/CodeGenCXX/module-intializer.cpp | 8 +-- 14 files changed, 147 insertions(+), 59 deletions(-) diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h index 387ce4d6e9b17..c0c99eb8b6d62 100644 --- a/clang/include/clang/Basic/Module.h +++ b/clang/include/clang/Basic/Module.h @@ -103,16 +103,22 @@ class alignas(8) Module { /// The location of the module definition. SourceLocation DefinitionLoc; + // FIXME: Consider if reducing the size of this enum (having Partition and + // Named modules only) then representing interface/implementation separately + // is more efficient. enum ModuleKind { /// This is a module that was defined by a module map and built out /// of header files. ModuleMapModule, + /// This is a C++ 20 header unit. + ModuleHeaderUnit, + /// This is a C++20 module interface unit. ModuleInterfaceUnit, - /// This is a C++ 20 header unit. - ModuleHeaderUnit, + /// This is a C++20 module implementation unit. + ModuleImplementationUnit, /// This is a C++ 20 module partition interface. ModulePartitionInterface, @@ -169,9 +175,16 @@ class alignas(8) Module { /// Does this Module scope describe part of the purview of a standard named /// C++ module? bool isModulePurview() const { - return Kind == ModuleInterfaceUnit || Kind == ModulePartitionInterface || - Kind == ModulePartitionImplementation || - Kind == PrivateModuleFragment; + switch (Kind) { + case ModuleInterfaceUnit: + case ModuleImplementationUnit: + case ModulePartitionInterface: + case ModulePartitionImplementation: + case PrivateModuleFragment: + return true; + default: + return false; + } } /// Does this Module scope describe a fragment of the global module within @@ -561,6 +574,11 @@ class alignas(8) Module { Kind == ModulePartitionImplementation; } + /// Is this a module implementation. + bool isModuleImplementation() const { + return Kind == ModuleImplementationUnit; + } + /// Is this module a header unit. bool isHeaderUnit() const { return Kind == ModuleHeaderUnit; } // Is this a C++20 module interface or a partition. diff --git a/clang/include/clang/Lex/ModuleMap.h b/clang/include/clang/Lex/ModuleMap.h index a0ddd13c11bfd..f155c609b06cb 100644 --- a/clang/include/clang/Lex/ModuleMap.h +++ b/clang/include/clang/Lex/ModuleMap.h @@ -560,6 +560,11 @@ class ModuleMap { Module *createPrivateModuleFragmentForInterfaceUnit(Module *Parent, SourceLocation Loc); + /// Create a new C++ module with the specified kind, and reparent any pending + /// global module fragment(s) to it. + Module *createModuleUnitWithKind(SourceLocation Loc, StringRef Name, + Module::ModuleKind Kind); + /// Create a new module for a C++ module interface unit. /// The module must not already exist, and will be configured for the current /// compilation. @@ -569,6 +574,13 @@ class ModuleMap { /// \returns The newly-created module. Module *createModuleForInterfaceUnit(SourceLocation Loc, StringRef Name); + /// Create a new module for a C++ module implementation unit. + /// The interface module for this implementation (implicitly imported) must + /// exist and be loaded and present in the modules map. + /// + /// \returns The newly-created module. + Module *createModuleForImplementationUnit(SourceLocation Loc, StringRef Name); + /// Create a C++20 header unit. Module *createHeaderUnit(SourceLocation Loc, StringRef Name, Module::Header H); diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 63ee0f0ed7fb6..277c02ee3f1bd 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -2274,6 +2274,10 @@ class Sema final { }; /// The modules we're currently parsing. llvm::SmallVector ModuleScopes; + + /// For an interface unit, this is the implicitly imported interface unit. + clang::Module *ThePrimaryInterface = nullptr; + /// The explicit global module fragment of the current translation unit. /// The explicit Global Module Fragment, as specified in C++ /// [module.global.frag]. diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 56042e5fd252f..cd786049f914e 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -1600,6 +1600,7 @@ Module *Decl::getOwningModuleForLinkage(bool IgnoreLinkage) const { return nullptr; case Module::ModuleInterfaceUnit: + case Module::ModuleImplementationUnit: case Module::ModulePartitionInterface: case Module::ModulePartitionImplementation: return M; diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp index 0d0b5707e605a..9d7284cd0e37d 100644 --- a/clang/lib/CodeGen/CGDeclCXX.cpp +++ b/clang/lib/CodeGen/CGDeclCXX.cpp @@ -880,9 +880,11 @@ CodeGenModule::EmitCXXGlobalInitFunc() { // Include the filename in the symbol name. Including "sub_" matches gcc // and makes sure these symbols appear lexicographically behind the symbols - // with priority emitted above. + // with priority emitted above. Module implementation units behave the same + // way as a non-modular TU with imports. llvm::Function *Fn; - if (CXX20ModuleInits && getContext().getNamedModuleForCodeGen()) { + if (CXX20ModuleInits && getContext().getNamedModuleForCodeGen() && + !getContext().getNamedModuleForCodeGen()->isModuleImplementation()) { SmallString<256> InitFnName; llvm::raw_svector_ostream Out(InitFnName); cast(getCXXABI().getMangleContext()) diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 0e33e9632b3eb..bd1ee2a674abb 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -548,6 +548,8 @@ void CodeGenModule::Release() { GlobalTopLevelStmtBlockInFlight = {nullptr, nullptr}; } + // Module implementations are initialized the same way as a regular TU that + // imports one or more modules. if (CXX20ModuleInits && Primary && Primary->isInterfaceOrPartition()) EmitCXXModuleInitFunc(Primary); else diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp index 2aae41fe488ff..05d9fc8208b26 100644 --- a/clang/lib/Frontend/FrontendActions.cpp +++ b/clang/lib/Frontend/FrontendActions.cpp @@ -759,6 +759,8 @@ static StringRef ModuleKindName(Module::ModuleKind MK) { return "Module Map Module"; case Module::ModuleInterfaceUnit: return "Interface Unit"; + case Module::ModuleImplementationUnit: + return "Implementation Unit"; case Module::ModulePartitionInterface: return "Partition Interface"; case Module::ModulePartitionImplementation: diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp index 8dead93b03734..f2b2d0b8c69f1 100644 --- a/clang/lib/Lex/ModuleMap.cpp +++ b/clang/lib/Lex/ModuleMap.cpp @@ -888,23 +888,30 @@ ModuleMap::createPrivateModuleFragmentForInterfaceUnit(Module *Parent, return Result; } -Module *ModuleMap::createModuleForInterfaceUnit(SourceLocation Loc, - StringRef Name) { - assert(LangOpts.CurrentModule == Name && "module name mismatch"); - assert(!Modules[Name] && "redefining existing module"); - +Module *ModuleMap::createModuleUnitWithKind(SourceLocation Loc, StringRef Name, + Module::ModuleKind Kind) { auto *Result = new Module(Name, Loc, nullptr, /*IsFramework*/ false, /*IsExplicit*/ false, NumCreatedModules++); - Result->Kind = Module::ModuleInterfaceUnit; - Modules[Name] = SourceModule = Result; + Result->Kind = Kind; - // Reparent the current global module fragment as a submodule of this module. + // Reparent any current global module fragment as a submodule of this module. for (auto &Submodule : PendingSubmodules) { Submodule->setParent(Result); Submodule.release(); // now owned by parent } PendingSubmodules.clear(); + return Result; +} + +Module *ModuleMap::createModuleForInterfaceUnit(SourceLocation Loc, + StringRef Name) { + assert(LangOpts.CurrentModule == Name && "module name mismatch"); + assert(!Modules[Name] && "redefining existing module"); + + auto *Result = + createModuleUnitWithKind(Loc, Name, Module::ModuleInterfaceUnit); + Modules[Name] = SourceModule = Result; // Mark the main source file as being within the newly-created module so that // declarations and macros are properly visibility-restricted to it. @@ -915,6 +922,25 @@ Module *ModuleMap::createModuleForInterfaceUnit(SourceLocation Loc, return Result; } +Module *ModuleMap::createModuleForImplementationUnit(SourceLocation Loc, + StringRef Name) { + assert(LangOpts.CurrentModule == Name && "module name mismatch"); + // The interface for this implementation must exist and be loaded. + assert(Modules[Name] && Modules[Name]->Kind == Module::ModuleInterfaceUnit && + "creating implementation module without an interface"); + + auto *Result = + createModuleUnitWithKind(Loc, Name, Module::ModuleImplementationUnit); + SourceModule = Result; + + // Mark the main source file as being within the newly-created module so that + // declarations and macros are properly visibility-restricted to it. + auto *MainFile = SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()); + assert(MainFile && "no input file for module implementation"); + + return Result; +} + Module *ModuleMap::createHeaderUnit(SourceLocation Loc, StringRef Name, Module::Header H) { assert(LangOpts.CurrentModule == Name && "module name mismatch"); diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 64034393344f0..dd001dba2b912 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -1661,13 +1661,19 @@ bool Sema::CheckRedeclarationModuleOwnership(NamedDecl *New, NamedDecl *Old) { if (NewM == OldM) return false; - // Partitions are part of the module, but a partition could import another - // module, so verify that the PMIs agree. - if (NewM && OldM && - (NewM->isModulePartition() || OldM->isModulePartition()) && - NewM->getPrimaryModuleInterfaceName() == - OldM->getPrimaryModuleInterfaceName()) - return false; + if (NewM && OldM) { + // A module implementation unit has visibility of the decls in its + // implicitly imported interface. + if (NewM->isModuleImplementation() && OldM == ThePrimaryInterface) + return false; + + // Partitions are part of the module, but a partition could import another + // module, so verify that the PMIs agree. + if ((NewM->isModulePartition() || OldM->isModulePartition()) && + NewM->getPrimaryModuleInterfaceName() == + OldM->getPrimaryModuleInterfaceName()) + return false; + } bool NewIsModuleInterface = NewM && NewM->isModulePurview(); bool OldIsModuleInterface = OldM && OldM->isModulePurview(); diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp index 8c120d278d634..c02b9d2ac25b0 100644 --- a/clang/lib/Sema/SemaModule.cpp +++ b/clang/lib/Sema/SemaModule.cpp @@ -298,8 +298,8 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, const_cast(getLangOpts()).CurrentModule = ModuleName; auto &Map = PP.getHeaderSearchInfo().getModuleMap(); - Module *Mod; - + Module *Mod; // The module we are creating. + Module *Interface = nullptr; // The interface for an implementation. switch (MDK) { case ModuleDeclKind::Interface: case ModuleDeclKind::PartitionInterface: { @@ -336,18 +336,19 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, // we're building if `LangOpts.CurrentModule` equals to 'ModuleName'. // Change the value for `LangOpts.CurrentModule` temporarily to make the // module loader work properly. - const_cast(getLangOpts()).CurrentModule = ""; - Mod = getModuleLoader().loadModule(ModuleLoc, {ModuleNameLoc}, - Module::AllVisible, - /*IsInclusionDirective=*/false); + const_cast(getLangOpts()).CurrentModule = ""; + Interface = getModuleLoader().loadModule(ModuleLoc, {ModuleNameLoc}, + Module::AllVisible, + /*IsInclusionDirective=*/false); const_cast(getLangOpts()).CurrentModule = ModuleName; - if (!Mod) { + if (!Interface) { Diag(ModuleLoc, diag::err_module_not_defined) << ModuleName; // Create an empty module interface unit for error recovery. Mod = Map.createModuleForInterfaceUnit(ModuleLoc, ModuleName); + } else { + Mod = Map.createModuleForImplementationUnit(ModuleLoc, ModuleName); } - } break; case ModuleDeclKind::PartitionImplementation: @@ -386,19 +387,31 @@ Sema::ActOnModuleDecl(SourceLocation StartLoc, SourceLocation ModuleLoc, // statements, so imports are allowed. ImportState = ModuleImportState::ImportAllowed; - // For an implementation, We already made an implicit import (its interface). - // Make and return the import decl to be added to the current TU. - if (MDK == ModuleDeclKind::Implementation) { - // Make the import decl for the interface. - ImportDecl *Import = - ImportDecl::Create(Context, CurContext, ModuleLoc, Mod, Path[0].second); - // and return it to be added. + getASTContext().setNamedModuleForCodeGen(Mod); + + // We already potentially made an implicit import (in the case of a module + // implementation unit importing its interface). Make this module visible + // and return the import decl to be added to the current TU. + if (Interface) { + + VisibleModules.setVisible(Interface, ModuleLoc); + + // Make the import decl for the interface in the impl module. + ImportDecl *Import = ImportDecl::Create(Context, CurContext, ModuleLoc, + Interface, Path[0].second); + CurContext->addDecl(Import); + + // Sequence initialization of the imported module before that of the current + // module, if any. + Context.addModuleInitializer(ModuleScopes.back().Module, Import); + Mod->Imports.insert(Interface); // As if we imported it. + // Also save this as a shortcut to checking for decls in the interface + ThePrimaryInterface = Interface; + // If we made an implicit import of the module interface, then return the + // imported module decl. return ConvertDeclToDeclGroup(Import); } - getASTContext().setNamedModuleForCodeGen(Mod); - - // FIXME: Create a ModuleDecl. return nullptr; } @@ -424,19 +437,17 @@ Sema::ActOnPrivateModuleFragmentDecl(SourceLocation ModuleLoc, Diag(ModuleScopes.back().BeginLoc, diag::note_previous_definition); return nullptr; - case Module::ModuleInterfaceUnit: - break; - } - - if (!ModuleScopes.back().ModuleInterface) { + case Module::ModuleImplementationUnit: Diag(PrivateLoc, diag::err_private_module_fragment_not_module_interface); Diag(ModuleScopes.back().BeginLoc, diag::note_not_module_interface_add_export) << FixItHint::CreateInsertion(ModuleScopes.back().BeginLoc, "export "); return nullptr; + + case Module::ModuleInterfaceUnit: + break; } - // FIXME: Check this isn't a module interface partition. // FIXME: Check that this translation unit does not import any partitions; // such imports would violate [basic.link]/2's "shall be the only module unit" // restriction. diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 94160409c5f53..3e40812a9a0ba 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -2719,7 +2719,7 @@ void ASTWriter::WriteSubmodules(Module *WritingModule) { Abbrev->Add(BitCodeAbbrevOp(SUBMODULE_DEFINITION)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ID Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Parent - Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); // Kind + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // Kind Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsFramework Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsExplicit Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // IsSystem diff --git a/clang/test/CXX/module/basic/basic.def.odr/p4.cppm b/clang/test/CXX/module/basic/basic.def.odr/p4.cppm index 1542e532c635a..487dbdef283ee 100644 --- a/clang/test/CXX/module/basic/basic.def.odr/p4.cppm +++ b/clang/test/CXX/module/basic/basic.def.odr/p4.cppm @@ -143,9 +143,6 @@ void use() { (void)&inline_var_exported; (void)&const_var_exported; - // CHECK: define {{.*}}@_ZL26used_static_module_linkagev - used_static_module_linkage(); - // CHECK: define linkonce_odr {{.*}}@_ZW6Module26used_inline_module_linkagev used_inline_module_linkage(); @@ -154,8 +151,12 @@ void use() { (void)&extern_var_module_linkage; (void)&inline_var_module_linkage; + + // FIXME: Issue #61427 Internal-linkage declarations in the interface TU + // should not be not visible here. (void)&static_var_module_linkage; // FIXME: Should not be visible here. - (void)&const_var_module_linkage; + + (void)&const_var_module_linkage; // FIXME: will be visible after P2788R0 } //--- user.cpp @@ -176,5 +177,6 @@ void use() { (void)&inline_var_exported; (void)&const_var_exported; + // Internal-linkage declarations are not visible here. // Module-linkage declarations are not visible here. } diff --git a/clang/test/CXX/module/basic/basic.link/p2.cppm b/clang/test/CXX/module/basic/basic.link/p2.cppm index e04412ea08d4a..19761fb3359ce 100644 --- a/clang/test/CXX/module/basic/basic.link/p2.cppm +++ b/clang/test/CXX/module/basic/basic.link/p2.cppm @@ -39,19 +39,21 @@ void use() { } //--- M.cpp -// expected-no-diagnostics + module M; -// FIXME: Use of internal linkage entities should be rejected. void use_from_module_impl() { external_linkage_fn(); module_linkage_fn(); - internal_linkage_fn(); + internal_linkage_fn(); // expected-error {{no matching function for call to 'internal_linkage_fn'}} (void)external_linkage_class{}; (void)module_linkage_class{}; - (void)internal_linkage_class{}; (void)external_linkage_var; (void)module_linkage_var; + + // FIXME: Issue #61427 Internal-linkage declarations in the interface TU + // should not be not visible here. + (void)internal_linkage_class{}; (void)internal_linkage_var; } diff --git a/clang/test/CodeGenCXX/module-intializer.cpp b/clang/test/CodeGenCXX/module-intializer.cpp index e5149401b467a..d365d180ac59d 100644 --- a/clang/test/CodeGenCXX/module-intializer.cpp +++ b/clang/test/CodeGenCXX/module-intializer.cpp @@ -18,17 +18,17 @@ // RUN: -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-P // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 M.cpp \ -// RUN: -fmodule-file=N.pcm -fmodule-file=O.pcm -fmodule-file=M-part.pcm \ +// RUN: -fmodule-file=N=N.pcm -fmodule-file=O=O.pcm -fmodule-file=M:Part=M-part.pcm \ // RUN: -emit-module-interface -o M.pcm // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 M.pcm -S -emit-llvm \ // RUN: -o - | FileCheck %s --check-prefix=CHECK-M // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 useM.cpp \ -// RUN: -fmodule-file=M.pcm -S -emit-llvm -o - \ +// RUN: -fmodule-file=M=M.pcm -S -emit-llvm -o - \ // RUN: | FileCheck %s --check-prefix=CHECK-USE // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 M-impl.cpp \ -// RUN: -fmodule-file=M.pcm -S -emit-llvm -o - \ +// RUN: -fmodule-file=M=M.pcm -S -emit-llvm -o - \ // RUN: | FileCheck %s --check-prefix=CHECK-IMPL // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 N.cpp -S -emit-llvm \ @@ -41,7 +41,7 @@ // RUN: -o - | FileCheck %s --check-prefix=CHECK-P // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++20 M.cpp \ -// RUN: -fmodule-file=N.pcm -fmodule-file=O.pcm -fmodule-file=M-part.pcm \ +// RUN: -fmodule-file=N.pcm -fmodule-file=O=O.pcm -fmodule-file=M:Part=M-part.pcm \ // RUN: -S -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-M //--- N-h.h From e54cdd058e223bd62840e901b8b462c011d2fae5 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Thu, 23 Mar 2023 12:48:03 +0000 Subject: [PATCH 420/691] [RISCV][clang][test] Fix missed test c39dd7c1db97fa367cb6282067b74cd8e55ef09a missed the appropriate change to clang/test/Driver/ricv-arch.c. --- clang/test/Driver/riscv-arch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Driver/riscv-arch.c b/clang/test/Driver/riscv-arch.c index cbc1464cbcd6f..b13da106df778 100644 --- a/clang/test/Driver/riscv-arch.c +++ b/clang/test/Driver/riscv-arch.c @@ -206,7 +206,7 @@ // RUN: %clang --target=riscv32-unknown-elf -march=unknown -### %s \ // RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32-STR %s // RV32-STR: error: invalid arch name 'unknown', -// RV32-STR: string must begin with rv32{i,e,g} or rv64{i,g} +// RV32-STR: string must begin with rv32{i,e,g} or rv64{i,e,g} // RUN: %clang --target=riscv32-unknown-elf -march=rv32q -### %s \ // RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32-LETTER %s From d0e2a42853b19d415b84c0dab94e800081e4adc6 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Thu, 23 Mar 2023 13:01:22 +0000 Subject: [PATCH 421/691] [RISCV][test] Fix another missed test change from RV64E patch c39dd7c1db97fa367cb6282067b74cd8e55ef09a missed a needed change to the llvm-objdump test. --- llvm/test/tools/llvm-objdump/ELF/RISCV/riscv-attributes.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/tools/llvm-objdump/ELF/RISCV/riscv-attributes.s b/llvm/test/tools/llvm-objdump/ELF/RISCV/riscv-attributes.s index 7c41b63dcbf86..d15b675450a90 100644 --- a/llvm/test/tools/llvm-objdump/ELF/RISCV/riscv-attributes.s +++ b/llvm/test/tools/llvm-objdump/ELF/RISCV/riscv-attributes.s @@ -31,7 +31,7 @@ vsetvli a3, a2, e8, m8, tu, mu .Lend: #--- invalid_arch.s -# INVALID: string must begin with rv32{i,e,g} or rv64{i,g} +# INVALID: string must begin with rv32{i,e,g} or rv64{i,e,g} nop .section .riscv.attributes,"",@0x70000003 From 4738c5f0832f283f8822b7a5b8b8491a20425346 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 23 Mar 2023 13:07:29 +0000 Subject: [PATCH 422/691] [X86] LowerVectorAllZero - early out for masked v2i64 cases without PTEST. NFC. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e828fe4b9dd15..74e2a2b6fdc10 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -24192,6 +24192,12 @@ static SDValue LowerVectorAllZero(const SDLoc &DL, SDValue V, ISD::CondCode CC, DAG.getConstant(0, DL, IntVT)); } + // Without PTEST, a masked v2i64 or-reduction is not faster than + // scalarization. + bool UsePTEST = Subtarget.hasSSE41(); + if (!UsePTEST && !Mask.isAllOnes() && VT.getScalarSizeInBits() > 32) + return SDValue(); + // Split down to 128/256-bit vector. unsigned TestSize = Subtarget.hasAVX() ? 256 : 128; while (VT.getSizeInBits() > TestSize) { @@ -24200,18 +24206,12 @@ static SDValue LowerVectorAllZero(const SDLoc &DL, SDValue V, ISD::CondCode CC, V = DAG.getNode(ISD::OR, DL, VT, Split.first, Split.second); } - bool UsePTEST = Subtarget.hasSSE41(); if (UsePTEST) { MVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; V = DAG.getBitcast(TestVT, MaskBits(V)); return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, V, V); } - // Without PTEST, a masked v2i64 or-reduction is not faster than - // scalarization. - if (!Mask.isAllOnes() && VT.getScalarSizeInBits() > 32) - return SDValue(); - V = DAG.getBitcast(MVT::v16i8, MaskBits(V)); V = DAG.getNode(X86ISD::PCMPEQ, DL, MVT::v16i8, V, getZeroVector(MVT::v16i8, Subtarget, DAG, DL)); From 3f2dbcc27dfaa7ab53a0318f2fc732f5ce144222 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Thu, 23 Mar 2023 08:47:55 +0900 Subject: [PATCH 423/691] [Bazel] Rework `//llvm:llvm-tblgen` and `//llvm/unittests:tablegen_tests` `llvm/utils/TableGen/GlobalISel` should be exported. FYI, after D144351,`tablegen_tests` behaved same as `llvm-tblgen -print-records`. It suceeded because stdin is `/dev/null`. --- .../llvm-project-overlay/llvm/BUILD.bazel | 35 ++++++++++--------- .../llvm/unittests/BUILD.bazel | 2 +- 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel index 55064fba0bf88..eff9752b785f5 100644 --- a/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/BUILD.bazel @@ -569,28 +569,18 @@ cc_library( ) cc_library( - name = "tblgen", - alwayslink = True, + name = "LLVMTableGenGlobalISel", srcs = glob([ - "utils/TableGen/*.cpp", - "utils/TableGen/*.inc", "utils/TableGen/GlobalISel/*.cpp", - - # Some tablegen sources include headers from MC, so these have to be - # listed here. MC uses headers produced by tablegen, so it cannot be a - # regular dependency. - "include/llvm/MC/*.h", - + ]) + [ + "utils/TableGen/CodeGenInstruction.h", + ], + hdrs = glob([ # We have to include these headers here as well as in the `hdrs` below # to allow the `.cpp` files to use file-relative-inclusion to find # them, even though consumers of this library use inclusion relative to # `utils/TableGen` with the `strip_includes_prefix` of this library. # This mixture appears to be incompatible with header modules. - "utils/TableGen/*.h", - "utils/TableGen/GlobalISel/*.h", - ]), - hdrs = glob([ - "utils/TableGen/*.h", "utils/TableGen/GlobalISel/*.h", ]), copts = llvm_copts, @@ -605,10 +595,23 @@ cc_library( cc_binary( name = "llvm-tblgen", + srcs = glob([ + "utils/TableGen/*.cpp", + "utils/TableGen/*.inc", + "utils/TableGen/*.h", + + # Some tablegen sources include headers from MC, so these have to be + # listed here. MC uses headers produced by tablegen, so it cannot be a + # regular dependency. + "include/llvm/MC/*.h", + ]), copts = llvm_copts, stamp = 0, deps = [ - ":tblgen", + ":LLVMTableGenGlobalISel", + ":Support", + ":TableGen", + ":config", ], ) diff --git a/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel b/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel index a37041af5e8dc..4996a0ac93ab7 100644 --- a/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/llvm/unittests/BUILD.bazel @@ -675,7 +675,7 @@ cc_test( ":automata_tables_gen", "//llvm:Support", "//llvm:TableGen", - "//llvm:tblgen", + "//llvm:LLVMTableGenGlobalISel", "//third-party/unittest:gmock", "//third-party/unittest:gtest", "//third-party/unittest:gtest_main", From a7c574d0c10ff686cf06d50010d759eaa5a4747b Mon Sep 17 00:00:00 2001 From: Johannes de Fine Licht Date: Thu, 23 Mar 2023 14:22:15 +0100 Subject: [PATCH 424/691] [MLIR][LLVM] Move the LLVM inliner interface into a separate file. A fully fledged LLVM inliner will require a lot of logic. Since `LLVMDialect.cpp` is large enough as it is, preemptively outline the inlining logic into a separate `.cpp` file. This will also allow us to add a `DEBUG_TYPE` for debugging the inliner. The name `LLVMInlining` was chosen over `LLVMInlinerInterface` to keep the option open for exposing inlining functionality even when not invoked through the `DialectInlinerInterface`. Depends on D146616 Reviewed By: gysit Differential Revision: https://reviews.llvm.org/D146628 --- mlir/lib/Dialect/LLVMIR/CMakeLists.txt | 1 + mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | 237 +----------------- mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.cpp | 252 ++++++++++++++++++++ mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.h | 33 +++ 4 files changed, 289 insertions(+), 234 deletions(-) create mode 100644 mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.cpp create mode 100644 mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.h diff --git a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt index ebfe0258e0793..7e631e7ddc802 100644 --- a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt +++ b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt @@ -4,6 +4,7 @@ add_mlir_dialect_library(MLIRLLVMDialect IR/FunctionCallUtils.cpp IR/LLVMAttrs.cpp IR/LLVMDialect.cpp + IR/LLVMInlining.cpp IR/LLVMInterfaces.cpp IR/LLVMTypes.cpp IR/LLVMTypeSyntax.cpp diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index ca439ab8cc15e..428f50f674b26 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// #include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "LLVMInlining.h" #include "TypeDetail.h" #include "mlir/Dialect/LLVMIR/LLVMAttrs.h" #include "mlir/Dialect/LLVMIR/LLVMInterfaces.h" @@ -22,7 +23,6 @@ #include "mlir/IR/FunctionImplementation.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Matchers.h" -#include "mlir/Transforms/InliningUtils.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/TypeSwitch.h" @@ -2777,237 +2777,6 @@ struct LLVMOpAsmDialectInterface : public OpAsmDialectInterface { }; } // namespace -//===----------------------------------------------------------------------===// -// DialectInlinerInterface -//===----------------------------------------------------------------------===// - -/// Check whether the given alloca is an input to a lifetime intrinsic, -/// optionally passing through one or more casts on the way. This is not -/// transitive through block arguments. -static bool hasLifetimeMarkers(LLVM::AllocaOp allocaOp) { - SmallVector stack(allocaOp->getUsers().begin(), - allocaOp->getUsers().end()); - while (!stack.empty()) { - Operation *op = stack.pop_back_val(); - if (isa(op)) - return true; - if (isa(op)) - stack.append(op->getUsers().begin(), op->getUsers().end()); - } - return false; -} - -/// Move all alloca operations with a constant size in the former entry block of -/// the newly inlined callee into the entry block of the caller, and insert -/// lifetime intrinsics that limit their scope to the inlined blocks. -static void moveConstantAllocasToEntryBlock( - iterator_range inlinedBlocks) { - Block *calleeEntryBlock = &(*inlinedBlocks.begin()); - Block *callerEntryBlock = &(*calleeEntryBlock->getParent()->begin()); - if (calleeEntryBlock == callerEntryBlock) - // Nothing to do. - return; - SmallVector> allocasToMove; - bool shouldInsertLifetimes = false; - // Conservatively only move alloca operations that are part of the entry block - // and do not inspect nested regions, since they may execute conditionally or - // have other unknown semantics. - for (auto allocaOp : calleeEntryBlock->getOps()) { - IntegerAttr arraySize; - if (!matchPattern(allocaOp.getArraySize(), m_Constant(&arraySize))) - continue; - bool shouldInsertLifetime = - arraySize.getValue() != 0 && !hasLifetimeMarkers(allocaOp); - shouldInsertLifetimes |= shouldInsertLifetime; - allocasToMove.emplace_back(allocaOp, arraySize, shouldInsertLifetime); - } - if (allocasToMove.empty()) - return; - OpBuilder builder(callerEntryBlock, callerEntryBlock->begin()); - for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) { - auto newConstant = builder.create( - allocaOp->getLoc(), allocaOp.getArraySize().getType(), arraySize); - // Insert a lifetime start intrinsic where the alloca was before moving it. - if (shouldInsertLifetime) { - OpBuilder::InsertionGuard insertionGuard(builder); - builder.setInsertionPoint(allocaOp); - builder.create( - allocaOp.getLoc(), arraySize.getValue().getLimitedValue(), - allocaOp.getResult()); - } - allocaOp->moveAfter(newConstant); - allocaOp.getArraySizeMutable().assign(newConstant.getResult()); - } - if (!shouldInsertLifetimes) - return; - // Insert a lifetime end intrinsic before each return in the callee function. - for (Block &block : inlinedBlocks) { - if (!block.getTerminator()->hasTrait()) - continue; - builder.setInsertionPoint(block.getTerminator()); - for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) { - if (!shouldInsertLifetime) - continue; - builder.create( - allocaOp.getLoc(), arraySize.getValue().getLimitedValue(), - allocaOp.getResult()); - } - } -} - -static Value handleByValArgument(OpBuilder &builder, Operation *callable, - Value argument, - NamedAttribute byValAttribute) { - auto func = cast(callable); - LLVM::MemoryEffectsAttr memoryEffects = func.getMemoryAttr(); - // If there is no memory effects attribute, assume that the function is - // not read-only. - bool isReadOnly = memoryEffects && - memoryEffects.getArgMem() != ModRefInfo::ModRef && - memoryEffects.getArgMem() != ModRefInfo::Mod; - if (isReadOnly) - return argument; - // Resolve the pointee type and its size. - auto ptrType = cast(argument.getType()); - Type elementType = cast(byValAttribute.getValue()).getValue(); - unsigned int typeSize = - DataLayout(callable->getParentOfType()) - .getTypeSize(elementType); - // Allocate the new value on the stack. - Value one = builder.create( - func.getLoc(), builder.getI64Type(), builder.getI64IntegerAttr(1)); - Value allocaOp = - builder.create(func.getLoc(), ptrType, elementType, one); - // Copy the pointee to the newly allocated value. - Value copySize = builder.create( - func.getLoc(), builder.getI64Type(), builder.getI64IntegerAttr(typeSize)); - Value isVolatile = builder.create( - func.getLoc(), builder.getI1Type(), builder.getBoolAttr(false)); - builder.create(func.getLoc(), allocaOp, argument, copySize, - isVolatile); - return allocaOp; -} - -namespace { -struct LLVMInlinerInterface : public DialectInlinerInterface { - using DialectInlinerInterface::DialectInlinerInterface; - - bool isLegalToInline(Operation *call, Operation *callable, - bool wouldBeCloned) const final { - if (!wouldBeCloned) - return false; - auto callOp = dyn_cast(call); - auto funcOp = dyn_cast(callable); - if (!callOp || !funcOp) - return false; - if (auto attrs = funcOp.getArgAttrs()) { - for (Attribute attr : *attrs) { - auto attrDict = cast(attr); - for (NamedAttribute attr : attrDict) { - if (attr.getName() == LLVMDialect::getByValAttrName()) - continue; - // TODO: Handle all argument attributes; - return false; - } - } - } - // TODO: Handle result attributes; - if (funcOp.getResAttrs()) - return false; - // TODO: Handle exceptions. - if (funcOp.getPersonality()) - return false; - if (funcOp.getPassthrough()) { - // TODO: Used attributes should not be passthrough. - DenseSet disallowed( - {StringAttr::get(funcOp->getContext(), "noduplicate"), - StringAttr::get(funcOp->getContext(), "noinline"), - StringAttr::get(funcOp->getContext(), "optnone"), - StringAttr::get(funcOp->getContext(), "presplitcoroutine"), - StringAttr::get(funcOp->getContext(), "returns_twice"), - StringAttr::get(funcOp->getContext(), "strictfp")}); - if (llvm::any_of(*funcOp.getPassthrough(), [&](Attribute attr) { - auto stringAttr = dyn_cast(attr); - if (!stringAttr) - return false; - return disallowed.contains(stringAttr); - })) - return false; - } - return true; - } - - bool isLegalToInline(Region *, Region *, bool, IRMapping &) const final { - return true; - } - - /// Conservative allowlist of operations supported so far. - bool isLegalToInline(Operation *op, Region *, bool, IRMapping &) const final { - if (isPure(op)) - return true; - // Some attributes on memory operations require handling during - // inlining. Since this is not yet implemented, refuse to inline memory - // operations that have any of these attributes. - if (auto iface = dyn_cast(op)) - if (iface.getAliasScopesOrNull() || iface.getNoAliasScopesOrNull()) - return false; - if (auto iface = dyn_cast(op)) - if (iface.getAccessGroupsOrNull()) - return false; - return isa(op); - } - - /// Handle the given inlined return by replacing it with a branch. This - /// overload is called when the inlined region has more than one block. - void handleTerminator(Operation *op, Block *newDest) const final { - // Only return needs to be handled here. - auto returnOp = dyn_cast(op); - if (!returnOp) - return; - - // Replace the return with a branch to the dest. - OpBuilder builder(op); - builder.create(op->getLoc(), returnOp.getOperands(), newDest); - op->erase(); - } - - /// Handle the given inlined return by replacing the uses of the call with the - /// operands of the return. This overload is called when the inlined region - /// only contains one block. - void handleTerminator(Operation *op, - ArrayRef valuesToRepl) const final { - // Return will be the only terminator present. - auto returnOp = cast(op); - - // Replace the values directly with the return operands. - assert(returnOp.getNumOperands() == valuesToRepl.size()); - for (const auto &[dst, src] : - llvm::zip(valuesToRepl, returnOp.getOperands())) - dst.replaceAllUsesWith(src); - } - - Value handleArgument(OpBuilder &builder, Operation *call, Operation *callable, - Value argument, Type targetType, - DictionaryAttr argumentAttrs) const final { - if (auto attr = argumentAttrs.getNamed(LLVMDialect::getByValAttrName())) - return handleByValArgument(builder, callable, argument, *attr); - return argument; - } - - void processInlinedCallBlocks( - Operation *call, - iterator_range inlinedBlocks) const override { - // Alloca operations with a constant size that were in the entry block of - // the callee should be moved to the entry block of the caller, as this will - // fold into prologue/epilogue code during code generation. - // This is not implemented as a standalone pattern because we need to know - // which newly inlined block was previously the entry block of the callee. - moveConstantAllocasToEntryBlock(inlinedBlocks); - } -}; -} // end anonymous namespace - //===----------------------------------------------------------------------===// // LLVMDialect initialization, type parsing, and registration. //===----------------------------------------------------------------------===// @@ -3037,9 +2806,9 @@ void LLVMDialect::initialize() { // Support unknown operations because not all LLVM operations are registered. allowUnknownOperations(); // clang-format off - addInterfaces(); + addInterfaces(); // clang-format on + detail::addLLVMInlinerInterface(this); } #define GET_OP_CLASSES diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.cpp new file mode 100644 index 0000000000000..8a399b9a5d030 --- /dev/null +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.cpp @@ -0,0 +1,252 @@ +//===- LLVMInlining.cpp - LLVM inlining interface and logic -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Logic for inlining LLVM functions and the definition of the +// LLVMInliningInterface. +// +//===----------------------------------------------------------------------===// + +#include "LLVMInlining.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/IR/Matchers.h" +#include "mlir/Transforms/InliningUtils.h" + +using namespace mlir; + +/// Check whether the given alloca is an input to a lifetime intrinsic, +/// optionally passing through one or more casts on the way. This is not +/// transitive through block arguments. +static bool hasLifetimeMarkers(LLVM::AllocaOp allocaOp) { + SmallVector stack(allocaOp->getUsers().begin(), + allocaOp->getUsers().end()); + while (!stack.empty()) { + Operation *op = stack.pop_back_val(); + if (isa(op)) + return true; + if (isa(op)) + stack.append(op->getUsers().begin(), op->getUsers().end()); + } + return false; +} + +/// Move all alloca operations with a constant size in the former entry block of +/// the newly inlined callee into the entry block of the caller, and insert +/// lifetime intrinsics that limit their scope to the inlined blocks. +static void moveConstantAllocasToEntryBlock( + iterator_range inlinedBlocks) { + Block *calleeEntryBlock = &(*inlinedBlocks.begin()); + Block *callerEntryBlock = &(*calleeEntryBlock->getParent()->begin()); + if (calleeEntryBlock == callerEntryBlock) + // Nothing to do. + return; + SmallVector> allocasToMove; + bool shouldInsertLifetimes = false; + // Conservatively only move alloca operations that are part of the entry block + // and do not inspect nested regions, since they may execute conditionally or + // have other unknown semantics. + for (auto allocaOp : calleeEntryBlock->getOps()) { + IntegerAttr arraySize; + if (!matchPattern(allocaOp.getArraySize(), m_Constant(&arraySize))) + continue; + bool shouldInsertLifetime = + arraySize.getValue() != 0 && !hasLifetimeMarkers(allocaOp); + shouldInsertLifetimes |= shouldInsertLifetime; + allocasToMove.emplace_back(allocaOp, arraySize, shouldInsertLifetime); + } + if (allocasToMove.empty()) + return; + OpBuilder builder(callerEntryBlock, callerEntryBlock->begin()); + for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) { + auto newConstant = builder.create( + allocaOp->getLoc(), allocaOp.getArraySize().getType(), arraySize); + // Insert a lifetime start intrinsic where the alloca was before moving it. + if (shouldInsertLifetime) { + OpBuilder::InsertionGuard insertionGuard(builder); + builder.setInsertionPoint(allocaOp); + builder.create( + allocaOp.getLoc(), arraySize.getValue().getLimitedValue(), + allocaOp.getResult()); + } + allocaOp->moveAfter(newConstant); + allocaOp.getArraySizeMutable().assign(newConstant.getResult()); + } + if (!shouldInsertLifetimes) + return; + // Insert a lifetime end intrinsic before each return in the callee function. + for (Block &block : inlinedBlocks) { + if (!block.getTerminator()->hasTrait()) + continue; + builder.setInsertionPoint(block.getTerminator()); + for (auto &[allocaOp, arraySize, shouldInsertLifetime] : allocasToMove) { + if (!shouldInsertLifetime) + continue; + builder.create( + allocaOp.getLoc(), arraySize.getValue().getLimitedValue(), + allocaOp.getResult()); + } + } +} + +static Value handleByValArgument(OpBuilder &builder, Operation *callable, + Value argument, + NamedAttribute byValAttribute) { + auto func = cast(callable); + LLVM::MemoryEffectsAttr memoryEffects = func.getMemoryAttr(); + // If there is no memory effects attribute, assume that the function is + // not read-only. + bool isReadOnly = memoryEffects && + memoryEffects.getArgMem() != LLVM::ModRefInfo::ModRef && + memoryEffects.getArgMem() != LLVM::ModRefInfo::Mod; + if (isReadOnly) + return argument; + // Resolve the pointee type and its size. + auto ptrType = cast(argument.getType()); + Type elementType = cast(byValAttribute.getValue()).getValue(); + unsigned int typeSize = + DataLayout(callable->getParentOfType()) + .getTypeSize(elementType); + // Allocate the new value on the stack. + Value one = builder.create( + func.getLoc(), builder.getI64Type(), builder.getI64IntegerAttr(1)); + Value allocaOp = + builder.create(func.getLoc(), ptrType, elementType, one); + // Copy the pointee to the newly allocated value. + Value copySize = builder.create( + func.getLoc(), builder.getI64Type(), builder.getI64IntegerAttr(typeSize)); + Value isVolatile = builder.create( + func.getLoc(), builder.getI1Type(), builder.getBoolAttr(false)); + builder.create(func.getLoc(), allocaOp, argument, copySize, + isVolatile); + return allocaOp; +} + +namespace { +struct LLVMInlinerInterface : public DialectInlinerInterface { + using DialectInlinerInterface::DialectInlinerInterface; + + bool isLegalToInline(Operation *call, Operation *callable, + bool wouldBeCloned) const final { + if (!wouldBeCloned) + return false; + auto callOp = dyn_cast(call); + auto funcOp = dyn_cast(callable); + if (!callOp || !funcOp) + return false; + if (auto attrs = funcOp.getArgAttrs()) { + for (Attribute attr : *attrs) { + auto attrDict = cast(attr); + for (NamedAttribute attr : attrDict) { + if (attr.getName() == LLVM::LLVMDialect::getByValAttrName()) + continue; + // TODO: Handle all argument attributes; + return false; + } + } + } + // TODO: Handle result attributes; + if (funcOp.getResAttrs()) + return false; + // TODO: Handle exceptions. + if (funcOp.getPersonality()) + return false; + if (funcOp.getPassthrough()) { + // TODO: Used attributes should not be passthrough. + DenseSet disallowed( + {StringAttr::get(funcOp->getContext(), "noduplicate"), + StringAttr::get(funcOp->getContext(), "noinline"), + StringAttr::get(funcOp->getContext(), "optnone"), + StringAttr::get(funcOp->getContext(), "presplitcoroutine"), + StringAttr::get(funcOp->getContext(), "returns_twice"), + StringAttr::get(funcOp->getContext(), "strictfp")}); + if (llvm::any_of(*funcOp.getPassthrough(), [&](Attribute attr) { + auto stringAttr = dyn_cast(attr); + if (!stringAttr) + return false; + return disallowed.contains(stringAttr); + })) + return false; + } + return true; + } + + bool isLegalToInline(Region *, Region *, bool, IRMapping &) const final { + return true; + } + + /// Conservative allowlist of operations supported so far. + bool isLegalToInline(Operation *op, Region *, bool, IRMapping &) const final { + if (isPure(op)) + return true; + // Some attributes on memory operations require handling during + // inlining. Since this is not yet implemented, refuse to inline memory + // operations that have any of these attributes. + if (auto iface = dyn_cast(op)) + if (iface.getAliasScopesOrNull() || iface.getNoAliasScopesOrNull()) + return false; + if (auto iface = dyn_cast(op)) + if (iface.getAccessGroupsOrNull()) + return false; + return isa(op); + } + + /// Handle the given inlined return by replacing it with a branch. This + /// overload is called when the inlined region has more than one block. + void handleTerminator(Operation *op, Block *newDest) const final { + // Only return needs to be handled here. + auto returnOp = dyn_cast(op); + if (!returnOp) + return; + + // Replace the return with a branch to the dest. + OpBuilder builder(op); + builder.create(op->getLoc(), returnOp.getOperands(), newDest); + op->erase(); + } + + /// Handle the given inlined return by replacing the uses of the call with the + /// operands of the return. This overload is called when the inlined region + /// only contains one block. + void handleTerminator(Operation *op, + ArrayRef valuesToRepl) const final { + // Return will be the only terminator present. + auto returnOp = cast(op); + + // Replace the values directly with the return operands. + assert(returnOp.getNumOperands() == valuesToRepl.size()); + for (const auto &[dst, src] : + llvm::zip(valuesToRepl, returnOp.getOperands())) + dst.replaceAllUsesWith(src); + } + + Value handleArgument(OpBuilder &builder, Operation *call, Operation *callable, + Value argument, Type targetType, + DictionaryAttr argumentAttrs) const final { + if (auto attr = + argumentAttrs.getNamed(LLVM::LLVMDialect::getByValAttrName())) + return handleByValArgument(builder, callable, argument, *attr); + return argument; + } + + void processInlinedCallBlocks( + Operation *call, + iterator_range inlinedBlocks) const override { + // Alloca operations with a constant size that were in the entry block of + // the callee should be moved to the entry block of the caller, as this will + // fold into prologue/epilogue code during code generation. + // This is not implemented as a standalone pattern because we need to know + // which newly inlined block was previously the entry block of the callee. + moveConstantAllocasToEntryBlock(inlinedBlocks); + } +}; + +} // end anonymous namespace + +void LLVM::detail::addLLVMInlinerInterface(LLVM::LLVMDialect *dialect) { + dialect->addInterfaces(); +} diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.h b/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.h new file mode 100644 index 0000000000000..c6f75d5657c3b --- /dev/null +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.h @@ -0,0 +1,33 @@ +//===- LLVMInlining.h - Registration of LLVMInlinerInterface ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Allows registering the LLVM DialectInlinerInterface with the LLVM dialect +// during initialization. +// +//===----------------------------------------------------------------------===// + +#ifndef DIALECT_LLVMIR_IR_LLVMINLINING_H +#define DIALECT_LLVMIR_IR_LLVMINLINING_H + +namespace mlir { +namespace LLVM { + +class LLVMDialect; + +namespace detail { + +/// Register the `LLVMInlinerInterface` implementation of +/// `DialectInlinerInterface` with the LLVM dialect. +void addLLVMInlinerInterface(LLVMDialect *dialect); + +} // namespace detail + +} // namespace LLVM +} // namespace mlir + +#endif // DIALECT_LLVMIR_IR_LLVMINLINING_H From 8482b238062ed7263facea9490f67119e00a037a Mon Sep 17 00:00:00 2001 From: Alexey Lapshin Date: Wed, 22 Mar 2023 17:37:15 +0100 Subject: [PATCH 425/691] [ADT] add ConcurrentHashtable class. ConcurrentHashTable - is a resizeable concurrent hashtable. The range of resizings is limited up to x2^32. The hashtable allows only concurrent insertions. Concurrent hashtable is necessary for the D96035 patch. Reviewed By: JDevlieghere Differential Revision: https://reviews.llvm.org/D132455 --- llvm/include/llvm/ADT/ConcurrentHashtable.h | 395 ++++++++++++++++++ llvm/unittests/ADT/CMakeLists.txt | 1 + .../unittests/ADT/ConcurrentHashtableTest.cpp | 279 +++++++++++++ 3 files changed, 675 insertions(+) create mode 100644 llvm/include/llvm/ADT/ConcurrentHashtable.h create mode 100644 llvm/unittests/ADT/ConcurrentHashtableTest.cpp diff --git a/llvm/include/llvm/ADT/ConcurrentHashtable.h b/llvm/include/llvm/ADT/ConcurrentHashtable.h new file mode 100644 index 0000000000000..56344ab9b8411 --- /dev/null +++ b/llvm/include/llvm/ADT/ConcurrentHashtable.h @@ -0,0 +1,395 @@ +//===- ConcurrentHashtable.h ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_CONCURRENTHASHTABLE_H +#define LLVM_ADT_CONCURRENTHASHTABLE_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Parallel.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/xxhash.h" +#include +#include +#include +#include +#include +#include + +namespace llvm { + +/// ConcurrentHashTable - is a resizeable concurrent hashtable. +/// The number of resizings limited up to x2^32. This hashtable is +/// useful to have efficient access to aggregate data(like strings, +/// type descriptors...) and to keep only single copy of such +/// an aggregate. The hashtable allows only concurrent insertions: +/// +/// KeyDataTy* = insert ( const KeyTy& ); +/// +/// Data structure: +/// +/// Inserted value KeyTy is mapped to 64-bit hash value -> +/// +/// [------- 64-bit Hash value --------] +/// [ StartEntryIndex ][ Bucket Index ] +/// | | +/// points to the points to +/// first probe the bucket. +/// position inside +/// bucket entries +/// +/// After initialization, all buckets have an initial size. During insertions, +/// buckets might be extended to contain more entries. Each bucket can be +/// independently resized and rehashed(no need to lock the whole table). +/// Different buckets may have different sizes. If the single bucket is full +/// then the bucket is resized. +/// +/// BucketsArray keeps all buckets. Each bucket keeps an array of Entries +/// (pointers to KeyDataTy) and another array of entries hashes: +/// +/// BucketsArray[BucketIdx].Hashes[EntryIdx]: +/// BucketsArray[BucketIdx].Entries[EntryIdx]: +/// +/// [Bucket 0].Hashes -> [uint32_t][uint32_t] +/// [Bucket 0].Entries -> [KeyDataTy*][KeyDataTy*] +/// +/// [Bucket 1].Hashes -> [uint32_t][uint32_t][uint32_t][uint32_t] +/// [Bucket 1].Entries -> [KeyDataTy*][KeyDataTy*][KeyDataTy*][KeyDataTy*] +/// ......................... +/// [Bucket N].Hashes -> [uint32_t][uint32_t][uint32_t] +/// [Bucket N].Entries -> [KeyDataTy*][KeyDataTy*][KeyDataTy*] +/// +/// ConcurrentHashTableByPtr uses an external thread-safe allocator to allocate +/// KeyDataTy items. + +template +class ConcurrentHashTableInfoByPtr { +public: + /// \returns Hash value for the specified \p Key. + static inline uint64_t getHashValue(const KeyTy &Key) { + return xxHash64(Key); + } + + /// \returns true if both \p LHS and \p RHS are equal. + static inline bool isEqual(const KeyTy &LHS, const KeyTy &RHS) { + return LHS == RHS; + } + + /// \returns key for the specified \p KeyData. + static inline const KeyTy &getKey(const KeyDataTy &KeyData) { + return KeyData.getKey(); + } + + /// \returns newly created object of KeyDataTy type. + static inline KeyDataTy *create(const KeyTy &Key, AllocatorTy &Allocator) { + return KeyDataTy::create(Key, Allocator); + } +}; + +template > +class ConcurrentHashTableByPtr { +public: + ConcurrentHashTableByPtr( + AllocatorTy &Allocator, size_t EstimatedSize = 100000, + size_t ThreadsNum = parallel::strategy.compute_thread_count(), + size_t InitialNumberOfBuckets = 128) + : MultiThreadAllocator(Allocator) { + assert((ThreadsNum > 0) && "ThreadsNum must be greater than 0"); + assert((InitialNumberOfBuckets > 0) && + "InitialNumberOfBuckets must be greater than 0"); + + constexpr size_t UINT64_BitsNum = sizeof(uint64_t) * 8; + constexpr size_t UINT32_BitsNum = sizeof(uint32_t) * 8; + + NumberOfBuckets = ThreadsNum; + + // Calculate number of buckets. + if (ThreadsNum > 1) { + NumberOfBuckets *= InitialNumberOfBuckets; + NumberOfBuckets *= std::max( + 1, + countr_zero(PowerOf2Ceil(EstimatedSize / InitialNumberOfBuckets)) >> + 2); + } + NumberOfBuckets = PowerOf2Ceil(NumberOfBuckets); + + // Allocate buckets. + BucketsArray = std::make_unique(NumberOfBuckets); + + InitialBucketSize = EstimatedSize / NumberOfBuckets; + InitialBucketSize = std::max((size_t)1, InitialBucketSize); + InitialBucketSize = PowerOf2Ceil(InitialBucketSize); + + // Initialize each bucket. + for (size_t Idx = 0; Idx < NumberOfBuckets; Idx++) { + HashesPtr Hashes = new ExtHashBitsTy[InitialBucketSize]; + memset(Hashes, 0, sizeof(ExtHashBitsTy) * InitialBucketSize); + + DataPtr Entries = new EntryDataTy[InitialBucketSize]; + memset(Entries, 0, sizeof(EntryDataTy) * InitialBucketSize); + + BucketsArray[Idx].Size = InitialBucketSize; + BucketsArray[Idx].Hashes = Hashes; + BucketsArray[Idx].Entries = Entries; + } + + // Calculate masks. + HashMask = NumberOfBuckets - 1; + + size_t LeadingZerosNumber = countl_zero(HashMask); + HashBitsNum = UINT64_BitsNum - LeadingZerosNumber; + + // We keep only high 32-bits of hash value. So bucket size cannot + // exceed 2^32. Bucket size is always power of two. + MaxBucketSize = 1Ull << (std::min(UINT32_BitsNum, LeadingZerosNumber)); + + // Calculate mask for extended hash bits. + ExtHashMask = (NumberOfBuckets * MaxBucketSize) - 1; + } + + virtual ~ConcurrentHashTableByPtr() { + // Deallocate buckets. + for (size_t Idx = 0; Idx < NumberOfBuckets; Idx++) { + delete[] BucketsArray[Idx].Hashes; + delete[] BucketsArray[Idx].Entries; + } + } + + /// Insert new value \p NewValue or return already existing entry. + /// + /// \returns entry and "true" if an entry is just inserted or + /// "false" if an entry already exists. + std::pair insert(const KeyTy &NewValue) { + // Calculate bucket index. + uint64_t Hash = Info::getHashValue(NewValue); + Bucket &CurBucket = BucketsArray[getBucketIdx(Hash)]; + uint32_t ExtHashBits = getExtHashBits(Hash); + + // Lock bucket. + CurBucket.Guard.lock(); + + HashesPtr BucketHashes = CurBucket.Hashes; + DataPtr BucketEntries = CurBucket.Entries; + size_t CurEntryIdx = getStartIdx(ExtHashBits, CurBucket.Size); + + while (true) { + uint32_t CurEntryHashBits = BucketHashes[CurEntryIdx]; + + if (CurEntryHashBits == 0 && BucketEntries[CurEntryIdx] == nullptr) { + // Found empty slot. Insert data. + KeyDataTy *NewData = Info::create(NewValue, MultiThreadAllocator); + BucketEntries[CurEntryIdx] = NewData; + BucketHashes[CurEntryIdx] = ExtHashBits; + + CurBucket.NumberOfEntries++; + RehashBucket(CurBucket); + + CurBucket.Guard.unlock(); + + return {NewData, true}; + } + + if (CurEntryHashBits == ExtHashBits) { + // Hash matched. Check value for equality. + KeyDataTy *EntryData = BucketEntries[CurEntryIdx]; + if (Info::isEqual(Info::getKey(*EntryData), NewValue)) { + // Already existed entry matched with inserted data is found. + CurBucket.Guard.unlock(); + + return {EntryData, false}; + } + } + + CurEntryIdx++; + CurEntryIdx &= (CurBucket.Size - 1); + } + + llvm_unreachable("Insertion error."); + return {}; + } + + /// Print information about current state of hash table structures. + void printStatistic(raw_ostream &OS) { + OS << "\n--- HashTable statistic:\n"; + OS << "\nNumber of buckets = " << NumberOfBuckets; + OS << "\nInitial bucket size = " << InitialBucketSize; + + uint64_t NumberOfNonEmptyBuckets = 0; + uint64_t NumberOfEntriesPlusEmpty = 0; + uint64_t OverallNumberOfEntries = 0; + uint64_t OverallSize = sizeof(*this) + NumberOfBuckets * sizeof(Bucket); + + DenseMap BucketSizesMap; + + // For each bucket... + for (size_t Idx = 0; Idx < NumberOfBuckets; Idx++) { + Bucket &CurBucket = BucketsArray[Idx]; + + BucketSizesMap[CurBucket.Size]++; + + if (CurBucket.NumberOfEntries != 0) + NumberOfNonEmptyBuckets++; + NumberOfEntriesPlusEmpty += CurBucket.Size; + OverallNumberOfEntries += CurBucket.NumberOfEntries; + OverallSize += + (sizeof(ExtHashBitsTy) + sizeof(EntryDataTy)) * CurBucket.Size; + } + + OS << "\nOverall number of entries = " << OverallNumberOfEntries; + OS << "\nOverall number of non empty buckets = " << NumberOfNonEmptyBuckets; + for (auto &BucketSize : BucketSizesMap) + OS << "\n Number of buckets with size " << BucketSize.first << ": " + << BucketSize.second; + + std::stringstream stream; + stream << std::fixed << std::setprecision(2) + << ((float)OverallNumberOfEntries / (float)NumberOfEntriesPlusEmpty); + std::string str = stream.str(); + + OS << "\nLoad factor = " << str; + OS << "\nOverall allocated size = " << OverallSize; + } + +protected: + using ExtHashBitsTy = uint32_t; + using EntryDataTy = KeyDataTy *; + + using HashesPtr = ExtHashBitsTy *; + using DataPtr = EntryDataTy *; + + // Bucket structure. Keeps bucket data. + struct Bucket { + Bucket() = default; + + // Size of bucket. + uint32_t Size = 0; + + // Number of non-null entries. + size_t NumberOfEntries = 0; + + // Hashes for [Size] entries. + HashesPtr Hashes = nullptr; + + // [Size] entries. + DataPtr Entries = nullptr; + + // Mutex for this bucket. + std::mutex Guard; + }; + + // Reallocate and rehash bucket if this is full enough. + void RehashBucket(Bucket &CurBucket) { + assert((CurBucket.Size > 0) && "Uninitialised bucket"); + if (CurBucket.NumberOfEntries < CurBucket.Size * 0.9) + return; + + if (CurBucket.Size >= MaxBucketSize) + report_fatal_error("ConcurrentHashTable is full"); + + size_t NewBucketSize = CurBucket.Size << 1; + assert((NewBucketSize <= MaxBucketSize) && "New bucket size is too big"); + assert((CurBucket.Size < NewBucketSize) && + "New bucket size less than size of current bucket"); + + // Store old entries & hashes arrays. + HashesPtr SrcHashes = CurBucket.Hashes; + DataPtr SrcEntries = CurBucket.Entries; + + // Allocate new entries&hashes arrays. + HashesPtr DestHashes = new ExtHashBitsTy[NewBucketSize]; + memset(DestHashes, 0, sizeof(ExtHashBitsTy) * NewBucketSize); + + DataPtr DestEntries = new EntryDataTy[NewBucketSize]; + memset(DestEntries, 0, sizeof(EntryDataTy) * NewBucketSize); + + // For each entry in source arrays... + for (size_t CurSrcEntryIdx = 0; CurSrcEntryIdx < CurBucket.Size; + CurSrcEntryIdx++) { + uint32_t CurSrcEntryHashBits = SrcHashes[CurSrcEntryIdx]; + + // Check for null entry. + if (CurSrcEntryHashBits == 0 && SrcEntries[CurSrcEntryIdx] == nullptr) + continue; + + size_t StartDestIdx = getStartIdx(CurSrcEntryHashBits, NewBucketSize); + + // Insert non-null entry into the new arrays. + while (true) { + uint32_t CurDestEntryHashBits = DestHashes[StartDestIdx]; + + if (CurDestEntryHashBits == 0 && DestEntries[StartDestIdx] == nullptr) { + // Found empty slot. Insert data. + DestHashes[StartDestIdx] = CurSrcEntryHashBits; + DestEntries[StartDestIdx] = SrcEntries[CurSrcEntryIdx]; + break; + } + + StartDestIdx++; + StartDestIdx = StartDestIdx & (NewBucketSize - 1); + } + } + + // Update bucket fields. + CurBucket.Hashes = DestHashes; + CurBucket.Entries = DestEntries; + CurBucket.Size = NewBucketSize; + + // Delete old bucket entries. + if (SrcHashes != nullptr) + delete[] SrcHashes; + if (SrcEntries != nullptr) + delete[] SrcEntries; + } + + size_t getBucketIdx(hash_code Hash) { return Hash & HashMask; } + + uint32_t getExtHashBits(uint64_t Hash) { + return (Hash & ExtHashMask) >> HashBitsNum; + } + + size_t getStartIdx(uint32_t ExtHashBits, size_t BucketSize) { + assert((BucketSize > 0) && "Empty bucket"); + + return ExtHashBits & (BucketSize - 1); + } + + // Number of bits in hash mask. + uint64_t HashBitsNum = 0; + + // Hash mask. + uint64_t HashMask = 0; + + // Hash mask for the extended hash bits. + uint64_t ExtHashMask = 0; + + // The maximal bucket size. + size_t MaxBucketSize = 0; + + // Initial size of bucket. + size_t InitialBucketSize = 0; + + // The number of buckets. + size_t NumberOfBuckets = 0; + + // Array of buckets. + std::unique_ptr BucketsArray; + + // Used for allocating KeyDataTy values. + AllocatorTy &MultiThreadAllocator; +}; + +} // end namespace llvm + +#endif // LLVM_ADT_CONCURRENTHASHTABLE_H diff --git a/llvm/unittests/ADT/CMakeLists.txt b/llvm/unittests/ADT/CMakeLists.txt index 900294d4216ee..c5190255ba773 100644 --- a/llvm/unittests/ADT/CMakeLists.txt +++ b/llvm/unittests/ADT/CMakeLists.txt @@ -17,6 +17,7 @@ add_llvm_unittest(ADTTests BumpPtrListTest.cpp CoalescingBitVectorTest.cpp CombinationGeneratorTest.cpp + ConcurrentHashtableTest.cpp DAGDeltaAlgorithmTest.cpp DeltaAlgorithmTest.cpp DenseMapTest.cpp diff --git a/llvm/unittests/ADT/ConcurrentHashtableTest.cpp b/llvm/unittests/ADT/ConcurrentHashtableTest.cpp new file mode 100644 index 0000000000000..c4faece251790 --- /dev/null +++ b/llvm/unittests/ADT/ConcurrentHashtableTest.cpp @@ -0,0 +1,279 @@ +//===- ConcurrentHashtableTest.cpp ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/ConcurrentHashtable.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/Parallel.h" +#include "gtest/gtest.h" +#include +#include +#include +using namespace llvm; + +namespace { +class String { +public: + String() {} + const std::string &getKey() const { return Data; } + + template + static String *create(const std::string &Num, AllocatorTy &Allocator) { + String *Result = Allocator.template Allocate(); + new (Result) String(Num); + return Result; + } + +protected: + String(const std::string &Num) { Data += Num; } + + std::string Data; + std::array ExtraData; +}; + +static LLVM_THREAD_LOCAL BumpPtrAllocator ThreadLocalAllocator; +class PerThreadAllocator : public AllocatorBase { +public: + inline LLVM_ATTRIBUTE_RETURNS_NONNULL void *Allocate(size_t Size, + size_t Alignment) { + return ThreadLocalAllocator.Allocate(Size, Align(Alignment)); + } + inline size_t getBytesAllocated() const { + return ThreadLocalAllocator.getBytesAllocated(); + } + + // Pull in base class overloads. + using AllocatorBase::Allocate; +} Allocator; + +TEST(ConcurrentHashTableTest, AddStringEntries) { + ConcurrentHashTableByPtr< + std::string, String, PerThreadAllocator, + ConcurrentHashTableInfoByPtr> + HashTable(Allocator, 10); + + size_t AllocatedBytesAtStart = Allocator.getBytesAllocated(); + std::pair res1 = HashTable.insert("1"); + // Check entry is inserted. + EXPECT_TRUE(res1.first->getKey() == "1"); + EXPECT_TRUE(res1.second); + + std::pair res2 = HashTable.insert("2"); + // Check old entry is still valid. + EXPECT_TRUE(res1.first->getKey() == "1"); + // Check new entry is inserted. + EXPECT_TRUE(res2.first->getKey() == "2"); + EXPECT_TRUE(res2.second); + // Check new and old entries use different memory. + EXPECT_TRUE(res1.first != res2.first); + + std::pair res3 = HashTable.insert("3"); + // Check one more entry is inserted. + EXPECT_TRUE(res3.first->getKey() == "3"); + EXPECT_TRUE(res3.second); + + std::pair res4 = HashTable.insert("1"); + // Check duplicated entry is inserted. + EXPECT_TRUE(res4.first->getKey() == "1"); + EXPECT_FALSE(res4.second); + // Check duplicated entry uses the same memory. + EXPECT_TRUE(res1.first == res4.first); + + // Check first entry is still valid. + EXPECT_TRUE(res1.first->getKey() == "1"); + + // Check data was allocated by allocator. + EXPECT_TRUE(Allocator.getBytesAllocated() > AllocatedBytesAtStart); + + // Check statistic. + std::string StatisticString; + raw_string_ostream StatisticStream(StatisticString); + HashTable.printStatistic(StatisticStream); + + EXPECT_TRUE(StatisticString.find("Overall number of entries = 3\n") != + std::string::npos); +} + +TEST(ConcurrentHashTableTest, AddStringMultiplueEntries) { + const size_t NumElements = 10000; + ConcurrentHashTableByPtr< + std::string, String, PerThreadAllocator, + ConcurrentHashTableInfoByPtr> + HashTable(Allocator); + + // Check insertion. + for (size_t I = 0; I < NumElements; I++) { + size_t AllocatedBytesAtStart = Allocator.getBytesAllocated(); + std::string StringForElement = formatv("{0}", I); + std::pair Entry = HashTable.insert(StringForElement); + EXPECT_TRUE(Entry.second); + EXPECT_TRUE(Entry.first->getKey() == StringForElement); + EXPECT_TRUE(Allocator.getBytesAllocated() > AllocatedBytesAtStart); + } + + std::string StatisticString; + raw_string_ostream StatisticStream(StatisticString); + HashTable.printStatistic(StatisticStream); + + // Verifying that the table contains exactly the number of elements we + // inserted. + EXPECT_TRUE(StatisticString.find("Overall number of entries = 10000\n") != + std::string::npos); + + // Check insertion of duplicates. + for (size_t I = 0; I < NumElements; I++) { + size_t AllocatedBytesAtStart = Allocator.getBytesAllocated(); + std::string StringForElement = formatv("{0}", I); + std::pair Entry = HashTable.insert(StringForElement); + EXPECT_FALSE(Entry.second); + EXPECT_TRUE(Entry.first->getKey() == StringForElement); + // Check no additional bytes were allocated for duplicate. + EXPECT_TRUE(Allocator.getBytesAllocated() == AllocatedBytesAtStart); + } + + // Check statistic. + // Verifying that the table contains exactly the number of elements we + // inserted. + EXPECT_TRUE(StatisticString.find("Overall number of entries = 10000\n") != + std::string::npos); +} + +TEST(ConcurrentHashTableTest, AddStringMultiplueEntriesWithResize) { + // Number of elements exceeds original size, thus hashtable should be resized. + const size_t NumElements = 20000; + ConcurrentHashTableByPtr< + std::string, String, PerThreadAllocator, + ConcurrentHashTableInfoByPtr> + HashTable(Allocator, 100); + + // Check insertion. + for (size_t I = 0; I < NumElements; I++) { + size_t AllocatedBytesAtStart = Allocator.getBytesAllocated(); + std::string StringForElement = formatv("{0} {1}", I, I + 100); + std::pair Entry = HashTable.insert(StringForElement); + EXPECT_TRUE(Entry.second); + EXPECT_TRUE(Entry.first->getKey() == StringForElement); + EXPECT_TRUE(Allocator.getBytesAllocated() > AllocatedBytesAtStart); + } + + std::string StatisticString; + raw_string_ostream StatisticStream(StatisticString); + HashTable.printStatistic(StatisticStream); + + // Verifying that the table contains exactly the number of elements we + // inserted. + EXPECT_TRUE(StatisticString.find("Overall number of entries = 20000\n") != + std::string::npos); + + // Check insertion of duplicates. + for (size_t I = 0; I < NumElements; I++) { + size_t AllocatedBytesAtStart = Allocator.getBytesAllocated(); + std::string StringForElement = formatv("{0} {1}", I, I + 100); + std::pair Entry = HashTable.insert(StringForElement); + EXPECT_FALSE(Entry.second); + EXPECT_TRUE(Entry.first->getKey() == StringForElement); + // Check no additional bytes were allocated for duplicate. + EXPECT_TRUE(Allocator.getBytesAllocated() == AllocatedBytesAtStart); + } + + // Check statistic. + // Verifying that the table contains exactly the number of elements we + // inserted. + EXPECT_TRUE(StatisticString.find("Overall number of entries = 20000\n") != + std::string::npos); +} + +TEST(ConcurrentHashTableTest, AddStringEntriesParallel) { + const size_t NumElements = 10000; + ConcurrentHashTableByPtr< + std::string, String, PerThreadAllocator, + ConcurrentHashTableInfoByPtr> + HashTable(Allocator); + + // Check parallel insertion. + parallelFor(0, NumElements, [&](size_t I) { + size_t AllocatedBytesAtStart = Allocator.getBytesAllocated(); + std::string StringForElement = formatv("{0}", I); + std::pair Entry = HashTable.insert(StringForElement); + EXPECT_TRUE(Entry.second); + EXPECT_TRUE(Entry.first->getKey() == StringForElement); + EXPECT_TRUE(Allocator.getBytesAllocated() > AllocatedBytesAtStart); + }); + + std::string StatisticString; + raw_string_ostream StatisticStream(StatisticString); + HashTable.printStatistic(StatisticStream); + + // Verifying that the table contains exactly the number of elements we + // inserted. + EXPECT_TRUE(StatisticString.find("Overall number of entries = 10000\n") != + std::string::npos); + + // Check parallel insertion of duplicates. + parallelFor(0, NumElements, [&](size_t I) { + size_t AllocatedBytesAtStart = Allocator.getBytesAllocated(); + std::string StringForElement = formatv("{0}", I); + std::pair Entry = HashTable.insert(StringForElement); + EXPECT_FALSE(Entry.second); + EXPECT_TRUE(Entry.first->getKey() == StringForElement); + // Check no additional bytes were allocated for duplicate. + EXPECT_TRUE(Allocator.getBytesAllocated() == AllocatedBytesAtStart); + }); + + // Check statistic. + // Verifying that the table contains exactly the number of elements we + // inserted. + EXPECT_TRUE(StatisticString.find("Overall number of entries = 10000\n") != + std::string::npos); +} + +TEST(ConcurrentHashTableTest, AddStringEntriesParallelWithResize) { + const size_t NumElements = 20000; + ConcurrentHashTableByPtr< + std::string, String, PerThreadAllocator, + ConcurrentHashTableInfoByPtr> + HashTable(Allocator, 100); + + // Check parallel insertion. + parallelFor(0, NumElements, [&](size_t I) { + size_t AllocatedBytesAtStart = Allocator.getBytesAllocated(); + std::string StringForElement = formatv("{0}", I); + std::pair Entry = HashTable.insert(StringForElement); + EXPECT_TRUE(Entry.second); + EXPECT_TRUE(Entry.first->getKey() == StringForElement); + EXPECT_TRUE(Allocator.getBytesAllocated() > AllocatedBytesAtStart); + }); + + std::string StatisticString; + raw_string_ostream StatisticStream(StatisticString); + HashTable.printStatistic(StatisticStream); + + // Verifying that the table contains exactly the number of elements we + // inserted. + EXPECT_TRUE(StatisticString.find("Overall number of entries = 20000\n") != + std::string::npos); + + // Check parallel insertion of duplicates. + parallelFor(0, NumElements, [&](size_t I) { + size_t AllocatedBytesAtStart = Allocator.getBytesAllocated(); + std::string StringForElement = formatv("{0}", I); + std::pair Entry = HashTable.insert(StringForElement); + EXPECT_FALSE(Entry.second); + EXPECT_TRUE(Entry.first->getKey() == StringForElement); + // Check no additional bytes were allocated for duplicate. + EXPECT_TRUE(Allocator.getBytesAllocated() == AllocatedBytesAtStart); + }); + + // Check statistic. + // Verifying that the table contains exactly the number of elements we + // inserted. + EXPECT_TRUE(StatisticString.find("Overall number of entries = 20000\n") != + std::string::npos); +} + +} // namespace From 320969f5058bfffd6517c36771b46ac4a447c7ee Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 23 Mar 2023 13:37:04 +0000 Subject: [PATCH 426/691] [X86] LowerVectorAllZero - add 512-bit support with AVX512 vptestnmd+kortestw patterns (REAPPLIED) Another step toward #53419 - this is also another step towards expanding MatchVectorAllZeroTest to match any pair of vectors and merge EmitAVX512Test into it. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 13 +++++- llvm/test/CodeGen/X86/ptest.ll | 15 +++--- llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll | 46 ++++++++----------- 3 files changed, 35 insertions(+), 39 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 74e2a2b6fdc10..2d371566381c8 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -24194,18 +24194,27 @@ static SDValue LowerVectorAllZero(const SDLoc &DL, SDValue V, ISD::CondCode CC, // Without PTEST, a masked v2i64 or-reduction is not faster than // scalarization. + bool UseKORTEST = Subtarget.useAVX512Regs(); bool UsePTEST = Subtarget.hasSSE41(); if (!UsePTEST && !Mask.isAllOnes() && VT.getScalarSizeInBits() > 32) return SDValue(); - // Split down to 128/256-bit vector. - unsigned TestSize = Subtarget.hasAVX() ? 256 : 128; + // Split down to 128/256/512-bit vector. + unsigned TestSize = UseKORTEST ? 512 : (Subtarget.hasAVX() ? 256 : 128); while (VT.getSizeInBits() > TestSize) { auto Split = DAG.SplitVector(V, DL); VT = Split.first.getValueType(); V = DAG.getNode(ISD::OR, DL, VT, Split.first, Split.second); } + if (UseKORTEST && VT.is512BitVector()) { + V = DAG.getBitcast(MVT::v16i32, MaskBits(V)); + V = DAG.getSetCC(DL, MVT::v16i1, V, + getZeroVector(MVT::v16i32, Subtarget, DAG, DL), + ISD::SETNE); + return DAG.getNode(X86ISD::KORTEST, DL, MVT::i32, V, V); + } + if (UsePTEST) { MVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; V = DAG.getBitcast(TestVT, MaskBits(V)); diff --git a/llvm/test/CodeGen/X86/ptest.ll b/llvm/test/CodeGen/X86/ptest.ll index 066cbb6193317..bedcfebc5f6e7 100644 --- a/llvm/test/CodeGen/X86/ptest.ll +++ b/llvm/test/CodeGen/X86/ptest.ll @@ -148,9 +148,8 @@ define i32 @veccond512(<16 x i32> %input) { ; ; AVX512-LABEL: veccond512: ; AVX512: # %bb.0: # %entry -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: je .LBB2_2 ; AVX512-NEXT: # %bb.1: # %if-true-block ; AVX512-NEXT: xorl %eax, %eax @@ -268,10 +267,9 @@ define i32 @vectest512(<16 x i32> %input) { ; ; AVX512-LABEL: vectest512: ; AVX512: # %bb.0: -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 ; AVX512-NEXT: xorl %eax, %eax -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -380,9 +378,8 @@ define i32 @vecsel512(<16 x i32> %input, i32 %a, i32 %b) { ; AVX512-LABEL: vecsel512: ; AVX512: # %bb.0: ; AVX512-NEXT: movl %edi, %eax -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: cmovel %esi, %eax ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll index fcb0ab6090398..a489a5e6099f0 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll @@ -105,9 +105,8 @@ define i1 @test_v8i64(<8 x i64> %a0) { ; ; AVX512-LABEL: test_v8i64: ; AVX512: # %bb.0: -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -169,9 +168,8 @@ define i1 @test_v16i64(<16 x i64> %a0) { ; AVX512-LABEL: test_v16i64: ; AVX512: # %bb.0: ; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -298,9 +296,8 @@ define i1 @test_v16i32(<16 x i32> %a0) { ; ; AVX512-LABEL: test_v16i32: ; AVX512: # %bb.0: -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -362,9 +359,8 @@ define i1 @test_v32i32(<32 x i32> %a0) { ; AVX512-LABEL: test_v32i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -510,9 +506,8 @@ define i1 @test_v32i16(<32 x i16> %a0) { ; ; AVX512-LABEL: test_v32i16: ; AVX512: # %bb.0: -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -574,9 +569,8 @@ define i1 @test_v64i16(<64 x i16> %a0) { ; AVX512-LABEL: test_v64i16: ; AVX512: # %bb.0: ; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -741,9 +735,8 @@ define i1 @test_v64i8(<64 x i8> %a0) { ; ; AVX512-LABEL: test_v64i8: ; AVX512: # %bb.0: -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: setne %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -805,9 +798,8 @@ define i1 @test_v128i8(<128 x i8> %a0) { ; AVX512-LABEL: test_v128i8: ; AVX512: # %bb.0: ; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vptest %ymm0, %ymm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1014,10 +1006,8 @@ define i1 @mask_v128i8(<128 x i8> %a0) { ; AVX512-LABEL: mask_v128i8: ; AVX512: # %bb.0: ; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpbroadcastq {{.*#+}} ymm1 = [72340172838076673,72340172838076673,72340172838076673,72340172838076673] -; AVX512-NEXT: vptest %ymm1, %ymm0 +; AVX512-NEXT: vptestmd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 ; AVX512-NEXT: sete %al ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq From 8c7c1f11ffaacf762e612c65440fd2cbb58ee426 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 23 Mar 2023 14:41:03 +0100 Subject: [PATCH 427/691] Silence unused variable warning in NDEBUG builds I usually would fold this into the assert, but the comment there suggests side effects. NFC. ModuleMap.cpp:938:9: error: unused variable 'MainFile' [-Werror,-Wunused-variable] auto *MainFile = SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()); --- clang/lib/Lex/ModuleMap.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/lib/Lex/ModuleMap.cpp b/clang/lib/Lex/ModuleMap.cpp index f2b2d0b8c69f1..44c872336ce9c 100644 --- a/clang/lib/Lex/ModuleMap.cpp +++ b/clang/lib/Lex/ModuleMap.cpp @@ -936,6 +936,7 @@ Module *ModuleMap::createModuleForImplementationUnit(SourceLocation Loc, // Mark the main source file as being within the newly-created module so that // declarations and macros are properly visibility-restricted to it. auto *MainFile = SourceMgr.getFileEntryForID(SourceMgr.getMainFileID()); + (void)MainFile; assert(MainFile && "no input file for module implementation"); return Result; From fd4aeba307ca30da00a8db21a200cc9afcef63c6 Mon Sep 17 00:00:00 2001 From: Alexey Lapshin Date: Thu, 23 Mar 2023 14:40:29 +0100 Subject: [PATCH 428/691] Revert "[ADT] add ConcurrentHashtable class." This reverts commit 8482b238062ed7263facea9490f67119e00a037a. --- llvm/include/llvm/ADT/ConcurrentHashtable.h | 395 ------------------ llvm/unittests/ADT/CMakeLists.txt | 1 - .../unittests/ADT/ConcurrentHashtableTest.cpp | 279 ------------- 3 files changed, 675 deletions(-) delete mode 100644 llvm/include/llvm/ADT/ConcurrentHashtable.h delete mode 100644 llvm/unittests/ADT/ConcurrentHashtableTest.cpp diff --git a/llvm/include/llvm/ADT/ConcurrentHashtable.h b/llvm/include/llvm/ADT/ConcurrentHashtable.h deleted file mode 100644 index 56344ab9b8411..0000000000000 --- a/llvm/include/llvm/ADT/ConcurrentHashtable.h +++ /dev/null @@ -1,395 +0,0 @@ -//===- ConcurrentHashtable.h ------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ADT_CONCURRENTHASHTABLE_H -#define LLVM_ADT_CONCURRENTHASHTABLE_H - -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Hashing.h" -#include "llvm/ADT/PointerIntPair.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Parallel.h" -#include "llvm/Support/WithColor.h" -#include "llvm/Support/xxhash.h" -#include -#include -#include -#include -#include -#include - -namespace llvm { - -/// ConcurrentHashTable - is a resizeable concurrent hashtable. -/// The number of resizings limited up to x2^32. This hashtable is -/// useful to have efficient access to aggregate data(like strings, -/// type descriptors...) and to keep only single copy of such -/// an aggregate. The hashtable allows only concurrent insertions: -/// -/// KeyDataTy* = insert ( const KeyTy& ); -/// -/// Data structure: -/// -/// Inserted value KeyTy is mapped to 64-bit hash value -> -/// -/// [------- 64-bit Hash value --------] -/// [ StartEntryIndex ][ Bucket Index ] -/// | | -/// points to the points to -/// first probe the bucket. -/// position inside -/// bucket entries -/// -/// After initialization, all buckets have an initial size. During insertions, -/// buckets might be extended to contain more entries. Each bucket can be -/// independently resized and rehashed(no need to lock the whole table). -/// Different buckets may have different sizes. If the single bucket is full -/// then the bucket is resized. -/// -/// BucketsArray keeps all buckets. Each bucket keeps an array of Entries -/// (pointers to KeyDataTy) and another array of entries hashes: -/// -/// BucketsArray[BucketIdx].Hashes[EntryIdx]: -/// BucketsArray[BucketIdx].Entries[EntryIdx]: -/// -/// [Bucket 0].Hashes -> [uint32_t][uint32_t] -/// [Bucket 0].Entries -> [KeyDataTy*][KeyDataTy*] -/// -/// [Bucket 1].Hashes -> [uint32_t][uint32_t][uint32_t][uint32_t] -/// [Bucket 1].Entries -> [KeyDataTy*][KeyDataTy*][KeyDataTy*][KeyDataTy*] -/// ......................... -/// [Bucket N].Hashes -> [uint32_t][uint32_t][uint32_t] -/// [Bucket N].Entries -> [KeyDataTy*][KeyDataTy*][KeyDataTy*] -/// -/// ConcurrentHashTableByPtr uses an external thread-safe allocator to allocate -/// KeyDataTy items. - -template -class ConcurrentHashTableInfoByPtr { -public: - /// \returns Hash value for the specified \p Key. - static inline uint64_t getHashValue(const KeyTy &Key) { - return xxHash64(Key); - } - - /// \returns true if both \p LHS and \p RHS are equal. - static inline bool isEqual(const KeyTy &LHS, const KeyTy &RHS) { - return LHS == RHS; - } - - /// \returns key for the specified \p KeyData. - static inline const KeyTy &getKey(const KeyDataTy &KeyData) { - return KeyData.getKey(); - } - - /// \returns newly created object of KeyDataTy type. - static inline KeyDataTy *create(const KeyTy &Key, AllocatorTy &Allocator) { - return KeyDataTy::create(Key, Allocator); - } -}; - -template > -class ConcurrentHashTableByPtr { -public: - ConcurrentHashTableByPtr( - AllocatorTy &Allocator, size_t EstimatedSize = 100000, - size_t ThreadsNum = parallel::strategy.compute_thread_count(), - size_t InitialNumberOfBuckets = 128) - : MultiThreadAllocator(Allocator) { - assert((ThreadsNum > 0) && "ThreadsNum must be greater than 0"); - assert((InitialNumberOfBuckets > 0) && - "InitialNumberOfBuckets must be greater than 0"); - - constexpr size_t UINT64_BitsNum = sizeof(uint64_t) * 8; - constexpr size_t UINT32_BitsNum = sizeof(uint32_t) * 8; - - NumberOfBuckets = ThreadsNum; - - // Calculate number of buckets. - if (ThreadsNum > 1) { - NumberOfBuckets *= InitialNumberOfBuckets; - NumberOfBuckets *= std::max( - 1, - countr_zero(PowerOf2Ceil(EstimatedSize / InitialNumberOfBuckets)) >> - 2); - } - NumberOfBuckets = PowerOf2Ceil(NumberOfBuckets); - - // Allocate buckets. - BucketsArray = std::make_unique(NumberOfBuckets); - - InitialBucketSize = EstimatedSize / NumberOfBuckets; - InitialBucketSize = std::max((size_t)1, InitialBucketSize); - InitialBucketSize = PowerOf2Ceil(InitialBucketSize); - - // Initialize each bucket. - for (size_t Idx = 0; Idx < NumberOfBuckets; Idx++) { - HashesPtr Hashes = new ExtHashBitsTy[InitialBucketSize]; - memset(Hashes, 0, sizeof(ExtHashBitsTy) * InitialBucketSize); - - DataPtr Entries = new EntryDataTy[InitialBucketSize]; - memset(Entries, 0, sizeof(EntryDataTy) * InitialBucketSize); - - BucketsArray[Idx].Size = InitialBucketSize; - BucketsArray[Idx].Hashes = Hashes; - BucketsArray[Idx].Entries = Entries; - } - - // Calculate masks. - HashMask = NumberOfBuckets - 1; - - size_t LeadingZerosNumber = countl_zero(HashMask); - HashBitsNum = UINT64_BitsNum - LeadingZerosNumber; - - // We keep only high 32-bits of hash value. So bucket size cannot - // exceed 2^32. Bucket size is always power of two. - MaxBucketSize = 1Ull << (std::min(UINT32_BitsNum, LeadingZerosNumber)); - - // Calculate mask for extended hash bits. - ExtHashMask = (NumberOfBuckets * MaxBucketSize) - 1; - } - - virtual ~ConcurrentHashTableByPtr() { - // Deallocate buckets. - for (size_t Idx = 0; Idx < NumberOfBuckets; Idx++) { - delete[] BucketsArray[Idx].Hashes; - delete[] BucketsArray[Idx].Entries; - } - } - - /// Insert new value \p NewValue or return already existing entry. - /// - /// \returns entry and "true" if an entry is just inserted or - /// "false" if an entry already exists. - std::pair insert(const KeyTy &NewValue) { - // Calculate bucket index. - uint64_t Hash = Info::getHashValue(NewValue); - Bucket &CurBucket = BucketsArray[getBucketIdx(Hash)]; - uint32_t ExtHashBits = getExtHashBits(Hash); - - // Lock bucket. - CurBucket.Guard.lock(); - - HashesPtr BucketHashes = CurBucket.Hashes; - DataPtr BucketEntries = CurBucket.Entries; - size_t CurEntryIdx = getStartIdx(ExtHashBits, CurBucket.Size); - - while (true) { - uint32_t CurEntryHashBits = BucketHashes[CurEntryIdx]; - - if (CurEntryHashBits == 0 && BucketEntries[CurEntryIdx] == nullptr) { - // Found empty slot. Insert data. - KeyDataTy *NewData = Info::create(NewValue, MultiThreadAllocator); - BucketEntries[CurEntryIdx] = NewData; - BucketHashes[CurEntryIdx] = ExtHashBits; - - CurBucket.NumberOfEntries++; - RehashBucket(CurBucket); - - CurBucket.Guard.unlock(); - - return {NewData, true}; - } - - if (CurEntryHashBits == ExtHashBits) { - // Hash matched. Check value for equality. - KeyDataTy *EntryData = BucketEntries[CurEntryIdx]; - if (Info::isEqual(Info::getKey(*EntryData), NewValue)) { - // Already existed entry matched with inserted data is found. - CurBucket.Guard.unlock(); - - return {EntryData, false}; - } - } - - CurEntryIdx++; - CurEntryIdx &= (CurBucket.Size - 1); - } - - llvm_unreachable("Insertion error."); - return {}; - } - - /// Print information about current state of hash table structures. - void printStatistic(raw_ostream &OS) { - OS << "\n--- HashTable statistic:\n"; - OS << "\nNumber of buckets = " << NumberOfBuckets; - OS << "\nInitial bucket size = " << InitialBucketSize; - - uint64_t NumberOfNonEmptyBuckets = 0; - uint64_t NumberOfEntriesPlusEmpty = 0; - uint64_t OverallNumberOfEntries = 0; - uint64_t OverallSize = sizeof(*this) + NumberOfBuckets * sizeof(Bucket); - - DenseMap BucketSizesMap; - - // For each bucket... - for (size_t Idx = 0; Idx < NumberOfBuckets; Idx++) { - Bucket &CurBucket = BucketsArray[Idx]; - - BucketSizesMap[CurBucket.Size]++; - - if (CurBucket.NumberOfEntries != 0) - NumberOfNonEmptyBuckets++; - NumberOfEntriesPlusEmpty += CurBucket.Size; - OverallNumberOfEntries += CurBucket.NumberOfEntries; - OverallSize += - (sizeof(ExtHashBitsTy) + sizeof(EntryDataTy)) * CurBucket.Size; - } - - OS << "\nOverall number of entries = " << OverallNumberOfEntries; - OS << "\nOverall number of non empty buckets = " << NumberOfNonEmptyBuckets; - for (auto &BucketSize : BucketSizesMap) - OS << "\n Number of buckets with size " << BucketSize.first << ": " - << BucketSize.second; - - std::stringstream stream; - stream << std::fixed << std::setprecision(2) - << ((float)OverallNumberOfEntries / (float)NumberOfEntriesPlusEmpty); - std::string str = stream.str(); - - OS << "\nLoad factor = " << str; - OS << "\nOverall allocated size = " << OverallSize; - } - -protected: - using ExtHashBitsTy = uint32_t; - using EntryDataTy = KeyDataTy *; - - using HashesPtr = ExtHashBitsTy *; - using DataPtr = EntryDataTy *; - - // Bucket structure. Keeps bucket data. - struct Bucket { - Bucket() = default; - - // Size of bucket. - uint32_t Size = 0; - - // Number of non-null entries. - size_t NumberOfEntries = 0; - - // Hashes for [Size] entries. - HashesPtr Hashes = nullptr; - - // [Size] entries. - DataPtr Entries = nullptr; - - // Mutex for this bucket. - std::mutex Guard; - }; - - // Reallocate and rehash bucket if this is full enough. - void RehashBucket(Bucket &CurBucket) { - assert((CurBucket.Size > 0) && "Uninitialised bucket"); - if (CurBucket.NumberOfEntries < CurBucket.Size * 0.9) - return; - - if (CurBucket.Size >= MaxBucketSize) - report_fatal_error("ConcurrentHashTable is full"); - - size_t NewBucketSize = CurBucket.Size << 1; - assert((NewBucketSize <= MaxBucketSize) && "New bucket size is too big"); - assert((CurBucket.Size < NewBucketSize) && - "New bucket size less than size of current bucket"); - - // Store old entries & hashes arrays. - HashesPtr SrcHashes = CurBucket.Hashes; - DataPtr SrcEntries = CurBucket.Entries; - - // Allocate new entries&hashes arrays. - HashesPtr DestHashes = new ExtHashBitsTy[NewBucketSize]; - memset(DestHashes, 0, sizeof(ExtHashBitsTy) * NewBucketSize); - - DataPtr DestEntries = new EntryDataTy[NewBucketSize]; - memset(DestEntries, 0, sizeof(EntryDataTy) * NewBucketSize); - - // For each entry in source arrays... - for (size_t CurSrcEntryIdx = 0; CurSrcEntryIdx < CurBucket.Size; - CurSrcEntryIdx++) { - uint32_t CurSrcEntryHashBits = SrcHashes[CurSrcEntryIdx]; - - // Check for null entry. - if (CurSrcEntryHashBits == 0 && SrcEntries[CurSrcEntryIdx] == nullptr) - continue; - - size_t StartDestIdx = getStartIdx(CurSrcEntryHashBits, NewBucketSize); - - // Insert non-null entry into the new arrays. - while (true) { - uint32_t CurDestEntryHashBits = DestHashes[StartDestIdx]; - - if (CurDestEntryHashBits == 0 && DestEntries[StartDestIdx] == nullptr) { - // Found empty slot. Insert data. - DestHashes[StartDestIdx] = CurSrcEntryHashBits; - DestEntries[StartDestIdx] = SrcEntries[CurSrcEntryIdx]; - break; - } - - StartDestIdx++; - StartDestIdx = StartDestIdx & (NewBucketSize - 1); - } - } - - // Update bucket fields. - CurBucket.Hashes = DestHashes; - CurBucket.Entries = DestEntries; - CurBucket.Size = NewBucketSize; - - // Delete old bucket entries. - if (SrcHashes != nullptr) - delete[] SrcHashes; - if (SrcEntries != nullptr) - delete[] SrcEntries; - } - - size_t getBucketIdx(hash_code Hash) { return Hash & HashMask; } - - uint32_t getExtHashBits(uint64_t Hash) { - return (Hash & ExtHashMask) >> HashBitsNum; - } - - size_t getStartIdx(uint32_t ExtHashBits, size_t BucketSize) { - assert((BucketSize > 0) && "Empty bucket"); - - return ExtHashBits & (BucketSize - 1); - } - - // Number of bits in hash mask. - uint64_t HashBitsNum = 0; - - // Hash mask. - uint64_t HashMask = 0; - - // Hash mask for the extended hash bits. - uint64_t ExtHashMask = 0; - - // The maximal bucket size. - size_t MaxBucketSize = 0; - - // Initial size of bucket. - size_t InitialBucketSize = 0; - - // The number of buckets. - size_t NumberOfBuckets = 0; - - // Array of buckets. - std::unique_ptr BucketsArray; - - // Used for allocating KeyDataTy values. - AllocatorTy &MultiThreadAllocator; -}; - -} // end namespace llvm - -#endif // LLVM_ADT_CONCURRENTHASHTABLE_H diff --git a/llvm/unittests/ADT/CMakeLists.txt b/llvm/unittests/ADT/CMakeLists.txt index c5190255ba773..900294d4216ee 100644 --- a/llvm/unittests/ADT/CMakeLists.txt +++ b/llvm/unittests/ADT/CMakeLists.txt @@ -17,7 +17,6 @@ add_llvm_unittest(ADTTests BumpPtrListTest.cpp CoalescingBitVectorTest.cpp CombinationGeneratorTest.cpp - ConcurrentHashtableTest.cpp DAGDeltaAlgorithmTest.cpp DeltaAlgorithmTest.cpp DenseMapTest.cpp diff --git a/llvm/unittests/ADT/ConcurrentHashtableTest.cpp b/llvm/unittests/ADT/ConcurrentHashtableTest.cpp deleted file mode 100644 index c4faece251790..0000000000000 --- a/llvm/unittests/ADT/ConcurrentHashtableTest.cpp +++ /dev/null @@ -1,279 +0,0 @@ -//===- ConcurrentHashtableTest.cpp ----------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "llvm/ADT/ConcurrentHashtable.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/Parallel.h" -#include "gtest/gtest.h" -#include -#include -#include -using namespace llvm; - -namespace { -class String { -public: - String() {} - const std::string &getKey() const { return Data; } - - template - static String *create(const std::string &Num, AllocatorTy &Allocator) { - String *Result = Allocator.template Allocate(); - new (Result) String(Num); - return Result; - } - -protected: - String(const std::string &Num) { Data += Num; } - - std::string Data; - std::array ExtraData; -}; - -static LLVM_THREAD_LOCAL BumpPtrAllocator ThreadLocalAllocator; -class PerThreadAllocator : public AllocatorBase { -public: - inline LLVM_ATTRIBUTE_RETURNS_NONNULL void *Allocate(size_t Size, - size_t Alignment) { - return ThreadLocalAllocator.Allocate(Size, Align(Alignment)); - } - inline size_t getBytesAllocated() const { - return ThreadLocalAllocator.getBytesAllocated(); - } - - // Pull in base class overloads. - using AllocatorBase::Allocate; -} Allocator; - -TEST(ConcurrentHashTableTest, AddStringEntries) { - ConcurrentHashTableByPtr< - std::string, String, PerThreadAllocator, - ConcurrentHashTableInfoByPtr> - HashTable(Allocator, 10); - - size_t AllocatedBytesAtStart = Allocator.getBytesAllocated(); - std::pair res1 = HashTable.insert("1"); - // Check entry is inserted. - EXPECT_TRUE(res1.first->getKey() == "1"); - EXPECT_TRUE(res1.second); - - std::pair res2 = HashTable.insert("2"); - // Check old entry is still valid. - EXPECT_TRUE(res1.first->getKey() == "1"); - // Check new entry is inserted. - EXPECT_TRUE(res2.first->getKey() == "2"); - EXPECT_TRUE(res2.second); - // Check new and old entries use different memory. - EXPECT_TRUE(res1.first != res2.first); - - std::pair res3 = HashTable.insert("3"); - // Check one more entry is inserted. - EXPECT_TRUE(res3.first->getKey() == "3"); - EXPECT_TRUE(res3.second); - - std::pair res4 = HashTable.insert("1"); - // Check duplicated entry is inserted. - EXPECT_TRUE(res4.first->getKey() == "1"); - EXPECT_FALSE(res4.second); - // Check duplicated entry uses the same memory. - EXPECT_TRUE(res1.first == res4.first); - - // Check first entry is still valid. - EXPECT_TRUE(res1.first->getKey() == "1"); - - // Check data was allocated by allocator. - EXPECT_TRUE(Allocator.getBytesAllocated() > AllocatedBytesAtStart); - - // Check statistic. - std::string StatisticString; - raw_string_ostream StatisticStream(StatisticString); - HashTable.printStatistic(StatisticStream); - - EXPECT_TRUE(StatisticString.find("Overall number of entries = 3\n") != - std::string::npos); -} - -TEST(ConcurrentHashTableTest, AddStringMultiplueEntries) { - const size_t NumElements = 10000; - ConcurrentHashTableByPtr< - std::string, String, PerThreadAllocator, - ConcurrentHashTableInfoByPtr> - HashTable(Allocator); - - // Check insertion. - for (size_t I = 0; I < NumElements; I++) { - size_t AllocatedBytesAtStart = Allocator.getBytesAllocated(); - std::string StringForElement = formatv("{0}", I); - std::pair Entry = HashTable.insert(StringForElement); - EXPECT_TRUE(Entry.second); - EXPECT_TRUE(Entry.first->getKey() == StringForElement); - EXPECT_TRUE(Allocator.getBytesAllocated() > AllocatedBytesAtStart); - } - - std::string StatisticString; - raw_string_ostream StatisticStream(StatisticString); - HashTable.printStatistic(StatisticStream); - - // Verifying that the table contains exactly the number of elements we - // inserted. - EXPECT_TRUE(StatisticString.find("Overall number of entries = 10000\n") != - std::string::npos); - - // Check insertion of duplicates. - for (size_t I = 0; I < NumElements; I++) { - size_t AllocatedBytesAtStart = Allocator.getBytesAllocated(); - std::string StringForElement = formatv("{0}", I); - std::pair Entry = HashTable.insert(StringForElement); - EXPECT_FALSE(Entry.second); - EXPECT_TRUE(Entry.first->getKey() == StringForElement); - // Check no additional bytes were allocated for duplicate. - EXPECT_TRUE(Allocator.getBytesAllocated() == AllocatedBytesAtStart); - } - - // Check statistic. - // Verifying that the table contains exactly the number of elements we - // inserted. - EXPECT_TRUE(StatisticString.find("Overall number of entries = 10000\n") != - std::string::npos); -} - -TEST(ConcurrentHashTableTest, AddStringMultiplueEntriesWithResize) { - // Number of elements exceeds original size, thus hashtable should be resized. - const size_t NumElements = 20000; - ConcurrentHashTableByPtr< - std::string, String, PerThreadAllocator, - ConcurrentHashTableInfoByPtr> - HashTable(Allocator, 100); - - // Check insertion. - for (size_t I = 0; I < NumElements; I++) { - size_t AllocatedBytesAtStart = Allocator.getBytesAllocated(); - std::string StringForElement = formatv("{0} {1}", I, I + 100); - std::pair Entry = HashTable.insert(StringForElement); - EXPECT_TRUE(Entry.second); - EXPECT_TRUE(Entry.first->getKey() == StringForElement); - EXPECT_TRUE(Allocator.getBytesAllocated() > AllocatedBytesAtStart); - } - - std::string StatisticString; - raw_string_ostream StatisticStream(StatisticString); - HashTable.printStatistic(StatisticStream); - - // Verifying that the table contains exactly the number of elements we - // inserted. - EXPECT_TRUE(StatisticString.find("Overall number of entries = 20000\n") != - std::string::npos); - - // Check insertion of duplicates. - for (size_t I = 0; I < NumElements; I++) { - size_t AllocatedBytesAtStart = Allocator.getBytesAllocated(); - std::string StringForElement = formatv("{0} {1}", I, I + 100); - std::pair Entry = HashTable.insert(StringForElement); - EXPECT_FALSE(Entry.second); - EXPECT_TRUE(Entry.first->getKey() == StringForElement); - // Check no additional bytes were allocated for duplicate. - EXPECT_TRUE(Allocator.getBytesAllocated() == AllocatedBytesAtStart); - } - - // Check statistic. - // Verifying that the table contains exactly the number of elements we - // inserted. - EXPECT_TRUE(StatisticString.find("Overall number of entries = 20000\n") != - std::string::npos); -} - -TEST(ConcurrentHashTableTest, AddStringEntriesParallel) { - const size_t NumElements = 10000; - ConcurrentHashTableByPtr< - std::string, String, PerThreadAllocator, - ConcurrentHashTableInfoByPtr> - HashTable(Allocator); - - // Check parallel insertion. - parallelFor(0, NumElements, [&](size_t I) { - size_t AllocatedBytesAtStart = Allocator.getBytesAllocated(); - std::string StringForElement = formatv("{0}", I); - std::pair Entry = HashTable.insert(StringForElement); - EXPECT_TRUE(Entry.second); - EXPECT_TRUE(Entry.first->getKey() == StringForElement); - EXPECT_TRUE(Allocator.getBytesAllocated() > AllocatedBytesAtStart); - }); - - std::string StatisticString; - raw_string_ostream StatisticStream(StatisticString); - HashTable.printStatistic(StatisticStream); - - // Verifying that the table contains exactly the number of elements we - // inserted. - EXPECT_TRUE(StatisticString.find("Overall number of entries = 10000\n") != - std::string::npos); - - // Check parallel insertion of duplicates. - parallelFor(0, NumElements, [&](size_t I) { - size_t AllocatedBytesAtStart = Allocator.getBytesAllocated(); - std::string StringForElement = formatv("{0}", I); - std::pair Entry = HashTable.insert(StringForElement); - EXPECT_FALSE(Entry.second); - EXPECT_TRUE(Entry.first->getKey() == StringForElement); - // Check no additional bytes were allocated for duplicate. - EXPECT_TRUE(Allocator.getBytesAllocated() == AllocatedBytesAtStart); - }); - - // Check statistic. - // Verifying that the table contains exactly the number of elements we - // inserted. - EXPECT_TRUE(StatisticString.find("Overall number of entries = 10000\n") != - std::string::npos); -} - -TEST(ConcurrentHashTableTest, AddStringEntriesParallelWithResize) { - const size_t NumElements = 20000; - ConcurrentHashTableByPtr< - std::string, String, PerThreadAllocator, - ConcurrentHashTableInfoByPtr> - HashTable(Allocator, 100); - - // Check parallel insertion. - parallelFor(0, NumElements, [&](size_t I) { - size_t AllocatedBytesAtStart = Allocator.getBytesAllocated(); - std::string StringForElement = formatv("{0}", I); - std::pair Entry = HashTable.insert(StringForElement); - EXPECT_TRUE(Entry.second); - EXPECT_TRUE(Entry.first->getKey() == StringForElement); - EXPECT_TRUE(Allocator.getBytesAllocated() > AllocatedBytesAtStart); - }); - - std::string StatisticString; - raw_string_ostream StatisticStream(StatisticString); - HashTable.printStatistic(StatisticStream); - - // Verifying that the table contains exactly the number of elements we - // inserted. - EXPECT_TRUE(StatisticString.find("Overall number of entries = 20000\n") != - std::string::npos); - - // Check parallel insertion of duplicates. - parallelFor(0, NumElements, [&](size_t I) { - size_t AllocatedBytesAtStart = Allocator.getBytesAllocated(); - std::string StringForElement = formatv("{0}", I); - std::pair Entry = HashTable.insert(StringForElement); - EXPECT_FALSE(Entry.second); - EXPECT_TRUE(Entry.first->getKey() == StringForElement); - // Check no additional bytes were allocated for duplicate. - EXPECT_TRUE(Allocator.getBytesAllocated() == AllocatedBytesAtStart); - }); - - // Check statistic. - // Verifying that the table contains exactly the number of elements we - // inserted. - EXPECT_TRUE(StatisticString.find("Overall number of entries = 20000\n") != - std::string::npos); -} - -} // namespace From 4f9929add5369490c9607fc9203761483d2bc916 Mon Sep 17 00:00:00 2001 From: Qiongsi Wu Date: Thu, 23 Mar 2023 09:16:18 -0400 Subject: [PATCH 429/691] [AIX][CodeGen] Storage Locations for Constant Pointers This patch adds an `llc` option `-mroptr` to specify storage locations for constant pointers on AIX. When the `-mroptr` option is specified, constant pointers, virtual function tables, and virtual type tables are placed in read-only storage. Otherwise, by default, pointers, virtual function tables, and virtual type tables are placed are placed in read/write storage. https://reviews.llvm.org/D144190 enables the `-mroptr` option for `clang`. Reviewed By: hubert.reinterpretcast, stephenpeckham, myhsu, MaskRay, serge-sans-paille Differential Revision: https://reviews.llvm.org/D144189 --- llvm/docs/ReleaseNotes.rst | 6 +++- llvm/include/llvm/CodeGen/CommandFlags.h | 2 ++ llvm/include/llvm/Target/TargetOptions.h | 19 ++++++----- llvm/lib/CodeGen/CommandFlags.cpp | 9 +++++ .../CodeGen/TargetLoweringObjectFileImpl.cpp | 20 ++++++++--- llvm/test/CodeGen/PowerPC/aix-xcoff-roptr.ll | 30 +++++++++++++++++ .../test/CodeGen/PowerPC/aix64-xcoff-roptr.ll | 33 +++++++++++++++++++ llvm/tools/llc/llc.cpp | 18 ++++++++++ 8 files changed, 124 insertions(+), 13 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/aix-xcoff-roptr.ll create mode 100644 llvm/test/CodeGen/PowerPC/aix64-xcoff-roptr.ll diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 525f57a90dfb0..6f78497644479 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -122,7 +122,11 @@ Changes to the MIPS Backend Changes to the PowerPC Backend ------------------------------ -* ... +* A new option ``-mroptr`` is added to ``clang`` and ``llc``. When this option + is present, constant objects with relocatable address values are put into the + RO data section. This option should be used with the ``-fdata-sections`` + option, and is not supported with ``-fno-data-sections``. The option is + only supported on AIX. Changes to the RISC-V Backend ----------------------------- diff --git a/llvm/include/llvm/CodeGen/CommandFlags.h b/llvm/include/llvm/CodeGen/CommandFlags.h index 475d87bdd5b13..19b466629dbfc 100644 --- a/llvm/include/llvm/CodeGen/CommandFlags.h +++ b/llvm/include/llvm/CodeGen/CommandFlags.h @@ -143,6 +143,8 @@ unsigned getAlignLoops(); bool getJMCInstrument(); +bool getXCOFFReadOnlyPointers(); + /// Create this object with static storage to register codegen-related command /// line options. struct RegisterCodeGenFlags { diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h index 22e811653c6d4..76e4248088afd 100644 --- a/llvm/include/llvm/Target/TargetOptions.h +++ b/llvm/include/llvm/Target/TargetOptions.h @@ -130,13 +130,12 @@ namespace llvm { HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false), GuaranteedTailCallOpt(false), StackSymbolOrdering(true), EnableFastISel(false), EnableGlobalISel(false), UseInitArray(false), - DisableIntegratedAS(false), - RelaxELFRelocations(true), FunctionSections(false), - DataSections(false), IgnoreXCOFFVisibility(false), - XCOFFTracebackTable(true), UniqueSectionNames(true), - UniqueBasicBlockSectionNames(false), TrapUnreachable(false), - NoTrapAfterNoreturn(false), TLSSize(0), EmulatedTLS(false), - ExplicitEmulatedTLS(false), EnableIPRA(false), + DisableIntegratedAS(false), RelaxELFRelocations(true), + FunctionSections(false), DataSections(false), + IgnoreXCOFFVisibility(false), XCOFFTracebackTable(true), + UniqueSectionNames(true), UniqueBasicBlockSectionNames(false), + TrapUnreachable(false), NoTrapAfterNoreturn(false), TLSSize(0), + EmulatedTLS(false), ExplicitEmulatedTLS(false), EnableIPRA(false), EmitStackSizeSection(false), EnableMachineOutliner(false), EnableMachineFunctionSplitter(false), SupportsDefaultOutlining(false), EmitAddrsig(false), EmitCallSiteInfo(false), @@ -144,7 +143,7 @@ namespace llvm { ValueTrackingVariableLocations(false), ForceDwarfFrameSection(false), XRayOmitFunctionIndex(false), DebugStrictDwarf(false), Hotpatch(false), PPCGenScalarMASSEntries(false), JMCInstrument(false), - EnableCFIFixup(false), MisExpect(false), + EnableCFIFixup(false), MisExpect(false), XCOFFReadOnlyPointers(false), FPDenormalMode(DenormalMode::IEEE, DenormalMode::IEEE) {} /// DisableFramePointerElim - This returns true if frame pointer elimination @@ -360,6 +359,10 @@ namespace llvm { /// By default, it is set to false unsigned MisExpect : 1; + /// When set to true, const objects with relocatable address values are put + /// into the RO data section. + unsigned XCOFFReadOnlyPointers : 1; + /// Name of the stack usage file (i.e., .su file) if user passes /// -fstack-usage. If empty, it can be implied that -fstack-usage is not /// passed on the command line. diff --git a/llvm/lib/CodeGen/CommandFlags.cpp b/llvm/lib/CodeGen/CommandFlags.cpp index 51d259cea41b8..5ef650787a585 100644 --- a/llvm/lib/CodeGen/CommandFlags.cpp +++ b/llvm/lib/CodeGen/CommandFlags.cpp @@ -103,6 +103,7 @@ CGOPT(bool, XRayOmitFunctionIndex) CGOPT(bool, DebugStrictDwarf) CGOPT(unsigned, AlignLoops) CGOPT(bool, JMCInstrument) +CGOPT(bool, XCOFFReadOnlyPointers) codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { #define CGBINDOPT(NAME) \ @@ -478,6 +479,13 @@ codegen::RegisterCodeGenFlags::RegisterCodeGenFlags() { cl::init(false)); CGBINDOPT(JMCInstrument); + static cl::opt XCOFFReadOnlyPointers( + "mroptr", + cl::desc("When set to true, const objects with relocatable address " + "values are put into the RO data section."), + cl::init(false)); + CGBINDOPT(XCOFFReadOnlyPointers); + #undef CGBINDOPT mc::RegisterMCTargetOptionsFlags(); @@ -554,6 +562,7 @@ codegen::InitTargetOptionsFromCodeGenFlags(const Triple &TheTriple) { Options.DebugStrictDwarf = getDebugStrictDwarf(); Options.LoopAlignment = getAlignLoops(); Options.JMCInstrument = getJMCInstrument(); + Options.XCOFFReadOnlyPointers = getXCOFFReadOnlyPointers(); Options.MCOptions = mc::InitMCTargetOptionsFromFlags(); diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index e2fbe027f15b0..c81b6bb623b96 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -2343,8 +2343,11 @@ MCSection *TargetLoweringObjectFileXCOFF::getExplicitSectionGlobal( XCOFF::StorageMappingClass MappingClass; if (Kind.isText()) MappingClass = XCOFF::XMC_PR; - else if (Kind.isData() || Kind.isReadOnlyWithRel() || Kind.isBSS()) + else if (Kind.isData() || Kind.isBSS()) MappingClass = XCOFF::XMC_RW; + else if (Kind.isReadOnlyWithRel()) + MappingClass = + TM.Options.XCOFFReadOnlyPointers ? XCOFF::XMC_RO : XCOFF::XMC_RW; else if (Kind.isReadOnly()) MappingClass = XCOFF::XMC_RO; else @@ -2429,9 +2432,18 @@ MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal( return TextSection; } - // TODO: We may put Kind.isReadOnlyWithRel() under option control, because - // user may want to have read-only data with relocations placed into a - // read-only section by the compiler. + if (TM.Options.XCOFFReadOnlyPointers && Kind.isReadOnlyWithRel()) { + if (!TM.getDataSections()) + report_fatal_error( + "ReadOnlyPointers is supported only if data sections is turned on"); + + SmallString<128> Name; + getNameWithPrefix(Name, GO, TM); + return getContext().getXCOFFSection( + Name, SectionKind::getReadOnly(), + XCOFF::CsectProperties(XCOFF::XMC_RO, XCOFF::XTY_SD)); + } + // For BSS kind, zero initialized data must be emitted to the .data section // because external linkage control sections that get mapped to the .bss // section will be linked as tentative defintions, which is only appropriate diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-roptr.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-roptr.ll new file mode 100644 index 0000000000000..532d17e087e5b --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-roptr.ll @@ -0,0 +1,30 @@ +; RUN: llc -mtriple powerpc-ibm-aix-xcoff -mroptr < %s | FileCheck %s +; RUN: llc -mtriple powerpc-ibm-aix-xcoff -mroptr -filetype=obj -o %t.o < %s +; RUN: llvm-objdump -t --symbol-description %t.o | FileCheck %s --check-prefix=OBJ + +; RUN: not llc -mtriple powerpc-ibm-aix-xcoff -mroptr -data-sections=false \ +; RUN: < %s 2>&1 | FileCheck %s --check-prefix=DS_ERR + +; DS_ERR: -mroptr option must be used with -data-sections + +%union.U = type { %"struct.U::A" } +%"struct.U::A" = type { ptr } + +@_ZL1p = internal constant i32 ptrtoint (ptr @_ZL1p to i32), align 4 +; CHECK: .csect _ZL1p[RO],2 +; CHECK-NEXT: .lglobl _ZL1p[RO] +; CHECK-NEXT: .align 2 +; CHECK-NEXT: .vbyte 4, _ZL1p[RO] +; OBJ-DAG: {{([[:xdigit:]]{8})}} l .text {{([[:xdigit:]]{8})}} (idx: [[#]]) _ZL1p[RO] +@q = thread_local constant ptr @_ZL1p, align 4 +; CHECK: .csect q[TL],2 +; CHECK-NEXT: .globl q[TL] +; CHECK-NEXT: .align 2 +; CHECK-NEXT: .vbyte 4, _ZL1p[RO] +; OBJ-DAG: {{([[:xdigit:]]{8})}} g O .tdata {{([[:xdigit:]]{8})}} (idx: [[#]]) q[TL] +@u = local_unnamed_addr constant [1 x %union.U] [%union.U { %"struct.U::A" { ptr @_ZL1p } }], align 4 +; CHECK: .csect u[RO],2 +; CHECK-NEXT: .globl u[RO] +; CHECK-NEXT: .align 2 +; CHECK-NEXT: .vbyte 4, _ZL1p[RO] +; OBJ-DAG: {{([[:xdigit:]]{8})}} g .text {{([[:xdigit:]]{8})}} (idx: [[#]]) u[RO] diff --git a/llvm/test/CodeGen/PowerPC/aix64-xcoff-roptr.ll b/llvm/test/CodeGen/PowerPC/aix64-xcoff-roptr.ll new file mode 100644 index 0000000000000..aff753661b0e1 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/aix64-xcoff-roptr.ll @@ -0,0 +1,33 @@ +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -mroptr < %s | FileCheck %s +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -mroptr -filetype=obj -o %t.o < %s +; RUN: llvm-objdump -t --symbol-description %t.o | FileCheck %s --check-prefix=OBJ + +; RUN: not llc -mtriple powerpc64-ibm-aix-xcoff -mroptr -data-sections=false \ +; RUN: < %s 2>&1 | FileCheck %s --check-prefix=DS_ERR +; RUN: not llc -mtriple powerpc64le-unknown-linux-gnu -mroptr \ +; RUN: < %s 2>&1 | FileCheck %s --check-prefix=OS_ERR + +; DS_ERR: -mroptr option must be used with -data-sections +; OS_ERR: -mroptr option is only supported on AIX + +%union.U = type { %"struct.U::A" } +%"struct.U::A" = type { ptr } + +@_ZL1p = internal constant i64 ptrtoint (ptr @_ZL1p to i64), align 8 +; CHECK: .csect _ZL1p[RO],3 +; CHECK-NEXT: .lglobl _ZL1p[RO] +; CHECK-NEXT: .align 3 +; CHECK-NEXT: .vbyte 8, _ZL1p[RO] +; OBJ-DAG: {{([[:xdigit:]]{16})}} l .text {{([[:xdigit:]]{16})}} (idx: [[#]]) _ZL1p[RO] +@q = thread_local constant ptr @_ZL1p, align 8 +; CHECK: .csect q[TL],3 +; CHECK-NEXT: .globl q[TL] +; CHECK-NEXT: .align 3 +; CHECK-NEXT: .vbyte 8, _ZL1p[RO] +; OBJ-DAG: {{([[:xdigit:]]{16})}} g O .tdata {{([[:xdigit:]]{16})}} (idx: [[#]]) q[TL] +@u = local_unnamed_addr constant [1 x %union.U] [%union.U { %"struct.U::A" { ptr @_ZL1p } }], align 8 +; CHECK: .csect u[RO],3 +; CHECK-NEXT: .globl u[RO] +; CHECK-NEXT: .align 3 +; CHECK-NEXT: .vbyte 8, _ZL1p[RO] +; OBJ-DAG: {{([[:xdigit:]]{16})}} g .text {{([[:xdigit:]]{16})}} (idx: [[#]]) u[RO] diff --git a/llvm/tools/llc/llc.cpp b/llvm/tools/llc/llc.cpp index ed65b83487790..860fa39d57e8a 100644 --- a/llvm/tools/llc/llc.cpp +++ b/llvm/tools/llc/llc.cpp @@ -496,6 +496,24 @@ static int compileModule(char **argv, LLVMContext &Context) { TargetOptions Options; auto InitializeOptions = [&](const Triple &TheTriple) { Options = codegen::InitTargetOptionsFromCodeGenFlags(TheTriple); + + if (Options.XCOFFReadOnlyPointers) { + if (!TheTriple.isOSAIX()) + reportError("-mroptr option is only supported on AIX", InputFilename); + + // Since the storage mapping class is specified per csect, + // without using data sections, it is less effective to use read-only + // pointers. Using read-only pointers may cause other RO variables in the + // same csect to become RW when the linker acts upon `-bforceimprw`; + // therefore, we require that separate data sections are used in the + // presence of ReadOnlyPointers. We respect the setting of data-sections + // since we have not found reasons to do otherwise that overcome the user + // surprise of not respecting the setting. + if (!Options.DataSections) + reportError("-mroptr option must be used with -data-sections", + InputFilename); + } + Options.BinutilsVersion = TargetMachine::parseBinutilsVersion(BinutilsVersion); Options.DisableIntegratedAS = NoIntegratedAssembler; From 5193c4a8b38c3e61c862d5badf1cace7c26324f7 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Thu, 23 Mar 2023 13:46:49 +0000 Subject: [PATCH 430/691] [lldb][AArch64] Fix run-qemu.sh when only MTE is enabled. SVE and MTE both require a CPU with that feature before you can use the other options, but we only added the "max" cpu when SVE was enabled too. --- lldb/scripts/lldb-test-qemu/run-qemu.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) mode change 100644 => 100755 lldb/scripts/lldb-test-qemu/run-qemu.sh diff --git a/lldb/scripts/lldb-test-qemu/run-qemu.sh b/lldb/scripts/lldb-test-qemu/run-qemu.sh old mode 100644 new mode 100755 index 339b8d955e613..d11711c10e772 --- a/lldb/scripts/lldb-test-qemu/run-qemu.sh +++ b/lldb/scripts/lldb-test-qemu/run-qemu.sh @@ -109,8 +109,12 @@ elif [[ "$ARCH" == "arm64" ]]; then QEMU_SVE_MAX_VQ=4 QEMU_CPU="cortex-a53" + if [[ $SVE ]] || [[ $MTE ]]; then + QEMU_CPU="max" + fi + if [[ $SVE ]]; then - QEMU_CPU="max,sve-max-vq=$QEMU_SVE_MAX_VQ" + QEMU_CPU="$QEMU_CPU,sve-max-vq=$QEMU_SVE_MAX_VQ" fi if [[ $MTE ]]; then QEMU_MACHINE="$QEMU_MACHINE,mte=on" From 3ab79124db5e4e1be0b58c4fe43ff01e6fdb3060 Mon Sep 17 00:00:00 2001 From: Ye Luo Date: Thu, 23 Mar 2023 08:56:47 -0500 Subject: [PATCH 431/691] [OpenMP] Add notifyDataUnmapped back in disassociatePtr Fix regression introduced by https://reviews.llvm.org/D123446 Reviewed By: tianshilei1992 Differential Revision: https://reviews.llvm.org/D146689 --- openmp/libomptarget/src/device.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openmp/libomptarget/src/device.cpp b/openmp/libomptarget/src/device.cpp index 09c8e808db463..22ab7436f75b5 100644 --- a/openmp/libomptarget/src/device.cpp +++ b/openmp/libomptarget/src/device.cpp @@ -134,7 +134,7 @@ int DeviceTy::disassociatePtr(void *HstPtrBegin) { if (Event) destroyEvent(Event); HDTTMap->erase(It); - return OFFLOAD_SUCCESS; + return notifyDataUnmapped(HstPtrBegin); } REPORT("Trying to disassociate a pointer which was not mapped via " From 0eabf59528f3c3f64923900cae740d9f26c45ae8 Mon Sep 17 00:00:00 2001 From: Doru Bercea Date: Tue, 21 Mar 2023 14:07:57 -0400 Subject: [PATCH 432/691] Enable constexpr class members that are device-mapped to not be optimized out. This patch fixes an issue whereby a constexpr class member which is mapped to the device is being optimized out thus leading to a runtime error. Patch: https://reviews.llvm.org/D146552 --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 4 +- .../declare_target_constexpr_codegen.cpp | 40 +++++++++++++++++++ .../offloading/target_constexpr_mapping.cpp | 34 ++++++++++++++++ 3 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 clang/test/OpenMP/declare_target_constexpr_codegen.cpp create mode 100644 openmp/libomptarget/test/offloading/target_constexpr_mapping.cpp diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 5f21cfca66bb8..58a95d64ac50e 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -10387,7 +10387,9 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, } Linkage = CGM.getLLVMLinkageVarDefinition(VD, /*IsConstant=*/false); // Temp solution to prevent optimizations of the internal variables. - if (CGM.getLangOpts().OpenMPIsDevice && !VD->isExternallyVisible()) { + if (CGM.getLangOpts().OpenMPIsDevice && + (!VD->isExternallyVisible() || + Linkage == llvm::GlobalValue::LinkOnceODRLinkage)) { // Do not create a "ref-variable" if the original is not also available // on the host. if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) diff --git a/clang/test/OpenMP/declare_target_constexpr_codegen.cpp b/clang/test/OpenMP/declare_target_constexpr_codegen.cpp new file mode 100644 index 0000000000000..27161feef05e0 --- /dev/null +++ b/clang/test/OpenMP/declare_target_constexpr_codegen.cpp @@ -0,0 +1,40 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --prefix-filecheck-ir-name _ --global-value-regex "llvm.compiler.used" "_[0-9a-zA-Z]+A[0-9a-zA-Z]+pi[0-9a-zA-Z]+" "_[0-9a-zA-Z]+anotherPi" --version 2 +// REQUIRES: amdgpu-registered-target + + +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-target-debug -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK + +// expected-no-diagnostics + +#ifndef HEADER +#define HEADER + +#pragma omp declare target +class A { +public: + static constexpr double pi = 3.141592653589793116; +//. +// CHECK: @_ZN1A2piE = linkonce_odr constant double 0x400921FB54442D18, comdat, align 8 +// CHECK: @_ZL9anotherPi = internal constant double 3.140000e+00, align 8 +// CHECK: @llvm.compiler.used = appending global [2 x ptr] [ptr @"__ZN1A2piE$ref", ptr @"__ZL9anotherPi$ref"], section "llvm.metadata" +//. + A() { ; } + ~A() { ; } +}; +#pragma omp end declare target + +void F(const double &); +void Test() { F(A::pi); } + +#pragma omp declare target +constexpr static double anotherPi = 3.14; +#pragma omp end declare target + +#endif + + +// +//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +// CHECK: {{.*}} diff --git a/openmp/libomptarget/test/offloading/target_constexpr_mapping.cpp b/openmp/libomptarget/test/offloading/target_constexpr_mapping.cpp new file mode 100644 index 0000000000000..14cf92a7cc26e --- /dev/null +++ b/openmp/libomptarget/test/offloading/target_constexpr_mapping.cpp @@ -0,0 +1,34 @@ +// RUN: %libomptarget-compileoptxx-run-and-check-generic + +#include +#include + +#pragma omp declare target +class A { +public: + constexpr static double pi = 3.141592653589793116; + A() { ; } + ~A() { ; } +}; +#pragma omp end declare target + +#pragma omp declare target +constexpr static double anotherPi = 3.14; +#pragma omp end declare target + +int main() { + double a[2]; +#pragma omp target map(tofrom : a[:2]) + { + a[0] = A::pi; + a[1] = anotherPi; + } + + // CHECK: pi = 3.141592653589793116 + printf("pi = %.18f\n", a[0]); + + // CHECK: anotherPi = 3.14 + printf("anotherPi = %.2f\n", a[1]); + + return 0; +} From c7a3284de3059ecb5940dac19dda897ade0d11b4 Mon Sep 17 00:00:00 2001 From: khei4 Date: Thu, 9 Mar 2023 15:31:11 +0900 Subject: [PATCH 433/691] [AggressiveInstCombine] Pre-Commit test for D144445 (NFC) Differential Revision: https://reviews.llvm.org/D145355 tweak: test --- .../AggressiveInstCombine/patterned-load.ll | 189 ++++++++++++++++++ .../InstSimplify/load-patterned-aggregates.ll | 134 ------------- llvm/test/Transforms/InstSimplify/load.ll | 20 ++ 3 files changed, 209 insertions(+), 134 deletions(-) create mode 100644 llvm/test/Transforms/AggressiveInstCombine/patterned-load.ll delete mode 100644 llvm/test/Transforms/InstSimplify/load-patterned-aggregates.ll diff --git a/llvm/test/Transforms/AggressiveInstCombine/patterned-load.ll b/llvm/test/Transforms/AggressiveInstCombine/patterned-load.ll new file mode 100644 index 0000000000000..5410a21e3211d --- /dev/null +++ b/llvm/test/Transforms/AggressiveInstCombine/patterned-load.ll @@ -0,0 +1,189 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=aggressive-instcombine -S -data-layout="e" | FileCheck %s --check-prefixes=CHECK,LE +; RUN: opt < %s -passes=aggressive-instcombine -S -data-layout="E" | FileCheck %s --check-prefixes=CHECK,BE + + +@constarray1 = internal constant [8 x i8] c"\01\00\01\00\01\00\01\00", align 4 +@constarray2 = internal constant [8 x i8] c"\FF\FF\01\00\01\00\01\00", align 4 + +@g = internal constant i32 42 +@constptrarray = internal constant [4 x ptr] [ptr @g, ptr @g, ptr @g, ptr @g], align 4 + +@constpackedstruct = internal constant <{[8 x i8]}> <{[8 x i8] c"\01\00\01\00\01\00\01\00"}>, align 4 +@conststruct = internal constant {i16, [8 x i8]} {i16 1, [8 x i8] c"\01\00\01\00\01\00\01\00"}, align 4 + +; TODO: this will be ret i8 1 +define i8 @inbounds_gep_load_i8_align2(i64 %idx){ +; CHECK-LABEL: @inbounds_gep_load_i8_align2( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @constarray1, i64 [[IDX:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 2 +; CHECK-NEXT: ret i8 [[TMP2]] +; + %1 = getelementptr inbounds i8, ptr @constarray1, i64 %idx + %2 = load i8, ptr %1, align 2 + ret i8 %2 +} + +; can't be folded because access with i8 strides is not patterned. +define i8 @inbounds_gep_load_i8_align1(i64 %idx){ +; CHECK-LABEL: @inbounds_gep_load_i8_align1( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @constarray1, i64 [[IDX:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +; CHECK-NEXT: ret i8 [[TMP2]] +; + %1 = getelementptr inbounds i8, ptr @constarray1, i64 %idx + %2 = load i8, ptr %1, align 1 + ret i8 %2 +} + +; can't be folded because volatile load cannot assure same results. +define i8 @inbounds_gep_load_i8_align2_volatile(i64 %idx){ +; CHECK-LABEL: @inbounds_gep_load_i8_align2_volatile( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @constarray1, i64 [[IDX:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = load volatile i8, ptr [[TMP1]], align 2 +; CHECK-NEXT: ret i8 [[TMP2]] +; + %1 = getelementptr inbounds i8, ptr @constarray1, i64 %idx + %2 = load volatile i8, ptr %1, align 2 + ret i8 %2 +} + +declare ptr @llvm.ptrmask.p0.i64(ptr , i64) + +; can't be folded because ptrmask can change ptr, while preserving provenance +define i8 @inbounds_gep_load_i8_align2_ptrmasked(i64 %idx, i64 %mask){ +; CHECK-LABEL: @inbounds_gep_load_i8_align2_ptrmasked( +; CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr @constarray1, i64 [[MASK:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[IDX:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 2 +; CHECK-NEXT: ret i8 [[TMP3]] +; + %1 = call ptr @llvm.ptrmask.p0.i64(ptr @constarray1, i64 %mask) + %2 = getelementptr inbounds i8, ptr %1, i64 %idx + %3 = load i8, ptr %2, align 2 + ret i8 %3 +} + +; TODO: this will be ret i32 65537(LE), 16777472(BE) +define i32 @inbounds_gep_i16_load_i32_align1(i64 %idx){ +; CHECK-LABEL: @inbounds_gep_i16_load_i32_align1( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr @constarray1, i64 [[IDX:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 1 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %1 = getelementptr inbounds i16, ptr @constarray1, i64 %idx + %2 = load i32, ptr %1, align 1 + ret i32 %2 +} + +; TODO: this will be ret i32 65537(LE), 16777472(BE) +define i32 @inbounds_gep_i32_load_i32_align8(i64 %idx){ +; CHECK-LABEL: @inbounds_gep_i32_load_i32_align8( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr @constarray1, i64 [[IDX:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %1 = getelementptr inbounds i32, ptr @constarray1, i64 %idx + %2 = load i32, ptr %1, align 8 + ret i32 %2 +} + +; TODO: this will be ret i32 65547(LE), 16777472(BE) +define i32 @inbounds_gep_i32_load_i32_const_offset(i64 %idx){ +; CHECK-LABEL: @inbounds_gep_i32_load_i32_const_offset( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr @constarray2, i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDX:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = getelementptr inbounds i16, ptr @constarray2, i64 1 + %2 = getelementptr inbounds i32, ptr %1, i64 %idx + %3 = load i32, ptr %2, align 4 + ret i32 %3 +} + +; TODO: this coould be folded into 65537(LE), 16777472(BE) +define i32 @gep_load_i32_align2_const_offset(i64 %idx){ +; CHECK-LABEL: @gep_load_i32_align2_const_offset( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr @constarray1, i64 -2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [3 x i16], ptr [[TMP1]], i64 [[IDX:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 2 +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = getelementptr i16, ptr @constarray1, i64 -2 + %2 = getelementptr [3 x i16], ptr %1, i64 %idx + %3 = load i32, ptr %2, align 2 + ret i32 %3 +} + +; can't be folded because if gep is non-inbounds, +; the offsets are silently-wrapped with two’s complement arithmetic(mod 2**64). +; So the load operand can be a base pointer of constarray2. +define i32 @gep_load_i32_align2_const_offset_wrap(i64 %idx){ +; CHECK-LABEL: @gep_load_i32_align2_const_offset_wrap( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr @constarray2, i64 -2 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [3 x i16], ptr [[TMP1]], i64 [[IDX:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 2 +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = getelementptr i16, ptr @constarray2, i64 -2 + %2 = getelementptr [3 x i16], ptr %1, i64 %idx + %3 = load i32, ptr %2, align 2 + ret i32 %3 +} + +; TODO: this will be ret i32 42 +define i32 @inbounds_gep_i32_load_i32_const_ptr_array(i64 %idx){ +; CHECK-LABEL: @inbounds_gep_i32_load_i32_const_ptr_array( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr @constptrarray, i64 [[IDX:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = getelementptr inbounds ptr, ptr @constptrarray, i64 %idx + %2 = load ptr, ptr %1, align 4 + %3 = load i32, ptr %2, align 4 + ret i32 %3 +} + +; TODO: this coould be folded into 65537(LE), 16777472(BE) +define i32 @inbounds_gep_i32_load_i32_align4_packedstruct(i64 %idx){ +; CHECK-LABEL: @inbounds_gep_i32_load_i32_align4_packedstruct( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr @constpackedstruct, i64 [[IDX:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %1 = getelementptr inbounds i32, ptr @constpackedstruct, i64 %idx + %2 = load i32, ptr %1, align 4 + ret i32 %2 +} + +; can't be folded because results are not equal +define i32 @inbounds_gep_i8_load_i32_align1_packedstruct(i64 %idx){ +; CHECK-LABEL: @inbounds_gep_i8_load_i32_align1_packedstruct( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @constpackedstruct, i64 [[IDX:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 1 +; CHECK-NEXT: ret i32 [[TMP2]] +; + %1 = getelementptr inbounds i8, ptr @constpackedstruct, i64 %idx + %2 = load i32, ptr %1, align 1 + ret i32 %2 +} + +; TODO: this coould be folded into 65537(LE), 16777472(BE) +define i32 @inbounds_gep_i32_load_i32_align4_struct_with_const_offset(i64 %idx){ +; CHECK-LABEL: @inbounds_gep_i32_load_i32_align4_struct_with_const_offset( +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr @conststruct, i64 1 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDX:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +; CHECK-NEXT: ret i32 [[TMP3]] +; + %1 = getelementptr inbounds i16, ptr @conststruct, i64 1 + %2 = getelementptr inbounds i32, ptr %1, i64 %idx + %3 = load i32, ptr %2, align 4 + ret i32 %3 +} + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; BE: {{.*}} +; LE: {{.*}} diff --git a/llvm/test/Transforms/InstSimplify/load-patterned-aggregates.ll b/llvm/test/Transforms/InstSimplify/load-patterned-aggregates.ll deleted file mode 100644 index 82283648936cf..0000000000000 --- a/llvm/test/Transforms/InstSimplify/load-patterned-aggregates.ll +++ /dev/null @@ -1,134 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=instsimplify -S | FileCheck %s -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" -@constzeroarray = internal constant [4 x i32] zeroinitializer - -@constarray = internal constant [8 x i8] c"\01\00\01\00\01\00\01\00", align 4 -@conststruct = internal constant <{[8 x i8]}> <{[8 x i8] c"\01\00\01\00\01\00\01\00"}>, align 4 - -define i32 @load_gep_const_zero_array(i64 %idx) { -; CHECK-LABEL: @load_gep_const_zero_array( -; CHECK-NEXT: ret i32 0 -; - %gep = getelementptr inbounds [4 x i32], ptr @constzeroarray, i64 0, i64 %idx - %load = load i32, ptr %gep - ret i32 %load -} - -define i8 @load_i8_multi_gep_const_zero_array(i64 %idx1, i64 %idx2) { -; CHECK-LABEL: @load_i8_multi_gep_const_zero_array( -; CHECK-NEXT: ret i8 0 -; - %gep1 = getelementptr inbounds i8, ptr @constzeroarray, i64 %idx1 - %gep = getelementptr inbounds i8, ptr %gep1, i64 %idx2 - %load = load i8, ptr %gep - ret i8 %load -} - - -define i32 @load_gep_const_patterned_array(i64 %idx) { -; CHECK-LABEL: @load_gep_const_patterned_array( -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [4 x i32], ptr @constarray, i64 0, i64 [[IDX:%.*]] -; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4 -; CHECK-NEXT: ret i32 [[LOAD]] -; - %gep = getelementptr inbounds [4 x i32], ptr @constarray, i64 0, i64 %idx - %load = load i32, ptr %gep - ret i32 %load -} - -define i8 @load_i8_multi_gep_const_array(i64 %idx1, i64 %idx2) { -; CHECK-LABEL: @load_i8_multi_gep_const_array( -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, ptr @constarray, i64 [[IDX1:%.*]] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[GEP1]], i64 [[IDX2:%.*]] -; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1 -; CHECK-NEXT: ret i8 [[LOAD]] -; - %gep1 = getelementptr inbounds i8, ptr @constarray, i64 %idx1 - %gep = getelementptr inbounds i8, ptr %gep1, i64 %idx2 - %load = load i8, ptr %gep - ret i8 %load -} - -; TODO: this should be ret i8 1 -define i8 @gep_load_i8_align2(i64 %idx){ -; CHECK-LABEL: @gep_load_i8_align2( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @constarray, i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 2 -; CHECK-NEXT: ret i8 [[TMP2]] -; - %1 = getelementptr inbounds i8, ptr @constarray, i64 %idx - %2 = load i8, ptr %1, align 2 - ret i8 %2 -} - -; can't be folded -define i8 @gep_load_i8_align1(i64 %idx){ -; CHECK-LABEL: @gep_load_i8_align1( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @constarray, i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 -; CHECK-NEXT: ret i8 [[TMP2]] -; - %1 = getelementptr inbounds i8, ptr @constarray, i64 %idx - %2 = load i8, ptr %1, align 1 - ret i8 %2 -} - -; TODO: this should be ret i8 65537 on the case for little endian -define i32 @gep_i32_load_i32_align4(i64 %idx){ -; CHECK-LABEL: @gep_i32_load_i32_align4( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr @constarray, i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -; CHECK-NEXT: ret i32 [[TMP2]] -; - %1 = getelementptr inbounds i32, ptr @constarray, i64 %idx - %2 = load i32, ptr %1, align 4 - ret i32 %2 -} - -; TODO: this should be ret i8 65537 on the case for little endian -define i32 @gep_i32_load_i32_align4_struct(i64 %idx){ -; CHECK-LABEL: @gep_i32_load_i32_align4_struct( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr @conststruct, i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -; CHECK-NEXT: ret i32 [[TMP2]] -; - %1 = getelementptr inbounds i32, ptr @conststruct, i64 %idx - %2 = load i32, ptr %1, align 4 - ret i32 %2 -} - -; can't be folded -define i32 @gep_i8_load_i32_align1(i64 %idx){ -; CHECK-LABEL: @gep_i8_load_i32_align1( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @constarray, i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 1 -; CHECK-NEXT: ret i32 [[TMP2]] -; - %1 = getelementptr inbounds i8, ptr @constarray, i64 %idx - %2 = load i32, ptr %1, align 1 - ret i32 %2 -} - -; can't be folded -define i32 @gep_i8_load_i32_align1_struct(i64 %idx){ -; CHECK-LABEL: @gep_i8_load_i32_align1_struct( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @conststruct, i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 1 -; CHECK-NEXT: ret i32 [[TMP2]] -; - %1 = getelementptr inbounds i8, ptr @conststruct, i64 %idx - %2 = load i32, ptr %1, align 1 - ret i32 %2 -} -; TODO: This could be folded but need to see GEP source types -define i32 @gep_i16_load_i32_align1(i64 %idx){ -; CHECK-LABEL: @gep_i16_load_i32_align1( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr @constarray, i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 1 -; CHECK-NEXT: ret i32 [[TMP2]] -; - %1 = getelementptr inbounds i16, ptr @constarray, i64 %idx - %2 = load i32, ptr %1, align 1 - ret i32 %2 -} diff --git a/llvm/test/Transforms/InstSimplify/load.ll b/llvm/test/Transforms/InstSimplify/load.ll index 2e2b1b14ddd0f..8b9a607f77b7d 100644 --- a/llvm/test/Transforms/InstSimplify/load.ll +++ b/llvm/test/Transforms/InstSimplify/load.ll @@ -3,6 +3,7 @@ @zeroinit = constant {} zeroinitializer @poison = constant {} poison +@constzeroarray = internal constant [4 x i32] zeroinitializer define i32 @crash_on_zeroinit() { ; CHECK-LABEL: @crash_on_zeroinit( @@ -40,3 +41,22 @@ define <3 x float> @load_vec3() { %1 = load <3 x float>, ptr getelementptr inbounds (<3 x float>, ptr @constvec, i64 1) ret <3 x float> %1 } + +define i32 @load_gep_const_zero_array(i64 %idx) { +; CHECK-LABEL: @load_gep_const_zero_array( +; CHECK-NEXT: ret i32 0 +; + %gep = getelementptr inbounds [4 x i32], ptr @constzeroarray, i64 0, i64 %idx + %load = load i32, ptr %gep + ret i32 %load +} + +define i8 @load_i8_multi_gep_const_zero_array(i64 %idx1, i64 %idx2) { +; CHECK-LABEL: @load_i8_multi_gep_const_zero_array( +; CHECK-NEXT: ret i8 0 +; + %gep1 = getelementptr inbounds i8, ptr @constzeroarray, i64 %idx1 + %gep = getelementptr inbounds i8, ptr %gep1, i64 %idx2 + %load = load i8, ptr %gep + ret i8 %load +} From 434b0badb5d53138490a075dd945df7480649154 Mon Sep 17 00:00:00 2001 From: khei4 Date: Thu, 9 Mar 2023 18:46:14 +0900 Subject: [PATCH 434/691] [AggressiveInstCombine] folding load for constant global patterened arrays and structs by alignment Differential Revision: https://reviews.llvm.org/D144445 Reviewed By: nikic fix: wrong arrow --- .../AggressiveInstCombine.cpp | 60 ++++++++++++++++++- .../AggressiveInstCombine/patterned-load.ll | 47 +++++++-------- 2 files changed, 77 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 473b41241b8a6..cf652836bef25 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -305,7 +306,7 @@ static bool tryToRecognizePopCount(Instruction &I) { Value *MulOp0; // Matching "(i * 0x01010101...) >> 24". if ((match(Op0, m_Mul(m_Value(MulOp0), m_SpecificInt(Mask01)))) && - match(Op1, m_SpecificInt(MaskShift))) { + match(Op1, m_SpecificInt(MaskShift))) { Value *ShiftOp0; // Matching "((i + (i >> 4)) & 0x0F0F0F0F...)". if (match(MulOp0, m_And(m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(4)), @@ -401,8 +402,8 @@ static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) { /// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids /// pessimistic codegen that has to account for setting errno and can enable /// vectorization. -static bool -foldSqrt(Instruction &I, TargetTransformInfo &TTI, TargetLibraryInfo &TLI) { +static bool foldSqrt(Instruction &I, TargetTransformInfo &TTI, + TargetLibraryInfo &TLI) { // Match a call to sqrt mathlib function. auto *Call = dyn_cast(&I); if (!Call) @@ -824,6 +825,58 @@ static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL, return true; } +/// If C is a constant patterned array and all valid loaded results for given +/// alignment are same to a constant, return that constant. +static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) { + auto *LI = dyn_cast(&I); + if (!LI || LI->isVolatile()) + return false; + + // We can only fold the load if it is from a constant global with definitive + // initializer. Skip expensive logic if this is not the case. + auto *PtrOp = LI->getPointerOperand(); + auto *GV = dyn_cast(getUnderlyingObject(PtrOp)); + if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) + return false; + + Type *LoadTy = LI->getType(); + Constant *C = GV->getInitializer(); + + // Bail for large initializers in excess of 4K to avoid too many scans. + uint64_t GVSize = DL.getTypeAllocSize(C->getType()); + if (!GVSize || 4096 < GVSize) + return false; + + // Check whether pointer arrives back at Global Variable. + // If PtrOp is neither GlobalVariable nor GEP, it might not arrive back at + // GlobalVariable. + // TODO: implement GEP handling + unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType()); + // TODO: Determine stride based on GEPs. + APInt Stride(BW, 1); + APInt ConstOffset(BW, 0); + + // Any possible offset could be multiple of GEP stride. And any valid + // offset is multiple of load alignment, so checking only multiples of bigger + // one is sufficient to say results' equality. + if (auto LA = LI->getAlign(); + LA <= GV->getAlign().valueOrOne() && Stride.getZExtValue() < LA.value()) + Stride = APInt(BW, LA.value()); + + Constant *Ca = ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL); + if (!Ca) + return false; + + unsigned E = GVSize - DL.getTypeStoreSize(LoadTy); + for (; ConstOffset.getZExtValue() <= E; ConstOffset += Stride) + if (Ca != ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL)) + return false; + + I.replaceAllUsesWith(Ca); + + return true; +} + /// This is the entry point for folds that could be implemented in regular /// InstCombine, but they are separated because they are not expected to /// occur frequently and/or have more than a constant-length pattern match. @@ -850,6 +903,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT, MadeChange |= tryToFPToSat(I, TTI); MadeChange |= tryToRecognizeTableBasedCttz(I); MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA); + MadeChange |= foldPatternedLoads(I, DL); // NOTE: This function introduces erasing of the instruction `I`, so it // needs to be called at the end of this sequence, otherwise we may make // bugs. diff --git a/llvm/test/Transforms/AggressiveInstCombine/patterned-load.ll b/llvm/test/Transforms/AggressiveInstCombine/patterned-load.ll index 5410a21e3211d..7acc6109744ca 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/patterned-load.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/patterned-load.ll @@ -12,12 +12,9 @@ @constpackedstruct = internal constant <{[8 x i8]}> <{[8 x i8] c"\01\00\01\00\01\00\01\00"}>, align 4 @conststruct = internal constant {i16, [8 x i8]} {i16 1, [8 x i8] c"\01\00\01\00\01\00\01\00"}, align 4 -; TODO: this will be ret i8 1 define i8 @inbounds_gep_load_i8_align2(i64 %idx){ ; CHECK-LABEL: @inbounds_gep_load_i8_align2( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr @constarray1, i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 2 -; CHECK-NEXT: ret i8 [[TMP2]] +; CHECK-NEXT: ret i8 1 ; %1 = getelementptr inbounds i8, ptr @constarray1, i64 %idx %2 = load i8, ptr %1, align 2 @@ -53,10 +50,7 @@ declare ptr @llvm.ptrmask.p0.i64(ptr , i64) ; can't be folded because ptrmask can change ptr, while preserving provenance define i8 @inbounds_gep_load_i8_align2_ptrmasked(i64 %idx, i64 %mask){ ; CHECK-LABEL: @inbounds_gep_load_i8_align2_ptrmasked( -; CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr @constarray1, i64 [[MASK:%.*]]) -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 2 -; CHECK-NEXT: ret i8 [[TMP3]] +; CHECK-NEXT: ret i8 1 ; %1 = call ptr @llvm.ptrmask.p0.i64(ptr @constarray1, i64 %mask) %2 = getelementptr inbounds i8, ptr %1, i64 %idx @@ -102,13 +96,12 @@ define i32 @inbounds_gep_i32_load_i32_const_offset(i64 %idx){ ret i32 %3 } -; TODO: this coould be folded into 65537(LE), 16777472(BE) define i32 @gep_load_i32_align2_const_offset(i64 %idx){ -; CHECK-LABEL: @gep_load_i32_align2_const_offset( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr @constarray1, i64 -2 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [3 x i16], ptr [[TMP1]], i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 2 -; CHECK-NEXT: ret i32 [[TMP3]] +; LE-LABEL: @gep_load_i32_align2_const_offset( +; LE-NEXT: ret i32 65537 +; +; BE-LABEL: @gep_load_i32_align2_const_offset( +; BE-NEXT: ret i32 16777472 ; %1 = getelementptr i16, ptr @constarray1, i64 -2 %2 = getelementptr [3 x i16], ptr %1, i64 %idx @@ -146,12 +139,12 @@ define i32 @inbounds_gep_i32_load_i32_const_ptr_array(i64 %idx){ ret i32 %3 } -; TODO: this coould be folded into 65537(LE), 16777472(BE) define i32 @inbounds_gep_i32_load_i32_align4_packedstruct(i64 %idx){ -; CHECK-LABEL: @inbounds_gep_i32_load_i32_align4_packedstruct( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr @constpackedstruct, i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -; CHECK-NEXT: ret i32 [[TMP2]] +; LE-LABEL: @inbounds_gep_i32_load_i32_align4_packedstruct( +; LE-NEXT: ret i32 65537 +; +; BE-LABEL: @inbounds_gep_i32_load_i32_align4_packedstruct( +; BE-NEXT: ret i32 16777472 ; %1 = getelementptr inbounds i32, ptr @constpackedstruct, i64 %idx %2 = load i32, ptr %1, align 4 @@ -172,11 +165,14 @@ define i32 @inbounds_gep_i8_load_i32_align1_packedstruct(i64 %idx){ ; TODO: this coould be folded into 65537(LE), 16777472(BE) define i32 @inbounds_gep_i32_load_i32_align4_struct_with_const_offset(i64 %idx){ -; CHECK-LABEL: @inbounds_gep_i32_load_i32_align4_struct_with_const_offset( -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr @conststruct, i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDX:%.*]] -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -; CHECK-NEXT: ret i32 [[TMP3]] +; LE-LABEL: @inbounds_gep_i32_load_i32_align4_struct_with_const_offset( +; LE-NEXT: ret i32 65537 +; +; BE-LABEL: @inbounds_gep_i32_load_i32_align4_struct_with_const_offset( +; BE-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr @conststruct, i64 1 +; BE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 [[IDX:%.*]] +; BE-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +; BE-NEXT: ret i32 [[TMP3]] ; %1 = getelementptr inbounds i16, ptr @conststruct, i64 1 %2 = getelementptr inbounds i32, ptr %1, i64 %idx @@ -184,6 +180,3 @@ define i32 @inbounds_gep_i32_load_i32_align4_struct_with_const_offset(i64 %idx){ ret i32 %3 } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; BE: {{.*}} -; LE: {{.*}} From 5bcb4c4da99c443fb880d408e5ff4e9b305bbb77 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 23 Mar 2023 07:23:36 -0700 Subject: [PATCH 435/691] [MSAN] Support load and stores of scalable vector types This adds support for scalable vector types - at least far enough to get basic load and store cases working. It turns out that load/store without origin tracking already worked; I apparently got that working with one of the pre patches to use TypeSize utilities and didn't notice. The code changes here are required to enable origin tracking. For origin tracking, a 4 byte value - the origin - is broadcast into a shadow region whose size exactly matches the type being accessed. This origin is only written if the shadow value is non-zero. The details of how shadow is computed from the original value being stored aren't relevant for this patch. The code changes involve two related primitives. First, we need to be able to perform that broadcast into a scalable sized memory region. This requires the use of a loop, and appropriate bound. The fixed size case optimizes with larger stores and alignment; I did not bother with that for the scalable case for now. We can optimize this codepath later if desired. Second, we need a way to test if the shadow is zero. The mechanism for this in the code is to convert the shadow value into a scalar, and then zero check that. There's an assumption that this scalar is zero exactly when all elements of the shadow value are zero. As a result, we use an OR reduction on the scalable vector. This is analogous to how e.g. an array is handled. I landed a bunch of cleanup changes to remove other direct uses of the scalar conversion to convince myself there were no other undocumented invariants. Differential Revision: https://reviews.llvm.org/D146157 --- .../Instrumentation/MemorySanitizer.cpp | 20 +- .../MemorySanitizer/vector-load-store.ll | 509 ++++++++++++++++++ 2 files changed, 528 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 75cb9e0c0e63d..953ce72c1cec9 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -1183,13 +1183,29 @@ struct MemorySanitizerVisitor : public InstVisitor { /// Fill memory range with the given origin value. void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr, TypeSize TS, Align Alignment) { - unsigned Size = TS.getFixedValue(); const DataLayout &DL = F.getParent()->getDataLayout(); const Align IntptrAlignment = DL.getABITypeAlign(MS.IntptrTy); unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy); assert(IntptrAlignment >= kMinOriginAlignment); assert(IntptrSize >= kOriginSize); + // Note: The loop based formation works for fixed length vectors too, + // however we prefer to unroll and specialize alignment below. + if (TS.isScalable()) { + Value *Size = IRB.CreateTypeSize(IRB.getInt32Ty(), TS); + Value *RoundUp = IRB.CreateAdd(Size, IRB.getInt32(kOriginSize - 1)); + Value *End = IRB.CreateUDiv(RoundUp, IRB.getInt32(kOriginSize)); + auto [InsertPt, Index] = + SplitBlockAndInsertSimpleForLoop(End, &*IRB.GetInsertPoint()); + IRB.SetInsertPoint(InsertPt); + + Value *GEP = IRB.CreateGEP(MS.OriginTy, OriginPtr, Index); + IRB.CreateAlignedStore(Origin, GEP, kMinOriginAlignment); + return; + } + + unsigned Size = TS.getFixedValue(); + unsigned Ofs = 0; Align CurrentAlignment = Alignment; if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) { @@ -1575,6 +1591,8 @@ struct MemorySanitizerVisitor : public InstVisitor { if (ArrayType *Array = dyn_cast(V->getType())) return collapseArrayShadow(Array, V, IRB); if (isa(V->getType())) { + if (isa(V->getType())) + return convertShadowToScalar(IRB.CreateOrReduce(V), IRB); unsigned BitWidth = V->getType()->getPrimitiveSizeInBits().getFixedValue(); return IRB.CreateBitCast(V, IntegerType::get(*MS.C, BitWidth)); diff --git a/llvm/test/Instrumentation/MemorySanitizer/vector-load-store.ll b/llvm/test/Instrumentation/MemorySanitizer/vector-load-store.ll index a2245763abbc7..feb8a27fd5410 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/vector-load-store.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/vector-load-store.ll @@ -422,4 +422,513 @@ define void @store.v16i32(ptr %p) sanitize_memory { ret void } +define void @load.nxv1i32(ptr %p) sanitize_memory { +; CHECK-LABEL: @load.nxv1i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP4]], align 4 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @load.nxv1i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = load , ptr [[P:%.*]], align 4 +; ADDR-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 +; ADDR-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; ADDR-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; ADDR-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP7]], align 4 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @load.nxv1i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 4 +; ORIGINS-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; ORIGINS-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; ORIGINS-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; ORIGINS-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], 17592186044416 +; ORIGINS-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ORIGINS-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP4]], align 4 +; ORIGINS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +; ORIGINS-NEXT: ret void +; + load , ptr %p + ret void +} + +define void @load.nxv2i32(ptr %p) sanitize_memory { +; CHECK-LABEL: @load.nxv2i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 8 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP4]], align 8 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @load.nxv2i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = load , ptr [[P:%.*]], align 8 +; ADDR-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 +; ADDR-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; ADDR-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; ADDR-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP7]], align 8 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @load.nxv2i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 8 +; ORIGINS-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; ORIGINS-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; ORIGINS-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; ORIGINS-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], 17592186044416 +; ORIGINS-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ORIGINS-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP4]], align 8 +; ORIGINS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 8 +; ORIGINS-NEXT: ret void +; + load , ptr %p + ret void +} + +define void @load.nxv4i32(ptr %p) sanitize_memory { +; CHECK-LABEL: @load.nxv4i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 16 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP4]], align 16 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @load.nxv4i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = load , ptr [[P:%.*]], align 16 +; ADDR-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 +; ADDR-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; ADDR-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; ADDR-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP7]], align 16 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @load.nxv4i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 16 +; ORIGINS-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; ORIGINS-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; ORIGINS-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; ORIGINS-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], 17592186044416 +; ORIGINS-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ORIGINS-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP4]], align 16 +; ORIGINS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 16 +; ORIGINS-NEXT: ret void +; + load , ptr %p + ret void +} + +define void @load.nxv8i32(ptr %p) sanitize_memory { +; CHECK-LABEL: @load.nxv8i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 32 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP4]], align 32 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @load.nxv8i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = load , ptr [[P:%.*]], align 32 +; ADDR-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 +; ADDR-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; ADDR-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; ADDR-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP7]], align 32 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @load.nxv8i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 32 +; ORIGINS-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; ORIGINS-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; ORIGINS-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; ORIGINS-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], 17592186044416 +; ORIGINS-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ORIGINS-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP4]], align 32 +; ORIGINS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 32 +; ORIGINS-NEXT: ret void +; + load , ptr %p + ret void +} + +define void @load.nxv16i32(ptr %p) sanitize_memory { +; CHECK-LABEL: @load.nxv16i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 64 +; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; CHECK-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; CHECK-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP4]], align 64 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @load.nxv16i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = load , ptr [[P:%.*]], align 64 +; ADDR-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[P]] to i64 +; ADDR-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080 +; ADDR-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; ADDR-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP7]], align 64 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @load.nxv16i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = load , ptr [[P:%.*]], align 64 +; ORIGINS-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64 +; ORIGINS-NEXT: [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080 +; ORIGINS-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr +; ORIGINS-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], 17592186044416 +; ORIGINS-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ORIGINS-NEXT: [[_MSLD:%.*]] = load , ptr [[TMP4]], align 64 +; ORIGINS-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 64 +; ORIGINS-NEXT: ret void +; + load , ptr %p + ret void +} + + +define void @store.nxv1i32(ptr %p) sanitize_memory { +; CHECK-LABEL: @store.nxv1i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; CHECK-NEXT: store zeroinitializer, ptr [[TMP3]], align 4 +; CHECK-NEXT: store zeroinitializer, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @store.nxv1i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ADDR-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; ADDR-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ADDR-NEXT: store zeroinitializer, ptr [[TMP6]], align 4 +; ADDR-NEXT: store zeroinitializer, ptr [[P]], align 4 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @store.nxv1i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ORIGINS-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; ORIGINS-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; ORIGINS-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416 +; ORIGINS-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; ORIGINS-NEXT: store zeroinitializer, ptr [[TMP3]], align 4 +; ORIGINS-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.nxv1i32( zeroinitializer) +; ORIGINS-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP6]], 0 +; ORIGINS-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP13:%.*]], !prof [[PROF0:![0-9]+]] +; ORIGINS: 7: +; ORIGINS-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32() +; ORIGINS-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], 4 +; ORIGINS-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 3 +; ORIGINS-NEXT: [[TMP11:%.*]] = udiv i32 [[TMP10]], 4 +; ORIGINS-NEXT: br label [[DOTSPLIT:%.*]] +; ORIGINS: .split: +; ORIGINS-NEXT: [[IV:%.*]] = phi i32 [ 0, [[TMP7]] ], [ [[IV_NEXT:%.*]], [[DOTSPLIT]] ] +; ORIGINS-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP5]], i32 [[IV]] +; ORIGINS-NEXT: store i32 0, ptr [[TMP12]], align 4 +; ORIGINS-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; ORIGINS-NEXT: [[IV_CHECK:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP11]] +; ORIGINS-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; ORIGINS: .split.split: +; ORIGINS-NEXT: br label [[TMP13]] +; ORIGINS: 13: +; ORIGINS-NEXT: store zeroinitializer, ptr [[P]], align 4 +; ORIGINS-NEXT: ret void +; + store zeroinitializer, ptr %p + ret void +} + +define void @store.nxv2i32(ptr %p) sanitize_memory { +; CHECK-LABEL: @store.nxv2i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; CHECK-NEXT: store zeroinitializer, ptr [[TMP3]], align 8 +; CHECK-NEXT: store zeroinitializer, ptr [[P]], align 8 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @store.nxv2i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ADDR-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; ADDR-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ADDR-NEXT: store zeroinitializer, ptr [[TMP6]], align 8 +; ADDR-NEXT: store zeroinitializer, ptr [[P]], align 8 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @store.nxv2i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ORIGINS-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; ORIGINS-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; ORIGINS-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416 +; ORIGINS-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; ORIGINS-NEXT: store zeroinitializer, ptr [[TMP3]], align 8 +; ORIGINS-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.nxv2i32( zeroinitializer) +; ORIGINS-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP6]], 0 +; ORIGINS-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP13:%.*]], !prof [[PROF0]] +; ORIGINS: 7: +; ORIGINS-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32() +; ORIGINS-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], 8 +; ORIGINS-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 3 +; ORIGINS-NEXT: [[TMP11:%.*]] = udiv i32 [[TMP10]], 4 +; ORIGINS-NEXT: br label [[DOTSPLIT:%.*]] +; ORIGINS: .split: +; ORIGINS-NEXT: [[IV:%.*]] = phi i32 [ 0, [[TMP7]] ], [ [[IV_NEXT:%.*]], [[DOTSPLIT]] ] +; ORIGINS-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP5]], i32 [[IV]] +; ORIGINS-NEXT: store i32 0, ptr [[TMP12]], align 4 +; ORIGINS-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; ORIGINS-NEXT: [[IV_CHECK:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP11]] +; ORIGINS-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; ORIGINS: .split.split: +; ORIGINS-NEXT: br label [[TMP13]] +; ORIGINS: 13: +; ORIGINS-NEXT: store zeroinitializer, ptr [[P]], align 8 +; ORIGINS-NEXT: ret void +; + store zeroinitializer, ptr %p + ret void +} + +define void @store.nxv4i32(ptr %p) sanitize_memory { +; CHECK-LABEL: @store.nxv4i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; CHECK-NEXT: store zeroinitializer, ptr [[TMP3]], align 16 +; CHECK-NEXT: store zeroinitializer, ptr [[P]], align 16 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @store.nxv4i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ADDR-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; ADDR-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ADDR-NEXT: store zeroinitializer, ptr [[TMP6]], align 16 +; ADDR-NEXT: store zeroinitializer, ptr [[P]], align 16 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @store.nxv4i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ORIGINS-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; ORIGINS-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; ORIGINS-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416 +; ORIGINS-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; ORIGINS-NEXT: store zeroinitializer, ptr [[TMP3]], align 16 +; ORIGINS-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32( zeroinitializer) +; ORIGINS-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP6]], 0 +; ORIGINS-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP13:%.*]], !prof [[PROF0]] +; ORIGINS: 7: +; ORIGINS-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32() +; ORIGINS-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], 16 +; ORIGINS-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 3 +; ORIGINS-NEXT: [[TMP11:%.*]] = udiv i32 [[TMP10]], 4 +; ORIGINS-NEXT: br label [[DOTSPLIT:%.*]] +; ORIGINS: .split: +; ORIGINS-NEXT: [[IV:%.*]] = phi i32 [ 0, [[TMP7]] ], [ [[IV_NEXT:%.*]], [[DOTSPLIT]] ] +; ORIGINS-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP5]], i32 [[IV]] +; ORIGINS-NEXT: store i32 0, ptr [[TMP12]], align 4 +; ORIGINS-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; ORIGINS-NEXT: [[IV_CHECK:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP11]] +; ORIGINS-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; ORIGINS: .split.split: +; ORIGINS-NEXT: br label [[TMP13]] +; ORIGINS: 13: +; ORIGINS-NEXT: store zeroinitializer, ptr [[P]], align 16 +; ORIGINS-NEXT: ret void +; + store zeroinitializer, ptr %p + ret void +} + +define void @store.nxv8i32(ptr %p) sanitize_memory { +; CHECK-LABEL: @store.nxv8i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; CHECK-NEXT: store zeroinitializer, ptr [[TMP3]], align 32 +; CHECK-NEXT: store zeroinitializer, ptr [[P]], align 32 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @store.nxv8i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ADDR-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; ADDR-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ADDR-NEXT: store zeroinitializer, ptr [[TMP6]], align 32 +; ADDR-NEXT: store zeroinitializer, ptr [[P]], align 32 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @store.nxv8i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ORIGINS-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; ORIGINS-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; ORIGINS-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416 +; ORIGINS-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; ORIGINS-NEXT: store zeroinitializer, ptr [[TMP3]], align 32 +; ORIGINS-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.nxv8i32( zeroinitializer) +; ORIGINS-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP6]], 0 +; ORIGINS-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP13:%.*]], !prof [[PROF0]] +; ORIGINS: 7: +; ORIGINS-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32() +; ORIGINS-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], 32 +; ORIGINS-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 3 +; ORIGINS-NEXT: [[TMP11:%.*]] = udiv i32 [[TMP10]], 4 +; ORIGINS-NEXT: br label [[DOTSPLIT:%.*]] +; ORIGINS: .split: +; ORIGINS-NEXT: [[IV:%.*]] = phi i32 [ 0, [[TMP7]] ], [ [[IV_NEXT:%.*]], [[DOTSPLIT]] ] +; ORIGINS-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP5]], i32 [[IV]] +; ORIGINS-NEXT: store i32 0, ptr [[TMP12]], align 4 +; ORIGINS-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; ORIGINS-NEXT: [[IV_CHECK:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP11]] +; ORIGINS-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; ORIGINS: .split.split: +; ORIGINS-NEXT: br label [[TMP13]] +; ORIGINS: 13: +; ORIGINS-NEXT: store zeroinitializer, ptr [[P]], align 32 +; ORIGINS-NEXT: ret void +; + store zeroinitializer, ptr %p + ret void +} +define void @store.nxv16i32(ptr %p) sanitize_memory { +; CHECK-LABEL: @store.nxv16i32( +; CHECK-NEXT: call void @llvm.donothing() +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; CHECK-NEXT: store zeroinitializer, ptr [[TMP3]], align 64 +; CHECK-NEXT: store zeroinitializer, ptr [[P]], align 64 +; CHECK-NEXT: ret void +; +; ADDR-LABEL: @store.nxv16i32( +; ADDR-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8 +; ADDR-NEXT: call void @llvm.donothing() +; ADDR-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 +; ADDR-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]] +; ADDR: 2: +; ADDR-NEXT: call void @__msan_warning_noreturn() #[[ATTR3]] +; ADDR-NEXT: unreachable +; ADDR: 3: +; ADDR-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ADDR-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080 +; ADDR-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ADDR-NEXT: store zeroinitializer, ptr [[TMP6]], align 64 +; ADDR-NEXT: store zeroinitializer, ptr [[P]], align 64 +; ADDR-NEXT: ret void +; +; ORIGINS-LABEL: @store.nxv16i32( +; ORIGINS-NEXT: call void @llvm.donothing() +; ORIGINS-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; ORIGINS-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080 +; ORIGINS-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr +; ORIGINS-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416 +; ORIGINS-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; ORIGINS-NEXT: store zeroinitializer, ptr [[TMP3]], align 64 +; ORIGINS-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.or.nxv16i32( zeroinitializer) +; ORIGINS-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP6]], 0 +; ORIGINS-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP13:%.*]], !prof [[PROF0]] +; ORIGINS: 7: +; ORIGINS-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32() +; ORIGINS-NEXT: [[TMP9:%.*]] = mul i32 [[TMP8]], 64 +; ORIGINS-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 3 +; ORIGINS-NEXT: [[TMP11:%.*]] = udiv i32 [[TMP10]], 4 +; ORIGINS-NEXT: br label [[DOTSPLIT:%.*]] +; ORIGINS: .split: +; ORIGINS-NEXT: [[IV:%.*]] = phi i32 [ 0, [[TMP7]] ], [ [[IV_NEXT:%.*]], [[DOTSPLIT]] ] +; ORIGINS-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP5]], i32 [[IV]] +; ORIGINS-NEXT: store i32 0, ptr [[TMP12]], align 4 +; ORIGINS-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; ORIGINS-NEXT: [[IV_CHECK:%.*]] = icmp eq i32 [[IV_NEXT]], [[TMP11]] +; ORIGINS-NEXT: br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]] +; ORIGINS: .split.split: +; ORIGINS-NEXT: br label [[TMP13]] +; ORIGINS: 13: +; ORIGINS-NEXT: store zeroinitializer, ptr [[P]], align 64 +; ORIGINS-NEXT: ret void +; + store zeroinitializer, ptr %p + ret void +} From 82c83d7e41053b72fc0dc84de9b8bee71986ffc3 Mon Sep 17 00:00:00 2001 From: Corentin Jabot Date: Tue, 21 Mar 2023 16:57:43 +0100 Subject: [PATCH 436/691] [Clang] Fix evaluation of parameters of lambda call operator attributes Fix a regresion introduced by D124351. Attributes of lambda call operator were evaluated in the context of the closure object type rather than its operator, causing an assertion failure. This was because we temporarily switch to the class lambda to produce the mangling of the lambda, but we stayed in that context too long. Reviewed By: eandrews, aaron.ballman Differential Revision: https://reviews.llvm.org/D146535 --- clang/lib/Sema/SemaLambda.cpp | 7 +++---- clang/test/SemaCXX/lambda-expressions.cpp | 12 +++++++++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/clang/lib/Sema/SemaLambda.cpp b/clang/lib/Sema/SemaLambda.cpp index 3a82c7b3e8285..64db9d065f9c6 100644 --- a/clang/lib/Sema/SemaLambda.cpp +++ b/clang/lib/Sema/SemaLambda.cpp @@ -390,6 +390,9 @@ buildTypeForLambdaCallOperator(Sema &S, clang::CXXRecordDecl *Class, void Sema::handleLambdaNumbering( CXXRecordDecl *Class, CXXMethodDecl *Method, std::optional> Mangling) { + + ContextRAII ManglingContext(*this, Class->getDeclContext()); + if (Mangling) { bool HasKnownInternalLinkage; unsigned ManglingNumber, DeviceManglingNumber; @@ -1324,8 +1327,6 @@ void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro, ParamInfo.getDeclSpec().getConstexprSpecifier(), IsLambdaStatic ? SC_Static : SC_None, Params, ExplicitResultType); - ContextRAII ManglingContext(*this, Class->getDeclContext()); - CheckCXXDefaultArguments(Method); // This represents the function body for the lambda function, check if we @@ -1350,8 +1351,6 @@ void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro, handleLambdaNumbering(Class, Method); - ManglingContext.pop(); - for (auto &&C : LSI->Captures) { if (!C.isVariableCapture()) continue; diff --git a/clang/test/SemaCXX/lambda-expressions.cpp b/clang/test/SemaCXX/lambda-expressions.cpp index 84d224fdc835e..67853c991ce53 100644 --- a/clang/test/SemaCXX/lambda-expressions.cpp +++ b/clang/test/SemaCXX/lambda-expressions.cpp @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -std=c++14 -Wno-unused-value -fsyntax-only -verify -verify=expected-cxx14 -fblocks %s -// RUN: %clang_cc1 -std=c++17 -Wno-unused-value -fsyntax-only -verify -fblocks %s +// RUN: %clang_cc1 -std=c++17 -Wno-unused-value -verify -ast-dump -fblocks %s | FileCheck %s namespace std { class type_info; }; @@ -704,3 +704,13 @@ static_assert([]() constexpr { }()); } // namespace GH60936 #endif + +// Call operator attributes refering to a variable should +// be properly handled after D124351 +constexpr int i = 2; +void foo() { + (void)[=][[gnu::aligned(i)]] () {}; // expected-warning{{C++2b extension}} + // CHECK: AlignedAttr + // CHECK-NEXT: ConstantExpr + // CHECK-NEXT: value: Int 2 +} From 61944469625d4b3ba4a87f4f8fffefb73e9f8cdc Mon Sep 17 00:00:00 2001 From: Archibald Elliott Date: Wed, 22 Mar 2023 13:25:08 +0000 Subject: [PATCH 437/691] [AArch64] Add Missing Custom Target Operands I noticed, when examining the generated Asm Matcher table, that some of these custom immediate operands are missing, and so we are not parsing some hint aliases into the correct MCInst. Where this becomes apparent is when you parse e.g. `hint #7` into an MCInst - without these cases, it becomes the MCInst `(HINT 17)`, which will always be printed as `hint #17`. With these cases, it becomes the MCInst `XPACLRI`, which will be printed as `xpaclri` with pauth, or `hint #17` without, matching how `xpaclri` is parsed. We only handle some specific hint aliases in this manner, usually where these hints have specific effects that need to be modelled for accurate code-generation. Otherwise, we just use the normal `InstAlias` system to have the aliases parsed into a `(HINT N)` MCInst. Differential Revision: https://reviews.llvm.org/D146630 --- .../AArch64/AsmParser/AArch64AsmParser.cpp | 17 ++++++-- .../test/MC/AArch64/armv8.3a-signed-pointer.s | 39 +++++++++++++++++++ 2 files changed, 53 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index d5162d39ce43c..b0c554780edfd 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -7642,9 +7642,10 @@ unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, return Match_Success; return Match_InvalidOperand; - // If the kind is a token for a literal immediate, check if our asm - // operand matches. This is for InstAliases which have a fixed-value - // immediate in the syntax. + // If the kind is a token for a literal immediate, check if our asm operand + // matches. This is for InstAliases which have a fixed-value immediate in + // the asm string, such as hints which are parsed into a specific + // instruction definition. #define MATCH_HASH(N) \ case MCK__HASH_##N: \ return MatchesOpImmediate(N); @@ -7654,10 +7655,20 @@ unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, MATCH_HASH(3) MATCH_HASH(4) MATCH_HASH(6) + MATCH_HASH(7) MATCH_HASH(8) + MATCH_HASH(10) MATCH_HASH(12) + MATCH_HASH(14) MATCH_HASH(16) MATCH_HASH(24) + MATCH_HASH(25) + MATCH_HASH(26) + MATCH_HASH(27) + MATCH_HASH(28) + MATCH_HASH(29) + MATCH_HASH(30) + MATCH_HASH(31) MATCH_HASH(32) MATCH_HASH(40) MATCH_HASH(48) diff --git a/llvm/test/MC/AArch64/armv8.3a-signed-pointer.s b/llvm/test/MC/AArch64/armv8.3a-signed-pointer.s index dad4bc667853b..e13b1bf3c98d8 100644 --- a/llvm/test/MC/AArch64/armv8.3a-signed-pointer.s +++ b/llvm/test/MC/AArch64/armv8.3a-signed-pointer.s @@ -96,44 +96,83 @@ // ALL-EMPTY: // ALL-EMPTY: + hint #25 paciasp // CHECK-NEXT: paciasp // encoding: [0x3f,0x23,0x03,0xd5] +// CHECK-NEXT: paciasp // encoding: [0x3f,0x23,0x03,0xd5] +// NO83-NEXT: hint #25 // encoding: [0x3f,0x23,0x03,0xd5] // NO83-NEXT: hint #25 // encoding: [0x3f,0x23,0x03,0xd5] + hint #29 autiasp // CHECK-NEXT: autiasp // encoding: [0xbf,0x23,0x03,0xd5] +// CHECK-NEXT: autiasp // encoding: [0xbf,0x23,0x03,0xd5] +// NO83-NEXT: hint #29 // encoding: [0xbf,0x23,0x03,0xd5] // NO83-NEXT: hint #29 // encoding: [0xbf,0x23,0x03,0xd5] + hint #24 paciaz // CHECK-NEXT: paciaz // encoding: [0x1f,0x23,0x03,0xd5] +// CHECK-NEXT: paciaz // encoding: [0x1f,0x23,0x03,0xd5] +// NO83-NEXT: hint #24 // encoding: [0x1f,0x23,0x03,0xd5] // NO83-NEXT: hint #24 // encoding: [0x1f,0x23,0x03,0xd5] + hint #28 autiaz // CHECK-NEXT: autiaz // encoding: [0x9f,0x23,0x03,0xd5] +// CHECK-NEXT: autiaz // encoding: [0x9f,0x23,0x03,0xd5] // NO83-NEXT: hint #28 // encoding: [0x9f,0x23,0x03,0xd5] +// NO83-NEXT: hint #28 // encoding: [0x9f,0x23,0x03,0xd5] + hint #8 pacia1716 // CHECK-NEXT: pacia1716 // encoding: [0x1f,0x21,0x03,0xd5] +// CHECK-NEXT: pacia1716 // encoding: [0x1f,0x21,0x03,0xd5] +// NO83-NEXT: hint #8 // encoding: [0x1f,0x21,0x03,0xd5] // NO83-NEXT: hint #8 // encoding: [0x1f,0x21,0x03,0xd5] + hint #12 autia1716 // CHECK-NEXT: autia1716 // encoding: [0x9f,0x21,0x03,0xd5] +// CHECK-NEXT: autia1716 // encoding: [0x9f,0x21,0x03,0xd5] +// NO83-NEXT: hint #12 // encoding: [0x9f,0x21,0x03,0xd5] // NO83-NEXT: hint #12 // encoding: [0x9f,0x21,0x03,0xd5] + hint #27 pacibsp // CHECK-NEXT: pacibsp // encoding: [0x7f,0x23,0x03,0xd5] +// CHECK-NEXT: pacibsp // encoding: [0x7f,0x23,0x03,0xd5] // NO83-NEXT: hint #27 // encoding: [0x7f,0x23,0x03,0xd5] +// NO83-NEXT: hint #27 // encoding: [0x7f,0x23,0x03,0xd5] + hint #31 autibsp // CHECK-NEXT: autibsp // encoding: [0xff,0x23,0x03,0xd5] +// CHECK-NEXT: autibsp // encoding: [0xff,0x23,0x03,0xd5] // NO83-NEXT: hint #31 // encoding: [0xff,0x23,0x03,0xd5] +// NO83-NEXT: hint #31 // encoding: [0xff,0x23,0x03,0xd5] + hint #26 pacibz // CHECK-NEXT: pacibz // encoding: [0x5f,0x23,0x03,0xd5] +// CHECK-NEXT: pacibz // encoding: [0x5f,0x23,0x03,0xd5] // NO83-NEXT: hint #26 // encoding: [0x5f,0x23,0x03,0xd5] +// NO83-NEXT: hint #26 // encoding: [0x5f,0x23,0x03,0xd5] + hint #30 autibz // CHECK-NEXT: autibz // encoding: [0xdf,0x23,0x03,0xd5] +// CHECK-NEXT: autibz // encoding: [0xdf,0x23,0x03,0xd5] +// NO83-NEXT: hint #30 // encoding: [0xdf,0x23,0x03,0xd5] // NO83-NEXT: hint #30 // encoding: [0xdf,0x23,0x03,0xd5] + hint #10 pacib1716 // CHECK-NEXT: pacib1716 // encoding: [0x5f,0x21,0x03,0xd5] +// CHECK-NEXT: pacib1716 // encoding: [0x5f,0x21,0x03,0xd5] +// NO83-NEXT: hint #10 // encoding: [0x5f,0x21,0x03,0xd5] // NO83-NEXT: hint #10 // encoding: [0x5f,0x21,0x03,0xd5] + hint #14 autib1716 // CHECK-NEXT: autib1716 // encoding: [0xdf,0x21,0x03,0xd5] +// CHECK-NEXT: autib1716 // encoding: [0xdf,0x21,0x03,0xd5] // NO83-NEXT: hint #14 // encoding: [0xdf,0x21,0x03,0xd5] +// NO83-NEXT: hint #14 // encoding: [0xdf,0x21,0x03,0xd5] + hint #7 xpaclri // CHECK-NEXT: xpaclri // encoding: [0xff,0x20,0x03,0xd5] +// CHECK-NEXT: xpaclri // encoding: [0xff,0x20,0x03,0xd5] +// NO83-NEXT: hint #7 // encoding: [0xff,0x20,0x03,0xd5] // NO83-NEXT: hint #7 // encoding: [0xff,0x20,0x03,0xd5] // ALL-EMPTY: From f570bd8f6322fab18df5099786683a813f9e7a08 Mon Sep 17 00:00:00 2001 From: Kirill Stoimenov Date: Wed, 22 Mar 2023 18:09:00 +0000 Subject: [PATCH 438/691] [HWASAN] Disable unexpected_format_specifier_test because HWASAN doesn't provide a printf interceptor Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D146647 --- compiler-rt/test/sanitizer_common/TestCases/Linux/mprobe.cpp | 2 +- .../TestCases/Linux/unexpected_format_specifier_test.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/mprobe.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/mprobe.cpp index 82c0faf0e2add..7633eb4762292 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Linux/mprobe.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/mprobe.cpp @@ -1,5 +1,5 @@ // RUN: %clangxx %s -o %t && %run %t 2>&1 | FileCheck %s -// UNSUPPORTED: android, ubsan +// UNSUPPORTED: android, hwasan, ubsan #include #include diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/unexpected_format_specifier_test.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/unexpected_format_specifier_test.cpp index 641495508ba10..fdce916ad1e1a 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Linux/unexpected_format_specifier_test.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/unexpected_format_specifier_test.cpp @@ -1,4 +1,5 @@ // RUN: %clang -w -O0 %s -o %t && %run %t 2>&1 | FileCheck %s +// UNSUPPORTED: hwasan // UNSUPPORTED: lsan // UNSUPPORTED: msan // UNSUPPORTED: ubsan From 5eb9acf9be3cee01ea95448fa8b1e00e3c01868a Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 23 Mar 2023 08:09:32 -0700 Subject: [PATCH 439/691] [HWASAN] Instrument scalable load/store without crashing We can simply push them down the existing call slowpath with some minor changes to how we compute the size argument. --- .../Instrumentation/HWAddressSanitizer.cpp | 6 +- .../HWAddressSanitizer/vector-load-store.ll | 272 ++++++++++++++++++ 2 files changed, 276 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Instrumentation/HWAddressSanitizer/vector-load-store.ll diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index ca498d08422f7..f98cb67481154 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -964,7 +964,7 @@ bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O) { return false; // FIXME IRBuilder<> IRB(O.getInsn()); - if (isPowerOf2_64(O.TypeStoreSize) && + if (!O.TypeStoreSize.isScalable() && isPowerOf2_64(O.TypeStoreSize) && (O.TypeStoreSize / 8 <= (1ULL << (kNumberOfAccessSizes - 1))) && (!O.Alignment || *O.Alignment >= Mapping.getObjectAlignment() || *O.Alignment >= O.TypeStoreSize / 8)) { @@ -980,7 +980,9 @@ bool HWAddressSanitizer::instrumentMemAccess(InterestingMemoryOperand &O) { } else { IRB.CreateCall(HwasanMemoryAccessCallbackSized[O.IsWrite], {IRB.CreatePointerCast(Addr, IntptrTy), - ConstantInt::get(IntptrTy, O.TypeStoreSize / 8)}); + IRB.CreateUDiv(IRB.CreateTypeSize(IntptrTy, + O.TypeStoreSize), + ConstantInt::get(IntptrTy, 8))}); } untagPointerOperand(O.getInsn(), Addr); diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/vector-load-store.ll b/llvm/test/Instrumentation/HWAddressSanitizer/vector-load-store.ll new file mode 100644 index 0000000000000..5312c7cc7336d --- /dev/null +++ b/llvm/test/Instrumentation/HWAddressSanitizer/vector-load-store.ll @@ -0,0 +1,272 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=hwasan -S | FileCheck %s + +target triple = "aarch64--linux-android10000" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @load.v1i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @load.v1i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[P:%.*]], i32 2) +; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i32>, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; + load <1 x i32>, ptr %p + ret void +} + +define void @load.v2i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @load.v2i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[P:%.*]], i32 3) +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[P]], align 8 +; CHECK-NEXT: ret void +; + load <2 x i32>, ptr %p + ret void +} + +define void @load.v4i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @load.v4i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[P:%.*]], i32 4) +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[P]], align 16 +; CHECK-NEXT: ret void +; + load <4 x i32>, ptr %p + ret void +} + +define void @load.v8i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @load.v8i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: call void @__hwasan_loadN(i64 [[TMP1]], i64 32) +; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[P]], align 32 +; CHECK-NEXT: ret void +; + load <8 x i32>, ptr %p + ret void +} + +define void @load.v16i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @load.v16i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: call void @__hwasan_loadN(i64 [[TMP1]], i64 64) +; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr [[P]], align 64 +; CHECK-NEXT: ret void +; + load <16 x i32>, ptr %p + ret void +} + + +define void @store.v1i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @store.v1i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[P:%.*]], i32 18) +; CHECK-NEXT: store <1 x i32> zeroinitializer, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; + store <1 x i32> zeroinitializer, ptr %p + ret void +} + +define void @store.v2i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @store.v2i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[P:%.*]], i32 19) +; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr [[P]], align 8 +; CHECK-NEXT: ret void +; + store <2 x i32> zeroinitializer, ptr %p + ret void +} + +define void @store.v4i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @store.v4i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: call void @llvm.hwasan.check.memaccess.shortgranules(ptr [[DOTHWASAN_SHADOW]], ptr [[P:%.*]], i32 20) +; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[P]], align 16 +; CHECK-NEXT: ret void +; + store <4 x i32> zeroinitializer, ptr %p + ret void +} + +define void @store.v8i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @store.v8i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: call void @__hwasan_storeN(i64 [[TMP1]], i64 32) +; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr [[P]], align 32 +; CHECK-NEXT: ret void +; + store <8 x i32> zeroinitializer, ptr %p + ret void +} + +define void @store.v16i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @store.v16i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: call void @__hwasan_storeN(i64 [[TMP1]], i64 64) +; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr [[P]], align 64 +; CHECK-NEXT: ret void +; + store <16 x i32> zeroinitializer, ptr %p + ret void +} + + +define void @load.nxv1i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @load.nxv1i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 +; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 [[TMP3]], 8 +; CHECK-NEXT: call void @__hwasan_loadN(i64 [[TMP1]], i64 [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = load , ptr [[P]], align 4 +; CHECK-NEXT: ret void +; + load , ptr %p + ret void +} + +define void @load.nxv2i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @load.nxv2i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 64 +; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 [[TMP3]], 8 +; CHECK-NEXT: call void @__hwasan_loadN(i64 [[TMP1]], i64 [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = load , ptr [[P]], align 8 +; CHECK-NEXT: ret void +; + load , ptr %p + ret void +} + +define void @load.nxv4i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @load.nxv4i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 128 +; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 [[TMP3]], 8 +; CHECK-NEXT: call void @__hwasan_loadN(i64 [[TMP1]], i64 [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = load , ptr [[P]], align 16 +; CHECK-NEXT: ret void +; + load , ptr %p + ret void +} + +define void @load.nxv8i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @load.nxv8i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 256 +; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 [[TMP3]], 8 +; CHECK-NEXT: call void @__hwasan_loadN(i64 [[TMP1]], i64 [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = load , ptr [[P]], align 32 +; CHECK-NEXT: ret void +; + load , ptr %p + ret void +} + +define void @load.nxv16i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @load.nxv16i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 512 +; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 [[TMP3]], 8 +; CHECK-NEXT: call void @__hwasan_loadN(i64 [[TMP1]], i64 [[TMP4]]) +; CHECK-NEXT: [[TMP5:%.*]] = load , ptr [[P]], align 64 +; CHECK-NEXT: ret void +; + load , ptr %p + ret void +} + + +define void @store.nxv1i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @store.nxv1i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 32 +; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 [[TMP3]], 8 +; CHECK-NEXT: call void @__hwasan_storeN(i64 [[TMP1]], i64 [[TMP4]]) +; CHECK-NEXT: store zeroinitializer, ptr [[P]], align 4 +; CHECK-NEXT: ret void +; + store zeroinitializer, ptr %p + ret void +} + +define void @store.nxv2i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @store.nxv2i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 64 +; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 [[TMP3]], 8 +; CHECK-NEXT: call void @__hwasan_storeN(i64 [[TMP1]], i64 [[TMP4]]) +; CHECK-NEXT: store zeroinitializer, ptr [[P]], align 8 +; CHECK-NEXT: ret void +; + store zeroinitializer, ptr %p + ret void +} + +define void @store.nxv4i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @store.nxv4i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 128 +; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 [[TMP3]], 8 +; CHECK-NEXT: call void @__hwasan_storeN(i64 [[TMP1]], i64 [[TMP4]]) +; CHECK-NEXT: store zeroinitializer, ptr [[P]], align 16 +; CHECK-NEXT: ret void +; + store zeroinitializer, ptr %p + ret void +} + +define void @store.nxv8i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @store.nxv8i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 256 +; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 [[TMP3]], 8 +; CHECK-NEXT: call void @__hwasan_storeN(i64 [[TMP1]], i64 [[TMP4]]) +; CHECK-NEXT: store zeroinitializer, ptr [[P]], align 32 +; CHECK-NEXT: ret void +; + store zeroinitializer, ptr %p + ret void +} + +define void @store.nxv16i32(ptr %p) sanitize_hwaddress { +; CHECK-LABEL: @store.nxv16i32( +; CHECK-NEXT: [[DOTHWASAN_SHADOW:%.*]] = call ptr asm "", "=r,0"(ptr @__hwasan_shadow) +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 512 +; CHECK-NEXT: [[TMP4:%.*]] = udiv i64 [[TMP3]], 8 +; CHECK-NEXT: call void @__hwasan_storeN(i64 [[TMP1]], i64 [[TMP4]]) +; CHECK-NEXT: store zeroinitializer, ptr [[P]], align 64 +; CHECK-NEXT: ret void +; + store zeroinitializer, ptr %p + ret void +} From fd1850b36158eaee5a2d577adc5872ab58362669 Mon Sep 17 00:00:00 2001 From: Job Noorman Date: Thu, 23 Mar 2023 16:25:53 +0100 Subject: [PATCH 440/691] Test commit to see if write access works From 85faee69928b1eeb74a0d74f374a1c74ddf236dd Mon Sep 17 00:00:00 2001 From: Jan Sjodin Date: Tue, 21 Mar 2023 13:38:54 -0400 Subject: [PATCH 441/691] [OpenMP][OMPIRBuilder] Make OffloadEntriesInfoManager a member of OpenMPIRBuilder This patch adds the OffloadEntriesInfoManager to the OpenMPIRBuilder, and allows the OffloadEntriesInfoManager to access the Configuration in the OpenMPIRBuilder. With the shared Config there is no risk for inconsistencies, and there is no longer the need for clang to have a separate OffloadEntriesInfoManager. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D146549 --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 39 +- clang/lib/CodeGen/CGOpenMPRuntime.h | 3 - clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 1 - .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 511 +++++++++--------- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 38 +- .../Frontend/OpenMPIRBuilderTest.cpp | 5 +- 6 files changed, 291 insertions(+), 306 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 58a95d64ac50e..76d0b92796bc5 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1054,7 +1054,7 @@ static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC, } CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) - : CGM(CGM), OMPBuilder(CGM.getModule()), OffloadEntriesInfoManager() { + : CGM(CGM), OMPBuilder(CGM.getModule()) { KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8); llvm::OpenMPIRBuilderConfig Config(CGM.getLangOpts().OpenMPIsDevice, false, hasRequiresUnifiedSharedMemory(), @@ -1062,7 +1062,6 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM) // Initialize Types used in OpenMPIRBuilder from OMPKinds.def OMPBuilder.initialize(); OMPBuilder.setConfig(Config); - OffloadEntriesInfoManager.setConfig(Config); loadOffloadInfoMetadata(); } @@ -1852,7 +1851,7 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, auto EntryInfo = getTargetEntryUniqueInfo(CGM.getContext(), Loc, VD->getName()); SmallString<128> Buffer, Out; - OffloadEntriesInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo); + OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Buffer, EntryInfo); const Expr *Init = VD->getAnyInitializer(); if (CGM.getLangOpts().CPlusPlus && PerformInit) { @@ -1900,7 +1899,7 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, Out.clear(); auto CtorEntryInfo = EntryInfo; CtorEntryInfo.ParentName = Twine(Buffer, "_ctor").toStringRef(Out); - OffloadEntriesInfoManager.registerTargetRegionEntryInfo( + OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo( CtorEntryInfo, Ctor, ID, llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor); } @@ -1949,7 +1948,7 @@ bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl *VD, Out.clear(); auto DtorEntryInfo = EntryInfo; DtorEntryInfo.ParentName = Twine(Buffer, "_dtor").toStringRef(Out); - OffloadEntriesInfoManager.registerTargetRegionEntryInfo( + OMPBuilder.OffloadInfoManager.registerTargetRegionEntryInfo( DtorEntryInfo, Dtor, ID, llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryDtor); } @@ -2942,7 +2941,7 @@ enum KmpTaskTFields { void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { // If we are in simd mode or there are no entries, we don't need to do // anything. - if (CGM.getLangOpts().OpenMPSimd || OffloadEntriesInfoManager.empty()) + if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty()) return; llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn = @@ -2986,8 +2985,7 @@ void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() { } }; - OMPBuilder.createOffloadEntriesAndInfoMetadata(OffloadEntriesInfoManager, - ErrorReportFn); + OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFn); } /// Loads all the offload entries information from the host IR @@ -3021,7 +3019,7 @@ void CGOpenMPRuntime::loadOffloadInfoMetadata() { return; } - OMPBuilder.loadOffloadInfoMetadata(*ME.get(), OffloadEntriesInfoManager); + OMPBuilder.loadOffloadInfoMetadata(*ME.get()); } void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) { @@ -6109,10 +6107,9 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams); getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads); - OMPBuilder.emitTargetRegionFunction(OffloadEntriesInfoManager, EntryInfo, - GenerateOutlinedFunction, DefaultValTeams, - DefaultValThreads, IsOffloadEntry, - OutlinedFn, OutlinedFnID); + OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction, + DefaultValTeams, DefaultValThreads, + IsOffloadEntry, OutlinedFn, OutlinedFnID); if (OutlinedFn != nullptr) CGM.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn, CGM); @@ -10136,7 +10133,7 @@ void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S, // Is this a target region that should not be emitted as an entry point? If // so just signal we are done with this target region. - if (!OffloadEntriesInfoManager.hasTargetRegionEntryInfo(EntryInfo)) + if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo)) return; switch (E.getDirectiveKind()) { @@ -10392,7 +10389,7 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, Linkage == llvm::GlobalValue::LinkOnceODRLinkage)) { // Do not create a "ref-variable" if the original is not also available // on the host. - if (!OffloadEntriesInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) + if (!OMPBuilder.OffloadInfoManager.hasDeviceGlobalVarEntryInfo(VarName)) return; std::string RefName = getName({VarName, "ref"}); if (!CGM.GetGlobalValue(RefName)) { @@ -10427,7 +10424,7 @@ void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD, Linkage = llvm::GlobalValue::WeakAnyLinkage; } - OffloadEntriesInfoManager.registerDeviceGlobalVarEntryInfo( + OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo( VarName, Addr, VarSize, Flags, Linkage); } @@ -10562,9 +10559,8 @@ llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { // don't need to do anything. if (CGM.getLangOpts().OMPTargetTriples.empty() || CGM.getLangOpts().OpenMPSimd || CGM.getLangOpts().OpenMPIsDevice || - (OffloadEntriesInfoManager.empty() && - !HasEmittedDeclareTargetRegion && - !HasEmittedTargetRegion)) + (OMPBuilder.OffloadInfoManager.empty() && + !HasEmittedDeclareTargetRegion && !HasEmittedTargetRegion)) return nullptr; // Create and register the function that handles the requires directives. @@ -10585,9 +10581,8 @@ llvm::Function *CGOpenMPRuntime::emitRequiresDirectiveRegFun() { // passed to the runtime. This avoids the runtime from throwing an error // for mismatching requires clauses across compilation units that don't // contain at least 1 target region. - assert((HasEmittedTargetRegion || - HasEmittedDeclareTargetRegion || - !OffloadEntriesInfoManager.empty()) && + assert((HasEmittedTargetRegion || HasEmittedDeclareTargetRegion || + !OMPBuilder.OffloadInfoManager.empty()) && "Target or declare target region expected."); if (HasRequiresUnifiedSharedMemory) Flags = OMP_REQ_UNIFIED_SHARED_MEMORY; diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h index e7c1a098c7689..c9678a16ce90b 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.h +++ b/clang/lib/CodeGen/CGOpenMPRuntime.h @@ -508,9 +508,6 @@ class CGOpenMPRuntime { /// kmp_int64 st; // stride /// }; QualType KmpDimTy; - /// Entity that registers the offloading constants that were emitted so - /// far. - llvm::OffloadEntriesInfoManager OffloadEntriesInfoManager; bool ShouldMarkAsGlobal = true; /// List of the emitted declarations. diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index e8c5f04db49f4..4ac28ee17a50b 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -863,7 +863,6 @@ CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM) hasRequiresUnifiedSharedMemory(), CGM.getLangOpts().OpenMPOffloadMandatory); OMPBuilder.setConfig(Config); - OffloadEntriesInfoManager.setConfig(Config); if (!CGM.getLangOpts().OpenMPIsDevice) llvm_unreachable("OpenMP can only handle device code."); diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index a13f8528fa6dd..acf91a2d35c41 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -27,6 +27,7 @@ namespace llvm { class CanonicalLoopInfo; struct TargetRegionEntryInfo; class OffloadEntriesInfoManager; +class OpenMPIRBuilder; /// Move the instruction after an InsertPoint to the beginning of another /// BasicBlock. @@ -160,6 +161,251 @@ class OpenMPIRBuilderConfig { void setSeparator(StringRef S) { Separator = S; } }; +/// Data structure to contain the information needed to uniquely identify +/// a target entry. +struct TargetRegionEntryInfo { + std::string ParentName; + unsigned DeviceID; + unsigned FileID; + unsigned Line; + unsigned Count; + + TargetRegionEntryInfo() + : ParentName(""), DeviceID(0), FileID(0), Line(0), Count(0) {} + TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID, + unsigned FileID, unsigned Line, unsigned Count = 0) + : ParentName(ParentName), DeviceID(DeviceID), FileID(FileID), Line(Line), + Count(Count) {} + + static void getTargetRegionEntryFnName(SmallVectorImpl &Name, + StringRef ParentName, + unsigned DeviceID, unsigned FileID, + unsigned Line, unsigned Count); + + bool operator<(const TargetRegionEntryInfo RHS) const { + return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) < + std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line, + RHS.Count); + } +}; + +/// Class that manages information about offload code regions and data +class OffloadEntriesInfoManager { + /// Number of entries registered so far. + OpenMPIRBuilder *OMPBuilder; + unsigned OffloadingEntriesNum = 0; + +public: + /// Base class of the entries info. + class OffloadEntryInfo { + public: + /// Kind of a given entry. + enum OffloadingEntryInfoKinds : unsigned { + /// Entry is a target region. + OffloadingEntryInfoTargetRegion = 0, + /// Entry is a declare target variable. + OffloadingEntryInfoDeviceGlobalVar = 1, + /// Invalid entry info. + OffloadingEntryInfoInvalid = ~0u + }; + + protected: + OffloadEntryInfo() = delete; + explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {} + explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order, + uint32_t Flags) + : Flags(Flags), Order(Order), Kind(Kind) {} + ~OffloadEntryInfo() = default; + + public: + bool isValid() const { return Order != ~0u; } + unsigned getOrder() const { return Order; } + OffloadingEntryInfoKinds getKind() const { return Kind; } + uint32_t getFlags() const { return Flags; } + void setFlags(uint32_t NewFlags) { Flags = NewFlags; } + Constant *getAddress() const { return cast_or_null(Addr); } + void setAddress(Constant *V) { + assert(!Addr.pointsToAliveValue() && "Address has been set before!"); + Addr = V; + } + static bool classof(const OffloadEntryInfo *Info) { return true; } + + private: + /// Address of the entity that has to be mapped for offloading. + WeakTrackingVH Addr; + + /// Flags associated with the device global. + uint32_t Flags = 0u; + + /// Order this entry was emitted. + unsigned Order = ~0u; + + OffloadingEntryInfoKinds Kind = OffloadingEntryInfoInvalid; + }; + + /// Return true if a there are no entries defined. + bool empty() const; + /// Return number of entries defined so far. + unsigned size() const { return OffloadingEntriesNum; } + + OffloadEntriesInfoManager(OpenMPIRBuilder *builder) : OMPBuilder(builder) {} + + // + // Target region entries related. + // + + /// Kind of the target registry entry. + enum OMPTargetRegionEntryKind : uint32_t { + /// Mark the entry as target region. + OMPTargetRegionEntryTargetRegion = 0x0, + /// Mark the entry as a global constructor. + OMPTargetRegionEntryCtor = 0x02, + /// Mark the entry as a global destructor. + OMPTargetRegionEntryDtor = 0x04, + }; + + /// Target region entries info. + class OffloadEntryInfoTargetRegion final : public OffloadEntryInfo { + /// Address that can be used as the ID of the entry. + Constant *ID = nullptr; + + public: + OffloadEntryInfoTargetRegion() + : OffloadEntryInfo(OffloadingEntryInfoTargetRegion) {} + explicit OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr, + Constant *ID, + OMPTargetRegionEntryKind Flags) + : OffloadEntryInfo(OffloadingEntryInfoTargetRegion, Order, Flags), + ID(ID) { + setAddress(Addr); + } + + Constant *getID() const { return ID; } + void setID(Constant *V) { + assert(!ID && "ID has been set before!"); + ID = V; + } + static bool classof(const OffloadEntryInfo *Info) { + return Info->getKind() == OffloadingEntryInfoTargetRegion; + } + }; + + /// Initialize target region entry. + /// This is ONLY needed for DEVICE compilation. + void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, + unsigned Order); + /// Register target region entry. + void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, + Constant *Addr, Constant *ID, + OMPTargetRegionEntryKind Flags); + /// Return true if a target region entry with the provided information + /// exists. + bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, + bool IgnoreAddressId = false) const; + + // Return the Name based on \a EntryInfo using the next available Count. + void getTargetRegionEntryFnName(SmallVectorImpl &Name, + const TargetRegionEntryInfo &EntryInfo); + + /// brief Applies action \a Action on all registered entries. + typedef function_ref + OffloadTargetRegionEntryInfoActTy; + void + actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action); + + // + // Device global variable entries related. + // + + /// Kind of the global variable entry.. + enum OMPTargetGlobalVarEntryKind : uint32_t { + /// Mark the entry as a to declare target. + OMPTargetGlobalVarEntryTo = 0x0, + /// Mark the entry as a to declare target link. + OMPTargetGlobalVarEntryLink = 0x1, + }; + + /// Device global variable entries info. + class OffloadEntryInfoDeviceGlobalVar final : public OffloadEntryInfo { + /// Type of the global variable. + int64_t VarSize; + GlobalValue::LinkageTypes Linkage; + + public: + OffloadEntryInfoDeviceGlobalVar() + : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar) {} + explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, + OMPTargetGlobalVarEntryKind Flags) + : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags) {} + explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr, + int64_t VarSize, + OMPTargetGlobalVarEntryKind Flags, + GlobalValue::LinkageTypes Linkage) + : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags), + VarSize(VarSize), Linkage(Linkage) { + setAddress(Addr); + } + + int64_t getVarSize() const { return VarSize; } + void setVarSize(int64_t Size) { VarSize = Size; } + GlobalValue::LinkageTypes getLinkage() const { return Linkage; } + void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; } + static bool classof(const OffloadEntryInfo *Info) { + return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar; + } + }; + + /// Initialize device global variable entry. + /// This is ONLY used for DEVICE compilation. + void initializeDeviceGlobalVarEntryInfo(StringRef Name, + OMPTargetGlobalVarEntryKind Flags, + unsigned Order); + + /// Register device global variable entry. + void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, + int64_t VarSize, + OMPTargetGlobalVarEntryKind Flags, + GlobalValue::LinkageTypes Linkage); + /// Checks if the variable with the given name has been registered already. + bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const { + return OffloadEntriesDeviceGlobalVar.count(VarName) > 0; + } + /// Applies action \a Action on all registered entries. + typedef function_ref + OffloadDeviceGlobalVarEntryInfoActTy; + void actOnDeviceGlobalVarEntriesInfo( + const OffloadDeviceGlobalVarEntryInfoActTy &Action); + +private: + /// Return the count of entries at a particular source location. + unsigned + getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const; + + /// Update the count of entries at a particular source location. + void + incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo); + + static TargetRegionEntryInfo + getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) { + return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID, + EntryInfo.FileID, EntryInfo.Line, 0); + } + + // Count of entries at a location. + std::map OffloadEntriesTargetRegionCount; + + // Storage for target region entries kind. + typedef std::map + OffloadEntriesTargetRegionTy; + OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion; + /// Storage for device global variable entries kind. The storage is to be + /// indexed by mangled name. + typedef StringMap + OffloadEntriesDeviceGlobalVarTy; + OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar; +}; + /// An interface to create LLVM-IR for OpenMP directives. /// /// Each OpenMP directive has a corresponding public generator method. @@ -167,7 +413,8 @@ class OpenMPIRBuilder { public: /// Create a new OpenMPIRBuilder operating on the given module \p M. This will /// not have an effect on \p M (see initialize) - OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) {} + OpenMPIRBuilder(Module &M) + : M(M), Builder(M.getContext()), OffloadInfoManager(this) {} ~OpenMPIRBuilder(); /// Initialize the internal state, this will put structures types and @@ -1063,6 +1310,9 @@ class OpenMPIRBuilder { /// Map to remember existing ident_t*. DenseMap, Constant *> IdentMap; + /// Info manager to keep track of target regions. + OffloadEntriesInfoManager OffloadInfoManager; + /// Helper that contains information about regions we need to outline /// during finalization. struct OutlineInfo { @@ -1231,7 +1481,6 @@ class OpenMPIRBuilder { // // We only generate metadata for function that contain target regions. void createOffloadEntriesAndInfoMetadata( - OffloadEntriesInfoManager &OffloadEntriesInfoManager, EmitMetadataErrorReportFunctionTy &ErrorReportFunction); public: @@ -1531,8 +1780,7 @@ class OpenMPIRBuilder { /// \param NumThreads Number default threads /// \param OutlinedFunction Pointer to the outlined function /// \param EntryFnIDName Name of the ID o be created - void emitTargetRegionFunction(OffloadEntriesInfoManager &InfoManager, - TargetRegionEntryInfo &EntryInfo, + void emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, int32_t NumTeams, int32_t NumThreads, bool IsOffloadEntry, Function *&OutlinedFn, @@ -1548,8 +1796,7 @@ class OpenMPIRBuilder { /// \param EntryFnIDName Name of the ID o be created /// \param NumTeams Number default teams /// \param NumThreads Number default threads - Constant *registerTargetRegionFunction(OffloadEntriesInfoManager &InfoManager, - TargetRegionEntryInfo &EntryInfo, + Constant *registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName, @@ -1918,10 +2165,7 @@ class OpenMPIRBuilder { /// /// \param M Module to load Metadata info from. Module passed maybe /// loaded from bitcode file, i.e, different from OpenMPIRBuilder::M module. - /// \param OffloadEntriesInfoManager Initialize Offload Entry information. - void - loadOffloadInfoMetadata(Module &M, - OffloadEntriesInfoManager &OffloadEntriesInfoManager); + void loadOffloadInfoMetadata(Module &M); /// Gets (if variable with the given name already exist) or creates /// internal global variable with the specified Name. The created variable has @@ -1933,253 +2177,6 @@ class OpenMPIRBuilder { unsigned AddressSpace = 0); }; -/// Data structure to contain the information needed to uniquely identify -/// a target entry. -struct TargetRegionEntryInfo { - std::string ParentName; - unsigned DeviceID; - unsigned FileID; - unsigned Line; - unsigned Count; - - TargetRegionEntryInfo() - : ParentName(""), DeviceID(0), FileID(0), Line(0), Count(0) {} - TargetRegionEntryInfo(StringRef ParentName, unsigned DeviceID, - unsigned FileID, unsigned Line, unsigned Count = 0) - : ParentName(ParentName), DeviceID(DeviceID), FileID(FileID), Line(Line), - Count(Count) {} - - static void getTargetRegionEntryFnName(SmallVectorImpl &Name, - StringRef ParentName, - unsigned DeviceID, unsigned FileID, - unsigned Line, unsigned Count); - - bool operator<(const TargetRegionEntryInfo RHS) const { - return std::make_tuple(ParentName, DeviceID, FileID, Line, Count) < - std::make_tuple(RHS.ParentName, RHS.DeviceID, RHS.FileID, RHS.Line, - RHS.Count); - } -}; - -/// Class that manages information about offload code regions and data -class OffloadEntriesInfoManager { - /// Number of entries registered so far. - OpenMPIRBuilderConfig Config; - unsigned OffloadingEntriesNum = 0; - -public: - void setConfig(OpenMPIRBuilderConfig C) { Config = C; } - - /// Base class of the entries info. - class OffloadEntryInfo { - public: - /// Kind of a given entry. - enum OffloadingEntryInfoKinds : unsigned { - /// Entry is a target region. - OffloadingEntryInfoTargetRegion = 0, - /// Entry is a declare target variable. - OffloadingEntryInfoDeviceGlobalVar = 1, - /// Invalid entry info. - OffloadingEntryInfoInvalid = ~0u - }; - - protected: - OffloadEntryInfo() = delete; - explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind) : Kind(Kind) {} - explicit OffloadEntryInfo(OffloadingEntryInfoKinds Kind, unsigned Order, - uint32_t Flags) - : Flags(Flags), Order(Order), Kind(Kind) {} - ~OffloadEntryInfo() = default; - - public: - bool isValid() const { return Order != ~0u; } - unsigned getOrder() const { return Order; } - OffloadingEntryInfoKinds getKind() const { return Kind; } - uint32_t getFlags() const { return Flags; } - void setFlags(uint32_t NewFlags) { Flags = NewFlags; } - Constant *getAddress() const { return cast_or_null(Addr); } - void setAddress(Constant *V) { - assert(!Addr.pointsToAliveValue() && "Address has been set before!"); - Addr = V; - } - static bool classof(const OffloadEntryInfo *Info) { return true; } - - private: - /// Address of the entity that has to be mapped for offloading. - WeakTrackingVH Addr; - - /// Flags associated with the device global. - uint32_t Flags = 0u; - - /// Order this entry was emitted. - unsigned Order = ~0u; - - OffloadingEntryInfoKinds Kind = OffloadingEntryInfoInvalid; - }; - - /// Return true if a there are no entries defined. - bool empty() const; - /// Return number of entries defined so far. - unsigned size() const { return OffloadingEntriesNum; } - - OffloadEntriesInfoManager() : Config() {} - - // - // Target region entries related. - // - - /// Kind of the target registry entry. - enum OMPTargetRegionEntryKind : uint32_t { - /// Mark the entry as target region. - OMPTargetRegionEntryTargetRegion = 0x0, - /// Mark the entry as a global constructor. - OMPTargetRegionEntryCtor = 0x02, - /// Mark the entry as a global destructor. - OMPTargetRegionEntryDtor = 0x04, - }; - - /// Target region entries info. - class OffloadEntryInfoTargetRegion final : public OffloadEntryInfo { - /// Address that can be used as the ID of the entry. - Constant *ID = nullptr; - - public: - OffloadEntryInfoTargetRegion() - : OffloadEntryInfo(OffloadingEntryInfoTargetRegion) {} - explicit OffloadEntryInfoTargetRegion(unsigned Order, Constant *Addr, - Constant *ID, - OMPTargetRegionEntryKind Flags) - : OffloadEntryInfo(OffloadingEntryInfoTargetRegion, Order, Flags), - ID(ID) { - setAddress(Addr); - } - - Constant *getID() const { return ID; } - void setID(Constant *V) { - assert(!ID && "ID has been set before!"); - ID = V; - } - static bool classof(const OffloadEntryInfo *Info) { - return Info->getKind() == OffloadingEntryInfoTargetRegion; - } - }; - - /// Initialize target region entry. - /// This is ONLY needed for DEVICE compilation. - void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, - unsigned Order); - /// Register target region entry. - void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, - Constant *Addr, Constant *ID, - OMPTargetRegionEntryKind Flags); - /// Return true if a target region entry with the provided information - /// exists. - bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, - bool IgnoreAddressId = false) const; - - // Return the Name based on \a EntryInfo using the next available Count. - void getTargetRegionEntryFnName(SmallVectorImpl &Name, - const TargetRegionEntryInfo &EntryInfo); - - /// brief Applies action \a Action on all registered entries. - typedef function_ref - OffloadTargetRegionEntryInfoActTy; - void - actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action); - - // - // Device global variable entries related. - // - - /// Kind of the global variable entry.. - enum OMPTargetGlobalVarEntryKind : uint32_t { - /// Mark the entry as a to declare target. - OMPTargetGlobalVarEntryTo = 0x0, - /// Mark the entry as a to declare target link. - OMPTargetGlobalVarEntryLink = 0x1, - }; - - /// Device global variable entries info. - class OffloadEntryInfoDeviceGlobalVar final : public OffloadEntryInfo { - /// Type of the global variable. - int64_t VarSize; - GlobalValue::LinkageTypes Linkage; - - public: - OffloadEntryInfoDeviceGlobalVar() - : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar) {} - explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, - OMPTargetGlobalVarEntryKind Flags) - : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags) {} - explicit OffloadEntryInfoDeviceGlobalVar(unsigned Order, Constant *Addr, - int64_t VarSize, - OMPTargetGlobalVarEntryKind Flags, - GlobalValue::LinkageTypes Linkage) - : OffloadEntryInfo(OffloadingEntryInfoDeviceGlobalVar, Order, Flags), - VarSize(VarSize), Linkage(Linkage) { - setAddress(Addr); - } - - int64_t getVarSize() const { return VarSize; } - void setVarSize(int64_t Size) { VarSize = Size; } - GlobalValue::LinkageTypes getLinkage() const { return Linkage; } - void setLinkage(GlobalValue::LinkageTypes LT) { Linkage = LT; } - static bool classof(const OffloadEntryInfo *Info) { - return Info->getKind() == OffloadingEntryInfoDeviceGlobalVar; - } - }; - - /// Initialize device global variable entry. - /// This is ONLY used for DEVICE compilation. - void initializeDeviceGlobalVarEntryInfo(StringRef Name, - OMPTargetGlobalVarEntryKind Flags, - unsigned Order); - - /// Register device global variable entry. - void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, - int64_t VarSize, - OMPTargetGlobalVarEntryKind Flags, - GlobalValue::LinkageTypes Linkage); - /// Checks if the variable with the given name has been registered already. - bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const { - return OffloadEntriesDeviceGlobalVar.count(VarName) > 0; - } - /// Applies action \a Action on all registered entries. - typedef function_ref - OffloadDeviceGlobalVarEntryInfoActTy; - void actOnDeviceGlobalVarEntriesInfo( - const OffloadDeviceGlobalVarEntryInfoActTy &Action); - -private: - /// Return the count of entries at a particular source location. - unsigned - getTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo) const; - - /// Update the count of entries at a particular source location. - void - incrementTargetRegionEntryInfoCount(const TargetRegionEntryInfo &EntryInfo); - - static TargetRegionEntryInfo - getTargetRegionEntryCountKey(const TargetRegionEntryInfo &EntryInfo) { - return TargetRegionEntryInfo(EntryInfo.ParentName, EntryInfo.DeviceID, - EntryInfo.FileID, EntryInfo.Line, 0); - } - - // Count of entries at a location. - std::map OffloadEntriesTargetRegionCount; - - // Storage for target region entries kind. - typedef std::map - OffloadEntriesTargetRegionTy; - OffloadEntriesTargetRegionTy OffloadEntriesTargetRegion; - /// Storage for device global variable entries kind. The storage is to be - /// indexed by mangled name. - typedef StringMap - OffloadEntriesDeviceGlobalVarTy; - OffloadEntriesDeviceGlobalVarTy OffloadEntriesDeviceGlobalVar; -}; - /// Class to represented the control flow structure of an OpenMP canonical loop. /// /// The control-flow structure is standardized for easy consumption by diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 81e2904bbd019..12c7c42ac5fe5 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -3999,13 +3999,13 @@ Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(Function *OutlinedFn, } void OpenMPIRBuilder::emitTargetRegionFunction( - OffloadEntriesInfoManager &InfoManager, TargetRegionEntryInfo &EntryInfo, + TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, int32_t NumTeams, int32_t NumThreads, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID) { SmallString<64> EntryFnName; - InfoManager.getTargetRegionEntryFnName(EntryFnName, EntryInfo); + OffloadInfoManager.getTargetRegionEntryFnName(EntryFnName, EntryInfo); OutlinedFn = Config.isEmbedded() || !Config.openMPOffloadMandatory() ? GenerateFunctionCallback(EntryFnName) @@ -4023,19 +4023,18 @@ void OpenMPIRBuilder::emitTargetRegionFunction( : createPlatformSpecificName({EntryFnName, "region_id"}); OutlinedFnID = registerTargetRegionFunction( - InfoManager, EntryInfo, OutlinedFn, EntryFnName, EntryFnIDName, NumTeams, - NumThreads); + EntryInfo, OutlinedFn, EntryFnName, EntryFnIDName, NumTeams, NumThreads); } Constant *OpenMPIRBuilder::registerTargetRegionFunction( - OffloadEntriesInfoManager &InfoManager, TargetRegionEntryInfo &EntryInfo, - Function *OutlinedFn, StringRef EntryFnName, StringRef EntryFnIDName, - int32_t NumTeams, int32_t NumThreads) { + TargetRegionEntryInfo &EntryInfo, Function *OutlinedFn, + StringRef EntryFnName, StringRef EntryFnIDName, int32_t NumTeams, + int32_t NumThreads) { if (OutlinedFn) setOutlinedTargetRegionFunctionAttributes(OutlinedFn, NumTeams, NumThreads); auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName); auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName); - InfoManager.registerTargetRegionEntryInfo( + OffloadInfoManager.registerTargetRegionEntryInfo( EntryInfo, EntryAddr, OutlinedFnID, OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion); return OutlinedFnID; @@ -4897,18 +4896,17 @@ void OpenMPIRBuilder::createOffloadEntry(Constant *ID, Constant *Addr, // We only generate metadata for function that contain target regions. void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata( - OffloadEntriesInfoManager &OffloadEntriesInfoManager, EmitMetadataErrorReportFunctionTy &ErrorFn) { // If there are no entries, we don't need to do anything. - if (OffloadEntriesInfoManager.empty()) + if (OffloadInfoManager.empty()) return; LLVMContext &C = M.getContext(); SmallVector, 16> - OrderedEntries(OffloadEntriesInfoManager.size()); + OrderedEntries(OffloadInfoManager.size()); // Auxiliary methods to create metadata values and strings. auto &&GetMDInt = [this](unsigned V) { @@ -4947,8 +4945,7 @@ void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata( MD->addOperand(MDNode::get(C, Ops)); }; - OffloadEntriesInfoManager.actOnTargetRegionEntriesInfo( - TargetRegionMetadataEmitter); + OffloadInfoManager.actOnTargetRegionEntriesInfo(TargetRegionMetadataEmitter); // Create function that emits metadata for each device global variable entry; auto &&DeviceGlobalVarMetadataEmitter = @@ -4973,7 +4970,7 @@ void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata( MD->addOperand(MDNode::get(C, Ops)); }; - OffloadEntriesInfoManager.actOnDeviceGlobalVarEntriesInfo( + OffloadInfoManager.actOnDeviceGlobalVarEntriesInfo( DeviceGlobalVarMetadataEmitter); for (const auto &E : OrderedEntries) { @@ -5061,8 +5058,7 @@ void OffloadEntriesInfoManager::getTargetRegionEntryFnName( /// Loads all the offload entries information from the host IR /// metadata. -void OpenMPIRBuilder::loadOffloadInfoMetadata( - Module &M, OffloadEntriesInfoManager &OffloadEntriesInfoManager) { +void OpenMPIRBuilder::loadOffloadInfoMetadata(Module &M) { // If we are in target mode, load the metadata from the host IR. This code has // to match the metadata creation in createOffloadEntriesAndInfoMetadata(). @@ -5092,13 +5088,13 @@ void OpenMPIRBuilder::loadOffloadInfoMetadata( /*FileID=*/GetMDInt(2), /*Line=*/GetMDInt(4), /*Count=*/GetMDInt(5)); - OffloadEntriesInfoManager.initializeTargetRegionEntryInfo( - EntryInfo, /*Order=*/GetMDInt(6)); + OffloadInfoManager.initializeTargetRegionEntryInfo(EntryInfo, + /*Order=*/GetMDInt(6)); break; } case OffloadEntriesInfoManager::OffloadEntryInfo:: OffloadingEntryInfoDeviceGlobalVar: - OffloadEntriesInfoManager.initializeDeviceGlobalVarEntryInfo( + OffloadInfoManager.initializeDeviceGlobalVarEntryInfo( /*MangledName=*/GetMDString(1), static_cast( /*Flags=*/GetMDInt(2)), @@ -5147,7 +5143,7 @@ void OffloadEntriesInfoManager::registerTargetRegionEntryInfo( // If we are emitting code for a target, the entry is already initialized, // only has to be registered. - if (Config.isEmbedded()) { + if (OMPBuilder->Config.isEmbedded()) { // This could happen if the device compilation is invoked standalone. if (!hasTargetRegionEntryInfo(EntryInfo)) { return; @@ -5202,7 +5198,7 @@ void OffloadEntriesInfoManager::initializeDeviceGlobalVarEntryInfo( void OffloadEntriesInfoManager::registerDeviceGlobalVarEntryInfo( StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage) { - if (Config.isEmbedded()) { + if (OMPBuilder->Config.isEmbedded()) { // This could happen if the device compilation is invoked standalone. if (!hasDeviceGlobalVarEntryInfo(VarName)) return; diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp index 05a1d7a58b84d..aee8ed26a6fa6 100644 --- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp +++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp @@ -5730,8 +5730,9 @@ TEST_F(OpenMPIRBuilderTest, EmitOffloadingArraysArguments) { } TEST_F(OpenMPIRBuilderTest, OffloadEntriesInfoManager) { - OffloadEntriesInfoManager InfoManager; - InfoManager.setConfig(OpenMPIRBuilderConfig(true, false, false, false)); + OpenMPIRBuilder OMPBuilder(*M); + OMPBuilder.setConfig(OpenMPIRBuilderConfig(true, false, false, false)); + OffloadEntriesInfoManager &InfoManager = OMPBuilder.OffloadInfoManager; TargetRegionEntryInfo EntryInfo("parent", 1, 2, 4, 0); InfoManager.initializeTargetRegionEntryInfo(EntryInfo, 0); EXPECT_TRUE(InfoManager.hasTargetRegionEntryInfo(EntryInfo)); From c640a146c4caa3cca559e308e2e7ecc78c45140d Mon Sep 17 00:00:00 2001 From: Felipe de Azevedo Piovezan Date: Thu, 23 Mar 2023 08:18:53 -0400 Subject: [PATCH 442/691] [lldb] Explicitly set libcxx paths when USE_SYSTEM_STDLIB is provided For tests marked as "USE_SYSTEM_STDLIB", the expectation is that the system's standard library should be used. However, the implementation of this flag is such that we simply don't pass _any_ libcxxx-related flags to Clang; in turn, Clang will use its defaults. For a Clang/Libcxx pair compiled together, Clang defaults to: 1. The headers of the sibling libcxx. 2. The libraries of the system. This mismatch is actually a bug in the driver; once fixed, however, (2) would point to the sibling libcxx as well, which is _not_ what test authors intended with the USE_SYSTEM_STDLIB flag. As such, this patch explicitly sets a path to the system's libraries. This change is done only in Apple platforms so that we can test this works in this case first. Differential Revision: https://reviews.llvm.org/D146714 --- .../packages/Python/lldbsuite/test/make/Makefile.rules | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules index 25c4d88763326..4c225ed360be5 100644 --- a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules +++ b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules @@ -428,6 +428,16 @@ ifeq (1,$(USE_LIBCPP)) endif endif +ifeq (1, $(USE_SYSTEM_STDLIB)) + ifeq "$(OS)" "Darwin" + ifeq "$(SDKROOT)" "" + $(error "SDKROOT must be set on Darwin to use the system libcxx") + endif + CXXFLAGS += -nostdlib++ -nostdinc++ -cxx-isystem $(SDKROOT)/usr/include/c++/v1 + LDFLAGS += -L$(SDKROOT)/usr/lib -Wl,-rpath,$(SDKROOT)/usr/lib -lc++ + endif +endif + # If no explicit request was made, but we have paths to a custom libcxx, use # them. ifeq ($(or $(USE_LIBSTDCPP), $(USE_LIBCPP), $(USE_SYSTEM_STDLIB)),) From 2cfd06ba672f4e3097b6c2c576bdb876d37c71d1 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 23 Mar 2023 08:47:44 -0700 Subject: [PATCH 443/691] [BoundsChecking] Don't crash on scalable vector sizes --- .../Instrumentation/BoundsChecking.cpp | 4 +- .../Instrumentation/BoundsChecking/simple.ll | 74 ++++++++++++++++--- 2 files changed, 64 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp index 8b1d39ad412fa..04ffbf6636e1a 100644 --- a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -56,7 +56,7 @@ static Value *getBoundsCheckCond(Value *Ptr, Value *InstVal, const DataLayout &DL, TargetLibraryInfo &TLI, ObjectSizeOffsetEvaluator &ObjSizeEval, BuilderTy &IRB, ScalarEvolution &SE) { - uint64_t NeededSize = DL.getTypeStoreSize(InstVal->getType()); + TypeSize NeededSize = DL.getTypeStoreSize(InstVal->getType()); LLVM_DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize) << " bytes\n"); @@ -72,7 +72,7 @@ static Value *getBoundsCheckCond(Value *Ptr, Value *InstVal, ConstantInt *SizeCI = dyn_cast(Size); Type *IntTy = DL.getIntPtrType(Ptr->getType()); - Value *NeededSizeVal = ConstantInt::get(IntTy, NeededSize); + Value *NeededSizeVal = IRB.CreateTypeSize(IntTy, NeededSize); auto SizeRange = SE.getUnsignedRange(SE.getSCEV(Size)); auto OffsetRange = SE.getUnsignedRange(SE.getSCEV(Offset)); diff --git a/llvm/test/Instrumentation/BoundsChecking/simple.ll b/llvm/test/Instrumentation/BoundsChecking/simple.ll index 57858618d17b3..e329b90d0cde4 100644 --- a/llvm/test/Instrumentation/BoundsChecking/simple.ll +++ b/llvm/test/Instrumentation/BoundsChecking/simple.ll @@ -33,7 +33,7 @@ define void @f2() nounwind { ; CHECK-NEXT: store i32 3, ptr [[IDX]], align 4 ; CHECK-NEXT: ret void ; CHECK: trap: -; CHECK-NEXT: call void @llvm.trap() #[[ATTR5:[0-9]+]] +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6:[0-9]+]] ; CHECK-NEXT: unreachable ; %1 = tail call ptr @malloc(i64 32) @@ -57,7 +57,7 @@ define void @f3(i64 %x) nounwind { ; CHECK-NEXT: store i32 3, ptr [[IDX]], align 4 ; CHECK-NEXT: ret void ; CHECK: trap: -; CHECK-NEXT: call void @llvm.trap() #[[ATTR5]] +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6]] ; CHECK-NEXT: unreachable ; %1 = tail call ptr @calloc(i64 4, i64 %x) @@ -93,7 +93,7 @@ define void @f4(i64 %x) nounwind { ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[IDX]], align 4 ; CHECK-NEXT: ret void ; CHECK: trap: -; CHECK-NEXT: call void @llvm.trap() #[[ATTR5]] +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6]] ; CHECK-NEXT: unreachable ; %1 = tail call ptr @realloc(ptr null, i64 %x) nounwind @@ -115,7 +115,7 @@ define void @f5(i64 %x) nounwind { ; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[IDX]], align 4 ; CHECK-NEXT: ret void ; CHECK: trap: -; CHECK-NEXT: call void @llvm.trap() #[[ATTR5]] +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6]] ; CHECK-NEXT: unreachable ; %idx = getelementptr inbounds [8 x i8], ptr @.str, i64 0, i64 %x @@ -137,7 +137,7 @@ define void @f5_as1(i64 %x) nounwind { ; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr addrspace(1) [[IDX]], align 4 ; CHECK-NEXT: ret void ; CHECK: trap: -; CHECK-NEXT: call void @llvm.trap() #[[ATTR5]] +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6]] ; CHECK-NEXT: unreachable ; %idx = getelementptr inbounds [8 x i8], ptr addrspace(1) @.str_as1, i64 0, i64 %x @@ -169,7 +169,7 @@ define void @f7(i64 %x) nounwind { ; CHECK-NEXT: [[TMP8:%.*]] = load i128, ptr [[TMP2]], align 4 ; CHECK-NEXT: ret void ; CHECK: trap: -; CHECK-NEXT: call void @llvm.trap() #[[ATTR5]] +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6]] ; CHECK-NEXT: unreachable ; %1 = alloca i128, i64 %x @@ -222,7 +222,7 @@ define void @f10(i64 %x, i64 %y) nounwind { ; CHECK-NEXT: [[TMP12:%.*]] = load i128, ptr [[TMP6]], align 4 ; CHECK-NEXT: ret void ; CHECK: trap: -; CHECK-NEXT: call void @llvm.trap() #[[ATTR5]] +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6]] ; CHECK-NEXT: unreachable ; %1 = alloca i128, i64 %x @@ -240,7 +240,7 @@ define void @f11(ptr byval(i128) %x) nounwind { ; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP1]], align 4 ; CHECK-NEXT: ret void ; CHECK: trap: -; CHECK-NEXT: call void @llvm.trap() #[[ATTR5]] +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6]] ; CHECK-NEXT: unreachable ; %1 = getelementptr inbounds i8, ptr %x, i64 16 @@ -256,7 +256,7 @@ define void @f11_as1(ptr addrspace(1) byval(i128) %x) nounwind { ; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr addrspace(1) [[TMP1]], align 4 ; CHECK-NEXT: ret void ; CHECK: trap: -; CHECK-NEXT: call void @llvm.trap() #[[ATTR5]] +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6]] ; CHECK-NEXT: unreachable ; %1 = getelementptr inbounds i8, ptr addrspace(1) %x, i16 16 @@ -282,7 +282,7 @@ define i64 @f12(i64 %x, i64 %y) nounwind { ; CHECK-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP4]], align 8 ; CHECK-NEXT: ret i64 [[TMP12]] ; CHECK: trap: -; CHECK-NEXT: call void @llvm.trap() #[[ATTR5]] +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6]] ; CHECK-NEXT: unreachable ; %1 = tail call ptr @calloc(i64 1, i64 %x) @@ -354,7 +354,7 @@ define i8 @f14(i1 %i) { ; CHECK-NEXT: [[RET:%.*]] = load i8, ptr [[P]], align 1 ; CHECK-NEXT: ret i8 [[RET]] ; CHECK: trap: -; CHECK-NEXT: call void @llvm.trap() #[[ATTR5]] +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6]] ; CHECK-NEXT: unreachable ; entry: @@ -396,7 +396,7 @@ define i8 @f15(i1 %i) { ; CHECK-NEXT: [[RET:%.*]] = load i8, ptr [[ALLOC]], align 1 ; CHECK-NEXT: ret i8 [[RET]] ; CHECK: trap: -; CHECK-NEXT: call void @llvm.trap() #[[ATTR5]] +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6]] ; CHECK-NEXT: unreachable ; entry: @@ -414,3 +414,53 @@ bb2: %ret = load i8, ptr %alloc ret i8 %ret } + +define <4 x i32> @load_vector(i64 %y) nounwind { +; CHECK-LABEL: @load_vector( +; CHECK-NEXT: [[TMP1:%.*]] = tail call ptr @calloc(i64 1, i64 256) +; CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[Y:%.*]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 0, [[DOTIDX]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 [[Y]] +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 256, [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 256, [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i64 [[TMP4]], 16 +; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; CHECK-NEXT: br i1 [[TMP7]], label [[TRAP:%.*]], label [[TMP8:%.*]] +; CHECK: 8: +; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i32>, ptr [[TMP3]], align 8 +; CHECK-NEXT: ret <4 x i32> [[TMP9]] +; CHECK: trap: +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6]] +; CHECK-NEXT: unreachable +; + %1 = tail call ptr @calloc(i64 1, i64 256) + %2 = getelementptr inbounds i64, ptr %1, i64 %y + %3 = load <4 x i32>, ptr %2, align 8 + ret <4 x i32> %3 +} + +define @load_scalable_vector(i64 %y) nounwind { +; CHECK-LABEL: @load_scalable_vector( +; CHECK-NEXT: [[TMP1:%.*]] = tail call ptr @calloc(i64 1, i64 256) +; CHECK-NEXT: [[DOTIDX:%.*]] = mul i64 [[Y:%.*]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 0, [[DOTIDX]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i64 [[Y]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = sub i64 256, [[TMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i64 256, [[TMP2]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP6]], [[TMP5]] +; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]] +; CHECK-NEXT: br i1 [[TMP9]], label [[TRAP:%.*]], label [[TMP10:%.*]] +; CHECK: 10: +; CHECK-NEXT: [[TMP11:%.*]] = load , ptr [[TMP3]], align 8 +; CHECK-NEXT: ret [[TMP11]] +; CHECK: trap: +; CHECK-NEXT: call void @llvm.trap() #[[ATTR6]] +; CHECK-NEXT: unreachable +; + %1 = tail call ptr @calloc(i64 1, i64 256) + %2 = getelementptr inbounds i64, ptr %1, i64 %y + %3 = load , ptr %2, align 8 + ret %3 +} From 16b6826bdd6e5cc02844c10bd1f9af388a6ffed8 Mon Sep 17 00:00:00 2001 From: Ding Xiang Fei Date: Thu, 23 Mar 2023 17:04:21 +0100 Subject: [PATCH 444/691] [MergeFuncs] Add tests for D144682 (NFC) I forgot to git add this test when committing the change. --- .../MergeFunc/mergefunc-preserve-nonnull.ll | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 llvm/test/Transforms/MergeFunc/mergefunc-preserve-nonnull.ll diff --git a/llvm/test/Transforms/MergeFunc/mergefunc-preserve-nonnull.ll b/llvm/test/Transforms/MergeFunc/mergefunc-preserve-nonnull.ll new file mode 100644 index 0000000000000..12bb0e8b38425 --- /dev/null +++ b/llvm/test/Transforms/MergeFunc/mergefunc-preserve-nonnull.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=mergefunc -S < %s | FileCheck %s + +; This test makes sure that the mergefunc pass does not merge functions +; that have different nonnull assertions. + +%1 = type ptr + +define void @f1(ptr %0, ptr %1) { +; CHECK-LABEL: @f1( +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP1:%.*]], align 8, !nonnull !0 +; CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP0:%.*]], align 8 +; CHECK-NEXT: ret void +; + %3 = load ptr, ptr %1, align 8, !nonnull !0 + store ptr %3, ptr %0, align 8 + ret void +} + +define void @f2(ptr %0, ptr %1) { +; CHECK-LABEL: @f2( +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP1:%.*]], align 8 +; CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP0:%.*]], align 8 +; CHECK-NEXT: ret void +; + %3 = load ptr, ptr %1, align 8 + store ptr %3, ptr %0, align 8 + ret void +} + +define void @f3(ptr %0, ptr %1) { +; CHECK-LABEL: @f3( +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP1:%.*]], align 8, !noundef !0 +; CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP0:%.*]], align 8 +; CHECK-NEXT: ret void +; + %3 = load ptr, ptr %1, align 8, !noundef !0 + store ptr %3, ptr %0, align 8 + ret void +} + +define void @f4(ptr %0, ptr %1) { +; CHECK-LABEL: @f4( +; CHECK-NEXT: tail call void @f3(ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) +; CHECK-NEXT: ret void +; + %3 = load ptr, ptr %1, align 8, !noundef !0, !dbg !1 + store ptr %3, ptr %0, align 8 + ret void +} + +!0 = !{} +!1 = !{} From ec294d2f8f1839e11c13ee32279cd28c1f46f66f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 23 Mar 2023 16:10:32 +0000 Subject: [PATCH 445/691] [X86] LowerVectorAllZero - lower to CMP(MOVMSK(NOT(X)),0) instead of CMP(MOVMSK(X),65535) In most cases the NOT will still be scalarized, but it allows us to perform the CMP(X,0) combines inside combineCMP() --- llvm/lib/Target/X86/X86ISelLowering.cpp | 3 +- llvm/test/CodeGen/X86/pr45378.ll | 4 +- llvm/test/CodeGen/X86/ptest.ll | 18 ++++---- llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll | 41 +++++++++---------- 4 files changed, 33 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2d371566381c8..3a4173e443798 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -24224,9 +24224,10 @@ static SDValue LowerVectorAllZero(const SDLoc &DL, SDValue V, ISD::CondCode CC, V = DAG.getBitcast(MVT::v16i8, MaskBits(V)); V = DAG.getNode(X86ISD::PCMPEQ, DL, MVT::v16i8, V, getZeroVector(MVT::v16i8, Subtarget, DAG, DL)); + V = DAG.getNOT(DL, V, MVT::v16i8); V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V); return DAG.getNode(X86ISD::CMP, DL, MVT::i32, V, - DAG.getConstant(0xFFFF, DL, MVT::i32)); + DAG.getConstant(0, DL, MVT::i32)); } // Check whether an OR'd reduction tree is PTEST-able, or if we can fallback to diff --git a/llvm/test/CodeGen/X86/pr45378.ll b/llvm/test/CodeGen/X86/pr45378.ll index fecfa95f8b838..aa870b7afbd38 100644 --- a/llvm/test/CodeGen/X86/pr45378.ll +++ b/llvm/test/CodeGen/X86/pr45378.ll @@ -15,7 +15,7 @@ define i1 @parseHeaders(ptr %ptr) nounwind { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -45,7 +45,7 @@ define i1 @parseHeaders2_scalar_or(ptr %ptr) nounwind { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/ptest.ll b/llvm/test/CodeGen/X86/ptest.ll index bedcfebc5f6e7..5983d502af3dd 100644 --- a/llvm/test/CodeGen/X86/ptest.ll +++ b/llvm/test/CodeGen/X86/ptest.ll @@ -10,7 +10,7 @@ define i32 @veccond128(<4 x i32> %input) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: je .LBB0_2 ; SSE2-NEXT: # %bb.1: # %if-true-block ; SSE2-NEXT: xorl %eax, %eax @@ -57,7 +57,7 @@ define i32 @veccond256(<8 x i32> %input) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: je .LBB1_2 ; SSE2-NEXT: # %bb.1: # %if-true-block ; SSE2-NEXT: xorl %eax, %eax @@ -109,7 +109,7 @@ define i32 @veccond512(<16 x i32> %input) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: je .LBB2_2 ; SSE2-NEXT: # %bb.1: # %if-true-block ; SSE2-NEXT: xorl %eax, %eax @@ -176,7 +176,7 @@ define i32 @vectest128(<4 x i32> %input) { ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %ecx ; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %ecx # imm = 0xFFFF ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; @@ -207,7 +207,7 @@ define i32 @vectest256(<8 x i32> %input) { ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %ecx ; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %ecx # imm = 0xFFFF ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; @@ -242,7 +242,7 @@ define i32 @vectest512(<16 x i32> %input) { ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %ecx ; SSE2-NEXT: xorl %eax, %eax -; SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %ecx # imm = 0xFFFF ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; @@ -286,7 +286,7 @@ define i32 @vecsel128(<4 x i32> %input, i32 %a, i32 %b) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %ecx -; SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %ecx # imm = 0xFFFF ; SSE2-NEXT: cmovel %esi, %eax ; SSE2-NEXT: retq ; @@ -317,7 +317,7 @@ define i32 @vecsel256(<8 x i32> %input, i32 %a, i32 %b) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %ecx -; SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %ecx # imm = 0xFFFF ; SSE2-NEXT: cmovel %esi, %eax ; SSE2-NEXT: retq ; @@ -352,7 +352,7 @@ define i32 @vecsel512(<16 x i32> %input, i32 %a, i32 %b) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %ecx -; SSE2-NEXT: cmpl $65535, %ecx # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %ecx # imm = 0xFFFF ; SSE2-NEXT: cmovel %esi, %eax ; SSE2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll index a489a5e6099f0..761ad105f75dc 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll @@ -16,7 +16,7 @@ define i1 @test_v2i64(<2 x i64> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -43,7 +43,7 @@ define i1 @test_v4i64(<4 x i64> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; @@ -74,7 +74,7 @@ define i1 @test_v8i64(<8 x i64> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -128,7 +128,7 @@ define i1 @test_v16i64(<16 x i64> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; @@ -207,7 +207,7 @@ define i1 @test_v4i32(<4 x i32> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; @@ -234,7 +234,7 @@ define i1 @test_v8i32(<8 x i32> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -265,7 +265,7 @@ define i1 @test_v16i32(<16 x i32> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; @@ -319,7 +319,7 @@ define i1 @test_v32i32(<32 x i32> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -417,7 +417,7 @@ define i1 @test_v8i16(<8 x i16> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -444,7 +444,7 @@ define i1 @test_v16i16(<16 x i16> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; @@ -475,7 +475,7 @@ define i1 @test_v32i16(<32 x i16> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -529,7 +529,7 @@ define i1 @test_v64i16(<64 x i16> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; @@ -646,7 +646,7 @@ define i1 @test_v16i8(<16 x i8> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; @@ -673,7 +673,7 @@ define i1 @test_v32i8(<32 x i8> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -704,7 +704,7 @@ define i1 @test_v64i8(<64 x i8> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; @@ -758,7 +758,7 @@ define i1 @test_v128i8(<128 x i8> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -866,7 +866,7 @@ define i1 @mask_v8i32(<8 x i32> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -913,7 +913,7 @@ define i1 @trunc_v16i16(<16 x i16> %a0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: setne %al ; SSE2-NEXT: retq ; @@ -964,8 +964,7 @@ define i1 @mask_v128i8(<128 x i8> %a0) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: psllw $7, %xmm0 ; SSE2-NEXT: pmovmskb %xmm0, %eax -; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: testl %eax, %eax ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; @@ -1026,7 +1025,7 @@ define zeroext i1 @PR44781(ptr %0) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: cmpl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; From dc2f2d2180f1d1a1835dc55478d3bcceea41a4b1 Mon Sep 17 00:00:00 2001 From: Teresa Johnson Date: Thu, 23 Mar 2023 09:15:57 -0700 Subject: [PATCH 446/691] [MemProf] Use stable_sort to avoid non-determinism Switch from std::sort to std::stable_sort when sorting callsites to avoid non-determinism when the comparisons are equal. This showed up in internal testing of fe27495be2040007c7b20844a9371b06156ab405. --- .../IPO/MemProfContextDisambiguation.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp index b2fcea1ec8694..762e4ce0c3e79 100644 --- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp +++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp @@ -1032,13 +1032,13 @@ void CallsiteContextGraph::updateStackNodes() { // latter is so that we can specially handle calls that have identical stack // id sequences (either due to cloning or artificially because of the MIB // context pruning). - std::sort(Calls.begin(), Calls.end(), - [](const CallContextInfo &A, const CallContextInfo &B) { - auto &IdsA = std::get<1>(A); - auto &IdsB = std::get<1>(B); - return IdsA.size() > IdsB.size() || - (IdsA.size() == IdsB.size() && IdsA < IdsB); - }); + std::stable_sort(Calls.begin(), Calls.end(), + [](const CallContextInfo &A, const CallContextInfo &B) { + auto &IdsA = std::get<1>(A); + auto &IdsB = std::get<1>(B); + return IdsA.size() > IdsB.size() || + (IdsA.size() == IdsB.size() && IdsA < IdsB); + }); // Find the node for the last stack id, which should be the same // across all calls recorded for this id, and is the id for this From 2bececb8bed1f8fcd8d54dba831ceb117717bfcc Mon Sep 17 00:00:00 2001 From: Viktoriia Bakalova Date: Tue, 28 Feb 2023 16:27:05 +0000 Subject: [PATCH 447/691] [clangd] Add provider info on symbol hover. Differential Revision: https://reviews.llvm.org/D144976 --- clang-tools-extra/clangd/Hover.cpp | 65 +++++++++++ clang-tools-extra/clangd/Hover.h | 3 + clang-tools-extra/clangd/IncludeCleaner.cpp | 77 +++++++------ clang-tools-extra/clangd/IncludeCleaner.h | 10 ++ clang-tools-extra/clangd/Preamble.cpp | 6 +- .../clangd/unittests/HoverTests.cpp | 104 +++++++++++++++++- .../include/clang-include-cleaner/Analysis.h | 10 ++ .../include-cleaner/lib/AnalysisInternal.h | 8 +- 8 files changed, 229 insertions(+), 54 deletions(-) diff --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp index c5436141adbf7..e240c22259f35 100644 --- a/clang-tools-extra/clangd/Hover.cpp +++ b/clang-tools-extra/clangd/Hover.cpp @@ -12,11 +12,16 @@ #include "CodeCompletionStrings.h" #include "Config.h" #include "FindTarget.h" +#include "IncludeCleaner.h" #include "ParsedAST.h" #include "Selection.h" #include "SourceCode.h" +#include "clang-include-cleaner/Analysis.h" +#include "clang-include-cleaner/Types.h" #include "index/SymbolCollector.h" +#include "support/Logger.h" #include "support/Markup.h" +#include "support/Trace.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTDiagnostic.h" #include "clang/AST/ASTTypeTraits.h" @@ -43,11 +48,13 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/Error.h" #include "llvm/Support/Format.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/raw_ostream.h" #include #include +#include namespace clang { namespace clangd { @@ -1084,6 +1091,49 @@ const NamedDecl *pickDeclToUse(llvm::ArrayRef Candidates) { return Candidates.front(); } +void maybeAddSymbolProviders(ParsedAST &AST, HoverInfo &HI, + include_cleaner::Symbol Sym) { + trace::Span Tracer("Hover::maybeAddSymbolProviders"); + + const SourceManager &SM = AST.getSourceManager(); + llvm::SmallVector RankedProviders = + include_cleaner::headersForSymbol(Sym, SM, AST.getPragmaIncludes()); + if (RankedProviders.empty()) + return; + + std::string Result; + include_cleaner::Includes ConvertedIncludes = + convertIncludes(SM, AST.getIncludeStructure().MainFileIncludes); + for (const auto &P : RankedProviders) { + if (P.kind() == include_cleaner::Header::Physical && + P.physical() == SM.getFileEntryForID(SM.getMainFileID())) + // Main file ranked higher than any #include'd file + break; + + // Pick the best-ranked #include'd provider + auto Matches = ConvertedIncludes.match(P); + if (!Matches.empty()) { + Result = Matches[0]->quote(); + break; + } + } + + if (!Result.empty()) { + HI.Provider = std::move(Result); + return; + } + + // Pick the best-ranked non-#include'd provider + const auto &H = RankedProviders.front(); + if (H.kind() == include_cleaner::Header::Physical && + H.physical() == SM.getFileEntryForID(SM.getMainFileID())) + // Do not show main file as provider, otherwise we'll show provider info + // on local variables, etc. + return; + + HI.Provider = spellHeader(AST, SM.getFileEntryForID(SM.getMainFileID()), H); +} + } // namespace std::optional getHover(ParsedAST &AST, Position Pos, @@ -1131,6 +1181,12 @@ std::optional getHover(ParsedAST &AST, Position Pos, HighlightRange = Tok.range(SM).toCharRange(SM); if (auto M = locateMacroAt(Tok, AST.getPreprocessor())) { HI = getHoverContents(*M, Tok, AST); + if (auto DefLoc = M->Info->getDefinitionLoc(); DefLoc.isValid()) { + include_cleaner::Macro IncludeCleanerMacro{ + AST.getPreprocessor().getIdentifierInfo(Tok.text(SM)), DefLoc}; + maybeAddSymbolProviders(AST, *HI, + include_cleaner::Symbol{IncludeCleanerMacro}); + } break; } } else if (Tok.kind() == tok::kw_auto || Tok.kind() == tok::kw_decltype) { @@ -1168,6 +1224,7 @@ std::optional getHover(ParsedAST &AST, Position Pos, if (!HI->Value) HI->Value = printExprValue(N, AST.getASTContext()); maybeAddCalleeArgInfo(N, *HI, PP); + maybeAddSymbolProviders(AST, *HI, include_cleaner::Symbol{*DeclToUse}); } else if (const Expr *E = N->ASTNode.get()) { HI = getHoverContents(N, E, AST, PP, Index); } else if (const Attr *A = N->ASTNode.get()) { @@ -1217,6 +1274,14 @@ markup::Document HoverInfo::present() const { assert(!Name.empty() && "hover triggered on a nameless symbol"); Header.appendCode(Name); + if (!Provider.empty()) { + markup::Paragraph &DI = Output.addParagraph(); + DI.appendText("provided by"); + DI.appendSpace(); + DI.appendCode(Provider); + Output.addRuler(); + } + // Put a linebreak after header to increase readability. Output.addRuler(); // Print Types on their own lines to reduce chances of getting line-wrapped by diff --git a/clang-tools-extra/clangd/Hover.h b/clang-tools-extra/clangd/Hover.h index e63ff95b400b3..7ade177f89cc1 100644 --- a/clang-tools-extra/clangd/Hover.h +++ b/clang-tools-extra/clangd/Hover.h @@ -14,6 +14,7 @@ #include "support/Markup.h" #include "clang/Index/IndexSymbol.h" #include +#include namespace clang { namespace clangd { @@ -67,6 +68,8 @@ struct HoverInfo { std::string LocalScope; /// Name of the symbol, does not contain any "::". std::string Name; + /// Header providing the symbol (best match). Contains ""<>. + std::string Provider; std::optional SymRange; index::SymbolKind Kind = index::SymbolKind::Unknown; std::string Documentation; diff --git a/clang-tools-extra/clangd/IncludeCleaner.cpp b/clang-tools-extra/clangd/IncludeCleaner.cpp index ee470bd8b963f..ab7c05eb834c0 100644 --- a/clang-tools-extra/clangd/IncludeCleaner.cpp +++ b/clang-tools-extra/clangd/IncludeCleaner.cpp @@ -136,45 +136,6 @@ static bool mayConsiderUnused(const Inclusion &Inc, ParsedAST &AST, return true; } -include_cleaner::Includes -convertIncludes(const SourceManager &SM, - const llvm::ArrayRef MainFileIncludes) { - include_cleaner::Includes Includes; - for (const Inclusion &Inc : MainFileIncludes) { - include_cleaner::Include TransformedInc; - llvm::StringRef WrittenRef = llvm::StringRef(Inc.Written); - TransformedInc.Spelled = WrittenRef.trim("\"<>"); - TransformedInc.HashLocation = - SM.getComposedLoc(SM.getMainFileID(), Inc.HashOffset); - TransformedInc.Line = Inc.HashLine + 1; - TransformedInc.Angled = WrittenRef.starts_with("<"); - auto FE = SM.getFileManager().getFile(Inc.Resolved); - if (!FE) { - elog("IncludeCleaner: Failed to get an entry for resolved path {0}: {1}", - Inc.Resolved, FE.getError().message()); - continue; - } - TransformedInc.Resolved = *FE; - Includes.add(std::move(TransformedInc)); - } - return Includes; -} - -std::string spellHeader(ParsedAST &AST, const FileEntry *MainFile, - include_cleaner::Header Provider) { - if (Provider.kind() == include_cleaner::Header::Physical) { - if (auto CanonicalPath = - getCanonicalPath(Provider.physical(), AST.getSourceManager())) { - std::string SpelledHeader = - llvm::cantFail(URI::includeSpelling(URI::create(*CanonicalPath))); - if (!SpelledHeader.empty()) - return SpelledHeader; - } - } - return include_cleaner::spellHeader( - Provider, AST.getPreprocessor().getHeaderSearchInfo(), MainFile); -} - std::vector collectMacroReferences(ParsedAST &AST) { const auto &SM = AST.getSourceManager(); @@ -327,6 +288,44 @@ std::vector generateUnusedIncludeDiagnostics( } } // namespace +include_cleaner::Includes +convertIncludes(const SourceManager &SM, + const llvm::ArrayRef Includes) { + include_cleaner::Includes ConvertedIncludes; + for (const Inclusion &Inc : Includes) { + include_cleaner::Include TransformedInc; + llvm::StringRef WrittenRef = llvm::StringRef(Inc.Written); + TransformedInc.Spelled = WrittenRef.trim("\"<>"); + TransformedInc.HashLocation = + SM.getComposedLoc(SM.getMainFileID(), Inc.HashOffset); + TransformedInc.Line = Inc.HashLine + 1; + TransformedInc.Angled = WrittenRef.starts_with("<"); + auto FE = SM.getFileManager().getFile(Inc.Resolved); + if (!FE) { + elog("IncludeCleaner: Failed to get an entry for resolved path {0}: {1}", + Inc.Resolved, FE.getError().message()); + continue; + } + TransformedInc.Resolved = *FE; + ConvertedIncludes.add(std::move(TransformedInc)); + } + return ConvertedIncludes; +} + +std::string spellHeader(ParsedAST &AST, const FileEntry *MainFile, + include_cleaner::Header Provider) { + if (Provider.kind() == include_cleaner::Header::Physical) { + if (auto CanonicalPath = + getCanonicalPath(Provider.physical(), AST.getSourceManager())) { + std::string SpelledHeader = + llvm::cantFail(URI::includeSpelling(URI::create(*CanonicalPath))); + if (!SpelledHeader.empty()) + return SpelledHeader; + } + } + return include_cleaner::spellHeader( + Provider, AST.getPreprocessor().getHeaderSearchInfo(), MainFile); +} std::vector getUnused(ParsedAST &AST, diff --git a/clang-tools-extra/clangd/IncludeCleaner.h b/clang-tools-extra/clangd/IncludeCleaner.h index d7edca035c965..1a5f07869d569 100644 --- a/clang-tools-extra/clangd/IncludeCleaner.h +++ b/clang-tools-extra/clangd/IncludeCleaner.h @@ -68,6 +68,16 @@ std::vector issueIncludeCleanerDiagnostics(ParsedAST &AST, /// FIXME: remove this hack once the implementation is good enough. void setIncludeCleanerAnalyzesStdlib(bool B); +/// Converts the clangd include representation to include-cleaner +/// include representation. +include_cleaner::Includes +convertIncludes(const SourceManager &SM, + const llvm::ArrayRef Includes); + +/// Determines the header spelling of an include-cleaner header +/// representation. The spelling contains the ""<> characters. +std::string spellHeader(ParsedAST &AST, const FileEntry *MainFile, + include_cleaner::Header Provider); } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/Preamble.cpp b/clang-tools-extra/clangd/Preamble.cpp index 061c67d65f7d8..08662697a4a5c 100644 --- a/clang-tools-extra/clangd/Preamble.cpp +++ b/clang-tools-extra/clangd/Preamble.cpp @@ -135,11 +135,7 @@ class CppFilePreambleCallbacks : public PreambleCallbacks { SourceMgr = &CI.getSourceManager(); PP = &CI.getPreprocessor(); Includes.collect(CI); - if (Config::current().Diagnostics.UnusedIncludes == - Config::IncludesPolicy::Strict || - Config::current().Diagnostics.MissingIncludes == - Config::IncludesPolicy::Strict) - Pragmas.record(CI); + Pragmas.record(CI); if (BeforeExecuteCallback) BeforeExecuteCallback(CI); } diff --git a/clang-tools-extra/clangd/unittests/HoverTests.cpp b/clang-tools-extra/clangd/unittests/HoverTests.cpp index 211fd1311c98f..6ee9384204036 100644 --- a/clang-tools-extra/clangd/unittests/HoverTests.cpp +++ b/clang-tools-extra/clangd/unittests/HoverTests.cpp @@ -14,11 +14,12 @@ #include "TestTU.h" #include "index/MemIndex.h" #include "clang/AST/Attr.h" +#include "clang/Format/Format.h" #include "clang/Index/IndexSymbol.h" #include "llvm/ADT/StringRef.h" -#include "gmock/gmock.h" #include "gtest/gtest.h" +#include #include #include @@ -28,6 +29,10 @@ namespace { using PassMode = HoverInfo::PassType::PassMode; +std::string guard(llvm::StringRef Code) { + return "#pragma once\n" + Code.str(); +} + TEST(Hover, Structured) { struct { const char *const Code; @@ -2882,6 +2887,99 @@ TEST(Hover, All) { } } +TEST(Hover, Providers) { + struct { + const char *Code; + const std::function ExpectedBuilder; + } Cases[] = {{R"cpp( + struct Foo {}; + Foo F = Fo^o{}; + )cpp", + [](HoverInfo &HI) { HI.Provider = ""; }}, + {R"cpp( + #include "foo.h" + Foo F = Fo^o{}; + )cpp", + [](HoverInfo &HI) { HI.Provider = "\"foo.h\""; }}, + {R"cpp( + #include "all.h" + Foo F = Fo^o{}; + )cpp", + [](HoverInfo &HI) { HI.Provider = "\"foo.h\""; }}, + {R"cpp( + #define FOO 5 + int F = ^FOO; + )cpp", + [](HoverInfo &HI) { HI.Provider = ""; }}, + {R"cpp( + #include "foo.h" + int F = ^FOO; + )cpp", + [](HoverInfo &HI) { HI.Provider = "\"foo.h\""; }}, + {R"cpp( + #include "all.h" + int F = ^FOO; + )cpp", + [](HoverInfo &HI) { HI.Provider = "\"foo.h\""; }}, + {R"cpp( + #include "foo.h" + Foo A; + Foo B; + Foo C = A ^+ B; + )cpp", + [](HoverInfo &HI) { HI.Provider = "\"foo.h\""; }}, + // Hover selects the underlying decl of the using decl + {R"cpp( + #include "foo.h" + namespace ns { + using ::Foo; + } + ns::F^oo d; + )cpp", + [](HoverInfo &HI) { HI.Provider = "\"foo.h\""; }}}; + + for (const auto &Case : Cases) { + Annotations Code{Case.Code}; + SCOPED_TRACE(Code.code()); + + TestTU TU; + TU.Filename = "foo.cpp"; + TU.Code = Code.code(); + TU.AdditionalFiles["foo.h"] = guard(R"cpp( + #define FOO 1 + class Foo {}; + Foo& operator+(const Foo, const Foo); + )cpp"); + TU.AdditionalFiles["all.h"] = guard("#include \"foo.h\""); + + auto AST = TU.build(); + auto H = getHover(AST, Code.point(), format::getLLVMStyle(), nullptr); + ASSERT_TRUE(H); + HoverInfo Expected; + Case.ExpectedBuilder(Expected); + SCOPED_TRACE(H->present().asMarkdown()); + EXPECT_EQ(H->Provider, Expected.Provider); + } +} + +TEST(Hover, ParseProviderInfo) { + HoverInfo HIFoo; + HIFoo.Name = "foo"; + HIFoo.Provider = "\"foo.h\""; + + HoverInfo HIFooBar; + HIFooBar.Name = "foo"; + HIFooBar.Provider = ""; + struct Case { + HoverInfo HI; + llvm::StringRef ExpectedMarkdown; + } Cases[] = {{HIFoo, "### `foo` \nprovided by `\"foo.h\"`"}, + {HIFooBar, "### `foo` \nprovided by ``"}}; + + for (const auto &Case : Cases) + EXPECT_EQ(Case.HI.present().asMarkdown(), Case.ExpectedMarkdown); +} + TEST(Hover, DocsFromIndex) { Annotations T(R"cpp( template class X {}; @@ -3359,8 +3457,8 @@ TEST(Hover, ParseDocumentation) { } } -// This is a separate test as headings don't create any differences in plaintext -// mode. +// This is a separate test as headings don't create any differences in +// plaintext mode. TEST(Hover, PresentHeadings) { HoverInfo HI; HI.Kind = index::SymbolKind::Variable; diff --git a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h index cd11700548075..66916a52046cb 100644 --- a/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h +++ b/clang-tools-extra/include-cleaner/include/clang-include-cleaner/Analysis.h @@ -16,11 +16,13 @@ #include "clang/Format/Format.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/MemoryBufferRef.h" #include namespace clang { class SourceLocation; +class SourceManager; class Decl; class FileEntry; class HeaderSearch; @@ -75,6 +77,14 @@ std::string fixIncludes(const AnalysisResults &Results, llvm::StringRef Code, std::string spellHeader(const Header &H, HeaderSearch &HS, const FileEntry *Main); + +/// Gets all the providers for a symbol by traversing each location. +/// Returned headers are sorted by relevance, first element is the most +/// likely provider for the symbol. +llvm::SmallVector
headersForSymbol(const Symbol &S, + const SourceManager &SM, + const PragmaIncludes *PI); + } // namespace include_cleaner } // namespace clang diff --git a/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h b/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h index acf462919344b..6bfed91b584b3 100644 --- a/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h +++ b/clang-tools-extra/include-cleaner/lib/AnalysisInternal.h @@ -22,6 +22,7 @@ #define CLANG_INCLUDE_CLEANER_ANALYSISINTERNAL_H #include "TypesInternal.h" +#include "clang-include-cleaner/Analysis.h" #include "clang-include-cleaner/Record.h" #include "clang-include-cleaner/Types.h" #include "llvm/ADT/STLFunctionalExtras.h" @@ -58,13 +59,6 @@ llvm::SmallVector> findHeaders(const SymbolLocation &Loc, /// A set of locations that provides the declaration. std::vector> locateSymbol(const Symbol &S); -/// Gets all the providers for a symbol by traversing each location. -/// Returned headers are sorted by relevance, first element is the most -/// likely provider for the symbol. -llvm::SmallVector
headersForSymbol(const Symbol &S, - const SourceManager &SM, - const PragmaIncludes *PI); - /// Write an HTML summary of the analysis to the given stream. void writeHTMLReport(FileID File, const Includes &, llvm::ArrayRef Roots, From 9c8bdbcbc502fac7d7d8da5c848cec448daf26ae Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 23 Mar 2023 09:05:34 -0500 Subject: [PATCH 448/691] [libc] Implement memory fences on NVPTX Memory fences are not handled by the NVPTX backend. We need to replace them with a memory barrier intrinsic function. This doesn't include the ordering, but should perform the necessary functionality, albeit slower. Reviewed By: tianshilei1992 Differential Revision: https://reviews.llvm.org/D146725 --- libc/src/__support/CPP/atomic.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/libc/src/__support/CPP/atomic.h b/libc/src/__support/CPP/atomic.h index b0e90e32dadd2..5514062525cce 100644 --- a/libc/src/__support/CPP/atomic.h +++ b/libc/src/__support/CPP/atomic.h @@ -10,6 +10,7 @@ #define LLVM_LIBC_SRC_SUPPORT_CPP_ATOMIC_H #include "src/__support/macros/attributes.h" +#include "src/__support/macros/properties/architectures.h" #include "type_traits.h" @@ -96,7 +97,14 @@ template struct Atomic { // Issue a thread fence with the given memory ordering. LIBC_INLINE void atomic_thread_fence(MemoryOrder mem_ord) { +// The NVPTX backend currently does not support atomic thread fences so we use a +// full system fence instead. +#ifdef LIBC_TARGET_ARCH_IS_NVPTX + (void)mem_ord; + __nvvm_membar_sys(); +#else __atomic_thread_fence(int(mem_ord)); +#endif } } // namespace cpp From 5525087e4c7bdedea3935fd08ee99ac3ba53a5b2 Mon Sep 17 00:00:00 2001 From: Archibald Elliott Date: Thu, 23 Mar 2023 15:56:07 +0000 Subject: [PATCH 449/691] [NFC][AArch64] Sort Hints in armv8.3a-signed-pointer.s test --- .../test/MC/AArch64/armv8.3a-signed-pointer.s | 108 +++++++++--------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/llvm/test/MC/AArch64/armv8.3a-signed-pointer.s b/llvm/test/MC/AArch64/armv8.3a-signed-pointer.s index e13b1bf3c98d8..e95c9309a3d4d 100644 --- a/llvm/test/MC/AArch64/armv8.3a-signed-pointer.s +++ b/llvm/test/MC/AArch64/armv8.3a-signed-pointer.s @@ -96,84 +96,84 @@ // ALL-EMPTY: // ALL-EMPTY: - hint #25 - paciasp -// CHECK-NEXT: paciasp // encoding: [0x3f,0x23,0x03,0xd5] -// CHECK-NEXT: paciasp // encoding: [0x3f,0x23,0x03,0xd5] -// NO83-NEXT: hint #25 // encoding: [0x3f,0x23,0x03,0xd5] -// NO83-NEXT: hint #25 // encoding: [0x3f,0x23,0x03,0xd5] - hint #29 - autiasp -// CHECK-NEXT: autiasp // encoding: [0xbf,0x23,0x03,0xd5] -// CHECK-NEXT: autiasp // encoding: [0xbf,0x23,0x03,0xd5] -// NO83-NEXT: hint #29 // encoding: [0xbf,0x23,0x03,0xd5] -// NO83-NEXT: hint #29 // encoding: [0xbf,0x23,0x03,0xd5] - hint #24 - paciaz -// CHECK-NEXT: paciaz // encoding: [0x1f,0x23,0x03,0xd5] -// CHECK-NEXT: paciaz // encoding: [0x1f,0x23,0x03,0xd5] -// NO83-NEXT: hint #24 // encoding: [0x1f,0x23,0x03,0xd5] -// NO83-NEXT: hint #24 // encoding: [0x1f,0x23,0x03,0xd5] - hint #28 - autiaz -// CHECK-NEXT: autiaz // encoding: [0x9f,0x23,0x03,0xd5] -// CHECK-NEXT: autiaz // encoding: [0x9f,0x23,0x03,0xd5] -// NO83-NEXT: hint #28 // encoding: [0x9f,0x23,0x03,0xd5] -// NO83-NEXT: hint #28 // encoding: [0x9f,0x23,0x03,0xd5] + hint #7 + xpaclri +// CHECK-NEXT: xpaclri // encoding: [0xff,0x20,0x03,0xd5] +// CHECK-NEXT: xpaclri // encoding: [0xff,0x20,0x03,0xd5] +// NO83-NEXT: hint #7 // encoding: [0xff,0x20,0x03,0xd5] +// NO83-NEXT: hint #7 // encoding: [0xff,0x20,0x03,0xd5] hint #8 pacia1716 // CHECK-NEXT: pacia1716 // encoding: [0x1f,0x21,0x03,0xd5] // CHECK-NEXT: pacia1716 // encoding: [0x1f,0x21,0x03,0xd5] // NO83-NEXT: hint #8 // encoding: [0x1f,0x21,0x03,0xd5] // NO83-NEXT: hint #8 // encoding: [0x1f,0x21,0x03,0xd5] + hint #10 + pacib1716 +// CHECK-NEXT: pacib1716 // encoding: [0x5f,0x21,0x03,0xd5] +// CHECK-NEXT: pacib1716 // encoding: [0x5f,0x21,0x03,0xd5] +// NO83-NEXT: hint #10 // encoding: [0x5f,0x21,0x03,0xd5] +// NO83-NEXT: hint #10 // encoding: [0x5f,0x21,0x03,0xd5] hint #12 autia1716 // CHECK-NEXT: autia1716 // encoding: [0x9f,0x21,0x03,0xd5] // CHECK-NEXT: autia1716 // encoding: [0x9f,0x21,0x03,0xd5] // NO83-NEXT: hint #12 // encoding: [0x9f,0x21,0x03,0xd5] // NO83-NEXT: hint #12 // encoding: [0x9f,0x21,0x03,0xd5] - hint #27 - pacibsp -// CHECK-NEXT: pacibsp // encoding: [0x7f,0x23,0x03,0xd5] -// CHECK-NEXT: pacibsp // encoding: [0x7f,0x23,0x03,0xd5] -// NO83-NEXT: hint #27 // encoding: [0x7f,0x23,0x03,0xd5] -// NO83-NEXT: hint #27 // encoding: [0x7f,0x23,0x03,0xd5] - hint #31 - autibsp -// CHECK-NEXT: autibsp // encoding: [0xff,0x23,0x03,0xd5] -// CHECK-NEXT: autibsp // encoding: [0xff,0x23,0x03,0xd5] -// NO83-NEXT: hint #31 // encoding: [0xff,0x23,0x03,0xd5] -// NO83-NEXT: hint #31 // encoding: [0xff,0x23,0x03,0xd5] + hint #14 + autib1716 +// CHECK-NEXT: autib1716 // encoding: [0xdf,0x21,0x03,0xd5] +// CHECK-NEXT: autib1716 // encoding: [0xdf,0x21,0x03,0xd5] +// NO83-NEXT: hint #14 // encoding: [0xdf,0x21,0x03,0xd5] +// NO83-NEXT: hint #14 // encoding: [0xdf,0x21,0x03,0xd5] + hint #24 + paciaz +// CHECK-NEXT: paciaz // encoding: [0x1f,0x23,0x03,0xd5] +// CHECK-NEXT: paciaz // encoding: [0x1f,0x23,0x03,0xd5] +// NO83-NEXT: hint #24 // encoding: [0x1f,0x23,0x03,0xd5] +// NO83-NEXT: hint #24 // encoding: [0x1f,0x23,0x03,0xd5] + hint #25 + paciasp +// CHECK-NEXT: paciasp // encoding: [0x3f,0x23,0x03,0xd5] +// CHECK-NEXT: paciasp // encoding: [0x3f,0x23,0x03,0xd5] +// NO83-NEXT: hint #25 // encoding: [0x3f,0x23,0x03,0xd5] +// NO83-NEXT: hint #25 // encoding: [0x3f,0x23,0x03,0xd5] hint #26 pacibz // CHECK-NEXT: pacibz // encoding: [0x5f,0x23,0x03,0xd5] // CHECK-NEXT: pacibz // encoding: [0x5f,0x23,0x03,0xd5] // NO83-NEXT: hint #26 // encoding: [0x5f,0x23,0x03,0xd5] // NO83-NEXT: hint #26 // encoding: [0x5f,0x23,0x03,0xd5] + hint #27 + pacibsp +// CHECK-NEXT: pacibsp // encoding: [0x7f,0x23,0x03,0xd5] +// CHECK-NEXT: pacibsp // encoding: [0x7f,0x23,0x03,0xd5] +// NO83-NEXT: hint #27 // encoding: [0x7f,0x23,0x03,0xd5] +// NO83-NEXT: hint #27 // encoding: [0x7f,0x23,0x03,0xd5] + hint #28 + autiaz +// CHECK-NEXT: autiaz // encoding: [0x9f,0x23,0x03,0xd5] +// CHECK-NEXT: autiaz // encoding: [0x9f,0x23,0x03,0xd5] +// NO83-NEXT: hint #28 // encoding: [0x9f,0x23,0x03,0xd5] +// NO83-NEXT: hint #28 // encoding: [0x9f,0x23,0x03,0xd5] + hint #29 + autiasp +// CHECK-NEXT: autiasp // encoding: [0xbf,0x23,0x03,0xd5] +// CHECK-NEXT: autiasp // encoding: [0xbf,0x23,0x03,0xd5] +// NO83-NEXT: hint #29 // encoding: [0xbf,0x23,0x03,0xd5] +// NO83-NEXT: hint #29 // encoding: [0xbf,0x23,0x03,0xd5] hint #30 autibz // CHECK-NEXT: autibz // encoding: [0xdf,0x23,0x03,0xd5] // CHECK-NEXT: autibz // encoding: [0xdf,0x23,0x03,0xd5] // NO83-NEXT: hint #30 // encoding: [0xdf,0x23,0x03,0xd5] // NO83-NEXT: hint #30 // encoding: [0xdf,0x23,0x03,0xd5] - hint #10 - pacib1716 -// CHECK-NEXT: pacib1716 // encoding: [0x5f,0x21,0x03,0xd5] -// CHECK-NEXT: pacib1716 // encoding: [0x5f,0x21,0x03,0xd5] -// NO83-NEXT: hint #10 // encoding: [0x5f,0x21,0x03,0xd5] -// NO83-NEXT: hint #10 // encoding: [0x5f,0x21,0x03,0xd5] - hint #14 - autib1716 -// CHECK-NEXT: autib1716 // encoding: [0xdf,0x21,0x03,0xd5] -// CHECK-NEXT: autib1716 // encoding: [0xdf,0x21,0x03,0xd5] -// NO83-NEXT: hint #14 // encoding: [0xdf,0x21,0x03,0xd5] -// NO83-NEXT: hint #14 // encoding: [0xdf,0x21,0x03,0xd5] - hint #7 - xpaclri -// CHECK-NEXT: xpaclri // encoding: [0xff,0x20,0x03,0xd5] -// CHECK-NEXT: xpaclri // encoding: [0xff,0x20,0x03,0xd5] -// NO83-NEXT: hint #7 // encoding: [0xff,0x20,0x03,0xd5] -// NO83-NEXT: hint #7 // encoding: [0xff,0x20,0x03,0xd5] + hint #31 + autibsp +// CHECK-NEXT: autibsp // encoding: [0xff,0x23,0x03,0xd5] +// CHECK-NEXT: autibsp // encoding: [0xff,0x23,0x03,0xd5] +// NO83-NEXT: hint #31 // encoding: [0xff,0x23,0x03,0xd5] +// NO83-NEXT: hint #31 // encoding: [0xff,0x23,0x03,0xd5] // ALL-EMPTY: pacia x0, x1 From e33f8ac9d8b3bd8b376d2306c3988381309b68eb Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 23 Mar 2023 11:27:20 -0500 Subject: [PATCH 450/691] [libc] Fix inline assembly for nvptx quick_exit Summary: The `exit` function in NVPTX has no intrinsic, but the assembly requires a semicolon in the ptx, otherwise it will fail. --- libc/src/__support/OSUtil/gpu/quick_exit.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/src/__support/OSUtil/gpu/quick_exit.cpp b/libc/src/__support/OSUtil/gpu/quick_exit.cpp index 56f0427c8d81b..3fab438a357a5 100644 --- a/libc/src/__support/OSUtil/gpu/quick_exit.cpp +++ b/libc/src/__support/OSUtil/gpu/quick_exit.cpp @@ -27,7 +27,7 @@ void quick_exit(int status) { [](rpc::Buffer *) { /* void */ }); #if defined(LIBC_TARGET_ARCH_IS_NVPTX) - asm("exit" ::: "memory"); + asm("exit;" ::: "memory"); #elif defined(LIBC_TARGET_ARCH_IS_AMDGPU) // This will terminate the entire wavefront, may not be valid with divergent // work items. From 2f5fdbfab8c63047bd4ebef154258868065168b3 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 23 Mar 2023 17:32:23 +0100 Subject: [PATCH 451/691] [MergeFunc] Don't assume constant metadata operands We should not call mdconst::extract, unless we know that the metadata in question is ConstantAsMetadata. For now we consider all other metadata as equal. The noalias test shows that this is not correct, but at least it doesn't crash anymore. --- .../Transforms/Utils/FunctionComparator.h | 3 +- .../Transforms/Utils/FunctionComparator.cpp | 44 ++++++++++--------- .../MergeFunc/mergefunc-preserve-nonnull.ll | 38 +++++++++++++--- 3 files changed, 59 insertions(+), 26 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/FunctionComparator.h b/llvm/include/llvm/Transforms/Utils/FunctionComparator.h index 400b9faa94c1b..78761fc78fee8 100644 --- a/llvm/include/llvm/Transforms/Utils/FunctionComparator.h +++ b/llvm/include/llvm/Transforms/Utils/FunctionComparator.h @@ -332,7 +332,8 @@ class FunctionComparator { int cmpOrderings(AtomicOrdering L, AtomicOrdering R) const; int cmpInlineAsm(const InlineAsm *L, const InlineAsm *R) const; int cmpAttrs(const AttributeList L, const AttributeList R) const; - int cmpMetadata(const MDNode *L, const MDNode *R) const; + int cmpMDNode(const MDNode *L, const MDNode *R) const; + int cmpMetadata(const Metadata *L, const Metadata *R) const; int cmpInstMetadata(Instruction const *L, Instruction const *R) const; int cmpOperandBundlesSchema(const CallBase &LCS, const CallBase &RCS) const; diff --git a/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/llvm/lib/Transforms/Utils/FunctionComparator.cpp index af8bc8126160e..7fb6a7415a6fe 100644 --- a/llvm/lib/Transforms/Utils/FunctionComparator.cpp +++ b/llvm/lib/Transforms/Utils/FunctionComparator.cpp @@ -157,7 +157,25 @@ int FunctionComparator::cmpAttrs(const AttributeList L, return 0; } -int FunctionComparator::cmpMetadata(const MDNode *L, const MDNode *R) const { +int FunctionComparator::cmpMetadata(const Metadata *L, + const Metadata *R) const { + // TODO: the following routine coerce the metadata contents into constants + // before comparison. + // It ignores any other cases, so that the metadata nodes are considered + // equal even though this is not correct. + // We should structurally compare the metadata nodes to be perfect here. + auto *CL = dyn_cast(L); + auto *CR = dyn_cast(R); + if (CL == CR) + return 0; + if (!CL) + return -1; + if (!CR) + return 1; + return cmpConstants(CL->getValue(), CR->getValue()); +} + +int FunctionComparator::cmpMDNode(const MDNode *L, const MDNode *R) const { if (L == R) return 0; if (!L) @@ -172,23 +190,9 @@ int FunctionComparator::cmpMetadata(const MDNode *L, const MDNode *R) const { // function semantically. if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands())) return Res; - for (size_t I = 0; I < L->getNumOperands(); ++I) { - // TODO: the following routine coerce the metadata contents into numbers - // before comparison. - // It ignores any other cases, so that the metadata nodes are considered - // equal even though this is not correct. - // We should structurally compare the metadata nodes to be perfect here. - ConstantInt *LLow = mdconst::extract(L->getOperand(I)); - ConstantInt *RLow = mdconst::extract(R->getOperand(I)); - if (LLow == RLow) - continue; - if (!LLow) - return -1; - if (!RLow) - return 1; - if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue())) + for (size_t I = 0; I < L->getNumOperands(); ++I) + if (int Res = cmpMetadata(L->getOperand(I), R->getOperand(I))) return Res; - } return 0; } @@ -209,7 +213,7 @@ int FunctionComparator::cmpInstMetadata(Instruction const *L, auto const [KeyR, MR] = MDR[I]; if (int Res = cmpNumbers(KeyL, KeyR)) return Res; - if (int Res = cmpMetadata(ML, MR)) + if (int Res = cmpMDNode(ML, MR)) return Res; } return 0; @@ -645,8 +649,8 @@ int FunctionComparator::cmpOperations(const Instruction *L, if (int Res = cmpNumbers(CI->getTailCallKind(), cast(R)->getTailCallKind())) return Res; - return cmpMetadata(L->getMetadata(LLVMContext::MD_range), - R->getMetadata(LLVMContext::MD_range)); + return cmpMDNode(L->getMetadata(LLVMContext::MD_range), + R->getMetadata(LLVMContext::MD_range)); } if (const InsertValueInst *IVI = dyn_cast(L)) { ArrayRef LIndices = IVI->getIndices(); diff --git a/llvm/test/Transforms/MergeFunc/mergefunc-preserve-nonnull.ll b/llvm/test/Transforms/MergeFunc/mergefunc-preserve-nonnull.ll index 12bb0e8b38425..3481d53b626fc 100644 --- a/llvm/test/Transforms/MergeFunc/mergefunc-preserve-nonnull.ll +++ b/llvm/test/Transforms/MergeFunc/mergefunc-preserve-nonnull.ll @@ -28,8 +28,8 @@ define void @f2(ptr %0, ptr %1) { ret void } -define void @f3(ptr %0, ptr %1) { -; CHECK-LABEL: @f3( +define void @noundef(ptr %0, ptr %1) { +; CHECK-LABEL: @noundef( ; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP1:%.*]], align 8, !noundef !0 ; CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP0:%.*]], align 8 ; CHECK-NEXT: ret void @@ -39,9 +39,20 @@ define void @f3(ptr %0, ptr %1) { ret void } -define void @f4(ptr %0, ptr %1) { -; CHECK-LABEL: @f4( -; CHECK-NEXT: tail call void @f3(ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) +define void @noalias_1(ptr %0, ptr %1) { +; CHECK-LABEL: @noalias_1( +; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP1:%.*]], align 8, !noalias !1 +; CHECK-NEXT: store ptr [[TMP3]], ptr [[TMP0:%.*]], align 8, !alias.scope !1 +; CHECK-NEXT: ret void +; + %3 = load ptr, ptr %1, align 8, !noalias !4 + store ptr %3, ptr %0, align 8, !alias.scope !4 + ret void +} + +define void @noundef_dbg(ptr %0, ptr %1) { +; CHECK-LABEL: @noundef_dbg( +; CHECK-NEXT: tail call void @noundef(ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) ; CHECK-NEXT: ret void ; %3 = load ptr, ptr %1, align 8, !noundef !0, !dbg !1 @@ -49,5 +60,22 @@ define void @f4(ptr %0, ptr %1) { ret void } +; FIXME: This is merged despite different noalias metadata. +define void @noalias_2(ptr %0, ptr %1) { +; CHECK-LABEL: @noalias_2( +; CHECK-NEXT: tail call void @noalias_1(ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) +; CHECK-NEXT: ret void +; + %3 = load ptr, ptr %1, align 8, !noalias !7 + store ptr %3, ptr %0, align 8, !alias.scope !7 + ret void +} + !0 = !{} !1 = !{} +!2 = !{!2} +!3 = !{!3, !2} +!4 = !{!3} +!5 = !{!5} +!6 = !{!6, !5} +!7 = !{!6} From 4c5dee7773dcc5ec1b7485dfba5ce5baa2355b2f Mon Sep 17 00:00:00 2001 From: Renaud-K Date: Tue, 21 Mar 2023 16:32:26 -0700 Subject: [PATCH 452/691] [flang] Lowering fir.dispatch in the polymorphic op pass Differential revision: https://reviews.llvm.org/D146594 --- .../flang/Semantics/runtime-type-info.h | 8 + flang/lib/Optimizer/CodeGen/CodeGen.cpp | 152 +------------- .../Transforms/PolymorphicOpConversion.cpp | 155 ++++++++++++++ flang/lib/Semantics/runtime-type-info.cpp | 9 +- flang/test/Fir/dispatch.f90 | 192 ++++++++++-------- flang/test/Lower/allocatable-polymorphic.f90 | 8 +- 6 files changed, 287 insertions(+), 237 deletions(-) diff --git a/flang/include/flang/Semantics/runtime-type-info.h b/flang/include/flang/Semantics/runtime-type-info.h index 76560b98b1c20..e27091cf32de0 100644 --- a/flang/include/flang/Semantics/runtime-type-info.h +++ b/flang/include/flang/Semantics/runtime-type-info.h @@ -42,6 +42,14 @@ RuntimeDerivedTypeTables BuildRuntimeDerivedTypeTables(SemanticsContext &); /// to describe other derived types at runtime in flang descriptor. constexpr char typeInfoBuiltinModule[]{"__fortran_type_info"}; +/// Name of the bindings descriptor component in the DerivedType type of the +/// __Fortran_type_info module +constexpr char bindingDescCompName[]{"binding"}; + +/// Name of the __builtin_c_funptr component in the Binding type of the +/// __Fortran_type_info module +constexpr char procCompName[]{"proc"}; + SymbolVector CollectBindings(const Scope &dtScope); } // namespace Fortran::semantics diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 6b4591789c545..8ea8fa7290372 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -104,10 +104,8 @@ template class FIROpConversion : public mlir::ConvertOpToLLVMPattern { public: explicit FIROpConversion(fir::LLVMTypeConverter &lowering, - const fir::FIRToLLVMPassOptions &options, - const fir::BindingTables &bindingTables) - : mlir::ConvertOpToLLVMPattern(lowering), options(options), - bindingTables(bindingTables) {} + const fir::FIRToLLVMPassOptions &options) + : mlir::ConvertOpToLLVMPattern(lowering), options(options) {} protected: mlir::Type convertType(mlir::Type ty) const { @@ -358,7 +356,6 @@ class FIROpConversion : public mlir::ConvertOpToLLVMPattern { } const fir::FIRToLLVMPassOptions &options; - const fir::BindingTables &bindingTables; }; /// FIR conversion pattern template @@ -970,131 +967,6 @@ struct ConvertOpConversion : public FIROpConversion { } }; -/// Lower `fir.dispatch` operation. A virtual call to a method in a dispatch -/// table. -struct DispatchOpConversion : public FIROpConversion { - using FIROpConversion::FIROpConversion; - - mlir::LogicalResult - matchAndRewrite(fir::DispatchOp dispatch, OpAdaptor adaptor, - mlir::ConversionPatternRewriter &rewriter) const override { - mlir::Location loc = dispatch.getLoc(); - - if (bindingTables.empty()) - return emitError(loc) << "no binding tables found"; - - // Get derived type information. - mlir::Type declaredType = - fir::getDerivedType(dispatch.getObject().getType().getEleTy()); - assert(declaredType.isa() && "expecting fir.type"); - auto recordType = declaredType.dyn_cast(); - - // Lookup for the binding table. - auto bindingsIter = bindingTables.find(recordType.getName()); - if (bindingsIter == bindingTables.end()) - return emitError(loc) - << "cannot find binding table for " << recordType.getName(); - - // Lookup for the binding. - const fir::BindingTable &bindingTable = bindingsIter->second; - auto bindingIter = bindingTable.find(dispatch.getMethod()); - if (bindingIter == bindingTable.end()) - return emitError(loc) - << "cannot find binding for " << dispatch.getMethod(); - unsigned bindingIdx = bindingIter->second; - - mlir::Value passedObject = dispatch.getObject(); - - auto module = dispatch.getOperation()->getParentOfType(); - mlir::Type typeDescTy; - std::string typeDescName = - fir::NameUniquer::getTypeDescriptorName(recordType.getName()); - if (auto global = module.lookupSymbol(typeDescName)) { - typeDescTy = convertType(global.getType()); - } else if (auto global = - module.lookupSymbol(typeDescName)) { - // The global may have already been translated to LLVM. - typeDescTy = global.getType(); - } - - unsigned typeDescFieldId = getTypeDescFieldId(passedObject.getType()); - - auto descPtr = adaptor.getOperands()[0] - .getType() - .dyn_cast(); - - // TODO: the following loads from the type descriptor related - // data structures must have proper TBAA access tags. - // These loads cannot alias with any real data accesses nor - // with any box accesses. Moreover, they can probably be marked - // as reading from constant memory (fourth operand of a TBAA - // tag may be set to true). These accesses probably deserve - // separate sub-root in the TBAA graph. - - // Load the descriptor. - auto desc = rewriter.create( - loc, descPtr.getElementType(), adaptor.getOperands()[0]); - - // Load the type descriptor. - auto typeDescPtr = - rewriter.create(loc, desc, typeDescFieldId); - auto typeDesc = - rewriter.create(loc, typeDescTy, typeDescPtr); - - // Load the bindings descriptor. - auto typeDescStructTy = typeDescTy.dyn_cast(); - auto bindingDescType = - typeDescStructTy.getBody()[0].dyn_cast(); - auto bindingDesc = - rewriter.create(loc, typeDesc, 0); - - // Load the correct binding. - auto bindingType = - bindingDescType.getBody()[0].dyn_cast(); - auto baseBindingPtr = rewriter.create( - loc, bindingDesc, kAddrPosInBox); - auto bindingPtr = rewriter.create( - loc, bindingType, baseBindingPtr, - llvm::ArrayRef{static_cast(bindingIdx)}); - auto binding = rewriter.create( - loc, bindingType.getElementType(), bindingPtr); - - // Get the function type. - llvm::SmallVector argTypes; - for (mlir::Value operand : adaptor.getOperands().drop_front()) - argTypes.push_back(operand.getType()); - mlir::Type resultType; - if (dispatch.getResults().empty()) - resultType = mlir::LLVM::LLVMVoidType::get(dispatch.getContext()); - else - resultType = convertType(dispatch.getResults()[0].getType()); - auto fctType = mlir::LLVM::LLVMFunctionType::get(resultType, argTypes, - /*isVarArg=*/false); - - // Get the function pointer. - auto builtinFuncPtr = - rewriter.create(loc, binding, 0); - auto funcAddr = - rewriter.create(loc, builtinFuncPtr, 0); - auto funcPtr = rewriter.create( - loc, mlir::LLVM::LLVMPointerType::get(fctType), funcAddr); - - // Indirect calls are done with the function pointer as the first operand. - llvm::SmallVector args; - args.push_back(funcPtr); - for (mlir::Value operand : adaptor.getOperands().drop_front()) - args.push_back(operand); - auto callOp = rewriter.replaceOpWithNewOp( - dispatch, - dispatch.getResults().empty() ? mlir::TypeRange{} - : fctType.getReturnType(), - "", args); - callOp.removeCalleeAttr(); // Indirect calls do not have callee attr. - - return mlir::success(); - } -}; - /// `fir.disptach_table` operation has no specific CodeGen. The operation is /// only used to carry information during FIR to FIR passes. struct DispatchTableOpConversion @@ -3656,9 +3528,8 @@ struct NegcOpConversion : public FIROpConversion { template struct MustBeDeadConversion : public FIROpConversion { explicit MustBeDeadConversion(fir::LLVMTypeConverter &lowering, - const fir::FIRToLLVMPassOptions &options, - const fir::BindingTables &bindingTables) - : FIROpConversion(lowering, options, bindingTables) {} + const fir::FIRToLLVMPassOptions &options) + : FIROpConversion(lowering, options) {} using OpAdaptor = typename FromOp::Adaptor; mlir::LogicalResult @@ -3768,9 +3639,6 @@ class FIRToLLVMLowering if (mlir::failed(runPipeline(mathConvertionPM, mod))) return signalPassFailure(); - fir::BindingTables bindingTables; - fir::buildBindingTables(bindingTables, mod); - auto *context = getModule().getContext(); fir::LLVMTypeConverter typeConverter{getModule(), options.applyTBAA || applyTBAA}; @@ -3783,11 +3651,11 @@ class FIRToLLVMLowering BoxProcHostOpConversion, BoxRankOpConversion, BoxTypeCodeOpConversion, BoxTypeDescOpConversion, CallOpConversion, CmpcOpConversion, ConstcOpConversion, ConvertOpConversion, CoordinateOpConversion, - DispatchOpConversion, DispatchTableOpConversion, DTEntryOpConversion, - DivcOpConversion, EmboxOpConversion, EmboxCharOpConversion, - EmboxProcOpConversion, ExtractValueOpConversion, FieldIndexOpConversion, - FirEndOpConversion, FreeMemOpConversion, GlobalLenOpConversion, - GlobalOpConversion, HasValueOpConversion, InsertOnRangeOpConversion, + DispatchTableOpConversion, DTEntryOpConversion, DivcOpConversion, + EmboxOpConversion, EmboxCharOpConversion, EmboxProcOpConversion, + ExtractValueOpConversion, FieldIndexOpConversion, FirEndOpConversion, + FreeMemOpConversion, GlobalLenOpConversion, GlobalOpConversion, + HasValueOpConversion, InsertOnRangeOpConversion, InsertValueOpConversion, IsPresentOpConversion, LenParamIndexOpConversion, LoadOpConversion, MulcOpConversion, NegcOpConversion, NoReassocOpConversion, SelectCaseOpConversion, @@ -3797,7 +3665,7 @@ class FIRToLLVMLowering SubcOpConversion, TypeDescOpConversion, UnboxCharOpConversion, UnboxProcOpConversion, UndefOpConversion, UnreachableOpConversion, XArrayCoorOpConversion, XEmboxOpConversion, XReboxOpConversion, - ZeroOpConversion>(typeConverter, options, bindingTables); + ZeroOpConversion>(typeConverter, options); mlir::populateFuncToLLVMConversionPatterns(typeConverter, pattern); mlir::populateOpenMPToLLVMConversionPatterns(typeConverter, pattern); mlir::arith::populateArithToLLVMConversionPatterns(typeConverter, pattern); diff --git a/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp b/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp index f7ee2c19d45ac..2f8cdf7934436 100644 --- a/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp @@ -6,18 +6,25 @@ // //===----------------------------------------------------------------------===// +#include "flang/Lower/BuiltinModules.h" +#include "flang/Optimizer/Builder/Todo.h" #include "flang/Optimizer/Dialect/FIRDialect.h" #include "flang/Optimizer/Dialect/FIROps.h" #include "flang/Optimizer/Dialect/FIROpsSupport.h" +#include "flang/Optimizer/Dialect/FIRType.h" #include "flang/Optimizer/Dialect/Support/FIRContext.h" #include "flang/Optimizer/Dialect/Support/KindMapping.h" #include "flang/Optimizer/Support/InternalNames.h" #include "flang/Optimizer/Support/TypeCode.h" +#include "flang/Optimizer/Support/Utils.h" #include "flang/Optimizer/Transforms/Passes.h" #include "flang/Runtime/derived-api.h" +#include "flang/Semantics/runtime-type-info.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/IR/BuiltinOps.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/DialectConversion.h" #include "llvm/ADT/SmallSet.h" @@ -72,6 +79,147 @@ class SelectTypeConv : public OpConversionPattern { std::mutex *moduleMutex; }; +/// Lower `fir.dispatch` operation. A virtual call to a method in a dispatch +/// table. +struct DispatchOpConv : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + DispatchOpConv(mlir::MLIRContext *ctx, const BindingTables &bindingTables) + : mlir::OpConversionPattern(ctx), + bindingTables(bindingTables) {} + + mlir::LogicalResult + matchAndRewrite(fir::DispatchOp dispatch, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + mlir::Location loc = dispatch.getLoc(); + + if (bindingTables.empty()) + return emitError(loc) << "no binding tables found"; + + // Get derived type information. + mlir::Type declaredType = + fir::getDerivedType(dispatch.getObject().getType().getEleTy()); + assert(declaredType.isa() && "expecting fir.type"); + auto recordType = declaredType.dyn_cast(); + + // Lookup for the binding table. + auto bindingsIter = bindingTables.find(recordType.getName()); + if (bindingsIter == bindingTables.end()) + return emitError(loc) + << "cannot find binding table for " << recordType.getName(); + + // Lookup for the binding. + const BindingTable &bindingTable = bindingsIter->second; + auto bindingIter = bindingTable.find(dispatch.getMethod()); + if (bindingIter == bindingTable.end()) + return emitError(loc) + << "cannot find binding for " << dispatch.getMethod(); + unsigned bindingIdx = bindingIter->second; + + mlir::Value passedObject = dispatch.getObject(); + + auto module = dispatch.getOperation()->getParentOfType(); + Type typeDescTy; + std::string typeDescName = + NameUniquer::getTypeDescriptorName(recordType.getName()); + if (auto global = module.lookupSymbol(typeDescName)) { + typeDescTy = global.getType(); + } + + // clang-format off + // Before: + // fir.dispatch "proc1"(%11 : + // !fir.class>>) + + // After: + // %12 = fir.box_tdesc %11 : (!fir.class>>) -> !fir.tdesc + // %13 = fir.convert %12 : (!fir.tdesc) -> !fir.ref> + // %14 = fir.field_index binding, !fir.type<_QM__fortran_type_infoTderivedtype> + // %15 = fir.coordinate_of %13, %14 : (!fir.ref>, !fir.field) -> !fir.ref>>>> + // %bindings = fir.load %15 : !fir.ref>>>> + // %16 = fir.box_addr %bindings : (!fir.box>>>) -> !fir.ptr>> + // %17 = fir.coordinate_of %16, %c0 : (!fir.ptr>>, index) -> !fir.ref> + // %18 = fir.field_index proc, !fir.type<_QM__fortran_type_infoTbinding> + // %19 = fir.coordinate_of %17, %18 : (!fir.ref>, !fir.field) -> !fir.ref> + // %20 = fir.field_index __address, !fir.type<_QM__fortran_builtinsT__builtin_c_funptr> + // %21 = fir.coordinate_of %19, %20 : (!fir.ref>, !fir.field) -> !fir.ref + // %22 = fir.load %21 : !fir.ref + // %23 = fir.convert %22 : (i64) -> (() -> ()) + // fir.call %23() : () -> () + // clang-format on + + // Load the descriptor. + mlir::Type fieldTy = fir::FieldType::get(rewriter.getContext()); + mlir::Type tdescType = + fir::TypeDescType::get(mlir::NoneType::get(rewriter.getContext())); + mlir::Value boxDesc = + rewriter.create(loc, tdescType, passedObject); + boxDesc = rewriter.create( + loc, fir::ReferenceType::get(typeDescTy), boxDesc); + + // Load the bindings descriptor. + auto bindingsCompName = Fortran::semantics::bindingDescCompName; + fir::RecordType typeDescRecTy = typeDescTy.cast(); + mlir::Value field = rewriter.create( + loc, fieldTy, bindingsCompName, typeDescRecTy, mlir::ValueRange{}); + mlir::Type coorTy = + fir::ReferenceType::get(typeDescRecTy.getType(bindingsCompName)); + mlir::Value bindingBoxAddr = + rewriter.create(loc, coorTy, boxDesc, field); + mlir::Value bindingBox = rewriter.create(loc, bindingBoxAddr); + + // Load the correct binding. + mlir::Value bindings = rewriter.create(loc, bindingBox); + fir::RecordType bindingTy = + fir::unwrapIfDerived(bindingBox.getType().cast()); + mlir::Type bindingAddrTy = fir::ReferenceType::get(bindingTy); + mlir::Value bindingIdxVal = rewriter.create( + loc, rewriter.getIndexType(), rewriter.getIndexAttr(bindingIdx)); + mlir::Value bindingAddr = rewriter.create( + loc, bindingAddrTy, bindings, bindingIdxVal); + + // Get the function pointer. + auto procCompName = Fortran::semantics::procCompName; + mlir::Value procField = rewriter.create( + loc, fieldTy, procCompName, bindingTy, mlir::ValueRange{}); + fir::RecordType procTy = + bindingTy.getType(procCompName).cast(); + mlir::Type procRefTy = fir::ReferenceType::get(procTy); + mlir::Value procRef = rewriter.create( + loc, procRefTy, bindingAddr, procField); + + auto addressFieldName = Fortran::lower::builtin::cptrFieldName; + mlir::Value addressField = rewriter.create( + loc, fieldTy, addressFieldName, procTy, mlir::ValueRange{}); + mlir::Type addressTy = procTy.getType(addressFieldName); + mlir::Type addressRefTy = fir::ReferenceType::get(addressTy); + mlir::Value addressRef = rewriter.create( + loc, addressRefTy, procRef, addressField); + mlir::Value address = rewriter.create(loc, addressRef); + + // Get the function type. + llvm::SmallVector argTypes; + for (mlir::Value operand : dispatch.getArgs()) + argTypes.push_back(operand.getType()); + llvm::SmallVector resTypes; + if (!dispatch.getResults().empty()) + resTypes.push_back(dispatch.getResults()[0].getType()); + + mlir::Type funTy = + mlir::FunctionType::get(rewriter.getContext(), argTypes, resTypes); + mlir::Value funcPtr = rewriter.create(loc, funTy, address); + + // Make the call. + llvm::SmallVector args{funcPtr}; + args.append(dispatch.getArgs().begin(), dispatch.getArgs().end()); + rewriter.replaceOpWithNewOp(dispatch, resTypes, nullptr, args); + return mlir::success(); + } + +private: + BindingTables bindingTables; +}; + /// Convert FIR structured control flow ops to CFG ops. class PolymorphicOpConversion : public fir::impl::PolymorphicOpConversionBase { @@ -83,14 +231,21 @@ class PolymorphicOpConversion void runOnOperation() override { auto *context = &getContext(); + auto mod = getOperation()->getParentOfType(); mlir::RewritePatternSet patterns(context); + + BindingTables bindingTables; + buildBindingTables(bindingTables, mod); + patterns.insert(context, moduleMutex); + patterns.insert(context, bindingTables); mlir::ConversionTarget target(*context); target.addLegalDialect(); // apply the patterns target.addIllegalOp(); + target.addIllegalOp(); target.markUnknownOpDynamicallyLegal([](Operation *) { return true; }); if (mlir::failed(mlir::applyPartialConversion(getOperation(), target, std::move(patterns)))) { diff --git a/flang/lib/Semantics/runtime-type-info.cpp b/flang/lib/Semantics/runtime-type-info.cpp index 29f63524b5c07..5e57c70c42fbb 100644 --- a/flang/lib/Semantics/runtime-type-info.cpp +++ b/flang/lib/Semantics/runtime-type-info.cpp @@ -151,7 +151,8 @@ RuntimeTableBuilder::RuntimeTableBuilder( : context_{c}, tables_{t}, derivedTypeSchema_{GetSchema("derivedtype")}, componentSchema_{GetSchema("component")}, procPtrSchema_{GetSchema( "procptrcomponent")}, - valueSchema_{GetSchema("value")}, bindingSchema_{GetSchema("binding")}, + valueSchema_{GetSchema("value")}, bindingSchema_{GetSchema( + bindingDescCompName)}, specialSchema_{GetSchema("specialbinding")}, deferredEnum_{GetEnumValue( "deferred")}, explicitEnum_{GetEnumValue("explicit")}, lenParameterEnum_{GetEnumValue( @@ -562,7 +563,7 @@ const Symbol *RuntimeTableBuilder::DescribeType(Scope &dtScope) { if (!isAbstractType) { std::vector bindings{ DescribeBindings(dtScope, scope)}; - AddValue(dtValues, derivedTypeSchema_, "binding"s, + AddValue(dtValues, derivedTypeSchema_, bindingDescCompName, SaveDerivedPointerTarget(scope, SaveObjectName(".v."s + distinctName), std::move(bindings), evaluate::ConstantSubscripts{ @@ -982,7 +983,7 @@ RuntimeTableBuilder::DescribeBindings(const Scope &dtScope, Scope &scope) { std::vector result; for (const SymbolRef &ref : CollectBindings(dtScope)) { evaluate::StructureConstructorValues values; - AddValue(values, bindingSchema_, "proc"s, + AddValue(values, bindingSchema_, procCompName, SomeExpr{evaluate::ProcedureDesignator{ ref.get().get().symbol()}}); AddValue(values, bindingSchema_, "name"s, @@ -1152,7 +1153,7 @@ void RuntimeTableBuilder::DescribeSpecialProc( values, specialSchema_, "which"s, SomeExpr{std::move(which.value())}); AddValue(values, specialSchema_, "isargdescriptorset"s, IntExpr<1>(isArgDescriptorSet)); - AddValue(values, specialSchema_, "proc"s, + AddValue(values, specialSchema_, procCompName, SomeExpr{evaluate::ProcedureDesignator{specific}}); // index might already be present in the case of an override specials.emplace(*index, diff --git a/flang/test/Fir/dispatch.f90 b/flang/test/Fir/dispatch.f90 index dcb52bed7d967..933c769d3e169 100644 --- a/flang/test/Fir/dispatch.f90 +++ b/flang/test/Fir/dispatch.f90 @@ -1,4 +1,4 @@ -! RUN: bbc -polymorphic-type -emit-fir %s -o - | tco | FileCheck %s +! RUN: bbc -polymorphic-type -emit-fir %s -o - | fir-opt --fir-polymorphic-op | FileCheck %s ! RUN: bbc -polymorphic-type -emit-fir %s -o - | FileCheck %s --check-prefix=BT ! Tests codegen of fir.dispatch operation. This test is intentionally run from @@ -182,105 +182,123 @@ program test_type_to_class end -! CHECK-LABEL: define void @_QMdispatch1Pdisplay_class( -! CHECK-SAME: ptr %[[CLASS:.*]]) +! CHECK-LABEL: func.func @_QMdispatch1Pdisplay_class( +! CHECK-SAME: %[[ARG:.*]]: [[CLASS:!fir.class<.*>>]] -! CHECK-DAG: %[[INT32:.*]] = alloca i32, i64 1 -! CHECK-DAG: %[[REAL:.*]] = alloca float, i64 1 -! CHECK-DAG: %[[I:.*]] = alloca i32, i64 1 +! CHECK-DAG: %[[INT32:.*]] = fir.alloca i32 +! CHECK-DAG: %[[REAL:.*]] = fir.alloca f32 +! CHECK-DAG: %[[I:.*]] = fir.alloca i32 ! Check dynamic dispatch equal to `call p%display2()` with binding index = 2. -! CHECK: %[[LOADED_CLASS:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[CLASS]] -! CHECK: %[[TYPEDESCPTR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOADED_CLASS]], 7 -! CHECK: %[[LOADED_TYPEDESC:.*]] = load %_QM__fortran_type_infoTderivedtype, ptr %[[TYPEDESCPTR]] -! CHECK: %[[DT:.*]] = extractvalue %_QM__fortran_type_infoTderivedtype %[[LOADED_TYPEDESC]], 0 -! CHECK: %[[BINDING_BASE_ADDR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] } %[[DT]], 0 -! CHECK: %[[BINDING_PTR:.*]] = getelementptr %_QM__fortran_type_infoTbinding, ptr %[[BINDING_BASE_ADDR]], i32 2 -! CHECK: %[[LOADED_BINDING:.*]] = load %_QM__fortran_type_infoTbinding, ptr %[[BINDING_PTR]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = extractvalue %_QM__fortran_type_infoTbinding %[[LOADED_BINDING]], 0 -! CHECK: %[[FUNC_ADDR:.*]] = extractvalue %_QM__fortran_builtinsT__builtin_c_funptr %[[BUILTIN_FUNC_PTR]], 0 -! CHECK: %[[FUNC_PTR:.*]] = inttoptr i64 %[[FUNC_ADDR]] to ptr -! CHECK: call void %[[FUNC_PTR]](ptr %[[CLASS]]) +! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG]] : ([[CLASS]]) -> !fir.tdesc +! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> +! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref<[[BINDING_BOX_TYPE]]> +! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : ([[BINDING_BOX_TYPE]]) -> !fir.ptr<[[BINDINGSINFO:.*]]> +! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c2 : (!fir.ptr<[[BINDINGSINFO]]>, index) -> !fir.ref<[[BINDINGINFO:.*]]> +! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref +! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> (([[CLASS]]) -> ()) +! CHECK: fir.call %[[FUNC_PTR]](%[[ARG]]) : ([[CLASS]]) -> () ! Check dynamic dispatch equal to `call p%display1()` with binding index = 1. -! CHECK: %[[LOADED_CLASS:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[CLASS]] -! CHECK: %[[TYPEDESCPTR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOADED_CLASS]], 7 -! CHECK: %[[LOADED_TYPEDESC:.*]] = load %_QM__fortran_type_infoTderivedtype, ptr %[[TYPEDESCPTR]] -! CHECK: %[[DT:.*]] = extractvalue %_QM__fortran_type_infoTderivedtype %[[LOADED_TYPEDESC]], 0 -! CHECK: %[[BINDING_BASE_ADDR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] } %[[DT]], 0 -! CHECK: %[[BINDING_PTR:.*]] = getelementptr %_QM__fortran_type_infoTbinding, ptr %[[BINDING_BASE_ADDR]], i32 1 -! CHECK: %[[LOADED_BINDING:.*]] = load %_QM__fortran_type_infoTbinding, ptr %[[BINDING_PTR]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = extractvalue %_QM__fortran_type_infoTbinding %[[LOADED_BINDING]], 0 -! CHECK: %[[FUNC_ADDR:.*]] = extractvalue %_QM__fortran_builtinsT__builtin_c_funptr %[[BUILTIN_FUNC_PTR]], 0 -! CHECK: %[[FUNC_PTR:.*]] = inttoptr i64 %[[FUNC_ADDR]] to ptr -! CHECK: call void %[[FUNC_PTR]](ptr %[[CLASS]]) +! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG]] : ([[CLASS]]) -> !fir.tdesc +! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> +! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref<[[BINDING_BOX_TYPE]]> +! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : ([[BINDING_BOX_TYPE]]) -> !fir.ptr<[[BINDINGSINFO:.*]]> +! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c1 : (!fir.ptr<[[BINDINGSINFO]]>, index) -> !fir.ref<[[BINDINGINFO:.*]]> +! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref +! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> (([[CLASS]]) -> ()) +! CHECK: fir.call %[[FUNC_PTR]](%[[ARG]]) : ([[CLASS]]) -> () ! Check dynamic dispatch equal to `call p%aproc()` with binding index = 0. -! CHECK: %[[LOADED_CLASS:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[CLASS]] -! CHECK: %[[TYPEDESCPTR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOADED_CLASS]], 7 -! CHECK: %[[LOADED_TYPEDESC:.*]] = load %_QM__fortran_type_infoTderivedtype, ptr %[[TYPEDESCPTR]] -! CHECK: %[[DT:.*]] = extractvalue %_QM__fortran_type_infoTderivedtype %[[LOADED_TYPEDESC]], 0 -! CHECK: %[[BINDING_BASE_ADDR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] } %[[DT]], 0 -! CHECK: %[[BINDING_PTR:.*]] = getelementptr %_QM__fortran_type_infoTbinding, ptr %[[BINDING_BASE_ADDR]], i32 0 -! CHECK: %[[LOADED_BINDING:.*]] = load %_QM__fortran_type_infoTbinding, ptr %[[BINDING_PTR]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = extractvalue %_QM__fortran_type_infoTbinding %[[LOADED_BINDING]], 0 -! CHECK: %[[FUNC_ADDR:.*]] = extractvalue %_QM__fortran_builtinsT__builtin_c_funptr %[[BUILTIN_FUNC_PTR]], 0 -! CHECK: %[[FUNC_PTR:.*]] = inttoptr i64 %[[FUNC_ADDR]] to ptr -! CHECK: call void %[[FUNC_PTR]](ptr %[[CLASS]]) +! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG]] : ([[CLASS]]) -> !fir.tdesc +! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> +! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref<[[BINDING_BOX_TYPE]]> +! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : ([[BINDING_BOX_TYPE]]) -> !fir.ptr<[[BINDINGSINFO:.*]]> +! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c0 : (!fir.ptr<[[BINDINGSINFO]]>, index) -> !fir.ref<[[BINDINGINFO:.*]]> +! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref +! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> (([[CLASS]]) -> ()) +! CHECK: fir.call %[[FUNC_PTR]](%[[ARG]]) : ([[CLASS]]) -> () ! Check dynamic dispatch of a function with result. -! CHECK: %[[LOADED_CLASS:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[CLASS]] -! CHECK: %[[TYPEDESCPTR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOADED_CLASS]], 7 -! CHECK: %[[LOADED_TYPEDESC:.*]] = load %_QM__fortran_type_infoTderivedtype, ptr %[[TYPEDESCPTR]] -! CHECK: %[[DT:.*]] = extractvalue %_QM__fortran_type_infoTderivedtype %[[LOADED_TYPEDESC]], 0 -! CHECK: %[[BINDING_BASE_ADDR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] } %[[DT]], 0 -! CHECK: %[[BINDING_PTR:.*]] = getelementptr %_QM__fortran_type_infoTbinding, ptr %[[BINDING_BASE_ADDR]], i32 3 -! CHECK: %[[LOADED_BINDING:.*]] = load %_QM__fortran_type_infoTbinding, ptr %[[BINDING_PTR]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = extractvalue %_QM__fortran_type_infoTbinding %[[LOADED_BINDING]], 0 -! CHECK: %[[FUNC_ADDR:.*]] = extractvalue %_QM__fortran_builtinsT__builtin_c_funptr %[[BUILTIN_FUNC_PTR]], 0 -! CHECK: %[[FUNC_PTR:.*]] = inttoptr i64 %[[FUNC_ADDR]] to ptr -! CHECK: %[[RET:.*]] = call i32 %[[FUNC_PTR]](ptr %[[CLASS]]) -! CHECK: store i32 %[[RET]], ptr %[[I]] +! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG]] : ([[CLASS]]) -> !fir.tdesc +! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> +! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref<[[BINDING_BOX_TYPE]]> +! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : ([[BINDING_BOX_TYPE]]) -> !fir.ptr<[[BINDINGSINFO:.*]]> +! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c3 : (!fir.ptr<[[BINDINGSINFO]]>, index) -> !fir.ref<[[BINDINGINFO:.*]]> +! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref +! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> (([[CLASS]]) -> i32) +! CHECK: %[[RES:.*]] = fir.call %[[FUNC_PTR]](%[[ARG]]) : ([[CLASS]]) -> i32 ! Check dynamic dispatch of call with passed-object and additional argument -! CHECK: store float 2.500000e+00, ptr %[[REAL]] -! CHECK: %[[LOADED_CLASS:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[CLASS]] -! CHECK: %[[TYPEDESCPTR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOADED_CLASS]], 7 -! CHECK: %[[LOADED_TYPEDESC:.*]] = load %_QM__fortran_type_infoTderivedtype, ptr %[[TYPEDESCPTR]] -! CHECK: %[[DT:.*]] = extractvalue %_QM__fortran_type_infoTderivedtype %[[LOADED_TYPEDESC]], 0 -! CHECK: %[[BINDING_BASE_ADDR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] } %[[DT]], 0 -! CHECK: %[[BINDING_PTR:.*]] = getelementptr %_QM__fortran_type_infoTbinding, ptr %[[BINDING_BASE_ADDR]], i32 6 -! CHECK: %[[LOADED_BINDING:.*]] = load %_QM__fortran_type_infoTbinding, ptr %[[BINDING_PTR]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = extractvalue %_QM__fortran_type_infoTbinding %[[LOADED_BINDING]], 0 -! CHECK: %[[FUNC_ADDR:.*]] = extractvalue %_QM__fortran_builtinsT__builtin_c_funptr %[[BUILTIN_FUNC_PTR]], 0 -! CHECK: %[[FUNC_PTR:.*]] = inttoptr i64 %[[FUNC_ADDR]] to ptr -! CHECK: call void %[[FUNC_PTR]](ptr %[[CLASS]], ptr %[[REAL]]) +! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG]] : ([[CLASS]]) -> !fir.tdesc +! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> +! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref<[[BINDING_BOX_TYPE]]> +! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : ([[BINDING_BOX_TYPE]]) -> !fir.ptr<[[BINDINGSINFO:.*]]> +! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c6 : (!fir.ptr<[[BINDINGSINFO]]>, index) -> !fir.ref<[[BINDINGINFO:.*]]> +! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref +! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> (([[CLASS]], !fir.ref) -> ()) +! CHECK: fir.call %[[FUNC_PTR]](%[[ARG]], %[[REAL]]) : ([[CLASS]], !fir.ref) -> () ! Check dynamic dispatch of a call with NOPASS -! CHECK: %[[LOADED_CLASS:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[CLASS]] -! CHECK: %[[TYPEDESCPTR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOADED_CLASS]], 7 -! CHECK: %[[LOADED_TYPEDESC:.*]] = load %_QM__fortran_type_infoTderivedtype, ptr %[[TYPEDESCPTR]] -! CHECK: %[[DT:.*]] = extractvalue %_QM__fortran_type_infoTderivedtype %[[LOADED_TYPEDESC]], 0 -! CHECK: %[[BINDING_BASE_ADDR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] } %[[DT]], 0 -! CHECK: %[[BINDING_PTR:.*]] = getelementptr %_QM__fortran_type_infoTbinding, ptr %[[BINDING_BASE_ADDR]], i32 4 -! CHECK: %[[LOADED_BINDING:.*]] = load %_QM__fortran_type_infoTbinding, ptr %[[BINDING_PTR]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = extractvalue %_QM__fortran_type_infoTbinding %[[LOADED_BINDING]], 0 -! CHECK: %[[FUNC_ADDR:.*]] = extractvalue %_QM__fortran_builtinsT__builtin_c_funptr %[[BUILTIN_FUNC_PTR]], 0 -! CHECK: %[[FUNC_PTR:.*]] = inttoptr i64 %[[FUNC_ADDR]] to ptr -! CHECK: call void %[[FUNC_PTR]]() - -! CHECK: store i32 1, ptr %[[INT32]] -! CHECK: %[[LOADED_CLASS:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %[[CLASS]] -! CHECK: %[[TYPEDESCPTR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOADED_CLASS]], 7 -! CHECK: %[[LOADED_TYPEDESC:.*]] = load %_QM__fortran_type_infoTderivedtype, ptr %[[TYPEDESCPTR]] -! CHECK: %[[DT:.*]] = extractvalue %_QM__fortran_type_infoTderivedtype %[[LOADED_TYPEDESC]], 0 -! CHECK: %[[BINDING_BASE_ADDR:.*]] = extractvalue { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] } %[[DT]], 0 -! CHECK: %[[BINDING_PTR:.*]] = getelementptr %_QM__fortran_type_infoTbinding, ptr %[[BINDING_BASE_ADDR]], i32 5 -! CHECK: %[[LOADED_BINDING:.*]] = load %_QM__fortran_type_infoTbinding, ptr %[[BINDING_PTR]] -! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = extractvalue %_QM__fortran_type_infoTbinding %[[LOADED_BINDING]], 0 -! CHECK: %[[FUNC_ADDR:.*]] = extractvalue %_QM__fortran_builtinsT__builtin_c_funptr %[[BUILTIN_FUNC_PTR]], 0 -! CHECK: %[[FUNC_PTR:.*]] = inttoptr i64 %[[FUNC_ADDR]] to ptr -! CHECK: call void %[[FUNC_PTR]](ptr %[[INT32]], ptr %[[CLASS]]) +! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG]] : ([[CLASS]]) -> !fir.tdesc +! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> +! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref<[[BINDING_BOX_TYPE]]> +! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : ([[BINDING_BOX_TYPE]]) -> !fir.ptr<[[BINDINGSINFO:.*]]> +! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c4 : (!fir.ptr<[[BINDINGSINFO]]>, index) -> !fir.ref<[[BINDINGINFO:.*]]> +! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref +! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> (() -> ()) +! CHECK: fir.call %[[FUNC_PTR]]() : () -> () + +! CHECK: %[[BOXDESC:.*]] = fir.box_tdesc %[[ARG]] : ([[CLASS]]) -> !fir.tdesc +! CHECK: %[[TYPEDESCPTR:.*]] = fir.convert %[[BOXDESC]] : (!fir.tdesc) -> !fir.ref<[[TYPEINFO:!fir.type<_QM__fortran_type_infoTderivedtype{.*}>]]> +! CHECK: %[[BINDING_FIELD:.*]] = fir.field_index binding, [[TYPEINFO]] +! CHECK: %[[BINDING_BOX_ADDR:.*]] = fir.coordinate_of %[[TYPEDESCPTR]], %[[BINDING_FIELD]] : (!fir.ref<[[TYPEINFO]]>, !fir.field) -> !fir.ref<[[BINDING_BOX_TYPE:.*]]> +! CHECK: %[[BINDING_BOX:.*]] = fir.load %[[BINDING_BOX_ADDR]] : !fir.ref<[[BINDING_BOX_TYPE]]> +! CHECK: %[[BINDING_BASE_ADDR:.*]] = fir.box_addr %[[BINDING_BOX]] : ([[BINDING_BOX_TYPE]]) -> !fir.ptr<[[BINDINGSINFO:.*]]> +! CHECK: %[[BINDING_PTR:.*]] = fir.coordinate_of %[[BINDING_BASE_ADDR]], %c5 : (!fir.ptr<[[BINDINGSINFO]]>, index) -> !fir.ref<[[BINDINGINFO:.*]]> +! CHECK: %[[PROC_FIELD:.*]] = fir.field_index proc, [[BINDINGINFO]] +! CHECK: %[[BUILTIN_FUNC_PTR:.*]] = fir.coordinate_of %[[BINDING_PTR]], %[[PROC_FIELD]] : ({{.*}}) -> !fir.ref<[[BUILTIN_FUNC_TYPE:.*]]> +! CHECK: %[[ADDRESS_FIELD:.*]] = fir.field_index __address, [[BUILTIN_FUNC_TYPE]] +! CHECK: %[[FUNC_ADDR_PTR:.*]] = fir.coordinate_of %[[BUILTIN_FUNC_PTR]], %[[ADDRESS_FIELD]] +! CHECK: %[[FUNC_ADDR:.*]] = fir.load %[[FUNC_ADDR_PTR]] : !fir.ref +! CHECK: %[[FUNC_PTR:.*]] = fir.convert %[[FUNC_ADDR]] : (i64) -> ((!fir.ref, [[CLASS]]) -> ()) +! CHECK: fir.call %[[FUNC_PTR]](%[[INT32]], %[[ARG]]) : (!fir.ref, [[CLASS]]) -> () ! CHECK-LABEL: _QMdispatch1Pno_pass_array ! CHECK-LABEL: _QMdispatch1Pno_pass_array_allocatable diff --git a/flang/test/Lower/allocatable-polymorphic.f90 b/flang/test/Lower/allocatable-polymorphic.f90 index b129c7c8cdbe1..c3c01a39b8606 100644 --- a/flang/test/Lower/allocatable-polymorphic.f90 +++ b/flang/test/Lower/allocatable-polymorphic.f90 @@ -586,9 +586,9 @@ program test_alloc ! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[C1_LOAD]], ptr %{{.*}} ! LLVM: %[[GEP_TDESC_C1:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 7 ! LLVM: %[[TDESC_C1:.*]] = load ptr, ptr %[[GEP_TDESC_C1]] -! LLVM: %[[ELEM_SIZE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.}}, i32 0, i32 1 +! LLVM: %[[ELEM_SIZE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 1 ! LLVM: %[[ELEM_SIZE:.*]] = load i64, ptr %[[ELEM_SIZE_GEP]] -! LLVM: %[[TYPE_CODE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.}}, i32 0, i32 4 +! LLVM: %[[TYPE_CODE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 4 ! LLVM: %[[TYPE_CODE:.*]] = load i32, ptr %[[TYPE_CODE_GEP]] ! LLVM: %{{.*}} = insertvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } undef, i64 %[[ELEM_SIZE]], 1 ! LLVM: %[[TRUNC_TYPE_CODE:.*]] = trunc i32 %[[TYPE_CODE]] to i8 @@ -600,9 +600,9 @@ program test_alloc ! LLVM: store { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } %[[LOAD_C2]], ptr %{{.*}} ! LLVM: %[[GEP_TDESC_C2:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 7 ! LLVM: %[[TDESC_C2:.*]] = load ptr, ptr %[[GEP_TDESC_C2]] -! LLVM: %[[ELEM_SIZE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.}}, i32 0, i32 1 +! LLVM: %[[ELEM_SIZE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 1 ! LLVM: %[[ELEM_SIZE:.*]] = load i64, ptr %[[ELEM_SIZE_GEP]] -! LLVM: %[[TYPE_CODE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.}}, i32 0, i32 4 +! LLVM: %[[TYPE_CODE_GEP:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, ptr %{{.*}}, i32 0, i32 4 ! LLVM: %[[TYPE_CODE:.*]] = load i32, ptr %[[TYPE_CODE_GEP]] ! LLVM: %{{.*}} = insertvalue { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] } undef, i64 %[[ELEM_SIZE]], 1 ! LLVM: %[[TRUNC_TYPE_CODE:.*]] = trunc i32 %[[TYPE_CODE]] to i8 From 7739be7c6b6d017bf6b4445c5010e59314655995 Mon Sep 17 00:00:00 2001 From: Jeff Byrnes Date: Wed, 15 Mar 2023 12:11:20 -0700 Subject: [PATCH 453/691] [ArgPromotion] Remove dead code produced by removing dead arguments ArgPromotion currently produces phantom / dead loads. A good example of this is store-into-inself.ll. First, ArgPromo finds the promotable argument %p in @l. Then it inserts a load of %p in the caller, and passes instead the loaded value / transforms the function body. PromoteMem2Reg is able to optimize out the entire function body, resulting in an unused argument. In a subsequent ArgPromotion pass, it removes the dead argument, resulting in a dead load in the caller. These dead loads may reduce effectiveness of other transformations (e.g. SimplifyCFG, MergedLoadStoreMotion). This patch removes loads and geps that are made dead in the caller after removal of dead args. Differential Revision: https://reviews.llvm.org/D146327 --- llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 8 ++++++ .../propagate-remove-dead-args.ll | 11 ++------ .../ArgumentPromotion/store-into-inself.ll | 1 - .../dce-after-argument-promotion-loads.ll | 25 +++++++------------ 4 files changed, 19 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp index dd1a3b78a378c..3b1a174f5cc63 100644 --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -67,6 +67,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include #include @@ -220,6 +221,8 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM, // pass in the loaded pointers. SmallVector Args; const DataLayout &DL = F->getParent()->getDataLayout(); + SmallVector DeadArgs; + while (!F->use_empty()) { CallBase &CB = cast(*F->user_back()); assert(CB.getCalledFunction() == F); @@ -255,6 +258,9 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM, Args.push_back(LI); ArgAttrVec.push_back(AttributeSet()); } + } else { + assert(ArgsToPromote.count(&*I) && I->use_empty()); + DeadArgs.emplace_back(AI->get()); } } @@ -297,6 +303,8 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM, CB.eraseFromParent(); } + RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadArgs); + // Since we have now created the new function, splice the body of the old // function right into the new function, leaving the old rotting hulk of the // function empty. diff --git a/llvm/test/Transforms/ArgumentPromotion/propagate-remove-dead-args.ll b/llvm/test/Transforms/ArgumentPromotion/propagate-remove-dead-args.ll index cc1f7fb26a479..4176a8a7bc5c8 100644 --- a/llvm/test/Transforms/ArgumentPromotion/propagate-remove-dead-args.ll +++ b/llvm/test/Transforms/ArgumentPromotion/propagate-remove-dead-args.ll @@ -18,18 +18,12 @@ entry: define internal void @parent(ptr %this, ptr %p1, ptr %p2) { ; CHECK-LABEL: define internal void @parent -; CHECK-SAME: (ptr [[THIS:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) { +; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SRC_ELEMENT_OP_0:%.*]] = getelementptr ptr, ptr [[THIS]], i64 0 -; CHECK-NEXT: [[LOAD0:%.*]] = load ptr, ptr [[SRC_ELEMENT_OP_0]], align 8 ; CHECK-NEXT: [[P2_VAL2:%.*]] = load half, ptr [[P2]], align 2 ; CHECK-NEXT: call void @child(ptr [[P1]], half [[P2_VAL2]]) -; CHECK-NEXT: [[SRC_ELEMENT_OP_1:%.*]] = getelementptr ptr, ptr [[THIS]], i64 1 -; CHECK-NEXT: [[LOAD1:%.*]] = load ptr, ptr [[SRC_ELEMENT_OP_1]], align 8 ; CHECK-NEXT: [[P2_VAL1:%.*]] = load half, ptr [[P2]], align 2 ; CHECK-NEXT: call void @child(ptr [[P1]], half [[P2_VAL1]]) -; CHECK-NEXT: [[SRC_ELEMENT_OP_2:%.*]] = getelementptr ptr, ptr [[THIS]], i64 2 -; CHECK-NEXT: [[LOAD2:%.*]] = load ptr, ptr [[SRC_ELEMENT_OP_2]], align 8 ; CHECK-NEXT: [[P2_VAL:%.*]] = load half, ptr [[P2]], align 2 ; CHECK-NEXT: call void @child(ptr [[P1]], half [[P2_VAL]]) ; CHECK-NEXT: ret void @@ -50,10 +44,9 @@ entry: define void @grandparent() { ; CHECK-LABEL: define void @grandparent() { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[F:%.*]] = alloca [[PTR_STRUCT:%.*]], align 8 ; CHECK-NEXT: [[XPTR:%.*]] = alloca i32, align 4 ; CHECK-NEXT: [[YPTR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: call void @parent(ptr [[F]], ptr [[XPTR]], ptr [[YPTR]]) +; CHECK-NEXT: call void @parent(ptr [[XPTR]], ptr [[YPTR]]) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/ArgumentPromotion/store-into-inself.ll b/llvm/test/Transforms/ArgumentPromotion/store-into-inself.ll index 7d7099003dc77..be94af6a0bd03 100644 --- a/llvm/test/Transforms/ArgumentPromotion/store-into-inself.ll +++ b/llvm/test/Transforms/ArgumentPromotion/store-into-inself.ll @@ -83,7 +83,6 @@ define i32 @main() nounwind { ; CHECK-NEXT: call void @g(ptr byval(ptr) align 4 [[S]]) #[[ATTR0]] ; CHECK-NEXT: call void @h(ptr byval(ptr) align 4 [[S]]) #[[ATTR0]] ; CHECK-NEXT: call void @k(ptr byval(ptr) align 4 [[S]]) #[[ATTR0]] -; CHECK-NEXT: [[S_VAL:%.*]] = load ptr, ptr [[S]], align 8 ; CHECK-NEXT: call void @l() #[[ATTR0]] ; CHECK-NEXT: ret i32 0 ; diff --git a/llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion-loads.ll b/llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion-loads.ll index 2bdd42b3dd8ca..2fe8f39e423a5 100644 --- a/llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion-loads.ll +++ b/llvm/test/Transforms/PhaseOrdering/dce-after-argument-promotion-loads.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -O3 -S < %s | FileCheck %s ; Arg promotion eliminates the struct argument, and eliminates dead arguments, but introduces and leaves dead loads of the eliminated dead arg in callers @@ -13,24 +13,17 @@ entry: } define ptr @parent(ptr align 8 dereferenceable(72) %f, i16 %val1, i16 %val2, i32 %val3) align 2 { -; CHECK-LABEL: define {{[^@]+}}@parent +; CHECK-LABEL: define nonnull ptr @parent ; CHECK-SAME: (ptr readonly returned align 8 dereferenceable(72) [[F:%.*]], i16 [[VAL1:%.*]], i16 [[VAL2:%.*]], i32 [[VAL3:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] align 2 { ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[F]], i64 64 +; CHECK-NEXT: [[F_VAL:%.*]] = load ptr, ptr [[TMP0]], align 8 ; CHECK-NEXT: [[CMP_NOT_NOT_I:%.*]] = icmp eq i32 [[VAL3]], 0 -; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], ptr [[F]], i64 0, i32 8 -; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -; CHECK-NEXT: br i1 [[CMP_NOT_NOT_I]], label [[IF_THEN_I:%.*]], label [[IF_ELSE_I:%.*]] -; CHECK: if.then.i: -; CHECK-NEXT: store i16 [[VAL1]], ptr [[TMP1]], align 2 -; CHECK-NEXT: [[ADD_PTR_I_I_I_I_I:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 -; CHECK-NEXT: br label [[BADCHILD_EXIT:%.*]] -; CHECK: if.else.i: -; CHECK-NEXT: [[ADD_PTR_I_I_I_I7_I:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 16 -; CHECK-NEXT: store i16 [[VAL1]], ptr [[ADD_PTR_I_I_I_I7_I]], align 2 -; CHECK-NEXT: br label [[BADCHILD_EXIT]] -; CHECK: badChild.exit: -; CHECK-NEXT: [[DOTSINK_I:%.*]] = phi ptr [ [[TMP1]], [[IF_ELSE_I]] ], [ [[ADD_PTR_I_I_I_I_I]], [[IF_THEN_I]] ] -; CHECK-NEXT: store i16 [[VAL2]], ptr [[DOTSINK_I]], align 2 +; CHECK-NEXT: [[SPEC_SELECT_I:%.*]] = select i1 [[CMP_NOT_NOT_I]], i16 [[VAL1]], i16 [[VAL2]] +; CHECK-NEXT: [[SPEC_SELECT2_I:%.*]] = select i1 [[CMP_NOT_NOT_I]], i16 [[VAL2]], i16 [[VAL1]] +; CHECK-NEXT: store i16 [[SPEC_SELECT_I]], ptr [[F_VAL]], align 2 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[F_VAL]], i64 16 +; CHECK-NEXT: store i16 [[SPEC_SELECT2_I]], ptr [[TMP1]], align 2 ; CHECK-NEXT: ret ptr [[F]] ; entry: From de939c6cd80c1e88913f1ef12be11aea501eb89b Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Mon, 13 Mar 2023 16:43:05 -0700 Subject: [PATCH 454/691] [libc] enable printf using system FILE The printf and fprintf implementations use our internal implementation to improve performance when it's available, but this patch enables using the public FILE API for overlay mode. Reviewed By: sivachandra, lntue Differential Revision: https://reviews.llvm.org/D146001 --- libc/config/linux/x86_64/entrypoints.txt | 4 +- libc/src/stdio/CMakeLists.txt | 35 +++++--- libc/src/stdio/fprintf.cpp | 10 ++- libc/src/stdio/printf.cpp | 12 ++- libc/src/stdio/printf_core/CMakeLists.txt | 16 ++-- libc/src/stdio/printf_core/file_writer.cpp | 54 ------------- libc/src/stdio/printf_core/file_writer.h | 79 ++++++++++++++++--- .../stdio/printf_core/vfprintf_internal.cpp | 32 -------- .../src/stdio/printf_core/vfprintf_internal.h | 18 ++++- libc/test/src/stdio/CMakeLists.txt | 17 +++- libc/test/src/stdio/fprintf_test.cpp | 32 ++++++-- 11 files changed, 174 insertions(+), 135 deletions(-) delete mode 100644 libc/src/stdio/printf_core/file_writer.cpp delete mode 100644 libc/src/stdio/printf_core/vfprintf_internal.cpp diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index b3017338f8260..5c0b3103f5615 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -111,6 +111,8 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.stdio.remove libc.src.stdio.sprintf libc.src.stdio.snprintf + libc.src.stdio.fprintf + libc.src.stdio.printf # sys/mman.h entrypoints libc.src.sys.mman.madvise @@ -412,10 +414,8 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.stdio.funlockfile libc.src.stdio.fwrite libc.src.stdio.fwrite_unlocked - libc.src.stdio.fprintf libc.src.stdio.getc libc.src.stdio.getc_unlocked - libc.src.stdio.printf libc.src.stdio.sscanf libc.src.stdio.scanf libc.src.stdio.fscanf diff --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt index 5f8d17953f633..7ccbf9aa28c4c 100644 --- a/libc/src/stdio/CMakeLists.txt +++ b/libc/src/stdio/CMakeLists.txt @@ -480,29 +480,42 @@ add_entrypoint_object( libc.src.stdio.printf_core.writer ) +list(APPEND printf_deps + libc.src.__support.arg_list + libc.src.stdio.printf_core.vfprintf_internal +) +if(LLVM_LIBC_FULL_BUILD) + list(APPEND printf_deps + libc.src.__support.File.file + libc.src.__support.File.platform_file + ) +else() + set(printf_copts "-DLIBC_COPT_PRINTF_USE_SYSTEM_FILE") +endif() + add_entrypoint_object( - fprintf + printf SRCS - fprintf.cpp + printf.cpp HDRS - fprintf.h + printf.h DEPENDS - libc.src.__support.arg_list - libc.src.stdio.printf_core.vfprintf_internal + ${printf_deps} + COMPILE_OPTIONS + ${printf_copts} ) - add_entrypoint_object( - printf + fprintf SRCS - printf.cpp + fprintf.cpp HDRS - printf.h + fprintf.h DEPENDS - libc.src.__support.File.file - libc.src.__support.File.platform_file libc.src.__support.arg_list libc.src.stdio.printf_core.vfprintf_internal + COMPILE_OPTIONS + ${printf_copts} ) add_entrypoint_object( diff --git a/libc/src/stdio/fprintf.cpp b/libc/src/stdio/fprintf.cpp index 796d5b5c47095..da8fabf5ab542 100644 --- a/libc/src/stdio/fprintf.cpp +++ b/libc/src/stdio/fprintf.cpp @@ -13,9 +13,16 @@ #include "src/stdio/printf_core/vfprintf_internal.h" #include +#include namespace __llvm_libc { +#ifndef LIBC_COPT_PRINTF_USE_SYSTEM_FILE +using FileT = __llvm_libc::File; +#else // defined(LIBC_COPT_PRINTF_USE_SYSTEM_FILE) +using FileT = ::FILE; +#endif // LIBC_COPT_PRINTF_USE_SYSTEM_FILE + LLVM_LIBC_FUNCTION(int, fprintf, (::FILE *__restrict stream, const char *__restrict format, ...)) { @@ -25,7 +32,8 @@ LLVM_LIBC_FUNCTION(int, fprintf, // and pointer semantics, as well as handling // destruction automatically. va_end(vlist); - int ret_val = printf_core::vfprintf_internal(stream, format, args); + int ret_val = printf_core::vfprintf_internal( + reinterpret_cast(stream), format, args); return ret_val; } diff --git a/libc/src/stdio/printf.cpp b/libc/src/stdio/printf.cpp index 8fd8b9cc57fad..ca6f61ed63033 100644 --- a/libc/src/stdio/printf.cpp +++ b/libc/src/stdio/printf.cpp @@ -8,11 +8,18 @@ #include "src/stdio/printf.h" -#include "src/__support/File/file.h" #include "src/__support/arg_list.h" #include "src/stdio/printf_core/vfprintf_internal.h" #include +#include + +#ifndef LIBC_COPT_PRINTF_USE_SYSTEM_FILE +#include "src/__support/File/file.h" +#define PRINTF_STDOUT __llvm_libc::stdout +#else // LIBC_COPT_PRINTF_USE_SYSTEM_FILE +#define PRINTF_STDOUT ::stdout +#endif // LIBC_COPT_PRINTF_USE_SYSTEM_FILE namespace __llvm_libc { @@ -23,8 +30,7 @@ LLVM_LIBC_FUNCTION(int, printf, (const char *__restrict format, ...)) { // and pointer semantics, as well as handling // destruction automatically. va_end(vlist); - int ret_val = printf_core::vfprintf_internal( - reinterpret_cast<::FILE *>(__llvm_libc::stdout), format, args); + int ret_val = printf_core::vfprintf_internal(PRINTF_STDOUT, format, args); return ret_val; } diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt index 31db8ad3c524c..109399772b53d 100644 --- a/libc/src/stdio/printf_core/CMakeLists.txt +++ b/libc/src/stdio/printf_core/CMakeLists.txt @@ -116,35 +116,31 @@ add_object_library( libc.src.__support.arg_list ) -if(NOT (TARGET libc.src.__support.File.file)) - # Not all platforms have a file implementation. If file is unvailable, - # then we must skip all file based printf sections. +if(NOT (TARGET libc.src.__support.File.file) AND LLVM_LIBC_FULL_BUILD) + # Not all platforms have a file implementation. If file is unvailable, and a + # full build is requested, then we must skip all file based printf sections. return() endif() -add_object_library( +add_header_library( file_writer - SRCS - file_writer.cpp HDRS file_writer.h DEPENDS + libc.include.stdio libc.src.__support.File.file libc.src.__support.CPP.string_view libc.src.string.memory_utils.memset_implementation .core_structs ) -add_object_library( +add_header_library( vfprintf_internal - SRCS - vfprintf_internal.cpp HDRS vfprintf_internal.h DEPENDS libc.include.stdio libc.src.__support.File.file - libc.src.__support.File.platform_file libc.src.__support.arg_list libc.src.stdio.printf_core.printf_main libc.src.stdio.printf_core.file_writer diff --git a/libc/src/stdio/printf_core/file_writer.cpp b/libc/src/stdio/printf_core/file_writer.cpp deleted file mode 100644 index 0e07e1c1eb8a7..0000000000000 --- a/libc/src/stdio/printf_core/file_writer.cpp +++ /dev/null @@ -1,54 +0,0 @@ -//===-- FILE Writer implementation for printf -------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "src/stdio/printf_core/file_writer.h" -#include "src/__support/CPP/string_view.h" -#include "src/__support/File/file.h" -#include "src/stdio/printf_core/core_structs.h" -#include - -namespace __llvm_libc { -namespace printf_core { - -int FileWriter::write(const char *__restrict to_write, size_t len) { - auto result = file->write_unlocked(to_write, len); - int written = result.value; - if (written != static_cast(len) || result.has_error()) - written = FILE_WRITE_ERROR; - if (file->error_unlocked()) - written = FILE_STATUS_ERROR; - return written; -} - -int FileWriter::write_str(void *raw_pointer, cpp::string_view new_string) { - FileWriter *file_writer = reinterpret_cast(raw_pointer); - return file_writer->write(new_string.data(), new_string.size()); -} - -int FileWriter::write_chars(void *raw_pointer, char new_char, size_t len) { - FileWriter *file_writer = reinterpret_cast(raw_pointer); - constexpr size_t BUFF_SIZE = 8; - char buff[BUFF_SIZE] = {new_char}; - int result; - while (len > BUFF_SIZE) { - result = file_writer->write(buff, BUFF_SIZE); - if (result < 0) - return result; - len -= BUFF_SIZE; - } - return file_writer->write(buff, len); -} - -// TODO(michaelrj): Move this to putc_unlocked once that is available. -int FileWriter::write_char(void *raw_pointer, char new_char) { - FileWriter *file_writer = reinterpret_cast(raw_pointer); - return file_writer->write(&new_char, 1); -} - -} // namespace printf_core -} // namespace __llvm_libc diff --git a/libc/src/stdio/printf_core/file_writer.h b/libc/src/stdio/printf_core/file_writer.h index 6ba1428a160e2..0fd6d115ddd8b 100644 --- a/libc/src/stdio/printf_core/file_writer.h +++ b/libc/src/stdio/printf_core/file_writer.h @@ -11,6 +11,8 @@ #include "src/__support/CPP/string_view.h" #include "src/__support/File/file.h" +#include "src/__support/macros/attributes.h" // For LIBC_INLINE +#include "src/stdio/printf_core/core_structs.h" #include #include @@ -18,26 +20,81 @@ namespace __llvm_libc { namespace printf_core { -class FileWriter { - __llvm_libc::File *file; +template class FileWriter { + file_t *file; public: - FileWriter(::FILE *init_file) { - file = reinterpret_cast<__llvm_libc::File *>(init_file); - file->lock(); - } + LIBC_INLINE FileWriter(file_t *init_file); - ~FileWriter() { file->unlock(); } + LIBC_INLINE ~FileWriter(); - int write(const char *__restrict to_write, size_t len); + LIBC_INLINE int write(const char *__restrict to_write, size_t len); // These write functions take a FileWriter as a void* in raw_pointer, and // call the appropriate write function on it. - static int write_str(void *raw_pointer, cpp::string_view new_string); - static int write_chars(void *raw_pointer, char new_char, size_t len); - static int write_char(void *raw_pointer, char new_char); + static int write_str(void *raw_pointer, cpp::string_view new_string) { + FileWriter *file_writer = reinterpret_cast(raw_pointer); + return file_writer->write(new_string.data(), new_string.size()); + } + static int write_chars(void *raw_pointer, char new_char, size_t len) { + FileWriter *file_writer = reinterpret_cast(raw_pointer); + constexpr size_t BUFF_SIZE = 8; + char buff[BUFF_SIZE] = {new_char}; + int result; + while (len > BUFF_SIZE) { + result = file_writer->write(buff, BUFF_SIZE); + if (result < 0) + return result; + len -= BUFF_SIZE; + } + return file_writer->write(buff, len); + } + static int write_char(void *raw_pointer, char new_char) { + FileWriter *file_writer = reinterpret_cast(raw_pointer); + return file_writer->write(&new_char, 1); + } }; +// The interface for using our internal file implementation. +template <> +LIBC_INLINE +FileWriter<__llvm_libc::File>::FileWriter(__llvm_libc::File *init_file) { + file = init_file; + file->lock(); +} +template <> LIBC_INLINE FileWriter<__llvm_libc::File>::~FileWriter() { + file->unlock(); +} +template <> +LIBC_INLINE int +FileWriter<__llvm_libc::File>::write(const char *__restrict to_write, + size_t len) { + auto result = file->write_unlocked(to_write, len); + size_t written = result.value; + if (written != len || result.has_error()) + written = FILE_WRITE_ERROR; + if (file->error_unlocked()) + written = FILE_STATUS_ERROR; + return written; +} + +// The interface for using the system's file implementation. +template <> LIBC_INLINE FileWriter<::FILE>::FileWriter(::FILE *init_file) { + file = init_file; + ::flockfile(file); +} +template <> LIBC_INLINE FileWriter<::FILE>::~FileWriter() { + ::funlockfile(file); +} +template <> +LIBC_INLINE int FileWriter<::FILE>::write(const char *__restrict to_write, + size_t len) { + size_t written = ::fwrite_unlocked(to_write, 1, len, file); + if (written != len || ::ferror_unlocked(file)) + written = FILE_WRITE_ERROR; + return written; +} + } // namespace printf_core } // namespace __llvm_libc diff --git a/libc/src/stdio/printf_core/vfprintf_internal.cpp b/libc/src/stdio/printf_core/vfprintf_internal.cpp deleted file mode 100644 index b25d545e54a11..0000000000000 --- a/libc/src/stdio/printf_core/vfprintf_internal.cpp +++ /dev/null @@ -1,32 +0,0 @@ -//===-- Internal implementation of vfprintf ---------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "src/stdio/printf_core/vfprintf_internal.h" - -#include "src/__support/arg_list.h" -#include "src/stdio/printf_core/file_writer.h" -#include "src/stdio/printf_core/printf_main.h" -#include "src/stdio/printf_core/writer.h" - -#include - -namespace __llvm_libc { -namespace printf_core { - -int vfprintf_internal(::FILE *__restrict stream, const char *__restrict format, - internal::ArgList &args) { - FileWriter file_writer(stream); - printf_core::Writer writer(reinterpret_cast(&file_writer), - printf_core::FileWriter::write_str, - printf_core::FileWriter::write_chars, - printf_core::FileWriter::write_char); - return printf_core::printf_main(&writer, format, args); -} - -} // namespace printf_core -} // namespace __llvm_libc diff --git a/libc/src/stdio/printf_core/vfprintf_internal.h b/libc/src/stdio/printf_core/vfprintf_internal.h index b837ebba182b4..762018f0b04c4 100644 --- a/libc/src/stdio/printf_core/vfprintf_internal.h +++ b/libc/src/stdio/printf_core/vfprintf_internal.h @@ -9,15 +9,29 @@ #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_VFPRINTF_INTERNAL_H #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_VFPRINTF_INTERNAL_H +#include "src/__support/File/file.h" #include "src/__support/arg_list.h" +#include "src/__support/macros/attributes.h" // For LIBC_INLINE +#include "src/stdio/printf_core/file_writer.h" +#include "src/stdio/printf_core/printf_main.h" +#include "src/stdio/printf_core/writer.h" #include namespace __llvm_libc { namespace printf_core { -int vfprintf_internal(::FILE *__restrict stream, const char *__restrict format, - internal::ArgList &args); +template +LIBC_INLINE int vfprintf_internal(file_t *__restrict stream, + const char *__restrict format, + internal::ArgList &args) { + FileWriter file_writer(stream); + Writer writer(reinterpret_cast(&file_writer), + FileWriter::write_str, FileWriter::write_chars, + FileWriter::write_char); + return printf_main(&writer, format, args); +} + } // namespace printf_core } // namespace __llvm_libc diff --git a/libc/test/src/stdio/CMakeLists.txt b/libc/test/src/stdio/CMakeLists.txt index 8747f18f9045b..a4b5a9be892f1 100644 --- a/libc/test/src/stdio/CMakeLists.txt +++ b/libc/test/src/stdio/CMakeLists.txt @@ -134,6 +134,8 @@ add_libc_unittest( libc.src.stdio.snprintf ) +# In fullbuild mode, fprintf's tests use the internal FILE for other functions. +if(LLVM_LIBC_FULL_BUILD) add_libc_unittest( fprintf_test SUITE @@ -147,7 +149,20 @@ add_libc_unittest( libc.src.stdio.fopen libc.src.stdio.fread ) - +else() +# Else in overlay mode they use the system's FILE. +add_libc_unittest( + fprintf_test + SUITE + libc_stdio_unittests + SRCS + fprintf_test.cpp + DEPENDS + libc.src.stdio.fprintf + COMPILE_OPTIONS + -DLIBC_COPT_PRINTF_USE_SYSTEM_FILE +) +endif() add_libc_unittest( printf_test diff --git a/libc/test/src/stdio/fprintf_test.cpp b/libc/test/src/stdio/fprintf_test.cpp index 286c516fbcf96..20b3c0faed6f7 100644 --- a/libc/test/src/stdio/fprintf_test.cpp +++ b/libc/test/src/stdio/fprintf_test.cpp @@ -6,10 +6,12 @@ // //===----------------------------------------------------------------------===// +#ifndef LIBC_COPT_PRINTF_USE_SYSTEM_FILE #include "src/stdio/fclose.h" #include "src/stdio/ferror.h" #include "src/stdio/fopen.h" #include "src/stdio/fread.h" +#endif // LIBC_COPT_PRINTF_USE_SYSTEM_FILE #include "src/stdio/fprintf.h" @@ -17,9 +19,23 @@ #include +namespace printf_test { +#ifndef LIBC_COPT_PRINTF_USE_SYSTEM_FILE +using __llvm_libc::fclose; +using __llvm_libc::ferror; +using __llvm_libc::fopen; +using __llvm_libc::fread; +#else // defined(LIBC_COPT_PRINTF_USE_SYSTEM_FILE) +using ::fclose; +using ::ferror; +using ::fopen; +using ::fread; +#endif // LIBC_COPT_PRINTF_USE_SYSTEM_FILE +} // namespace printf_test + TEST(LlvmLibcFPrintfTest, WriteToFile) { constexpr char FILENAME[] = "testdata/fprintf_output.test"; - ::FILE *file = __llvm_libc::fopen(FILENAME, "w"); + ::FILE *file = printf_test::fopen(FILENAME, "w"); ASSERT_FALSE(file == nullptr); int written; @@ -37,31 +53,31 @@ TEST(LlvmLibcFPrintfTest, WriteToFile) { written = __llvm_libc::fprintf(file, format_more, short_numbers); EXPECT_EQ(written, 14); - ASSERT_EQ(0, __llvm_libc::fclose(file)); + ASSERT_EQ(0, printf_test::fclose(file)); - file = __llvm_libc::fopen(FILENAME, "r"); + file = printf_test::fopen(FILENAME, "r"); ASSERT_FALSE(file == nullptr); char data[50]; - ASSERT_EQ(__llvm_libc::fread(data, 1, sizeof(simple) - 1, file), + ASSERT_EQ(printf_test::fread(data, 1, sizeof(simple) - 1, file), sizeof(simple) - 1); data[sizeof(simple) - 1] = '\0'; ASSERT_STREQ(data, simple); - ASSERT_EQ(__llvm_libc::fread(data, 1, sizeof(numbers) - 1, file), + ASSERT_EQ(printf_test::fread(data, 1, sizeof(numbers) - 1, file), sizeof(numbers) - 1); data[sizeof(numbers) - 1] = '\0'; ASSERT_STREQ(data, numbers); - ASSERT_EQ(__llvm_libc::fread( + ASSERT_EQ(printf_test::fread( data, 1, sizeof(format_more) + sizeof(short_numbers) - 4, file), sizeof(format_more) + sizeof(short_numbers) - 4); data[sizeof(format_more) + sizeof(short_numbers) - 4] = '\0'; ASSERT_STREQ(data, "1234 and more\n"); - ASSERT_EQ(__llvm_libc::ferror(file), 0); + ASSERT_EQ(printf_test::ferror(file), 0); written = __llvm_libc::fprintf(file, "Writing to a read only file should fail."); EXPECT_LT(written, 0); - ASSERT_EQ(__llvm_libc::fclose(file), 0); + ASSERT_EQ(printf_test::fclose(file), 0); } From 7d11a592c5adc286bf1845c20b20965d5e999039 Mon Sep 17 00:00:00 2001 From: Alex Brachet Date: Thu, 23 Mar 2023 17:07:19 +0000 Subject: [PATCH 455/691] [libc] Fix some math conversion warnings Differential Revision: https://reviews.llvm.org/D146738 --- libc/src/__support/FPUtil/ManipulationFunctions.h | 2 +- .../__support/FPUtil/NearestIntegerOperations.h | 6 +++--- libc/src/math/generic/acosf.cpp | 8 ++++---- libc/src/math/generic/acoshf.cpp | 3 ++- libc/src/math/generic/asinf.cpp | 4 ++-- libc/src/math/generic/asinhf.cpp | 14 ++++++++------ libc/src/math/generic/atanf.cpp | 4 ++-- libc/src/math/generic/atanhf.cpp | 9 +++++---- libc/src/math/generic/cosf.cpp | 4 ++-- libc/src/math/generic/coshf.cpp | 2 +- libc/src/math/generic/exp10f.cpp | 4 ++-- libc/src/math/generic/exp2f.cpp | 2 +- libc/src/math/generic/log10.cpp | 2 +- libc/src/math/generic/log1pf.cpp | 2 +- libc/src/math/generic/sincosf.cpp | 8 ++++---- libc/src/math/generic/sinf.cpp | 6 +++--- libc/src/math/generic/sinhf.cpp | 7 ++++--- libc/src/math/generic/tanf.cpp | 7 ++++--- libc/src/math/generic/tanhf.cpp | 8 ++++---- 19 files changed, 54 insertions(+), 48 deletions(-) diff --git a/libc/src/__support/FPUtil/ManipulationFunctions.h b/libc/src/__support/FPUtil/ManipulationFunctions.h index 27d91c433ac5b..14055ab74dce0 100644 --- a/libc/src/__support/FPUtil/ManipulationFunctions.h +++ b/libc/src/__support/FPUtil/ManipulationFunctions.h @@ -112,7 +112,7 @@ LIBC_INLINE T logb(T x) { } NormalFloat normal(bits); - return normal.exponent; + return static_cast(normal.exponent); } template , int> = 0> diff --git a/libc/src/__support/FPUtil/NearestIntegerOperations.h b/libc/src/__support/FPUtil/NearestIntegerOperations.h index 8265ea1cbb3e5..06aa9484c3f70 100644 --- a/libc/src/__support/FPUtil/NearestIntegerOperations.h +++ b/libc/src/__support/FPUtil/NearestIntegerOperations.h @@ -261,9 +261,9 @@ LIBC_INLINE I rounded_float_to_signed_integer(F x) { } // For all other cases, if `x` can fit in the integer type `I`, - // we just return `x`. Implicit conversion will convert the - // floating point value to the exact integer value. - return x; + // we just return `x`. static_cast will convert the floating + // point value to the exact integer value. + return static_cast(x); } } // namespace internal diff --git a/libc/src/math/generic/acosf.cpp b/libc/src/math/generic/acosf.cpp index 5835dfa617056..41152e06ff1f5 100644 --- a/libc/src/math/generic/acosf.cpp +++ b/libc/src/math/generic/acosf.cpp @@ -56,8 +56,8 @@ LLVM_LIBC_FUNCTION(float, acosf, (float x)) { return r.value(); double xd = static_cast(x); - return fputil::multiply_add(-0x1.5555555555555p-3 * xd, xd * xd, - M_MATH_PI_2 - xd); + return static_cast(fputil::multiply_add( + -0x1.5555555555555p-3 * xd, xd * xd, M_MATH_PI_2 - xd)); } // For |x| <= 0.5, we approximate acosf(x) by: @@ -70,7 +70,7 @@ LLVM_LIBC_FUNCTION(float, acosf, (float x)) { double xsq = xd * xd; double x3 = xd * xsq; double r = asin_eval(xsq); - return fputil::multiply_add(-x3, r, M_MATH_PI_2 - xd); + return static_cast(fputil::multiply_add(-x3, r, M_MATH_PI_2 - xd)); } // |x| > 1, return NaNs. @@ -111,7 +111,7 @@ LLVM_LIBC_FUNCTION(float, acosf, (float x)) { double r3 = asin_eval(u); double r = fputil::multiply_add(cv * u, r3, cv); - return x_sign ? M_MATH_PI - r : r; + return static_cast(x_sign ? M_MATH_PI - r : r); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/acoshf.cpp b/libc/src/math/generic/acoshf.cpp index ac225fe5a808f..f8e5a90a4d056 100644 --- a/libc/src/math/generic/acoshf.cpp +++ b/libc/src/math/generic/acoshf.cpp @@ -68,7 +68,8 @@ LLVM_LIBC_FUNCTION(float, acoshf, (float x)) { double x_d = static_cast(x); // acosh(x) = log(x + sqrt(x^2 - 1)) - return log_eval(x_d + fputil::sqrt(fputil::multiply_add(x_d, x_d, -1.0))); + return static_cast( + log_eval(x_d + fputil::sqrt(fputil::multiply_add(x_d, x_d, -1.0)))); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/asinf.cpp b/libc/src/math/generic/asinf.cpp index c24697cb14727..9b724d3296c84 100644 --- a/libc/src/math/generic/asinf.cpp +++ b/libc/src/math/generic/asinf.cpp @@ -99,7 +99,7 @@ LLVM_LIBC_FUNCTION(float, asinf, (float x)) { double xsq = xd * xd; double x3 = xd * xsq; double r = asin_eval(xsq); - return fputil::multiply_add(x3, r, xd); + return static_cast(fputil::multiply_add(x3, r, xd)); } // |x| > 1, return NaNs. @@ -149,7 +149,7 @@ LLVM_LIBC_FUNCTION(float, asinf, (float x)) { double c3 = c1 * u; double r = asin_eval(u); - return fputil::multiply_add(c3, r, c2); + return static_cast(fputil::multiply_add(c3, r, c2)); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/asinhf.cpp b/libc/src/math/generic/asinhf.cpp index 7063387313ab8..91ecf45667bfc 100644 --- a/libc/src/math/generic/asinhf.cpp +++ b/libc/src/math/generic/asinhf.cpp @@ -27,8 +27,9 @@ LLVM_LIBC_FUNCTION(float, asinhf, (float x)) { if (LIBC_UNLIKELY(x_abs <= 0x3e80'0000U)) { // |x| <= 2^-26 if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) { - return LIBC_UNLIKELY(x_abs == 0) ? x - : (x - 0x1.5555555555555p-3 * x * x * x); + return static_cast(LIBC_UNLIKELY(x_abs == 0) + ? x + : (x - 0x1.5555555555555p-3 * x * x * x)); } double x_d = x; @@ -40,7 +41,7 @@ LLVM_LIBC_FUNCTION(float, asinhf, (float x)) { x_sq, 0.0, -0x1.555555555551ep-3, 0x1.3333333325495p-4, -0x1.6db6db5a7622bp-5, 0x1.f1c70f82928c6p-6, -0x1.6e893934266b7p-6, 0x1.1c0b41d3fbe78p-6, -0x1.c0f47810b3c4fp-7, 0x1.2c8602690143dp-7); - return fputil::multiply_add(x_d, p, x_d); + return static_cast(fputil::multiply_add(x_d, p, x_d)); } const double SIGN[2] = {1.0, -1.0}; @@ -97,9 +98,10 @@ LLVM_LIBC_FUNCTION(float, asinhf, (float x)) { } // asinh(x) = log(x + sqrt(x^2 + 1)) - return x_sign * - log_eval(fputil::multiply_add( - x_d, x_sign, fputil::sqrt(fputil::multiply_add(x_d, x_d, 1.0)))); + return static_cast( + x_sign * + log_eval(fputil::multiply_add( + x_d, x_sign, fputil::sqrt(fputil::multiply_add(x_d, x_d, 1.0))))); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/atanf.cpp b/libc/src/math/generic/atanf.cpp index ff5d4507afa3d..ed7847adb15a1 100644 --- a/libc/src/math/generic/atanf.cpp +++ b/libc/src/math/generic/atanf.cpp @@ -22,7 +22,7 @@ LLVM_LIBC_FUNCTION(float, atanf, (float x)) { if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) { if (xbits.is_inf()) - return opt_barrier(sign ? -M_MATH_PI_2 : M_MATH_PI_2); + return static_cast(opt_barrier(sign ? -M_MATH_PI_2 : M_MATH_PI_2)); else return x; } @@ -52,7 +52,7 @@ LLVM_LIBC_FUNCTION(float, atanf, (float x)) { } } - return atan_eval(x); + return static_cast(atan_eval(x)); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/atanhf.cpp b/libc/src/math/generic/atanhf.cpp index b0c92fa8de87d..0a4512f7622da 100644 --- a/libc/src/math/generic/atanhf.cpp +++ b/libc/src/math/generic/atanhf.cpp @@ -40,8 +40,9 @@ LLVM_LIBC_FUNCTION(float, atanhf, (float x)) { if (LIBC_UNLIKELY(x_abs <= 0x3dcc'0000U)) { // |x| <= 2^-26 if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) { - return LIBC_UNLIKELY(x_abs == 0) ? x - : (x + 0x1.5555555555555p-2 * x * x * x); + return static_cast(LIBC_UNLIKELY(x_abs == 0) + ? x + : (x + 0x1.5555555555555p-2 * x * x * x)); } double xdbl = x; @@ -50,10 +51,10 @@ LLVM_LIBC_FUNCTION(float, atanhf, (float x)) { double pe = fputil::polyeval(x2, 0.0, 0x1.5555555555555p-2, 0x1.999999999999ap-3, 0x1.2492492492492p-3, 0x1.c71c71c71c71cp-4, 0x1.745d1745d1746p-4); - return fputil::multiply_add(xdbl, pe, xdbl); + return static_cast(fputil::multiply_add(xdbl, pe, xdbl)); } double xdbl = x; - return 0.5 * log_eval((xdbl + 1.0) / (xdbl - 1.0)); + return static_cast(0.5 * log_eval((xdbl + 1.0) / (xdbl - 1.0))); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/cosf.cpp b/libc/src/math/generic/cosf.cpp index ef94804bda60d..2e4ca3c4133ff 100644 --- a/libc/src/math/generic/cosf.cpp +++ b/libc/src/math/generic/cosf.cpp @@ -129,8 +129,8 @@ LLVM_LIBC_FUNCTION(float, cosf, (float x)) { sincosf_eval(xd, x_abs, sin_k, cos_k, sin_y, cosm1_y); - return fputil::multiply_add(sin_y, -sin_k, - fputil::multiply_add(cosm1_y, cos_k, cos_k)); + return static_cast(fputil::multiply_add( + sin_y, -sin_k, fputil::multiply_add(cosm1_y, cos_k, cos_k))); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/coshf.cpp b/libc/src/math/generic/coshf.cpp index 1ce1bc300d46b..1cf789a10a8ca 100644 --- a/libc/src/math/generic/coshf.cpp +++ b/libc/src/math/generic/coshf.cpp @@ -47,7 +47,7 @@ LLVM_LIBC_FUNCTION(float, coshf, (float x)) { // but not too small inputs, such as |x| < 2^-2, or |x| < 2^-3. // cosh(x) = (e^x + e^(-x)) / 2. - return exp_pm_eval(x); + return static_cast(exp_pm_eval(x)); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/exp10f.cpp b/libc/src/math/generic/exp10f.cpp index 06509a97fe032..9d07f2c5261ee 100644 --- a/libc/src/math/generic/exp10f.cpp +++ b/libc/src/math/generic/exp10f.cpp @@ -72,7 +72,7 @@ LLVM_LIBC_FUNCTION(float, exp10f, (float x)) { return fputil::multiply_add(x, 0x1.26bb1cp+1f, 1.0f); } - return Exp10Base::powb_lo(x); + return static_cast(Exp10Base::powb_lo(x)); } // Exceptional value. @@ -129,7 +129,7 @@ LLVM_LIBC_FUNCTION(float, exp10f, (float x)) { // 10^x = 2^(mid + hi) * 10^lo // ~ mh * (c0 + p * lo^2) // = (mh * c0) + p * (mh * lo^2) - return multiply_add(p, lo2 * rr.mh, c0 * rr.mh); + return static_cast(multiply_add(p, lo2 * rr.mh, c0 * rr.mh)); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/exp2f.cpp b/libc/src/math/generic/exp2f.cpp index 3c319d288e1c9..15f35d0da82f5 100644 --- a/libc/src/math/generic/exp2f.cpp +++ b/libc/src/math/generic/exp2f.cpp @@ -128,7 +128,7 @@ LLVM_LIBC_FUNCTION(float, exp2f, (float x)) { // = 2^(hi + mid) * 2^lo // ~ mh * (1 + lo * P(lo)) // = mh + (mh*lo) * P(lo) - return fputil::multiply_add(p, dx_sq * mh, c1 * mh); + return static_cast(fputil::multiply_add(p, dx_sq * mh, c1 * mh)); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/log10.cpp b/libc/src/math/generic/log10.cpp index b2dd29f5f74d1..47569b4758a4b 100644 --- a/libc/src/math/generic/log10.cpp +++ b/libc/src/math/generic/log10.cpp @@ -978,7 +978,7 @@ LLVM_LIBC_FUNCTION(double, log10, (double x)) { // |R * x_m - 1| < C uint64_t x_u = xbits.uintval(); int shifted = x_u >> 45; - size_t index = shifted & 0x7F; + int index = shifted & 0x7F; double r = R[index]; x_e += (x_u >> 52) & 0x7FF; diff --git a/libc/src/math/generic/log1pf.cpp b/libc/src/math/generic/log1pf.cpp index bf6a91a5fc466..a7ca54887d59a 100644 --- a/libc/src/math/generic/log1pf.cpp +++ b/libc/src/math/generic/log1pf.cpp @@ -150,7 +150,7 @@ LLVM_LIBC_FUNCTION(float, log1pf, (float x)) { double c2 = fputil::multiply_add(xd, COEFFS[5], COEFFS[4]); double r = fputil::polyeval(xsq, xd, c0, c1, c2, COEFFS[6]); - return r; + return static_cast(r); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/sincosf.cpp b/libc/src/math/generic/sincosf.cpp index 8448945a71d5d..277126bdc89db 100644 --- a/libc/src/math/generic/sincosf.cpp +++ b/libc/src/math/generic/sincosf.cpp @@ -193,10 +193,10 @@ LLVM_LIBC_FUNCTION(void, sincosf, (float x, float *sinp, float *cosp)) { sincosf_eval(xd, x_abs, sin_k, cos_k, sin_y, cosm1_y); - *sinp = fputil::multiply_add(sin_y, cos_k, - fputil::multiply_add(cosm1_y, sin_k, sin_k)); - *cosp = fputil::multiply_add(sin_y, -sin_k, - fputil::multiply_add(cosm1_y, cos_k, cos_k)); + *sinp = static_cast(fputil::multiply_add( + sin_y, cos_k, fputil::multiply_add(cosm1_y, sin_k, sin_k))); + *cosp = static_cast(fputil::multiply_add( + sin_y, -sin_k, fputil::multiply_add(cosm1_y, cos_k, cos_k))); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/sinf.cpp b/libc/src/math/generic/sinf.cpp index 1641c44e9fc00..697c438c2c67b 100644 --- a/libc/src/math/generic/sinf.cpp +++ b/libc/src/math/generic/sinf.cpp @@ -121,7 +121,7 @@ LLVM_LIBC_FUNCTION(float, sinf, (float x)) { double result = fputil::polyeval(xsq, 1.0, -0x1.55555555554c6p-3, 0x1.1111111085e65p-7, -0x1.a019f70fb4d4fp-13, 0x1.718d179815e74p-19); - return xd * result; + return static_cast(xd * result); } if (LIBC_UNLIKELY(x_abs == 0x4619'9998U)) { // x = 0x1.33333p13 @@ -150,8 +150,8 @@ LLVM_LIBC_FUNCTION(float, sinf, (float x)) { sincosf_eval(xd, x_abs, sin_k, cos_k, sin_y, cosm1_y); - return fputil::multiply_add(sin_y, cos_k, - fputil::multiply_add(cosm1_y, sin_k, sin_k)); + return static_cast(fputil::multiply_add( + sin_y, cos_k, fputil::multiply_add(cosm1_y, sin_k, sin_k))); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/sinhf.cpp b/libc/src/math/generic/sinhf.cpp index cc3811deeb9de..92edd4ea6a98f 100644 --- a/libc/src/math/generic/sinhf.cpp +++ b/libc/src/math/generic/sinhf.cpp @@ -21,7 +21,8 @@ LLVM_LIBC_FUNCTION(float, sinhf, (float x)) { // |x| <= 2^-26 if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) { - return LIBC_UNLIKELY(x_abs == 0) ? x : (x + 0.25 * x * x * x); + return static_cast( + LIBC_UNLIKELY(x_abs == 0) ? x : (x + 0.25 * x * x * x)); } // When |x| >= 90, or x is inf or nan @@ -65,11 +66,11 @@ LLVM_LIBC_FUNCTION(float, sinhf, (float x)) { // Therefore, output of Sollya = x * pe; double pe = fputil::polyeval(x2, 0.0, 0x1.5555555556583p-3, 0x1.111110d239f1fp-7, 0x1.a02b5a284013cp-13); - return fputil::multiply_add(xdbl, pe, xdbl); + return static_cast(fputil::multiply_add(xdbl, pe, xdbl)); } // sinh(x) = (e^x - e^(-x)) / 2. - return exp_pm_eval(x); + return static_cast(exp_pm_eval(x)); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/tanf.cpp b/libc/src/math/generic/tanf.cpp index 217664f8b2acb..681f16177fde5 100644 --- a/libc/src/math/generic/tanf.cpp +++ b/libc/src/math/generic/tanf.cpp @@ -90,7 +90,7 @@ LLVM_LIBC_FUNCTION(float, tanf, (float x)) { double result = fputil::polyeval(xsq, 1.0, 0x1.555555553d022p-2, 0x1.111111ce442c1p-3, 0x1.ba180a6bbdecdp-5, 0x1.69c0a88a0b71fp-6); - return xd * result; + return static_cast(xd * result); } // Check for exceptional values @@ -134,8 +134,9 @@ LLVM_LIBC_FUNCTION(float, tanf, (float x)) { // tan(x) = sin(x) / cos(x) // = (sin_y * cos_k + cos_y * sin_k) / (cos_y * cos_k - sin_y * sin_k) using fputil::multiply_add; - return multiply_add(sin_y, cos_k, multiply_add(cosm1_y, sin_k, sin_k)) / - multiply_add(sin_y, -sin_k, multiply_add(cosm1_y, cos_k, cos_k)); + return static_cast( + multiply_add(sin_y, cos_k, multiply_add(cosm1_y, sin_k, sin_k)) / + multiply_add(sin_y, -sin_k, multiply_add(cosm1_y, cos_k, cos_k))); } } // namespace __llvm_libc diff --git a/libc/src/math/generic/tanhf.cpp b/libc/src/math/generic/tanhf.cpp index e67eadf3ce4b7..3b8506f809c3b 100644 --- a/libc/src/math/generic/tanhf.cpp +++ b/libc/src/math/generic/tanhf.cpp @@ -22,8 +22,8 @@ LLVM_LIBC_FUNCTION(float, tanhf, (float x)) { // |x| <= 2^-26 if (LIBC_UNLIKELY(x_abs <= 0x3280'0000U)) { - return LIBC_UNLIKELY(x_abs == 0) ? x - : (x - 0x1.5555555555555p-2 * x * x * x); + return static_cast( + LIBC_UNLIKELY(x_abs == 0) ? x : (x - 0x1.5555555555555p-2 * x * x * x)); } // When |x| >= 15, or x is inf or nan @@ -48,7 +48,7 @@ LLVM_LIBC_FUNCTION(float, tanhf, (float x)) { double pe = fputil::polyeval(x2, 0.0, -0x1.5555555555555p-2, 0x1.1111111111111p-3, -0x1.ba1ba1ba1ba1cp-5, 0x1.664f4882c10fap-6, -0x1.226e355e6c23dp-7); - return fputil::multiply_add(xdbl, pe, xdbl); + return static_cast(fputil::multiply_add(xdbl, pe, xdbl)); } if (LIBC_UNLIKELY(xbits.bits == 0x4058'e0a3U)) { @@ -65,7 +65,7 @@ LLVM_LIBC_FUNCTION(float, tanhf, (float x)) { fputil::multiply_add(ep.mh, r, 1.0); #else double exp_x = ep.mh * r; - return (exp_x - 1.0) / (exp_x + 1.0); + return static_cast((exp_x - 1.0) / (exp_x + 1.0)); #endif // LIBC_TARGET_CPU_HAS_FMA } From 5f883cdbfbe216ec184194114676075f3633e08b Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 23 Mar 2023 10:19:10 -0700 Subject: [PATCH 456/691] [docs] Document -fomit-frame-pointer Close #61322 Reviewed By: vitalybuka Differential Revision: https://reviews.llvm.org/D146603 --- clang/include/clang/Driver/Options.td | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index b50dfd6f35510..821e86c0260f3 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2638,7 +2638,11 @@ defm objc_avoid_heapify_local_blocks : BoolFOption<"objc-avoid-heapify-local-blo NegFlag, BothFlags<[CC1Option, NoDriverOption], " to avoid heapifying local blocks">>; -def fomit_frame_pointer : Flag<["-"], "fomit-frame-pointer">, Group; +def fomit_frame_pointer : Flag<["-"], "fomit-frame-pointer">, Group, + HelpText<"Omit the frame pointer from functions that don't need it. " + "Some stack unwinding cases, such as profilers and sanitizers, may prefer specifying -fno-omit-frame-pointer. " + "On many targets, -O1 and higher omit the frame pointer by default. " + "-m[no-]omit-leaf-frame-pointer takes precedence for leaf functions">; def fopenmp : Flag<["-"], "fopenmp">, Group, Flags<[CC1Option, NoArgumentUnused, FlangOption, FC1Option]>, HelpText<"Parse OpenMP pragmas and generate parallel code.">; def fno_openmp : Flag<["-"], "fno-openmp">, Group, Flags<[NoArgumentUnused]>; From 40e5d212cffd2b87f688dd441cd7c7f4084d407d Mon Sep 17 00:00:00 2001 From: Viktoriia Bakalova Date: Thu, 23 Mar 2023 17:27:10 +0000 Subject: [PATCH 457/691] [clangd] Fix indentation in HoverTests.cpp --- .../clangd/unittests/HoverTests.cpp | 66 +++++++++---------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/clang-tools-extra/clangd/unittests/HoverTests.cpp b/clang-tools-extra/clangd/unittests/HoverTests.cpp index 6ee9384204036..728f5444014dc 100644 --- a/clang-tools-extra/clangd/unittests/HoverTests.cpp +++ b/clang-tools-extra/clangd/unittests/HoverTests.cpp @@ -2892,50 +2892,50 @@ TEST(Hover, Providers) { const char *Code; const std::function ExpectedBuilder; } Cases[] = {{R"cpp( - struct Foo {}; - Foo F = Fo^o{}; - )cpp", + struct Foo {}; + Foo F = Fo^o{}; + )cpp", [](HoverInfo &HI) { HI.Provider = ""; }}, {R"cpp( - #include "foo.h" - Foo F = Fo^o{}; - )cpp", + #include "foo.h" + Foo F = Fo^o{}; + )cpp", [](HoverInfo &HI) { HI.Provider = "\"foo.h\""; }}, {R"cpp( - #include "all.h" - Foo F = Fo^o{}; - )cpp", + #include "all.h" + Foo F = Fo^o{}; + )cpp", [](HoverInfo &HI) { HI.Provider = "\"foo.h\""; }}, {R"cpp( - #define FOO 5 - int F = ^FOO; - )cpp", + #define FOO 5 + int F = ^FOO; + )cpp", [](HoverInfo &HI) { HI.Provider = ""; }}, {R"cpp( - #include "foo.h" - int F = ^FOO; - )cpp", + #include "foo.h" + int F = ^FOO; + )cpp", [](HoverInfo &HI) { HI.Provider = "\"foo.h\""; }}, {R"cpp( - #include "all.h" - int F = ^FOO; - )cpp", + #include "all.h" + int F = ^FOO; + )cpp", [](HoverInfo &HI) { HI.Provider = "\"foo.h\""; }}, {R"cpp( - #include "foo.h" - Foo A; - Foo B; - Foo C = A ^+ B; - )cpp", + #include "foo.h" + Foo A; + Foo B; + Foo C = A ^+ B; + )cpp", [](HoverInfo &HI) { HI.Provider = "\"foo.h\""; }}, // Hover selects the underlying decl of the using decl {R"cpp( - #include "foo.h" - namespace ns { - using ::Foo; - } - ns::F^oo d; - )cpp", + #include "foo.h" + namespace ns { + using ::Foo; + } + ns::F^oo d; + )cpp", [](HoverInfo &HI) { HI.Provider = "\"foo.h\""; }}}; for (const auto &Case : Cases) { @@ -2946,10 +2946,10 @@ TEST(Hover, Providers) { TU.Filename = "foo.cpp"; TU.Code = Code.code(); TU.AdditionalFiles["foo.h"] = guard(R"cpp( - #define FOO 1 - class Foo {}; - Foo& operator+(const Foo, const Foo); - )cpp"); + #define FOO 1 + class Foo {}; + Foo& operator+(const Foo, const Foo); + )cpp"); TU.AdditionalFiles["all.h"] = guard("#include \"foo.h\""); auto AST = TU.build(); From 40aaa272f145e633b29d5e70a4590cc425801f7e Mon Sep 17 00:00:00 2001 From: Hristo Hristov Date: Wed, 22 Mar 2023 23:24:22 +0200 Subject: [PATCH 458/691] [libc++][ranges] P2711R1 Making multi-param constructors of views explicit Implemented [[ https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2022/p2711r1.html | P2711R1 ]] for existing views. (`join_with_view` is not yet implemented) Reviewed By: #libc, philnik Differential Revision: https://reviews.llvm.org/D144822 --- libcxx/docs/Status/Cxx2b.rst | 1 + libcxx/docs/Status/Cxx2bPapers.csv | 2 +- libcxx/include/__config | 6 +++ libcxx/include/__ranges/drop_view.h | 2 +- libcxx/include/__ranges/drop_while_view.h | 2 +- libcxx/include/__ranges/filter_view.h | 6 +-- libcxx/include/__ranges/iota_view.h | 8 ++-- libcxx/include/__ranges/lazy_split_view.h | 4 +- libcxx/include/__ranges/split_view.h | 5 ++- libcxx/include/__ranges/take_view.h | 3 +- libcxx/include/__ranges/take_while_view.h | 2 +- libcxx/include/__ranges/transform_view.h | 2 +- .../range.drop.while/ctor.view.pass.cpp | 21 ++++++++- .../range.drop/ctor.view.pass.cpp | 17 +++++++- .../ranges/range.adaptors/range.drop/types.h | 11 +++++ .../range.filter/ctor.view_pred.pass.cpp | 35 ++++++++------- .../range.lazy.split/ctor.range.pass.cpp | 20 ++++++++- .../range.lazy.split/ctor.view.pass.cpp | 28 +++++++++--- .../range.split/ctor.range.pass.cpp | 21 ++++++++- .../range.split/ctor.view.pass.cpp | 25 +++++++++-- .../range.take.while/ctor.view.pass.cpp | 21 ++++++++- .../range.take/ctor.view_count.pass.cpp | 21 +++++++-- .../ranges/range.adaptors/range.take/types.h | 11 +++++ .../ctor.view_function.pass.cpp | 32 ++++++++------ .../range.iota.view/ctor.first.last.pass.cpp | 43 ++++++++++++++++++- .../range.iota.view/ctor.value.bound.pass.cpp | 41 +++++++++++++++++- 26 files changed, 321 insertions(+), 69 deletions(-) diff --git a/libcxx/docs/Status/Cxx2b.rst b/libcxx/docs/Status/Cxx2b.rst index 471b992fdc03b..3fbbb10f3e30b 100644 --- a/libcxx/docs/Status/Cxx2b.rst +++ b/libcxx/docs/Status/Cxx2b.rst @@ -43,6 +43,7 @@ Paper Status .. [#note-P1413R3] P1413R3: ``std::aligned_storage_t`` and ``std::aligned_union_t`` are marked deprecated, but clang doesn't issue a diagnostic for deprecated using template declarations. .. [#note-P2520R0] P2520R0: Libc++ implemented this paper as a DR in C++20 as well. + .. [#note-P2711R1] P2711R1: ``join_with_view`` hasn't been done yet since this type isn't implemented yet. .. _issues-status-cxx2b: diff --git a/libcxx/docs/Status/Cxx2bPapers.csv b/libcxx/docs/Status/Cxx2bPapers.csv index e51ee27deb3c1..900130cfdd506 100644 --- a/libcxx/docs/Status/Cxx2bPapers.csv +++ b/libcxx/docs/Status/Cxx2bPapers.csv @@ -108,7 +108,7 @@ "`P0290R4 `__","LWG", "``apply()`` for ``synchronized_value``","February 2023","","","|concurrency TS|" "`P2770R0 `__","LWG", "Stashing stashing ``iterators`` for proper flattening","February 2023","","","|ranges|" "`P2164R9 `__","LWG", "``views::enumerate``","February 2023","","","|ranges|" -"`P2711R1 `__","LWG", "Making multi-param constructors of ``views`` ``explicit``","February 2023","","","|ranges|" +"`P2711R1 `__","LWG", "Making multi-param constructors of ``views`` ``explicit``","February 2023","|Partial| [#note-P2711R1]_","","|ranges|" "`P2609R3 `__","LWG", "Relaxing Ranges Just A Smidge","February 2023","","","|ranges|" "`P2713R1 `__","LWG", "Escaping improvements in ``std::format``","February 2023","","","|format|" "`P2675R1 `__","LWG", "``format``'s width estimation is too approximate and not forward compatible","February 2023","","","|format|" diff --git a/libcxx/include/__config b/libcxx/include/__config index 3d3664eb83ae7..b9076073ab250 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -847,6 +847,12 @@ _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_END_NAMESPACE_STD # define _LIBCPP_EXPLICIT_SINCE_CXX14 explicit # endif +# if _LIBCPP_STD_VER >= 23 +# define _LIBCPP_EXPLICIT_SINCE_CXX23 explicit +# else +# define _LIBCPP_EXPLICIT_SINCE_CXX23 +# endif + # if _LIBCPP_STD_VER >= 14 # define _LIBCPP_CONSTEXPR_SINCE_CXX14 constexpr # else diff --git a/libcxx/include/__ranges/drop_view.h b/libcxx/include/__ranges/drop_view.h index b97505b578552..87668c341cb67 100644 --- a/libcxx/include/__ranges/drop_view.h +++ b/libcxx/include/__ranges/drop_view.h @@ -74,7 +74,7 @@ namespace ranges { drop_view() requires default_initializable<_View> = default; _LIBCPP_HIDE_FROM_ABI - constexpr drop_view(_View __base, range_difference_t<_View> __count) + constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 drop_view(_View __base, range_difference_t<_View> __count) : __count_(__count) , __base_(std::move(__base)) { diff --git a/libcxx/include/__ranges/drop_while_view.h b/libcxx/include/__ranges/drop_while_view.h index 7c28992f18742..518feae4e2a98 100644 --- a/libcxx/include/__ranges/drop_while_view.h +++ b/libcxx/include/__ranges/drop_while_view.h @@ -51,7 +51,7 @@ class drop_while_view : public view_interface> { requires default_initializable<_View> && default_initializable<_Pred> = default; - _LIBCPP_HIDE_FROM_ABI constexpr drop_while_view(_View __base, _Pred __pred) + _LIBCPP_HIDE_FROM_ABI constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 drop_while_view(_View __base, _Pred __pred) : __base_(std::move(__base)), __pred_(std::in_place, std::move(__pred)) {} _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& diff --git a/libcxx/include/__ranges/filter_view.h b/libcxx/include/__ranges/filter_view.h index bf1481b7f9156..28d08c8a67e3b 100644 --- a/libcxx/include/__ranges/filter_view.h +++ b/libcxx/include/__ranges/filter_view.h @@ -64,10 +64,8 @@ namespace ranges { _LIBCPP_HIDE_FROM_ABI filter_view() requires default_initializable<_View> && default_initializable<_Pred> = default; - _LIBCPP_HIDE_FROM_ABI - constexpr filter_view(_View __base, _Pred __pred) - : __base_(std::move(__base)), __pred_(in_place, std::move(__pred)) - { } + _LIBCPP_HIDE_FROM_ABI constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 filter_view(_View __base, _Pred __pred) + : __base_(std::move(__base)), __pred_(in_place, std::move(__pred)) {} template _LIBCPP_HIDE_FROM_ABI diff --git a/libcxx/include/__ranges/iota_view.h b/libcxx/include/__ranges/iota_view.h index 67cf0b73ecd65..57139426724bd 100644 --- a/libcxx/include/__ranges/iota_view.h +++ b/libcxx/include/__ranges/iota_view.h @@ -314,7 +314,7 @@ namespace ranges { constexpr explicit iota_view(_Start __value) : __value_(std::move(__value)) { } _LIBCPP_HIDE_FROM_ABI - constexpr iota_view(type_identity_t<_Start> __value, type_identity_t<_BoundSentinel> __bound_sentinel) + constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 iota_view(type_identity_t<_Start> __value, type_identity_t<_BoundSentinel> __bound_sentinel) : __value_(std::move(__value)), __bound_sentinel_(std::move(__bound_sentinel)) { // Validate the precondition if possible. if constexpr (totally_ordered_with<_Start, _BoundSentinel>) { @@ -324,17 +324,17 @@ namespace ranges { } _LIBCPP_HIDE_FROM_ABI - constexpr iota_view(__iterator __first, __iterator __last) + constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 iota_view(__iterator __first, __iterator __last) requires same_as<_Start, _BoundSentinel> : iota_view(std::move(__first.__value_), std::move(__last.__value_)) {} _LIBCPP_HIDE_FROM_ABI - constexpr iota_view(__iterator __first, _BoundSentinel __last) + constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 iota_view(__iterator __first, _BoundSentinel __last) requires same_as<_BoundSentinel, unreachable_sentinel_t> : iota_view(std::move(__first.__value_), std::move(__last)) {} _LIBCPP_HIDE_FROM_ABI - constexpr iota_view(__iterator __first, __sentinel __last) + constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 iota_view(__iterator __first, __sentinel __last) requires(!same_as<_Start, _BoundSentinel> && !same_as<_Start, unreachable_sentinel_t>) : iota_view(std::move(__first.__value_), std::move(__last.__bound_sentinel_)) {} diff --git a/libcxx/include/__ranges/lazy_split_view.h b/libcxx/include/__ranges/lazy_split_view.h index b5b0e7ef02307..186a0af320f14 100644 --- a/libcxx/include/__ranges/lazy_split_view.h +++ b/libcxx/include/__ranges/lazy_split_view.h @@ -82,14 +82,14 @@ class lazy_split_view : public view_interface> requires default_initializable<_View> && default_initializable<_Pattern> = default; _LIBCPP_HIDE_FROM_ABI - constexpr lazy_split_view(_View __base, _Pattern __pattern) + constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 lazy_split_view(_View __base, _Pattern __pattern) : __base_(std::move(__base)), __pattern_(std::move(__pattern)) {} template requires constructible_from<_View, views::all_t<_Range>> && constructible_from<_Pattern, single_view>> _LIBCPP_HIDE_FROM_ABI - constexpr lazy_split_view(_Range&& __r, range_value_t<_Range> __e) + constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 lazy_split_view(_Range&& __r, range_value_t<_Range> __e) : __base_(views::all(std::forward<_Range>(__r))) , __pattern_(views::single(std::move(__e))) {} diff --git a/libcxx/include/__ranges/split_view.h b/libcxx/include/__ranges/split_view.h index 6ebe5a43ed228..a27ac4ef7a196 100644 --- a/libcxx/include/__ranges/split_view.h +++ b/libcxx/include/__ranges/split_view.h @@ -75,13 +75,14 @@ class split_view : public view_interface> { requires default_initializable<_View> && default_initializable<_Pattern> = default; - _LIBCPP_HIDE_FROM_ABI constexpr split_view(_View __base, _Pattern __pattern) + _LIBCPP_HIDE_FROM_ABI constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 split_view(_View __base, _Pattern __pattern) : __base_(std::move(__base)), __pattern_(std::move((__pattern))) {} template requires constructible_from<_View, views::all_t<_Range>> && constructible_from<_Pattern, single_view>> - _LIBCPP_HIDE_FROM_ABI constexpr split_view(_Range&& __range, range_value_t<_Range> __elem) + _LIBCPP_HIDE_FROM_ABI constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 + split_view(_Range&& __range, range_value_t<_Range> __elem) : __base_(views::all(std::forward<_Range>(__range))), __pattern_(views::single(std::move(__elem))) {} _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& diff --git a/libcxx/include/__ranges/take_view.h b/libcxx/include/__ranges/take_view.h index ec859e920ff17..111e7e5ba2516 100644 --- a/libcxx/include/__ranges/take_view.h +++ b/libcxx/include/__ranges/take_view.h @@ -67,7 +67,8 @@ class take_view : public view_interface> { _LIBCPP_HIDE_FROM_ABI take_view() requires default_initializable<_View> = default; - _LIBCPP_HIDE_FROM_ABI constexpr take_view(_View __base, range_difference_t<_View> __count) + _LIBCPP_HIDE_FROM_ABI + constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 take_view(_View __base, range_difference_t<_View> __count) : __base_(std::move(__base)), __count_(__count) { _LIBCPP_ASSERT(__count >= 0, "count has to be greater than or equal to zero"); } diff --git a/libcxx/include/__ranges/take_while_view.h b/libcxx/include/__ranges/take_while_view.h index 77d7390dceb9c..d1f1bfe75411f 100644 --- a/libcxx/include/__ranges/take_while_view.h +++ b/libcxx/include/__ranges/take_while_view.h @@ -67,7 +67,7 @@ class take_while_view : public view_interface> { requires default_initializable<_View> && default_initializable<_Pred> = default; - _LIBCPP_HIDE_FROM_ABI constexpr take_while_view(_View __base, _Pred __pred) + _LIBCPP_HIDE_FROM_ABI constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 take_while_view(_View __base, _Pred __pred) : __base_(std::move(__base)), __pred_(std::in_place, std::move(__pred)) {} _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& diff --git a/libcxx/include/__ranges/transform_view.h b/libcxx/include/__ranges/transform_view.h index a71350f0c99dc..14bd400e6d079 100644 --- a/libcxx/include/__ranges/transform_view.h +++ b/libcxx/include/__ranges/transform_view.h @@ -71,7 +71,7 @@ class transform_view : public view_interface> { requires default_initializable<_View> && default_initializable<_Fn> = default; _LIBCPP_HIDE_FROM_ABI - constexpr transform_view(_View __base, _Fn __func) + constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 transform_view(_View __base, _Fn __func) : __func_(std::in_place, std::move(__func)), __base_(std::move(__base)) {} _LIBCPP_HIDE_FROM_ABI diff --git a/libcxx/test/std/ranges/range.adaptors/range.drop.while/ctor.view.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.drop.while/ctor.view.pass.cpp index cf9f9dbca9a56..326cabd637089 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.drop.while/ctor.view.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.drop.while/ctor.view.pass.cpp @@ -8,7 +8,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// constexpr drop_while_view(V base, Pred pred); +// constexpr drop_while_view(V base, Pred pred); // explicit since C++23 #include #include @@ -16,6 +16,8 @@ #include #include "MoveOnly.h" +#include "test_convertible.h" +#include "test_macros.h" struct View : std::ranges::view_base { MoveOnly mo; @@ -32,9 +34,23 @@ struct Pred { bool operator()(int) const; }; +// SFINAE tests. + +#if TEST_STD_VER >= 23 + +static_assert(!test_convertible, View, Pred>(), + "This constructor must be explicit"); + +#else + +static_assert( test_convertible, View, Pred>(), + "This constructor must not be explicit"); + +#endif // TEST_STD_VER >= 23 + constexpr bool test() { { - std::ranges::drop_while_view dwv = {View{{}, MoveOnly{5}}, Pred{}}; + std::ranges::drop_while_view dwv{View{{}, MoveOnly{5}}, Pred{}}; assert(dwv.pred().moved); assert(!dwv.pred().copied); assert(std::move(dwv).base().mo.get() == 5); @@ -45,5 +61,6 @@ constexpr bool test() { int main(int, char**) { test(); static_assert(test()); + return 0; } diff --git a/libcxx/test/std/ranges/range.adaptors/range.drop/ctor.view.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.drop/ctor.view.pass.cpp index 504021aa9cc48..4f4257f9102cb 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.drop/ctor.view.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.drop/ctor.view.pass.cpp @@ -8,13 +8,28 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// constexpr drop_view(V base, range_difference_t count); +// constexpr drop_view(V base, range_difference_t count); // explicit since C++23 #include +#include "test_convertible.h" #include "test_macros.h" #include "types.h" +// SFINAE tests. + +#if TEST_STD_VER >= 23 + +static_assert(!test_convertible, View, std::ranges::range_difference_t>(), + "This constructor must be explicit"); + +#else + +static_assert(test_convertible, View, std::ranges::range_difference_t>(), + "This constructor must not be explicit"); + +#endif // TEST_STD_VER >= 23 + constexpr bool test() { std::ranges::drop_view dropView1(MoveOnlyView(), 4); assert(dropView1.size() == 4); diff --git a/libcxx/test/std/ranges/range.adaptors/range.drop/types.h b/libcxx/test/std/ranges/range.adaptors/range.drop/types.h index b32c534a37e43..32bbddc05ed97 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.drop/types.h +++ b/libcxx/test/std/ranges/range.adaptors/range.drop/types.h @@ -94,4 +94,15 @@ struct CountedView : std::ranges::view_base { constexpr CountedIter end() const { return CountedIter(ForwardIter(globalBuff + 8)); } }; +struct View : std::ranges::view_base { + constexpr explicit View(int* b, int* e) : begin_(b), end_(e) { } + + constexpr int* begin() const { return begin_; } + constexpr int* end() const { return end_; } + +private: + int* begin_; + int* end_; +}; + #endif // TEST_STD_RANGES_RANGE_ADAPTORS_RANGE_DROP_TYPES_H diff --git a/libcxx/test/std/ranges/range.adaptors/range.filter/ctor.view_pred.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.filter/ctor.view_pred.pass.cpp index 644f8deba6e85..3ccab93397147 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.filter/ctor.view_pred.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.filter/ctor.view_pred.pass.cpp @@ -8,12 +8,14 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// constexpr filter_view(View, Pred); - -#include +// constexpr filter_view(View, Pred); // explicit since C++23 #include +#include #include + +#include "test_convertible.h" +#include "test_macros.h" #include "types.h" struct Range : std::ranges::view_base { @@ -41,6 +43,20 @@ struct TrackingRange : TrackInitialization, std::ranges::view_base { int* end() const; }; +// SFINAE tests. + +#if TEST_STD_VER >= 23 + +static_assert(!test_convertible, Range, Pred>(), + "This constructor must be explicit"); + +#else + +static_assert( test_convertible, Range, Pred>(), + "This constructor must not be explicit"); + +#endif // TEST_STD_VER >= 23 + constexpr bool test() { int buff[] = {1, 2, 3, 4, 5, 6, 7, 8}; @@ -57,19 +73,6 @@ constexpr bool test() { assert(it == end); } - // Test implicit syntax - { - Range range(buff, buff + 8); - Pred pred; - std::ranges::filter_view view = {range, pred}; - auto it = view.begin(), end = view.end(); - assert(*it++ == 1); - assert(*it++ == 3); - assert(*it++ == 5); - assert(*it++ == 7); - assert(it == end); - } - // Make sure we move the view { bool moved = false, copied = false; diff --git a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/ctor.range.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/ctor.range.pass.cpp index ebf1356afff60..91df304b79af7 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/ctor.range.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/ctor.range.pass.cpp @@ -11,7 +11,7 @@ // template // requires constructible_from> && // constructible_from>> -// constexpr lazy_split_view(Range&& r, range_value_t e); +// constexpr lazy_split_view(Range&& r, range_value_t e); // explicit since C++23 #include @@ -20,6 +20,8 @@ #include #include #include + +#include "test_convertible.h" #include "types.h" struct ElementWithCounting { @@ -88,6 +90,22 @@ static_assert( std::ranges::random_access_range); static_assert( std::ranges::view); static_assert( std::is_copy_constructible_v); +// SFINAE tests. + +#if TEST_STD_VER >= 23 + +static_assert( + !test_convertible, StrView, std::ranges::range_value_t>(), + "This constructor must be explicit"); + +#else + +static_assert( + test_convertible, StrView, std::ranges::range_value_t>(), + "This constructor must not be explicit"); + +#endif // TEST_STD_VER >= 23 + constexpr bool test() { { using V = std::ranges::lazy_split_view; diff --git a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/ctor.view.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/ctor.view.pass.cpp index 264e883beeaea..e7bf052a7e9ee 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/ctor.view.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/ctor.view.pass.cpp @@ -8,13 +8,14 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// constexpr lazy_split_view(View base, Pattern pattern); - -#include +// constexpr lazy_split_view(View base, Pattern pattern); // explicit since C++23 #include +#include #include #include + +#include "test_convertible.h" #include "types.h" struct ViewWithCounting : std::ranges::view_base { @@ -41,9 +42,27 @@ struct ViewWithCounting : std::ranges::view_base { constexpr ViewWithCounting& operator=(ViewWithCounting&&) = default; constexpr bool operator==(const ViewWithCounting&) const { return true; } }; + static_assert(std::ranges::forward_range); static_assert(std::ranges::view); +using View = ViewWithCounting; +using Pattern = ViewWithCounting; + +// SFINAE tests. + +#if TEST_STD_VER >= 23 + +static_assert(!test_convertible, View, Pattern>(), + "This constructor must be explicit"); + +#else + +static_assert( test_convertible, View, Pattern>(), + "This constructor must not be explicit"); + +#endif // TEST_STD_VER >= 23 + constexpr bool test() { // Calling the constructor with `(ForwardView, ForwardView)`. { @@ -62,9 +81,6 @@ constexpr bool test() { // Make sure the arguments are moved, not copied. { - using View = ViewWithCounting; - using Pattern = ViewWithCounting; - // Arguments are lvalues. { int view_copied = 0, view_moved = 0, pattern_copied = 0, pattern_moved = 0; diff --git a/libcxx/test/std/ranges/range.adaptors/range.split/ctor.range.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.split/ctor.range.pass.cpp index 605e3d544b2d8..bbe08befdb419 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.split/ctor.range.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.split/ctor.range.pass.cpp @@ -11,7 +11,7 @@ // template // requires constructible_from> && // constructible_from>> -// constexpr split_view(Range&& r, range_value_t e); +// constexpr split_view(Range&& r, range_value_t e); // explicit since C++23 #include #include @@ -21,6 +21,9 @@ #include #include +#include "test_convertible.h" +#include "test_macros.h" + struct Counting { int* times_copied = nullptr; int* times_moved = nullptr; @@ -68,6 +71,22 @@ static_assert(std::ranges::random_access_range); static_assert(std::ranges::view); static_assert(std::is_copy_constructible_v); +// SFINAE tests. + +#if TEST_STD_VER >= 23 + +static_assert( + !test_convertible, StrView, std::ranges::range_value_t>(), + "This constructor must be explicit"); + +# else + +static_assert( + test_convertible, StrView, std::ranges::range_value_t>(), + "This constructor must not be explicit"); + +#endif // TEST_STD_VER >= 23 + constexpr bool test() { { using V = std::ranges::split_view; diff --git a/libcxx/test/std/ranges/range.adaptors/range.split/ctor.view.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.split/ctor.view.pass.cpp index ad206ee5ed751..963f85f8c478f 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.split/ctor.view.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.split/ctor.view.pass.cpp @@ -8,7 +8,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// constexpr split_view(View base, Pattern pattern); +// constexpr split_view(View base, Pattern pattern); // explicit since C++23 #include #include @@ -16,6 +16,9 @@ #include #include +#include "test_convertible.h" +#include "test_macros.h" + struct ViewWithCounting : std::ranges::view_base { int* times_copied = nullptr; int* times_moved = nullptr; @@ -38,6 +41,23 @@ struct ViewWithCounting : std::ranges::view_base { constexpr bool operator==(const ViewWithCounting&) const { return true; } }; +using View = ViewWithCounting; +using Pattern = ViewWithCounting; + +// SFINAE tests. + +#if TEST_STD_VER >= 23 + +static_assert(!test_convertible, View, Pattern>(), + "This constructor must be explicit"); + +#else + +static_assert( test_convertible, View, Pattern>(), + "This constructor must not be explicit"); + +#endif // TEST_STD_VER >= 23 + constexpr bool test() { { std::string_view input = "abc def"; @@ -48,9 +68,6 @@ constexpr bool test() { // Make sure the arguments are moved, not copied. { - using View = ViewWithCounting; - using Pattern = ViewWithCounting; - // Arguments are lvalues. { int view_copied = 0, view_moved = 0, pattern_copied = 0, pattern_moved = 0; diff --git a/libcxx/test/std/ranges/range.adaptors/range.take.while/ctor.view.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.take.while/ctor.view.pass.cpp index 7adeb6713680a..469b2698c8844 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.take.while/ctor.view.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.take.while/ctor.view.pass.cpp @@ -8,7 +8,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// constexpr take_while_view(V base, Pred pred); +// constexpr take_while_view(V base, Pred pred); // explicit since C++23 #include #include @@ -16,6 +16,8 @@ #include #include "MoveOnly.h" +#include "test_convertible.h" +#include "test_macros.h" struct View : std::ranges::view_base { MoveOnly mo; @@ -32,9 +34,23 @@ struct Pred { bool operator()(int) const; }; +// SFINAE tests. + +#if TEST_STD_VER >= 23 + +static_assert(!test_convertible, View, Pred>(), + "This constructor must be explicit"); + +#else + +static_assert(test_convertible, View, Pred>(), + "This constructor must not be explicit"); + +#endif // TEST_STD_VER >= 23 + constexpr bool test() { { - std::ranges::take_while_view twv = {View{{}, MoveOnly{5}}, Pred{}}; + std::ranges::take_while_view twv{View{{}, MoveOnly{5}}, Pred{}}; assert(twv.pred().moved); assert(!twv.pred().copied); assert(std::move(twv).base().mo.get() == 5); @@ -45,5 +61,6 @@ constexpr bool test() { int main(int, char**) { test(); static_assert(test()); + return 0; } diff --git a/libcxx/test/std/ranges/range.adaptors/range.take/ctor.view_count.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.take/ctor.view_count.pass.cpp index 63b936da98181..f37ffb0825ac1 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.take/ctor.view_count.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.take/ctor.view_count.pass.cpp @@ -8,16 +8,31 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// constexpr take_view(V base, range_difference_t count); +// constexpr take_view(V base, range_difference_t count); // explicit since C++23 -#include #include +#include -#include "test_macros.h" +#include "test_convertible.h" #include "test_iterators.h" +#include "test_macros.h" #include "test_range.h" #include "types.h" +// SFINAE tests. + +#if TEST_STD_VER >= 23 + +static_assert(!test_convertible, View, std::ranges::range_difference_t>(), + "This constructor must be explicit"); + +#else + +static_assert(test_convertible, View, std::ranges::range_difference_t>(), + "This constructor must be explicit"); + +#endif // TEST_STD_VER >= 23 + constexpr bool test() { int buffer[8] = {1, 2, 3, 4, 5, 6, 7, 8}; diff --git a/libcxx/test/std/ranges/range.adaptors/range.take/types.h b/libcxx/test/std/ranges/range.adaptors/range.take/types.h index 09549a9e086f0..db80e68bb21af 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.take/types.h +++ b/libcxx/test/std/ranges/range.adaptors/range.take/types.h @@ -54,4 +54,15 @@ static_assert(std::ranges::view); static_assert(std::ranges::random_access_range); static_assert(std::ranges::sized_range); +struct View : std::ranges::view_base { + constexpr explicit View(int* b, int* e) : begin_(b), end_(e) { } + + constexpr int* begin() const { return begin_; } + constexpr int* end() const { return end_; } + +private: + int* begin_; + int* end_; +}; + #endif // TEST_STD_RANGES_RANGE_ADAPTORS_RANGE_TAKE_TYPES_H diff --git a/libcxx/test/std/ranges/range.adaptors/range.transform/ctor.view_function.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.transform/ctor.view_function.pass.cpp index 7ce042603694d..63a43d189256f 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.transform/ctor.view_function.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.transform/ctor.view_function.pass.cpp @@ -8,14 +8,16 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// constexpr transform_view(View, F); +// constexpr transform_view(View, F); // explicit since C++23 +#include #include -#include +#include "test_convertible.h" +#include "test_macros.h" struct Range : std::ranges::view_base { - constexpr explicit Range(int* b, int* e) : begin_(b), end_(e) { } + constexpr explicit Range(int* b, int* e) : begin_(b), end_(e) {} constexpr int* begin() const { return begin_; } constexpr int* end() const { return end_; } @@ -28,6 +30,20 @@ struct F { constexpr int operator()(int i) const { return i + 100; } }; +// SFINAE tests. + +#if TEST_STD_VER >= 23 + +static_assert(!test_convertible, Range, F>(), + "This constructor must be explicit"); + +#else + +static_assert( test_convertible, Range, F>(), + "This constructor must not be explicit"); + +#endif // TEST_STD_VER >= 23 + constexpr bool test() { int buff[] = {1, 2, 3, 4, 5, 6, 7, 8}; @@ -41,16 +57,6 @@ constexpr bool test() { assert(view[7] == 108); } - { - Range range(buff, buff + 8); - F f; - std::ranges::transform_view view = {range, f}; - assert(view[0] == 101); - assert(view[1] == 102); - // ... - assert(view[7] == 108); - } - return true; } diff --git a/libcxx/test/std/ranges/range.factories/range.iota.view/ctor.first.last.pass.cpp b/libcxx/test/std/ranges/range.factories/range.iota.view/ctor.first.last.pass.cpp index 0b02cadc32609..ee0e7fceffa61 100644 --- a/libcxx/test/std/ranges/range.factories/range.iota.view/ctor.first.last.pass.cpp +++ b/libcxx/test/std/ranges/range.factories/range.iota.view/ctor.first.last.pass.cpp @@ -8,14 +8,55 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// constexpr iota_view(iterator first, see below last); +// constexpr iota_view(iterator first, see below last); // explicit since C++23 #include #include +#include "test_convertible.h" #include "test_macros.h" #include "types.h" +// SFINAE tests. + +#if TEST_STD_VER >= 23 + +std::ranges::iota_view view; + +static_assert(!test_convertible, + decltype(std::ranges::iota_view{}.begin()), + decltype(std::ranges::iota_view{}.end())>(), + "This constructor must be explicit"); + +static_assert(!test_convertible, + decltype(std::ranges::iota_view{SomeInt{0}}.begin()), + decltype(std::unreachable_sentinel)>(), + "This constructor must be explicit"); + +static_assert(!test_convertible>, + decltype(std::ranges::iota_view{SomeInt(0), IntComparableWith(SomeInt(10))}.begin()), + decltype(std::ranges::iota_view{SomeInt(0), IntComparableWith(SomeInt(10))}.end())>(), + "This constructor must be explicit"); + +#else + +static_assert(test_convertible, + decltype(std::ranges::iota_view{}.begin()), + decltype(std::ranges::iota_view{}.end())>(), + "This constructor must not be explicit"); + +static_assert(test_convertible, + decltype(std::ranges::iota_view{SomeInt{0}}.begin()), + decltype(std::unreachable_sentinel)>(), + "This constructor must not be explicit"); + +static_assert(test_convertible>, + decltype(std::ranges::iota_view{SomeInt(0), IntComparableWith(SomeInt(10))}.begin()), + decltype(std::ranges::iota_view{SomeInt(0), IntComparableWith(SomeInt(10))}.end())>(), + "This constructor must not be explicit"); + +#endif // TEST_STD_VER >= 23 + constexpr bool test() { { std::ranges::iota_view commonView(SomeInt(0), SomeInt(10)); diff --git a/libcxx/test/std/ranges/range.factories/range.iota.view/ctor.value.bound.pass.cpp b/libcxx/test/std/ranges/range.factories/range.iota.view/ctor.value.bound.pass.cpp index 906e0e092d2a6..7528e1ccf3ee0 100644 --- a/libcxx/test/std/ranges/range.factories/range.iota.view/ctor.value.bound.pass.cpp +++ b/libcxx/test/std/ranges/range.factories/range.iota.view/ctor.value.bound.pass.cpp @@ -14,13 +14,52 @@ TEST_CLANG_DIAGNOSTIC_IGNORED("-Wsign-compare") TEST_GCC_DIAGNOSTIC_IGNORED("-Wsign-compare") TEST_MSVC_DIAGNOSTIC_IGNORED(4018 4389) // various "signed/unsigned mismatch" -// constexpr iota_view(type_identity_t value, type_identity_t bound); +// constexpr iota_view(type_identity_t value, type_identity_t bound); // explicit since C++23 #include #include +#include "test_convertible.h" #include "types.h" +// SFINAE tests. + +#if TEST_STD_VER >= 23 + +static_assert(!test_convertible, + decltype(std::ranges::iota_view{}.begin()), + decltype(std::ranges::iota_view{}.end())>(), + "This constructor must be explicit"); + +static_assert(!test_convertible, + decltype(std::ranges::iota_view{}.begin()), + decltype(std::unreachable_sentinel)>(), + "This constructor must be explicit"); + +static_assert(!test_convertible>, + decltype(std::ranges::iota_view{SomeInt(0), IntComparableWith(SomeInt(10))}.begin()), + decltype(std::ranges::iota_view{SomeInt(0), IntComparableWith(SomeInt(10))}.end())>(), + "This constructor must be explicit"); + +#else + +static_assert( test_convertible, + decltype(std::ranges::iota_view{}.begin()), + decltype(std::ranges::iota_view{}.end())>(), + "This constructor must not be explicit"); + +static_assert( test_convertible, + decltype(std::ranges::iota_view{}.begin()), + decltype(std::unreachable_sentinel)>(), + "This constructor must not be explicit"); + +static_assert( test_convertible>, + decltype(std::ranges::iota_view{SomeInt(0), IntComparableWith(SomeInt(10))}.begin()), + decltype(std::ranges::iota_view{SomeInt(0), IntComparableWith(SomeInt(10))}.end())>(), + "This constructor must not be explicit"); + +#endif // TEST_STD_VER >= 23 + constexpr bool test() { { std::ranges::iota_view io(SomeInt(0), SomeInt(10)); From 101cfe18f7b80801be3e415ad62acfc57960ad90 Mon Sep 17 00:00:00 2001 From: Mike Hommey Date: Thu, 23 Mar 2023 17:09:00 +0000 Subject: [PATCH 459/691] [libcxx] Fix build bustage with threads disabled Building with -DLIBCXX_ENABLE_THREADS=OFF -DLIBCXXABI_ENABLE_THREADS=OFF (like e.g. for wasm) fails after D146228 because of a misplaced std namespace begin/end. Reviewed By: philnik, #libc Differential Revision: https://reviews.llvm.org/D146682 --- libcxx/include/__condition_variable/condition_variable.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libcxx/include/__condition_variable/condition_variable.h b/libcxx/include/__condition_variable/condition_variable.h index e66f78725a08c..926effbb23e8e 100644 --- a/libcxx/include/__condition_variable/condition_variable.h +++ b/libcxx/include/__condition_variable/condition_variable.h @@ -29,10 +29,10 @@ _LIBCPP_PUSH_MACROS #include <__undef_macros> -#ifndef _LIBCPP_HAS_NO_THREADS - _LIBCPP_BEGIN_NAMESPACE_STD +#ifndef _LIBCPP_HAS_NO_THREADS + // enum class cv_status _LIBCPP_DECLARE_STRONG_ENUM(cv_status){no_timeout, timeout}; _LIBCPP_DECLARE_STRONG_ENUM_EPILOG(cv_status) @@ -234,10 +234,10 @@ inline void condition_variable::__do_timed_wait(unique_lock& __lk, wait_for(__lk, __tp - _Clock::now()); } -_LIBCPP_END_NAMESPACE_STD - #endif // _LIBCPP_HAS_NO_THREADS +_LIBCPP_END_NAMESPACE_STD + _LIBCPP_POP_MACROS #endif // _LIBCPP___CONDITION_VARIABLE_CONDITION_VARIABLE_H From 6a2a5f08de0a09171bb92f91cd7b9deea97f6cce Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Thu, 23 Mar 2023 17:16:31 +0000 Subject: [PATCH 460/691] [CodeGenPrepare] Don't give up if unable to sink first arg to a cold call Reviewed By: mkazantsev Differential Revision: https://reviews.llvm.org/D143892 --- llvm/lib/CodeGen/CodeGenPrepare.cpp | 3 +- .../Generic/addr-sink-call-multi-arg.ll | 34 +++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/Generic/addr-sink-call-multi-arg.ll diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 8d11f282516c8..b571b5a8ab5f4 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2279,7 +2279,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) { if (!Arg->getType()->isPointerTy()) continue; unsigned AS = Arg->getType()->getPointerAddressSpace(); - return optimizeMemoryInst(CI, Arg, Arg->getType(), AS); + if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS)) + return true; } IntrinsicInst *II = dyn_cast(CI); diff --git a/llvm/test/CodeGen/Generic/addr-sink-call-multi-arg.ll b/llvm/test/CodeGen/Generic/addr-sink-call-multi-arg.ll new file mode 100644 index 0000000000000..b02bdc3b57242 --- /dev/null +++ b/llvm/test/CodeGen/Generic/addr-sink-call-multi-arg.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -codegenprepare < %s | FileCheck %s +; REQUIRES: aarch64-registered-target + +; Check that we don't give up if unable to sink the first argument. + +target triple = "aarch64-linux" + +declare void @f(ptr, ptr) cold + +define void @g(i1 %c1, ptr %p, i32 %i) { +; CHECK-LABEL: @g( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A0:%.*]] = getelementptr ptr, ptr [[P:%.*]], i32 [[I:%.*]] +; CHECK-NEXT: br i1 [[C1:%.*]], label [[IF_THEN:%.*]], label [[EXIT:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[P]], i64 32 +; CHECK-NEXT: call void @f(ptr [[A0]], ptr [[SUNKADDR]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %a0 = getelementptr ptr, ptr %p, i32 %i + %a1 = getelementptr ptr, ptr %p, i32 4 + br i1 %c1, label %if.then, label %exit + +if.then: + call void @f(ptr %a0, ptr %a1) + br label %exit + +exit: + ret void +} From c70e360b355ad30a7dd299435aae0324c5033b3f Mon Sep 17 00:00:00 2001 From: Emilia Dreamer Date: Thu, 23 Mar 2023 19:31:39 +0200 Subject: [PATCH 461/691] [clang-format] Allow trailing return types in macros The trailing return type arrow checker verifies that a declaration is being parsed, however, this isn't true when inside of macros. It turns out the existence of the auto keyword is enough to make sure that we're dealing with a trailing return type, and whether we're in a declaration doesn't matter. Fixes https://github.com/llvm/llvm-project/issues/47664 Reviewed By: HazardyKnusperkeks, owenpan Differential Revision: https://reviews.llvm.org/D141811 --- clang/lib/Format/TokenAnnotator.cpp | 3 ++- clang/unittests/Format/FormatTest.cpp | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 55be50aec203e..5dbda8fbe0719 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1909,7 +1909,8 @@ class AnnotatingParser { } else if (Current.is(tok::arrow) && Style.Language == FormatStyle::LK_Java) { Current.setType(TT_LambdaArrow); - } else if (Current.is(tok::arrow) && AutoFound && Line.MustBeDeclaration && + } else if (Current.is(tok::arrow) && AutoFound && + (Line.MustBeDeclaration || Line.InPPDirective) && Current.NestingLevel == 0 && !Current.Previous->isOneOf(tok::kw_operator, tok::identifier)) { // not auto operator->() -> xxx; diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index eeb1234999a10..eb1b563b3d2c3 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -8010,6 +8010,11 @@ TEST_F(FormatTest, TrailingReturnType) { "auto aaaaaaaaaaaaaaaaaaaaaa(T t)\n" " -> decltype(eaaaaaaaaaaaaaaa(t.a).aaaaaaaa());"); + FormatStyle Style = getLLVMStyleWithColumns(60); + verifyFormat("#define MAKE_DEF(NAME) \\\n" + " auto NAME() -> int { return 42; }", + Style); + // Not trailing return types. verifyFormat("void f() { auto a = b->c(); }"); verifyFormat("auto a = p->foo();"); From 8088f5bf2dc051dc0828990b3df2a3299c9f0433 Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Thu, 23 Mar 2023 17:13:35 +0000 Subject: [PATCH 462/691] [support] Fix PrintNumber Test on AIX When fixing the test earlier, we missed the JSON case for NaN and INF, so handle those the same as for non-JSON, by creating the string dynamically. Reviewed By: abhina.sreeskantharajan Differential Revision: https://reviews.llvm.org/D146739 --- llvm/unittests/Support/ScopedPrinterTest.cpp | 50 ++++++++++---------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/llvm/unittests/Support/ScopedPrinterTest.cpp b/llvm/unittests/Support/ScopedPrinterTest.cpp index f62d310f25d95..9ebcb0b14bd43 100644 --- a/llvm/unittests/Support/ScopedPrinterTest.cpp +++ b/llvm/unittests/Support/ScopedPrinterTest.cpp @@ -510,7 +510,16 @@ FirstSecondThirdByteMask [ (0x333) } TEST_F(ScopedPrinterTest, PrintNumber) { - auto PrintFunc = [](ScopedPrinter &W) { + constexpr float MaxFloat = std::numeric_limits::max(); + constexpr float MinFloat = std::numeric_limits::min(); + constexpr float InfFloat = std::numeric_limits::infinity(); + const float NaNFloat = std::nanf("1"); + constexpr double MaxDouble = std::numeric_limits::max(); + constexpr double MinDouble = std::numeric_limits::min(); + constexpr double InfDouble = std::numeric_limits::infinity(); + const double NaNDouble = std::nan("1"); + + auto PrintFunc = [&](ScopedPrinter &W) { uint64_t Unsigned64Max = std::numeric_limits::max(); uint64_t Unsigned64Min = std::numeric_limits::min(); W.printNumber("uint64_t-max", Unsigned64Max); @@ -556,10 +565,6 @@ TEST_F(ScopedPrinterTest, PrintNumber) { W.printNumber("label", "value", 0); - float MaxFloat = std::numeric_limits::max(); - float MinFloat = std::numeric_limits::min(); - float InfFloat = std::numeric_limits::infinity(); - float NaNFloat = std::nanf("1"); W.printNumber("float-max", MaxFloat); W.printNumber("float-min", MinFloat); W.printNumber("float-inf", InfFloat); @@ -567,11 +572,7 @@ TEST_F(ScopedPrinterTest, PrintNumber) { W.printNumber("float-42.0", 42.0f); W.printNumber("float-42.5625", 42.5625f); - double MaxDouble = std::numeric_limits::max(); - double MinDouble = std::numeric_limits::min(); - double InfDouble = std::numeric_limits::infinity(); - double NaNDouble = std::nan("1"); - W.printNumber("double-max", MaxDouble); + W.printNumber("double-max", MaxDouble); W.printNumber("double-min", MinDouble); W.printNumber("double-inf", InfDouble); W.printNumber("double-nan", NaNDouble); @@ -583,29 +584,30 @@ TEST_F(ScopedPrinterTest, PrintNumber) { // implementation defined behavior. So format the max float/double, instead of // hard coding it in the tests. Note: we can't just use std::to_string(), // since we format the float in PrintNumber(). This isn't required for JSON - // formatting, since it uses exponents, which will be consistent. + // formatting, since it uses exponents, which will be consistent. However, + // NaN and INF may be printed differently, (like AIX), so we still need to + // handle those cases for JSON checking. // Allocate a buffer large enough to represent large floating point values // and construct the string representation for them there. char Buf[512]; - format("%5.1f", std::numeric_limits::max()).snprint(Buf, sizeof(Buf)); + format("%5.1f", MaxFloat).snprint(Buf, sizeof(Buf)); std::string MaxFloatStr(Buf); - format("%5.1f", std::numeric_limits::max()).snprint(Buf, sizeof(Buf)); + format("%5.1f", MaxDouble).snprint(Buf, sizeof(Buf)); std::string MaxDoubleStr(Buf); - format("%5.1f", std::numeric_limits::infinity()) - .snprint(Buf, sizeof(Buf)); + format("%5.1f", InfFloat).snprint(Buf, sizeof(Buf)); std::string InfFloatStr(Buf); - std::to_string(std::numeric_limits::infinity()); + format("%5.1f", InfDouble).snprint(Buf, sizeof(Buf)); std::string InfDoubleStr(Buf); - format("%5.1f", std::nanf("1")).snprint(Buf, sizeof(Buf)); + format("%5.1f", NaNFloat).snprint(Buf, sizeof(Buf)); std::string NaNFloatStr(Buf); - format("%5.1f", std::nan("1")).snprint(Buf, sizeof(Buf)); + format("%5.1f", NaNDouble).snprint(Buf, sizeof(Buf)); std::string NaNDoubleStr(Buf); std::string ExpectedOut = Twine( @@ -643,7 +645,7 @@ double-42.5625: 42.6 )") .str(); - const char *JSONExpectedOut = R"({ + std::string JSONExpectedOut = Twine(R"({ "uint64_t-max": 18446744073709551615, "uint64_t-min": 0, "uint32_t-max": 4294967295, @@ -667,17 +669,17 @@ double-42.5625: 42.6 }, "float-max": 3.4028234663852886e+38, "float-min": 1.1754943508222875e-38, - "float-inf": inf, - "float-nan": nan, + "float-inf": )" + std::to_string(InfFloat) + R"(, + "float-nan": )" + std::to_string(NaNFloat) + R"(, "float-42.0": 42, "float-42.5625": 42.5625, "double-max": 1.7976931348623157e+308, "double-min": 2.2250738585072014e-308, - "double-inf": inf, - "double-nan": nan, + "double-inf": )" + std::to_string(InfDouble) + R"(, + "double-nan": )" + std::to_string(NaNDouble) + R"(, "double-42.0": 42, "double-42.5625": 42.5625 -})"; +})").str(); verifyAll(ExpectedOut, JSONExpectedOut, PrintFunc); } From cc8a34b11b0cff9c28780401a61d1cfb9a0f8b36 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 23 Mar 2023 17:49:39 +0000 Subject: [PATCH 463/691] [X86] Refactor movmsk(icmp_eq(and(x,c1),0)) -> movmsk(not(shl(x,c2))) fold to use KnownBits We don't need an explicit AND mask, we can use KnownBits to determine if each element has (the same) single non-zero bit and shift that into the msb/signbit for MOVMSK to access directly. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 22 +++++++++---------- llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll | 6 ++--- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3a4173e443798..a87dc476a1849 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -54442,25 +54442,25 @@ static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG, // Fold movmsk(icmp_eq(and(x,c1),0)) -> movmsk(not(shl(x,c2))) // iff pow2splat(c1). + // Use KnownBits to determine if only a single bit is non-zero + // in each element (pow2 or zero), and shift that bit to the msb. if (Src.getOpcode() == X86ISD::PCMPEQ && - Src.getOperand(0).getOpcode() == ISD::AND && ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode())) { - SDValue LHS = Src.getOperand(0).getOperand(0); - SDValue RHS = Src.getOperand(0).getOperand(1); - KnownBits KnownRHS = DAG.computeKnownBits(RHS); - if (KnownRHS.isConstant() && KnownRHS.getConstant().isPowerOf2()) { + KnownBits KnownSrc = DAG.computeKnownBits(Src.getOperand(0)); + if (KnownSrc.countMaxPopulation() == 1) { SDLoc DL(N); MVT ShiftVT = SrcVT; + SDValue ShiftSrc = Src.getOperand(0); if (ShiftVT.getScalarType() == MVT::i8) { // vXi8 shifts - we only care about the signbit so can use PSLLW. ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2); - LHS = DAG.getBitcast(ShiftVT, LHS); + ShiftSrc = DAG.getBitcast(ShiftVT, ShiftSrc); } - unsigned ShiftAmt = KnownRHS.getConstant().countl_zero(); - LHS = getTargetVShiftByConstNode(X86ISD::VSHLI, DL, ShiftVT, LHS, - ShiftAmt, DAG); - LHS = DAG.getNOT(DL, DAG.getBitcast(SrcVT, LHS), SrcVT); - return DAG.getNode(X86ISD::MOVMSK, DL, VT, LHS); + unsigned ShiftAmt = KnownSrc.countMinLeadingZeros(); + ShiftSrc = getTargetVShiftByConstNode(X86ISD::VSHLI, DL, ShiftVT, + ShiftSrc, ShiftAmt, DAG); + ShiftSrc = DAG.getNOT(DL, DAG.getBitcast(SrcVT, ShiftSrc), SrcVT); + return DAG.getNode(X86ISD::MOVMSK, DL, VT, ShiftSrc); } } diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll index 761ad105f75dc..f22d705068150 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-or-cmp.ll @@ -863,10 +863,8 @@ define i1 @mask_v8i32(<8 x i32> %a0) { ; SSE2: # %bb.0: ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 -; SSE2-NEXT: pmovmskb %xmm1, %eax -; SSE2-NEXT: xorl $65535, %eax # imm = 0xFFFF +; SSE2-NEXT: pmovmskb %xmm0, %eax +; SSE2-NEXT: testl %eax, %eax ; SSE2-NEXT: sete %al ; SSE2-NEXT: retq ; From 5c9a26238a198c115b7a14acd80d1505438438c5 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Thu, 23 Mar 2023 17:38:07 +0000 Subject: [PATCH 464/691] [CodeGenPrepare][NFC] Pre-commit test for memory use count fix Reviewed By: mkazantsev Differential Revision: https://reviews.llvm.org/D145705 --- llvm/test/CodeGen/Generic/addr-use-count.ll | 70 +++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 llvm/test/CodeGen/Generic/addr-use-count.ll diff --git a/llvm/test/CodeGen/Generic/addr-use-count.ll b/llvm/test/CodeGen/Generic/addr-use-count.ll new file mode 100644 index 0000000000000..a3b110bf60896 --- /dev/null +++ b/llvm/test/CodeGen/Generic/addr-use-count.ll @@ -0,0 +1,70 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -codegenprepare < %s | FileCheck %s +; REQUIRES: aarch64-registered-target + +; Test that `%addr` is sunk, even though the number of memory uses to scan exceeds the limit. + +target triple = "aarch64-linux" + +declare void @g(...) + +define void @f(ptr %p) { +; CHECK-LABEL: @f( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 4 +; CHECK-NEXT: [[T0:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T1:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T2:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T3:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T4:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T5:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T6:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T7:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T8:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T9:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T10:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T11:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T12:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T13:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T14:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T15:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T16:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T17:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[T18:%.*]] = load i32, ptr [[SUNKADDR]], align 4 +; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[P]], i64 8 +; CHECK-NEXT: [[T19:%.*]] = load i32, ptr [[SUNKADDR1]], align 4 +; CHECK-NEXT: call void @g(i32 [[T0]], i32 [[T1]], i32 [[T2]], i32 [[T3]], i32 [[T4]], i32 [[T5]], i32 [[T6]], i32 [[T7]], i32 [[T8]], i32 [[T9]], i32 [[T10]], i32 [[T11]], i32 [[T12]], i32 [[T13]], i32 [[T14]], i32 [[T15]], i32 [[T16]], i32 [[T17]], i32 [[T18]], i32 [[T19]]) +; CHECK-NEXT: ret void +; +entry: + %addr = getelementptr i8, ptr %p, i32 4 + br label %exit + +exit: + %t0 = load i32, ptr %addr + %t1 = load i32, ptr %addr + %t2 = load i32, ptr %addr + %t3 = load i32, ptr %addr + %t4 = load i32, ptr %addr + %t5 = load i32, ptr %addr + %t6 = load i32, ptr %addr + %t7 = load i32, ptr %addr + %t8 = load i32, ptr %addr + %t9 = load i32, ptr %addr + %t10 = load i32, ptr %addr + %t11 = load i32, ptr %addr + %t12 = load i32, ptr %addr + %t13 = load i32, ptr %addr + %t14 = load i32, ptr %addr + %t15 = load i32, ptr %addr + %t16 = load i32, ptr %addr + %t17 = load i32, ptr %addr + %t18 = load i32, ptr %addr + %addr.1 = getelementptr i8, ptr %addr, i32 4 + %t19 = load i32, ptr %addr.1 + + call void @g(i32 %t0, i32 %t1, i32 %t2, i32 %t3, i32 %t4, i32 %t5, i32 %t6, i32 %t7, + i32 %t8, i32 %t9, i32 %t10, i32 %t11, i32 %t12, i32 %t13, i32 %t14, i32 %t15, i32 %t16, + i32 %t17, i32 %t18, i32 %t19) + ret void +} From 73bec2b2c3c90bc503bde5b67a239708f2c0f183 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Thu, 23 Mar 2023 10:47:04 -0700 Subject: [PATCH 465/691] [mlir][Vector] Retire one old filter-based test Differential Revision: https://reviews.llvm.org/D146742 --- .../Vector/vector-contract-transforms.mlir | 27 ------------------- .../Dialect/Vector/TestVectorTransforms.cpp | 23 +--------------- 2 files changed, 1 insertion(+), 49 deletions(-) diff --git a/mlir/test/Dialect/Vector/vector-contract-transforms.mlir b/mlir/test/Dialect/Vector/vector-contract-transforms.mlir index 2cbd604759edc..e3f86ee0b39bc 100644 --- a/mlir/test/Dialect/Vector/vector-contract-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-contract-transforms.mlir @@ -1,7 +1,6 @@ // RUN: mlir-opt %s -test-vector-contraction-lowering | FileCheck %s // RUN: mlir-opt %s -test-vector-contraction-lowering=vector-lower-matrix-intrinsics=1 | FileCheck %s --check-prefix=MATRIX // RUN: mlir-opt %s -test-vector-contraction-lowering=vector-outerproduct=1 | FileCheck %s --check-prefix=OUTERPRODUCT -// RUN: mlir-opt %s -test-vector-contraction-lowering=vector-filter-outerproduct=1 | FileCheck %s --check-prefix=FILTEROUTERPRODUCT // RUN: mlir-opt %s -test-vector-contraction-lowering=vector-parallel-arith=1 | FileCheck %s --check-prefix=PARALLEL #dotp_accesses = [ @@ -1182,32 +1181,6 @@ func.func @matmul_7(%arg0: vector<2x1xf32>, %arg1: vector<1x3xf32>, %arg2: vecto return %0 : vector<3x2xf32> } -// FILTEROUTERPRODUCT-LABEL: func @matmul_4_filtered -// FILTEROUTERPRODUCT-SAME: %[[A:[a-zA-Z0-9]*]]: vector<4x4xf32>, -// FILTEROUTERPRODUCT-SAME: %[[B:[a-zA-Z0-9]*]]: vector<4x4xf32>, -// FILTEROUTERPRODUCT-SAME: %[[C:[a-zA-Z0-9]*]]: vector<4x4xf32> -// FILTEROUTERPRODUCT: %[[c0:.*]] = vector.contract {{{.*}}} %[[A]], %[[B]], %[[C]] -func.func @matmul_4_filtered(%arg0: vector<4x4xf32>, %arg1: vector<4x4xf32>, %arg2: vector<4x4xf32>) --> vector<4x4xf32> -{ - %0 = vector.contract #matmat_trait_0 %arg0, %arg1, %arg2 - : vector<4x4xf32>, vector<4x4xf32> into vector<4x4xf32> - return %0 : vector<4x4xf32> -} - -// FILTEROUTERPRODUCT-LABEL: func @matmul_4_not_filtered -// FILTEROUTERPRODUCT-SAME: %[[A:[a-zA-Z0-9]*]]: vector<3x4xf32>, -// FILTEROUTERPRODUCT-SAME: %[[B:[a-zA-Z0-9]*]]: vector<4x4xf32>, -// FILTEROUTERPRODUCT-SAME: %[[C:[a-zA-Z0-9]*]]: vector<3x4xf32> -// FILTEROUTERPRODUCT: %[[c0:.*]] = vector.contract {{{.*}}} %[[A]], %[[B]], %[[C]] -func.func @matmul_4_not_filtered(%arg0: vector<3x4xf32>, %arg1: vector<4x4xf32>, %arg2: vector<3x4xf32>) --> vector<3x4xf32> -{ - %0 = vector.contract #matmat_trait_0 %arg0, %arg1, %arg2 - : vector<3x4xf32>, vector<4x4xf32> into vector<3x4xf32> - return %0 : vector<3x4xf32> -} - // PARALLEL-LABEL: func @parrallel_contract_lowering // PARALLEL: %[[E0:.*]] = vector.extract %{{.*}}[0, 0] : vector<1x1x4xf32> // PARALLEL: %[[E1:.*]] = vector.extract %{{.*}}[0, 0] : vector<1x1x4xf32> diff --git a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp index 5a21bff0b39c3..f79ca2259fa38 100644 --- a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp +++ b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include #include +#include #include "mlir/Analysis/SliceAnalysis.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" @@ -136,11 +136,6 @@ struct TestVectorContractionLowering *this, "vector-outerproduct", llvm::cl::desc("Lower vector.contract to vector.outerproduct"), llvm::cl::init(false)}; - Option lowerToFilterOuterProduct{ - *this, "vector-filter-outerproduct", - llvm::cl::desc("Lower vector.contract to vector.outerproduct but not for " - "vectors of size 4."), - llvm::cl::init(false)}; Option lowerToParallelArith{ *this, "vector-parallel-arith", llvm::cl::desc("Lower vector.contract to elementwise vector ops."), @@ -159,22 +154,6 @@ struct TestVectorContractionLowering return; } - // Test on one pattern in isolation. - if (lowerToFilterOuterProduct) { - VectorContractLowering lowering = VectorContractLowering::OuterProduct; - VectorTransformsOptions options{lowering}; - patterns.add( - options, &getContext(), /*benefit=*/1, [](vector::ContractionOp op) { - // Only lowers vector.contract where the lhs as a type vector - // where M is not 4. - if (op.getRhsType().getShape()[0] == 4) - return failure(); - return success(); - }); - (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); - return; - } - if (lowerToParallelArith) { vector::populateVectorContractLoweringPatterns( patterns, From 637048f122dc5112a86ae8c5c437efa22379127e Mon Sep 17 00:00:00 2001 From: Julian Lettner Date: Thu, 23 Mar 2023 11:01:33 -0700 Subject: [PATCH 466/691] [TSan][Darwin] Test fix external-swift-debugging.cpp My recent change [1] extended the external-swift-debugging.cpp test, but didn't account for PAC under which function pointers aren't trivially comparable. We could use `ptrauth_strip()`, but for the test it's easier to just the symbol name. [1] https://reviews.llvm.org/D146264 --- compiler-rt/test/tsan/Darwin/external-swift-debugging.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/compiler-rt/test/tsan/Darwin/external-swift-debugging.cpp b/compiler-rt/test/tsan/Darwin/external-swift-debugging.cpp index 64475a3e97373..8f8b2d514ea0b 100644 --- a/compiler-rt/test/tsan/Darwin/external-swift-debugging.cpp +++ b/compiler-rt/test/tsan/Darwin/external-swift-debugging.cpp @@ -30,9 +30,6 @@ int main(int argc, char *argv[]) { fprintf(stderr, "Start.\n"); // CHECK: Start. - fprintf(stderr, "ExternalWrite function address: %p\n", &ExternalWrite); - // CHECK: ExternalWrite function address: [[ExternalWrite_addr:0x[0-9a-z]+]] - void *opaque_object = malloc(16); std::thread t1([opaque_object] { ExternalWrite(opaque_object); @@ -85,7 +82,7 @@ __tsan_on_report(void *report) { info.dli_saddr, info.dli_sname); } // Ensure ExternalWrite() function is top of trace - // CHECK: 0: frame: 0x{{[0-9a-z]+}}, function: [[ExternalWrite_addr]] _Z13ExternalWritePv + // CHECK: 0: frame: 0x{{[0-9a-z]+}}, function: 0x{{[0-9a-z]+}} _Z13ExternalWritePv } // CHECK: Done. From 2bc4c3e920ee078ef2879b00c40440e0867f0b9e Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Thu, 23 Mar 2023 08:32:48 -0700 Subject: [PATCH 467/691] [mlir][Vector] NFC - Reorganize vector patterns Vector dialect patterns have grown enormously in the past year to a point where they are now impenetrable. Start reorganizing them towards finer-grained control. Differential Revision: https://reviews.llvm.org/D146736 --- .../mlir/Dialect/Vector/IR/VectorOps.h | 36 +- .../Vector/Transforms/LoweringPatterns.h | 248 ++ .../mlir/Dialect/Vector/Transforms/Passes.h | 6 - .../Vector/Transforms/VectorRewritePatterns.h | 382 +-- .../Vector/Transforms/VectorTransforms.h | 61 +- .../VectorToLLVM/ConvertVectorToLLVM.cpp | 1 + .../VectorToLLVM/ConvertVectorToLLVMPass.cpp | 6 +- .../Conversion/VectorToSCF/VectorToSCF.cpp | 3 +- .../Linalg/TransformOps/CMakeLists.txt | 2 +- .../TransformOps/LinalgTransformOps.cpp | 1 + .../TransformOps/VectorTransformOps.cpp | 17 +- .../Dialect/Vector/Transforms/CMakeLists.txt | 10 +- .../Transforms/LowerVectorBroadcast.cpp | 156 ++ .../Vector/Transforms/LowerVectorContract.cpp | 1329 ++++++++++ .../Vector/Transforms/LowerVectorGather.cpp | 173 ++ .../Vector/Transforms/LowerVectorMask.cpp | 144 +- ...orms.cpp => LowerVectorMultiReduction.cpp} | 9 +- .../Vector/Transforms/LowerVectorScan.cpp | 251 ++ .../Transforms/LowerVectorShapeCast.cpp | 177 ++ ...tePatterns.cpp => LowerVectorTransfer.cpp} | 245 +- .../Transforms/LowerVectorTranspose.cpp | 210 ++ .../Transforms/VectorTransferOpTransforms.cpp | 1 + .../VectorTransferSplitRewritePatterns.cpp | 55 +- .../Vector/Transforms/VectorTransforms.cpp | 2224 +---------------- .../Dialect/Vector/TestVectorTransforms.cpp | 8 +- .../llvm-project-overlay/mlir/BUILD.bazel | 1 + 26 files changed, 3103 insertions(+), 2653 deletions(-) create mode 100644 mlir/include/mlir/Dialect/Vector/Transforms/LoweringPatterns.h create mode 100644 mlir/lib/Dialect/Vector/Transforms/LowerVectorBroadcast.cpp create mode 100644 mlir/lib/Dialect/Vector/Transforms/LowerVectorContract.cpp create mode 100644 mlir/lib/Dialect/Vector/Transforms/LowerVectorGather.cpp rename mlir/lib/Dialect/Vector/Transforms/{VectorMultiDimReductionTransforms.cpp => LowerVectorMultiReduction.cpp} (98%) create mode 100644 mlir/lib/Dialect/Vector/Transforms/LowerVectorScan.cpp create mode 100644 mlir/lib/Dialect/Vector/Transforms/LowerVectorShapeCast.cpp rename mlir/lib/Dialect/Vector/Transforms/{VectorTransferPermutationMapRewritePatterns.cpp => LowerVectorTransfer.cpp} (57%) create mode 100644 mlir/lib/Dialect/Vector/Transforms/LowerVectorTranspose.cpp diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.h b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.h index 56f8b4bf22d21..4763b6525b934 100644 --- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.h +++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.h @@ -110,43 +110,11 @@ void populateFlattenVectorTransferPatterns(RewritePatternSet &patterns, void populateBubbleVectorBitCastOpPatterns(RewritePatternSet &patterns, PatternBenefit benefit = 1); -/// Collect a set of transfer read/write lowering patterns. -/// -/// These patterns lower transfer ops to simpler ops like `vector.load`, -/// `vector.store` and `vector.broadcast`. Only transfers with a transfer rank -/// of a most `maxTransferRank` are lowered. This is useful when combined with -/// VectorToSCF, which reduces the rank of vector transfer ops. -void populateVectorTransferLoweringPatterns( - RewritePatternSet &patterns, - std::optional maxTransferRank = std::nullopt, - PatternBenefit benefit = 1); - /// These patterns materialize masks for various vector ops such as transfers. void populateVectorMaskMaterializationPatterns(RewritePatternSet &patterns, bool force32BitVectorIndices, PatternBenefit benefit = 1); -/// Collects patterns to progressively lower vector.broadcast ops on high-D -/// vectors to low-D vector ops. -void populateVectorBroadcastLoweringPatterns(RewritePatternSet &patterns, - PatternBenefit benefit = 1); - -/// Collects patterns to progressively lower vector mask ops into elementary -/// selection and insertion ops. -void populateVectorMaskOpLoweringPatterns(RewritePatternSet &patterns, - PatternBenefit benefit = 1); - -/// Collects patterns to progressively lower vector.shape_cast ops on high-D -/// vectors into 1-D/2-D vector ops by generating data movement extract/insert -/// ops. -void populateVectorShapeCastLoweringPatterns(RewritePatternSet &patterns, - PatternBenefit benefit = 1); - -/// Collects patterns that lower scalar vector transfer ops to memref loads and -/// stores when beneficial. -void populateScalarVectorTransferLoweringPatterns(RewritePatternSet &patterns, - PatternBenefit benefit = 1); - /// Returns the integer type required for subscripts in the vector dialect. IntegerType getVectorSubscriptType(Builder &builder); @@ -214,8 +182,8 @@ void createMaskOpRegion(OpBuilder &builder, Operation *maskableOp); /// Creates a vector.mask operation around a maskable operation. Returns the /// vector.mask operation if the mask provided is valid. Otherwise, returns the /// maskable operation itself. -Operation *maskOperation(OpBuilder &builder, Operation *maskableOp, - Value mask, Value passthru = Value()); +Operation *maskOperation(OpBuilder &builder, Operation *maskableOp, Value mask, + Value passthru = Value()); /// Creates a vector select operation that picks values from `newValue` or /// `passthru` for each result vector lane based on `mask`. This utility is used diff --git a/mlir/include/mlir/Dialect/Vector/Transforms/LoweringPatterns.h b/mlir/include/mlir/Dialect/Vector/Transforms/LoweringPatterns.h new file mode 100644 index 0000000000000..dfadffba3883b --- /dev/null +++ b/mlir/include/mlir/Dialect/Vector/Transforms/LoweringPatterns.h @@ -0,0 +1,248 @@ +//===- LoweringPatterns.h - Vector rewrite patterns --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_VECTOR_TRANSFORMS_LOWERINGPATTERNS_H +#define MLIR_DIALECT_VECTOR_TRANSFORMS_LOWERINGPATTERNS_H + +#include "mlir/Dialect/Vector/Transforms/VectorTransforms.h" + +namespace mlir { +class RewritePatternSet; + +namespace vector { + +//===----------------------------------------------------------------------===// +// Lowering pattern populate functions +//===----------------------------------------------------------------------===// + +/// Populate the pattern set with the following patterns: +/// +/// [OuterProductOpLowering] +/// Progressively lower a `vector.outerproduct` to linearized +/// `vector.extract` + `vector.fma` + `vector.insert`. +/// +/// [ContractionOpLowering] +/// Progressive lowering of ContractionOp. +/// One: +/// %x = vector.contract with at least one free/batch dimension +/// is replaced by: +/// %a = vector.contract with one less free/batch dimension +/// %b = vector.contract with one less free/batch dimension +/// +/// [ContractionOpToMatmulOpLowering] +/// Progressively lower a `vector.contract` with row-major matmul semantics to +/// linearized `vector.shape_cast` + `vector.matmul` on the way to +/// `llvm.matrix.multiply`. +/// +/// [ContractionOpToDotLowering] +/// Progressively lower a `vector.contract` with row-major matmul semantics to +/// linearized `vector.extract` + `vector.reduce` + `vector.insert`. +/// +/// [ContractionOpToOuterProductOpLowering] +/// Progressively lower a `vector.contract` with row-major matmul semantics to +/// linearized `vector.extract` + `vector.outerproduct` + `vector.insert`. +void populateVectorContractLoweringPatterns( + RewritePatternSet &patterns, VectorTransformsOptions options, + PatternBenefit benefit = 1, bool disableOuterProductLowering = false); + +/// Collect a set of patterns to convert vector.multi_reduction op into +/// a sequence of vector.reduction ops. The patterns comprise: +/// +/// [InnerOuterDimReductionConversion] +/// Rewrites vector.multi_reduction such that all reduction dimensions are +/// either innermost or outermost, by adding the proper vector.transpose +/// operations. +/// +/// [ReduceMultiDimReductionRank] +/// Once in innermost or outermost reduction +/// form, rewrites n-D vector.multi_reduction into 2-D vector.multi_reduction, +/// by introducing vector.shape_cast ops to collapse + multi-reduce + expand +/// back. +/// +/// [TwoDimMultiReductionToElementWise] +/// Once in 2-D vector.multi_reduction form, with an **outermost** reduction +/// dimension, unroll the outer dimension to obtain a sequence of 1-D vector +/// ops. This also has an opportunity for tree-reduction (in the future). +/// +/// [TwoDimMultiReductionToReduction] +/// Once in 2-D vector.multi_reduction form, with an **innermost** reduction +/// dimension, unroll the outer dimension to obtain a sequence of extract + +/// vector.reduction + insert. This can further lower to horizontal reduction +/// ops. +/// +/// [OneDimMultiReductionToTwoDim] +/// For cases that reduce to 1-D vector reduction (and are thus missing +/// either a parallel or a reduction), we lift them back up to 2-D with a simple +/// vector.shape_cast to vector<1xk> so that the other patterns can kick in, +/// thus fully exiting out of the vector.multi_reduction abstraction. +void populateVectorMultiReductionLoweringPatterns( + RewritePatternSet &patterns, VectorMultiReductionLowering options, + PatternBenefit benefit = 1); + +/// Populate the pattern set with the following patterns: +/// +/// [TransferReadToVectorLoadLowering] +/// Progressive lowering of BroadcastOp to ExtractOp + InsertOp + lower-D +/// BroadcastOp until dim 1. +void populateVectorBroadcastLoweringPatterns(RewritePatternSet &patterns, + PatternBenefit benefit = 1); + +/// Populate the pattern set with the following patterns: +/// +/// [CreateMaskOp] +/// Progressive lowering of CreateMaskOp to lower-D CreateMaskOp until dim 1. +/// +/// [ConstantMaskOp] +/// Progressive lowering of ConstantMaskOp to lower-D ConstantMaskOp until +/// dim 1. +void populateVectorMaskOpLoweringPatterns(RewritePatternSet &patterns, + PatternBenefit benefit = 1); + +/// Collects patterns that lower scalar vector transfer ops to memref loads and +/// stores when beneficial. +void populateScalarVectorTransferLoweringPatterns(RewritePatternSet &patterns, + PatternBenefit benefit = 1); + +/// Populate the pattern set with the following patterns: +/// +/// [ShapeCastOp2DDownCastRewritePattern] +/// ShapeOp 2D -> 1D downcast serves the purpose of flattening 2-D to 1-D +/// vectors progressively. +/// +/// [ShapeCastOp2DUpCastRewritePattern] +/// ShapeOp 1D -> 2D upcast serves the purpose of unflattening 2-D from 1-D +/// vectors progressively. +/// +/// [ShapeCastOpRewritePattern] +/// Reference lowering to fully unrolled sequences of single element ExtractOp + +/// InsertOp. Note that applying this pattern can almost always be considered a +/// performance bug. +void populateVectorShapeCastLoweringPatterns(RewritePatternSet &patterns, + PatternBenefit benefit = 1); + +/// Populate the pattern set with the following patterns: +/// +/// [TransposeOpLowering] +/// +/// [TransposeOp2DToShuffleLowering] +/// +void populateVectorTransposeLoweringPatterns(RewritePatternSet &patterns, + VectorTransformsOptions options, + PatternBenefit benefit = 1); + +/// Populate the pattern set with the following patterns: +/// +/// [TransferReadToVectorLoadLowering] +/// Progressive lowering of transfer_read.This pattern supports lowering of +/// `vector.transfer_read` to a combination of `vector.load` and +/// `vector.broadcast` +/// +/// [TransferWriteToVectorStoreLowering] +/// Progressive lowering of transfer_write. This pattern supports lowering of +/// `vector.transfer_write` to `vector.store` +/// +/// [VectorLoadToMemrefLoadLowering] +/// Replace a 0-d vector.load with a memref.load + vector.broadcast. +/// +/// [VectorStoreToMemrefStoreLowering] +/// Replace a 0-d vector.store with a vector.extractelement + memref.store. +/// +/// These patterns lower transfer ops to simpler ops like `vector.load`, +/// `vector.store` and `vector.broadcast`. Only transfers with a transfer rank +/// of a most `maxTransferRank` are lowered. This is useful when combined with +/// VectorToSCF, which reduces the rank of vector transfer ops. +void populateVectorTransferLoweringPatterns( + RewritePatternSet &patterns, + std::optional maxTransferRank = std::nullopt, + PatternBenefit benefit = 1); + +/// Collect a set of transfer read/write lowering patterns that simplify the +/// permutation map (e.g., converting it to a minor identity map) by inserting +/// broadcasts and transposes. More specifically: +/// +/// [TransferReadPermutationLowering] +/// Lower transfer_read op with permutation into a transfer_read with a +/// permutation map composed of leading zeros followed by a minor identity + +/// vector.transpose op. +/// Ex: +/// vector.transfer_read ... +/// permutation_map: (d0, d1, d2) -> (0, d1) +/// into: +/// %v = vector.transfer_read ... +/// permutation_map: (d0, d1, d2) -> (d1, 0) +/// vector.transpose %v, [1, 0] +/// +/// vector.transfer_read ... +/// permutation_map: (d0, d1, d2, d3) -> (0, 0, 0, d1, d3) +/// into: +/// %v = vector.transfer_read ... +/// permutation_map: (d0, d1, d2, d3) -> (0, 0, d1, 0, d3) +/// vector.transpose %v, [0, 1, 3, 2, 4] +/// Note that an alternative is to transform it to linalg.transpose + +/// vector.transfer_read to do the transpose in memory instead. +/// +/// [TransferWritePermutationLowering] +/// Lower transfer_write op with permutation into a transfer_write with a +/// minor identity permutation map. (transfer_write ops cannot have broadcasts.) +/// Ex: +/// vector.transfer_write %v ... +/// permutation_map: (d0, d1, d2) -> (d2, d0, d1) +/// into: +/// %tmp = vector.transpose %v, [2, 0, 1] +/// vector.transfer_write %tmp ... +/// permutation_map: (d0, d1, d2) -> (d0, d1, d2) +/// +/// vector.transfer_write %v ... +/// permutation_map: (d0, d1, d2, d3) -> (d3, d2) +/// into: +/// %tmp = vector.transpose %v, [1, 0] +/// %v = vector.transfer_write %tmp ... +/// permutation_map: (d0, d1, d2, d3) -> (d2, d3) +/// +/// [TransferOpReduceRank] +/// Lower transfer_read op with broadcast in the leading dimensions into +/// transfer_read of lower rank + vector.broadcast. +/// Ex: vector.transfer_read ... +/// permutation_map: (d0, d1, d2, d3) -> (0, d1, 0, d3) +/// into: +/// %v = vector.transfer_read ... +/// permutation_map: (d0, d1, d2, d3) -> (d1, 0, d3) +/// vector.broadcast %v +void populateVectorTransferPermutationMapLoweringPatterns( + RewritePatternSet &patterns, PatternBenefit benefit = 1); + +/// Populate the pattern set with the following patterns: +/// +/// [ScanToArithOps] +/// Convert vector.scan op into arith ops and vector.insert_strided_slice / +/// vector.extract_strided_slice. +void populateVectorScanLoweringPatterns(RewritePatternSet &patterns, + PatternBenefit benefit = 1); + +/// Populate the pattern set with the following patterns: +/// +/// [FlattenGather] +/// Flattens 2 or more dimensional `vector.gather` ops by unrolling the +/// outermost dimension. For example: +/// +/// [Gather1DToConditionalLoads] +/// Turns 1-d `vector.gather` into a scalarized sequence of `vector.loads` or +/// `tensor.extract`s. To avoid out-of-bounds memory accesses, these +/// loads/extracts are made conditional using `scf.if` ops. +void populateVectorGatherLoweringPatterns(RewritePatternSet &patterns, + PatternBenefit benefit = 1); + +/// Populates instances of `MaskOpRewritePattern` to lower masked operations +/// with `vector.mask`. Patterns should rewrite the `vector.mask` operation and +/// not its nested `MaskableOpInterface`. +void populateVectorMaskLoweringPatternsForSideEffectingOps( + RewritePatternSet &patterns); + +} // namespace vector +} // namespace mlir +#endif // MLIR_DIALECT_VECTOR_TRANSFORMS_LOWERINGPATTERNS_H diff --git a/mlir/include/mlir/Dialect/Vector/Transforms/Passes.h b/mlir/include/mlir/Dialect/Vector/Transforms/Passes.h index d0c06f69930d2..bf89b01e2b60c 100644 --- a/mlir/include/mlir/Dialect/Vector/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Vector/Transforms/Passes.h @@ -22,12 +22,6 @@ std::unique_ptr createVectorBufferizePass(); /// Creates an instance of the `vector.mask` lowering pass. std::unique_ptr createLowerVectorMaskPass(); -/// Populates instances of `MaskOpRewritePattern` to lower masked operations -/// with `vector.mask`. Patterns should rewrite the `vector.mask` operation and -/// not its nested `MaskableOpInterface`. -void populateVectorMaskLoweringPatternsForSideEffectingOps( - RewritePatternSet &patterns); - //===----------------------------------------------------------------------===// // Registration //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h b/mlir/include/mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h index af68de7e0051e..a79bbd0be0975 100644 --- a/mlir/include/mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h +++ b/mlir/include/mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h @@ -9,8 +9,8 @@ #ifndef MLIR_DIALECT_VECTOR_TRANSFORMS_VECTORREWRITEPATTERNS_H #define MLIR_DIALECT_VECTOR_TRANSFORMS_VECTORREWRITEPATTERNS_H -#include #include +#include #include "mlir/Dialect/Vector/IR/VectorOps.h" #include "mlir/Dialect/Vector/Transforms/VectorTransformsEnums.h.inc" @@ -23,42 +23,7 @@ namespace mlir { class RewritePatternSet; namespace vector { - -//===----------------------------------------------------------------------===// -// Vector transformation options exposed as auxiliary structs. -//===----------------------------------------------------------------------===// -/// Structure to control the behavior of vector transform patterns. -struct VectorTransformsOptions { - /// Option to control the lowering of vector.contract. - VectorContractLowering vectorContractLowering = VectorContractLowering::Dot; - VectorTransformsOptions & - setVectorTransformsOptions(VectorContractLowering opt) { - vectorContractLowering = opt; - return *this; - } - /// Option to control the lowering of vector.multi_reduction. - VectorMultiReductionLowering vectorMultiReductionLowering = - VectorMultiReductionLowering::InnerParallel; - VectorTransformsOptions & - setVectorMultiReductionLowering(VectorMultiReductionLowering opt) { - vectorMultiReductionLowering = opt; - return *this; - } - /// Option to control the lowering of vector.transpose. - VectorTransposeLowering vectorTransposeLowering = - VectorTransposeLowering::EltWise; - VectorTransformsOptions & - setVectorTransposeLowering(VectorTransposeLowering opt) { - vectorTransposeLowering = opt; - return *this; - } - /// Option to control the splitting of vector transfers. - VectorTransferSplit vectorTransferSplit = VectorTransferSplit::None; - VectorTransformsOptions &setVectorTransferSplit(VectorTransferSplit opt) { - vectorTransferSplit = opt; - return *this; - } -}; +struct VectorTransformsOptions; /// Options that control the vector unrolling. struct UnrollVectorOptions { @@ -109,45 +74,6 @@ struct UnrollVectorOptions { // Vector transformation exposed as populate functions over rewrite patterns. //===----------------------------------------------------------------------===// -/// Insert TransposeLowering patterns into extraction/insertion. -void populateVectorTransposeLoweringPatterns( - RewritePatternSet &patterns, - VectorTransformsOptions options = VectorTransformsOptions(), - PatternBenefit benefit = 1); - -/// Collect a set of patterns to convert vector.multi_reduction op into -/// a sequence of vector.reduction ops. The patterns comprise: -/// - InnerOuterDimReductionConversion: rewrites vector.multi_reduction such -/// that all reduction dimensions are either innermost or outermost, by adding -/// the proper vector.transpose operations. -/// - ReduceMultiDimReductionRank: once in innermost or outermost reduction -/// form, rewrites n-D vector.multi_reduction into 2-D vector.multi_reduction, -/// by introducing vector.shape_cast ops to collapse + multi-reduce + expand -/// back. -/// - TwoDimMultiReductionToElementWise: once in 2-D vector.multi_reduction -/// form, with an **outermost** reduction dimension, unroll the outer dimension -/// to obtain a sequence of 1-D vector ops. This also has an opportunity for -/// tree-reduction (in the future). -/// - TwoDimMultiReductionToReduction: once in 2-D vector.multi_reduction form, -/// with an **innermost** reduction dimension, unroll the outer dimension to -/// obtain a sequence of extract + vector.reduction + insert. This can further -/// lower to horizontal reduction ops. -/// - OneDimMultiReductionToTwoDim: for cases that reduce to 1-D vector -/// reduction (and are thus missing either a parallel or a reduction), we lift -/// them back up to 2-D with a simple vector.shape_cast to vector<1xk> so that -/// the other patterns can kick in, thus fully exiting out of the -/// vector.multi_reduction abstraction. -void populateVectorMultiReductionLoweringPatterns( - RewritePatternSet &patterns, VectorMultiReductionLowering options, - PatternBenefit benefit = 1); - -/// Collects patterns to progressively lower vector contraction ops on high-D -/// into low-D reduction and product ops. -void populateVectorContractLoweringPatterns( - RewritePatternSet &patterns, - VectorTransformsOptions options = VectorTransformsOptions(), - PatternBenefit benefit = 1); - /// Canonicalization of a `vector.contraction %a, %b, %c` with row-major matmul /// semantics to a contraction with MMT semantics (matrix matrix multiplication /// with the RHS transposed). This specific form is meant to have the vector @@ -174,67 +100,43 @@ void populateVectorContractCanonicalizeMatmulToMMT( void populateVectorReductionToContractPatterns(RewritePatternSet &patterns, PatternBenefit benefit = 1); -/// Collect patterns to convert scan op -void populateVectorScanLoweringPatterns(RewritePatternSet &patterns, - PatternBenefit benefit = 1); - -//===----------------------------------------------------------------------===// -// Vector.transfer patterns. -//===----------------------------------------------------------------------===// -/// Collect a set of transfer read/write lowering patterns that simplify the -/// permutation map (e.g., converting it to a minor identity map) by inserting -/// broadcasts and transposes. More specifically: -/// -/// [TransferReadPermutationLowering] -/// Lower transfer_read op with permutation into a transfer_read with a -/// permutation map composed of leading zeros followed by a minor identity + -/// vector.transpose op. -/// Ex: -/// vector.transfer_read ... -/// permutation_map: (d0, d1, d2) -> (0, d1) -/// into: -/// %v = vector.transfer_read ... -/// permutation_map: (d0, d1, d2) -> (d1, 0) -/// vector.transpose %v, [1, 0] +/// Populate `patterns` with the following patterns. /// -/// vector.transfer_read ... -/// permutation_map: (d0, d1, d2, d3) -> (0, 0, 0, d1, d3) -/// into: -/// %v = vector.transfer_read ... -/// permutation_map: (d0, d1, d2, d3) -> (0, 0, d1, 0, d3) -/// vector.transpose %v, [0, 1, 3, 2, 4] -/// Note that an alternative is to transform it to linalg.transpose + -/// vector.transfer_read to do the transpose in memory instead. +/// - VectorTransferFullPartialRewriter /// -/// [TransferWritePermutationLowering] -/// Lower transfer_write op with permutation into a transfer_write with a -/// minor identity permutation map. (transfer_write ops cannot have broadcasts.) -/// Ex: -/// vector.transfer_write %v ... -/// permutation_map: (d0, d1, d2) -> (d2, d0, d1) -/// into: -/// %tmp = vector.transpose %v, [2, 0, 1] -/// vector.transfer_write %tmp ... -/// permutation_map: (d0, d1, d2) -> (d0, d1, d2) +/// Split a vector.transfer operation into an in-bounds (i.e., no out-of-bounds +/// masking) fast path and a slow path. /// -/// vector.transfer_write %v ... -/// permutation_map: (d0, d1, d2, d3) -> (d3, d2) -/// into: -/// %tmp = vector.transpose %v, [1, 0] -/// %v = vector.transfer_write %tmp ... -/// permutation_map: (d0, d1, d2, d3) -> (d2, d3) +/// Example (a 2-D vector.transfer_read): +/// ``` +/// %1 = vector.transfer_read %0[...], %pad : memref, vector<...> +/// ``` +/// is transformed into: +/// ``` +/// %1:3 = scf.if (%inBounds) { +/// // fast path, direct cast +/// memref.cast %A: memref to compatibleMemRefType +/// scf.yield %view : compatibleMemRefType, index, index +/// } else { +/// // slow path, not in-bounds vector.transfer or linalg.copy. +/// memref.cast %alloc: memref to compatibleMemRefType +/// scf.yield %4 : compatibleMemRefType, index, index +// } +/// %0 = vector.transfer_read %1#0[%1#1, %1#2] {in_bounds = [true ... true]} +/// ``` +/// where `alloc` is a top of the function alloca'ed buffer of one vector. /// -/// [TransferOpReduceRank] -/// Lower transfer_read op with broadcast in the leading dimensions into -/// transfer_read of lower rank + vector.broadcast. -/// Ex: vector.transfer_read ... -/// permutation_map: (d0, d1, d2, d3) -> (0, d1, 0, d3) -/// into: -/// %v = vector.transfer_read ... -/// permutation_map: (d0, d1, d2, d3) -> (d1, 0, d3) -/// vector.broadcast %v -void populateVectorTransferPermutationMapLoweringPatterns( - RewritePatternSet &patterns, PatternBenefit benefit = 1); +/// Preconditions: +/// 1. `xferOp.permutation_map()` must be a minor identity map +/// 2. the rank of the `xferOp.memref()` and the rank of the `xferOp.vector()` +/// must be equal. This will be relaxed in the future but requires +/// rank-reducing subviews. +void populateVectorTransferFullPartialPatterns( + RewritePatternSet &patterns, const VectorTransformsOptions &options); + +//===----------------------------------------------------------------------===// +// Vector.transfer patterns. +//===----------------------------------------------------------------------===// /// Collect a set of patterns to reduce the rank of the operands of vector /// transfer ops to operate on the largest contigious vector. @@ -334,220 +236,6 @@ void populateVectorUnrollPatterns(RewritePatternSet &patterns, const UnrollVectorOptions &options, PatternBenefit benefit = 1); -/// Expands `vector.gather` ops into a series of conditional scalar loads -/// (`vector.load` for memrefs or `tensor.extract` for tensors). These loads are -/// conditional to avoid out-of-bounds memory accesses and guarded with `scf.if` -/// ops. This lowering path is intended for targets that do not feature -/// dedicated gather ops. -void populateVectorGatherLoweringPatterns(RewritePatternSet &patterns, - PatternBenefit benefit = 1); - -//===----------------------------------------------------------------------===// -// Finer-grained patterns exposed for more control over individual lowerings. -//===----------------------------------------------------------------------===// -/// Apply `splitFullAndPartialTransfer` selectively via a pattern. This pattern -/// may take an extra filter to perform selection at a finer granularity. -struct VectorTransferFullPartialRewriter : public RewritePattern { - using FilterConstraintType = - std::function; - - explicit VectorTransferFullPartialRewriter( - MLIRContext *context, - VectorTransformsOptions options = VectorTransformsOptions(), - FilterConstraintType filter = - [](VectorTransferOpInterface op) { return success(); }, - PatternBenefit benefit = 1) - : RewritePattern(MatchAnyOpTypeTag(), benefit, context), options(options), - filter(std::move(filter)) {} - - /// Performs the rewrite. - LogicalResult matchAndRewrite(Operation *op, - PatternRewriter &rewriter) const override; - -private: - VectorTransformsOptions options; - FilterConstraintType filter; -}; - -/// Progressive lowering of a `vector.contract %a, %b, %c` with row-major matmul -/// semantics to: -/// ``` -/// %flattened_a = vector.shape_cast %a -/// %flattened_b = vector.shape_cast %b -/// %flattened_d = vector.matmul %flattened_a, %flattened_b -/// %d = vector.shape_cast %%flattened_d -/// %e = add %c, %d -/// ``` -/// `vector.matmul` later lowers to `llvm.matrix.multiply`. -// -/// This only kicks in when VectorTransformsOptions is set to OuterProduct and -/// the vector.contract op is a row-major matrix multiply. -class ContractionOpToMatmulOpLowering - : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - using FilterConstraintType = - std::function; - - static LogicalResult defaultFilter(vector::ContractionOp op) { - return success(); - } - - ContractionOpToMatmulOpLowering( - vector::VectorTransformsOptions vectorTransformOptions, - MLIRContext *context, PatternBenefit benefit = 1, - FilterConstraintType constraint = defaultFilter) - : OpRewritePattern(context, benefit), - vectorTransformOptions(vectorTransformOptions), - filter(std::move(constraint)) {} - - LogicalResult matchAndRewrite(vector::ContractionOp op, - PatternRewriter &rewriter) const override; - -private: - /// Options to control the vector patterns. - vector::VectorTransformsOptions vectorTransformOptions; - FilterConstraintType filter; -}; - -/// Progressive lowering of a `vector.contract %a, %b, %c` with row-major matmul -/// semantics to a reduction_size-unrolled sequence: -/// ``` -/// %at = vector.transpose %a, [1, 0] -/// %bRow0 = vector.extract %b[0] -/// %atRow0 = vector.extract %at[0] -/// %c0 = vector.outerproduct %atRow0, %bRow0, %c -/// ... -/// %bRowK = vector.extract %b[K] -/// %atRowK = vector.extract %at[K] -/// %cK = vector.outerproduct %atRowK, %bRowK, %cK-1 -/// ``` -/// -/// This only kicks in when VectorTransformsOptions is set to OuterProduct and -/// the vector.contract op is a row-major matrix multiply. -class ContractionOpToOuterProductOpLowering - : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - using FilterConstraintType = - std::function; - - static LogicalResult defaultFilter(vector::ContractionOp op) { - return success(); - } - - ContractionOpToOuterProductOpLowering( - vector::VectorTransformsOptions vectorTransformOptions, - MLIRContext *context, PatternBenefit benefit = 1, - FilterConstraintType constraint = defaultFilter) - : OpRewritePattern(context, benefit), - vectorTransformOptions(vectorTransformOptions), - filter(std::move(constraint)) {} - - LogicalResult matchAndRewrite(vector::ContractionOp op, - PatternRewriter &rewriter) const override; - -private: - /// Options to control the vector patterns. - vector::VectorTransformsOptions vectorTransformOptions; - FilterConstraintType filter; -}; - -/// Progressive lowering of a `vector.contract %a, %b, %c` with row-major matmul -/// semantics to an output-size-unrolled sequence: -/// ``` -/// %out = arith.constant ... : vector -/// %bt = vector.transpose %b, [1, 0] -/// %aRow0 = vector.extract %a[0] -/// %btRow0 = vector.extract %bt[0] -/// %c00 = vector.reduce %atRow0, %bRow0 -/// %out00 = vector.insert %c00, %out[0, 0] -/// ... -/// %aRowLast = vector.extract %at[M-1] -/// %btRowLast = vector.extract %b[N-1] -/// %cLastLast = vector.reduce %atRowLast, %bRowLast -/// %outcLastLast = vector.insert %cLastLast, %out[M-1, N-1] -/// ``` -/// -/// This only kicks in when VectorTransformsOptions is set to Dot and -/// the vector.contract op is a row-major matmul or matvec. -class ContractionOpToDotLowering - : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - using FilterConstraintType = - std::function; - - static LogicalResult defaultFilter(vector::ContractionOp op) { - return success(); - } - - ContractionOpToDotLowering( - vector::VectorTransformsOptions vectorTransformOptions, - MLIRContext *context, PatternBenefit benefit = 1, - const FilterConstraintType &constraint = defaultFilter) - : OpRewritePattern(context, benefit), - vectorTransformOptions(vectorTransformOptions), filter(defaultFilter) {} - - LogicalResult matchAndRewrite(vector::ContractionOp op, - PatternRewriter &rewriter) const override; - -private: - /// Options to control the vector patterns. - vector::VectorTransformsOptions vectorTransformOptions; - FilterConstraintType filter; -}; - -/// Progressive lowering of ContractionOp. -/// -/// One: -/// %x = vector.contract with at least one free/batch dimension -/// is replaced by: -/// %a = vector.contract with one less free/batch dimension -/// %b = vector.contract with one less free/batch dimension -/// .. -/// %x = combine %a %b .. -/// until a pure contraction is reached (no free/batch dimensions), -/// which is replaced by a dot-product. -/// -/// This only kicks in when either VectorTransformsOptions is set -/// to Dot or when other contraction patterns fail. -class ContractionOpLowering : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - using FilterConstraintType = - std::function; - - static LogicalResult defaultFilter(vector::ContractionOp op) { - return success(); - } - - ContractionOpLowering(vector::VectorTransformsOptions vectorTransformOptions, - MLIRContext *context, PatternBenefit benefit = 1, - FilterConstraintType constraint = defaultFilter) - : OpRewritePattern(context, benefit), - vectorTransformOptions(vectorTransformOptions), - filter(std::move(constraint)) {} - - LogicalResult matchAndRewrite(vector::ContractionOp op, - PatternRewriter &rewriter) const override; - -private: - /// Options to control the vector patterns. - vector::VectorTransformsOptions vectorTransformOptions; - FilterConstraintType filter; - // Lower one parallel dimension. - FailureOr lowerParallel(PatternRewriter &rewriter, - vector::ContractionOp op, int64_t lhsIndex, - int64_t rhsIndex, Value mask) const; - // Lower one reduction dimension. - FailureOr lowerReduction(PatternRewriter &rewriter, - vector::ContractionOp op, Value mask) const; -}; - } // namespace vector } // namespace mlir diff --git a/mlir/include/mlir/Dialect/Vector/Transforms/VectorTransforms.h b/mlir/include/mlir/Dialect/Vector/Transforms/VectorTransforms.h index 947911f9a3841..52a4c9cc368d8 100644 --- a/mlir/include/mlir/Dialect/Vector/Transforms/VectorTransforms.h +++ b/mlir/include/mlir/Dialect/Vector/Transforms/VectorTransforms.h @@ -24,17 +24,53 @@ class IfOp; namespace vector { +//===----------------------------------------------------------------------===// +// Vector transformation options exposed as auxiliary structs. +//===----------------------------------------------------------------------===// +/// Structure to control the behavior of vector transform patterns. +struct VectorTransformsOptions { + /// Option to control the lowering of vector.contract. + VectorContractLowering vectorContractLowering = VectorContractLowering::Dot; + VectorTransformsOptions & + setVectorTransformsOptions(VectorContractLowering opt) { + vectorContractLowering = opt; + return *this; + } + /// Option to control the lowering of vector.multi_reduction. + VectorMultiReductionLowering vectorMultiReductionLowering = + VectorMultiReductionLowering::InnerParallel; + VectorTransformsOptions & + setVectorMultiReductionLowering(VectorMultiReductionLowering opt) { + vectorMultiReductionLowering = opt; + return *this; + } + /// Option to control the lowering of vector.transpose. + VectorTransposeLowering vectorTransposeLowering = + VectorTransposeLowering::EltWise; + VectorTransformsOptions & + setVectorTransposeLowering(VectorTransposeLowering opt) { + vectorTransposeLowering = opt; + return *this; + } + /// Option to control the splitting of vector transfers. + VectorTransferSplit vectorTransferSplit = VectorTransferSplit::None; + VectorTransformsOptions &setVectorTransferSplit(VectorTransferSplit opt) { + vectorTransferSplit = opt; + return *this; + } +}; + //===----------------------------------------------------------------------===// // Standalone transformations and helpers. //===----------------------------------------------------------------------===// -/// Split a vector.transfer operation into an in-bounds (i.e., no out-of-bounds -/// masking) fastpath and a slowpath. -/// If `ifOp` is not null and the result is `success, the `ifOp` points to the -/// newly created conditional upon function return. -/// To accomodate for the fact that the original vector.transfer indexing may be -/// arbitrary and the slow path indexes @[0...0] in the temporary buffer, the -/// scf.if op returns a view and values of type index. -/// At this time, only vector.transfer_read case is implemented. +/// Split a vector.transfer operation into an in-bounds (i.e., no +/// out-of-bounds masking) fastpath and a slowpath. If `ifOp` is not null and +/// the result is `success, the `ifOp` points to the newly created conditional +/// upon function return. To accomodate for the fact that the original +/// vector.transfer indexing may be arbitrary and the slow path indexes +/// @[0...0] in the temporary buffer, the scf.if op returns a view and values +/// of type index. At this time, only vector.transfer_read case is +/// implemented. /// /// Example (a 2-D vector.transfer_read): /// ``` @@ -51,15 +87,16 @@ namespace vector { /// memref.cast %alloc: memref to compatibleMemRefType /// scf.yield %4 : compatibleMemRefType, index, index // } -/// %0 = vector.transfer_read %1#0[%1#1, %1#2] {in_bounds = [true ... true]} +/// %0 = vector.transfer_read %1#0[%1#1, %1#2] {in_bounds = [true ... +/// true]} /// ``` /// where `alloc` is a top of the function alloca'ed buffer of one vector. /// /// Preconditions: /// 1. `xferOp.permutation_map()` must be a minor identity map -/// 2. the rank of the `xferOp.memref()` and the rank of the `xferOp.vector()` -/// must be equal. This will be relaxed in the future but requires -/// rank-reducing subviews. +/// 2. the rank of the `xferOp.memref()` and the rank of the +/// `xferOp.vector()` must be equal. This will be relaxed in the future but +/// requires rank-reducing subviews. LogicalResult splitFullAndPartialTransfer( RewriterBase &b, VectorTransferOpInterface xferOp, VectorTransformsOptions options = VectorTransformsOptions(), diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index c56d03f6f31d7..05def0f45d7fb 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -16,6 +16,7 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/Vector/Interfaces/MaskableOpInterface.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" #include "mlir/Dialect/Vector/Transforms/VectorTransforms.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/TypeUtilities.h" diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp index fb544df18324b..3f1b107f6f8e0 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp @@ -19,6 +19,7 @@ #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" #include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h" #include "mlir/Dialect/X86Vector/Transforms.h" #include "mlir/Dialect/X86Vector/X86VectorDialect.h" @@ -64,10 +65,11 @@ void LowerVectorToLLVMPass::runOnOperation() { RewritePatternSet patterns(&getContext()); populateVectorToVectorCanonicalizationPatterns(patterns); populateVectorBroadcastLoweringPatterns(patterns); - populateVectorContractLoweringPatterns(patterns); + populateVectorContractLoweringPatterns(patterns, VectorTransformsOptions()); populateVectorMaskOpLoweringPatterns(patterns); populateVectorShapeCastLoweringPatterns(patterns); - populateVectorTransposeLoweringPatterns(patterns); + populateVectorTransposeLoweringPatterns(patterns, + VectorTransformsOptions()); // Vector transfer ops with rank > 1 should be lowered with VectorToSCF. populateVectorTransferLoweringPatterns(patterns, /*maxTransferRank=*/1); (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp index d8070b34a761d..ec2e2aa4c0624 100644 --- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp +++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#include #include +#include #include "mlir/Conversion/VectorToSCF/VectorToSCF.h" @@ -20,6 +20,7 @@ #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" #include "mlir/Dialect/Vector/Transforms/VectorTransforms.h" #include "mlir/IR/Builders.h" #include "mlir/IR/ImplicitLocOpBuilder.h" diff --git a/mlir/lib/Dialect/Linalg/TransformOps/CMakeLists.txt b/mlir/lib/Dialect/Linalg/TransformOps/CMakeLists.txt index eb97c6e168e5c..b7d9812ada0b1 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/TransformOps/CMakeLists.txt @@ -20,5 +20,5 @@ add_mlir_dialect_library(MLIRLinalgTransformOps MLIRSideEffectInterfaces MLIRTransformDialect MLIRTransformDialectUtils - MLIRVectorDialect + MLIRVectorTransforms ) diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index d98eb3b781fc5..e3c1429ade54a 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -26,6 +26,7 @@ #include "mlir/Dialect/Transform/IR/TransformTypes.h" #include "mlir/Dialect/Transform/Utils/Utils.h" #include "mlir/Dialect/Utils/IndexingUtils.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" #include "mlir/IR/AffineMap.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Matchers.h" diff --git a/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp b/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp index 60996b9add614..136d234742b8d 100644 --- a/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp +++ b/mlir/lib/Dialect/Vector/TransformOps/VectorTransformOps.cpp @@ -7,13 +7,14 @@ //===----------------------------------------------------------------------===// #include "mlir/Dialect/Vector/TransformOps/VectorTransformOps.h" - #include "mlir/Conversion/VectorToSCF/VectorToSCF.h" #include "mlir/Dialect/PDL/IR/PDL.h" #include "mlir/Dialect/PDL/IR/PDLTypes.h" #include "mlir/Dialect/Transform/IR/TransformDialect.h" #include "mlir/Dialect/Transform/IR/TransformInterfaces.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" +#include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h" #include "mlir/Dialect/Vector/Transforms/VectorTransforms.h" #include "mlir/Dialect/X86Vector/Transforms.h" #include "mlir/Parser/Parser.h" @@ -82,10 +83,9 @@ DiagnosedSilenceableFailure transform::LowerVectorsOp::apply( // In the future we may want to more finely select particular stages. // Stage 1: contraction lowerings. - patterns.add(vectorTransformOptions, - ctx); + populateVectorContractLoweringPatterns( + patterns, vectorTransformOptions, /*benefit=*/1, + /*disableOuterProductLowering*/ true); vector::populateVectorTransferPermutationMapLoweringPatterns(patterns); // Stage 2: multi-reduction lowerings. @@ -93,8 +93,7 @@ DiagnosedSilenceableFailure transform::LowerVectorsOp::apply( patterns, vectorTransformOptions.vectorMultiReductionLowering); // Stage 3: Rewrite vector.transfer into full and partial parts. - patterns.add( - ctx, vectorTransformOptions); + populateVectorTransferFullPartialPatterns(patterns, vectorTransformOptions); // Stage 4: Lower vector transfers. vector::populateVectorTransferLoweringPatterns(patterns, maxTransferRank); @@ -107,8 +106,8 @@ DiagnosedSilenceableFailure transform::LowerVectorsOp::apply( vector::populateVectorShapeCastLoweringPatterns(patterns); // Stage 7: Lower vector.transpose. - vector::populateVectorTransposeLoweringPatterns(patterns, - vectorTransformOptions); + vector::populateVectorTransposeLoweringPatterns( + patterns, vectorTransformOptions, /*benefit=*/1); if (getTransposeAvx2Lowering()) x86vector::avx2::populateSpecializedTransposeLoweringPatterns( patterns, avx2LoweringOptions, /*benefit=*/10); diff --git a/mlir/lib/Dialect/Vector/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Vector/Transforms/CMakeLists.txt index 6fb1b8c18a122..f17208e193b3c 100644 --- a/mlir/lib/Dialect/Vector/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Vector/Transforms/CMakeLists.txt @@ -1,14 +1,20 @@ add_mlir_dialect_library(MLIRVectorTransforms BufferizableOpInterfaceImpl.cpp Bufferize.cpp + LowerVectorBroadcast.cpp + LowerVectorContract.cpp + LowerVectorGather.cpp LowerVectorMask.cpp + LowerVectorMultiReduction.cpp + LowerVectorScan.cpp + LowerVectorShapeCast.cpp + LowerVectorTransfer.cpp + LowerVectorTranspose.cpp VectorDistribute.cpp VectorDropLeadUnitDim.cpp VectorInsertExtractStridedSliceRewritePatterns.cpp - VectorMultiDimReductionTransforms.cpp VectorTransferOpTransforms.cpp VectorTransferSplitRewritePatterns.cpp - VectorTransferPermutationMapRewritePatterns.cpp VectorTransforms.cpp VectorUnroll.cpp diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorBroadcast.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorBroadcast.cpp new file mode 100644 index 0000000000000..ad538fe4a6828 --- /dev/null +++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorBroadcast.cpp @@ -0,0 +1,156 @@ +//===- LowerVectorBroadcast.cpp - Lower 'vector.broadcast' operation ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements target-independent rewrites and utilities to lower the +// 'vector.broadcast' operation. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Arith/Utils/Utils.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Utils/IndexingUtils.h" +#include "mlir/Dialect/Utils/StructuredOpsUtils.h" +#include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" +#include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h" +#include "mlir/Dialect/Vector/Utils/VectorUtils.h" +#include "mlir/IR/BuiltinAttributeInterfaces.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/ImplicitLocOpBuilder.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/Matchers.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/TypeUtilities.h" +#include "mlir/Interfaces/VectorInterfaces.h" +#include "mlir/Support/LogicalResult.h" + +#define DEBUG_TYPE "vector-broadcast-lowering" + +using namespace mlir; +using namespace mlir::vector; + +namespace { +/// Progressive lowering of BroadcastOp. +class BroadcastOpLowering : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::BroadcastOp op, + PatternRewriter &rewriter) const override { + auto loc = op.getLoc(); + VectorType dstType = op.getResultVectorType(); + VectorType srcType = op.getSourceType().dyn_cast(); + Type eltType = dstType.getElementType(); + + // Scalar to any vector can use splat. + if (!srcType) { + rewriter.replaceOpWithNewOp(op, dstType, op.getSource()); + return success(); + } + + // Determine rank of source and destination. + int64_t srcRank = srcType.getRank(); + int64_t dstRank = dstType.getRank(); + + // Stretching scalar inside vector (e.g. vector<1xf32>) can use splat. + if (srcRank <= 1 && dstRank == 1) { + Value ext; + if (srcRank == 0) + ext = rewriter.create(loc, op.getSource()); + else + ext = rewriter.create(loc, op.getSource(), 0); + rewriter.replaceOpWithNewOp(op, dstType, ext); + return success(); + } + + // Duplicate this rank. + // For example: + // %x = broadcast %y : k-D to n-D, k < n + // becomes: + // %b = broadcast %y : k-D to (n-1)-D + // %x = [%b,%b,%b,%b] : n-D + // becomes: + // %b = [%y,%y] : (n-1)-D + // %x = [%b,%b,%b,%b] : n-D + if (srcRank < dstRank) { + // Duplication. + VectorType resType = + VectorType::get(dstType.getShape().drop_front(), eltType); + Value bcst = + rewriter.create(loc, resType, op.getSource()); + Value result = rewriter.create( + loc, dstType, rewriter.getZeroAttr(dstType)); + for (int64_t d = 0, dim = dstType.getDimSize(0); d < dim; ++d) + result = rewriter.create(loc, bcst, result, d); + rewriter.replaceOp(op, result); + return success(); + } + + // Find non-matching dimension, if any. + assert(srcRank == dstRank); + int64_t m = -1; + for (int64_t r = 0; r < dstRank; r++) + if (srcType.getDimSize(r) != dstType.getDimSize(r)) { + m = r; + break; + } + + // All trailing dimensions are the same. Simply pass through. + if (m == -1) { + rewriter.replaceOp(op, op.getSource()); + return success(); + } + + // Any non-matching dimension forces a stretch along this rank. + // For example: + // %x = broadcast %y : vector<4x1x2xf32> to vector<4x2x2xf32> + // becomes: + // %a = broadcast %y[0] : vector<1x2xf32> to vector<2x2xf32> + // %b = broadcast %y[1] : vector<1x2xf32> to vector<2x2xf32> + // %c = broadcast %y[2] : vector<1x2xf32> to vector<2x2xf32> + // %d = broadcast %y[3] : vector<1x2xf32> to vector<2x2xf32> + // %x = [%a,%b,%c,%d] + // becomes: + // %u = broadcast %y[0][0] : vector<2xf32> to vector <2x2xf32> + // %v = broadcast %y[1][0] : vector<2xf32> to vector <2x2xf32> + // %a = [%u, %v] + // .. + // %x = [%a,%b,%c,%d] + VectorType resType = + VectorType::get(dstType.getShape().drop_front(), eltType); + Value result = rewriter.create( + loc, dstType, rewriter.getZeroAttr(dstType)); + if (m == 0) { + // Stetch at start. + Value ext = rewriter.create(loc, op.getSource(), 0); + Value bcst = rewriter.create(loc, resType, ext); + for (int64_t d = 0, dim = dstType.getDimSize(0); d < dim; ++d) + result = rewriter.create(loc, bcst, result, d); + } else { + // Stetch not at start. + for (int64_t d = 0, dim = dstType.getDimSize(0); d < dim; ++d) { + Value ext = rewriter.create(loc, op.getSource(), d); + Value bcst = rewriter.create(loc, resType, ext); + result = rewriter.create(loc, bcst, result, d); + } + } + rewriter.replaceOp(op, result); + return success(); + } +}; +} // namespace + +void mlir::vector::populateVectorBroadcastLoweringPatterns( + RewritePatternSet &patterns, PatternBenefit benefit) { + patterns.add(patterns.getContext(), benefit); +} diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorContract.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorContract.cpp new file mode 100644 index 0000000000000..1280cfef0b645 --- /dev/null +++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorContract.cpp @@ -0,0 +1,1329 @@ +//===- LowerVectorContract.cpp - Lower 'vector.contract' operation --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements target-independent rewrites and utilities to lower the +// 'vector.contract' operation. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Arith/Utils/Utils.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Utils/IndexingUtils.h" +#include "mlir/Dialect/Utils/StructuredOpsUtils.h" +#include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" +#include "mlir/Dialect/Vector/Utils/VectorUtils.h" +#include "mlir/IR/BuiltinAttributeInterfaces.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/ImplicitLocOpBuilder.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/Matchers.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/TypeUtilities.h" +#include "mlir/Interfaces/VectorInterfaces.h" +#include "mlir/Support/LogicalResult.h" + +#define DEBUG_TYPE "vector-contract-lowering" + +using namespace mlir; +using namespace mlir::vector; + +//===----------------------------------------------------------------------===// +// Helper functions +//===----------------------------------------------------------------------===// + +// Helper to find an index in an affine map. +static std::optional getResultIndex(AffineMap map, int64_t index) { + for (int64_t i = 0, e = map.getNumResults(); i < e; ++i) { + int64_t idx = map.getDimPosition(i); + if (idx == index) + return i; + } + return std::nullopt; +} + +// Helper to construct iterator types with one index removed. +static SmallVector adjustIter(ArrayAttr iteratorTypes, + int64_t index) { + SmallVector results; + for (const auto &it : llvm::enumerate(iteratorTypes)) { + int64_t idx = it.index(); + if (idx == index) + continue; + results.push_back(it.value()); + } + return results; +} + +// Helper to construct an affine map with one index removed. +static AffineMap adjustMap(AffineMap map, int64_t index, + PatternRewriter &rewriter) { + auto *ctx = rewriter.getContext(); + SmallVector results; + for (int64_t i = 0, e = map.getNumResults(); i < e; ++i) { + int64_t idx = map.getDimPosition(i); + if (idx == index) + continue; + // Re-insert remaining indices, but renamed when occurring + // after the removed index. + auto targetExpr = getAffineDimExpr(idx < index ? idx : idx - 1, ctx); + results.push_back(targetExpr); + } + return AffineMap::get(map.getNumDims() - 1, 0, results, ctx); +} + +// Helper method to possibly drop a dimension in a load. +// TODO +static Value reshapeLoad(Location loc, Value val, VectorType type, + int64_t index, int64_t pos, + PatternRewriter &rewriter) { + if (index == -1) + return val; + Type lowType = VectorType::Builder(type).dropDim(0); + // At extraction dimension? + if (index == 0) { + auto posAttr = rewriter.getI64ArrayAttr(pos); + return rewriter.create(loc, lowType, val, posAttr); + } + // Unroll leading dimensions. + VectorType vType = lowType.cast(); + Type resType = VectorType::Builder(type).dropDim(index); + auto resVectorType = resType.cast(); + Value result = rewriter.create( + loc, resVectorType, rewriter.getZeroAttr(resVectorType)); + for (int64_t d = 0, e = resVectorType.getDimSize(0); d < e; d++) { + auto posAttr = rewriter.getI64ArrayAttr(d); + Value ext = rewriter.create(loc, vType, val, posAttr); + Value load = reshapeLoad(loc, ext, vType, index - 1, pos, rewriter); + result = rewriter.create(loc, resVectorType, load, result, + posAttr); + } + return result; +} + +// Helper method to possibly drop a dimension in a store. +// TODO +static Value reshapeStore(Location loc, Value val, Value result, + VectorType type, int64_t index, int64_t pos, + PatternRewriter &rewriter) { + // Unmodified? + if (index == -1) + return val; + // At insertion dimension? + if (index == 0) { + auto posAttr = rewriter.getI64ArrayAttr(pos); + return rewriter.create(loc, type, val, result, posAttr); + } + // Unroll leading dimensions. + Type lowType = VectorType::Builder(type).dropDim(0); + VectorType vType = lowType.cast(); + Type insType = VectorType::Builder(vType).dropDim(0); + for (int64_t d = 0, e = type.getDimSize(0); d < e; d++) { + auto posAttr = rewriter.getI64ArrayAttr(d); + Value ext = rewriter.create(loc, vType, result, posAttr); + Value ins = rewriter.create(loc, insType, val, posAttr); + Value sto = reshapeStore(loc, ins, ext, vType, index - 1, pos, rewriter); + result = rewriter.create(loc, type, sto, result, posAttr); + } + return result; +} + +/// Helper to create arithmetic operation associated with a kind of contraction. +static std::optional +createContractArithOp(Location loc, Value x, Value y, Value acc, + vector::CombiningKind kind, PatternRewriter &rewriter, + bool isInt, Value mask = Value()) { + using vector::CombiningKind; + Value mul; + + if (isInt) { + if (kind == CombiningKind::MINF || kind == CombiningKind::MAXF) + // Only valid for floating point types. + return std::nullopt; + mul = rewriter.create(loc, x, y); + } else { + // Float case. + if (kind == CombiningKind::AND || kind == CombiningKind::MINUI || + kind == CombiningKind::MINSI || kind == CombiningKind::MAXUI || + kind == CombiningKind::MAXSI || kind == CombiningKind::OR || + kind == CombiningKind::XOR) + // Only valid for integer types. + return std::nullopt; + // Special case for fused multiply-add. + if (acc && acc.getType().isa() && kind == CombiningKind::ADD) { + Value fma = rewriter.create(loc, x, y, acc); + if (mask) + // The fma op doesn't need explicit masking. However, fma ops used in + // reductions must preserve previous 'acc' values for masked-out lanes. + fma = selectPassthru(rewriter, mask, fma, acc); + return fma; + } + mul = rewriter.create(loc, x, y); + } + + if (!acc) + return std::optional(mul); + + return makeArithReduction(rewriter, loc, kind, mul, acc, mask); +} + +/// Return the positions of the reductions in the given map. +static SmallVector getReductionIndex(AffineMap map, + ArrayAttr iteratorTypes) { + SmallVector dimsIdx; + for (unsigned i = 0, e = map.getNumResults(); i < e; i++) { + if (isReductionIterator(iteratorTypes[map.getDimPosition(i)])) + dimsIdx.push_back(i); + } + return dimsIdx; +} + +/// Look for a given dimension in an affine map and return its position. Return +/// std::nullopt if the dimension is not in the map results. +static std::optional getDimPosition(AffineMap map, unsigned dim) { + for (unsigned i = 0, e = map.getNumResults(); i < e; i++) { + if (map.getDimPosition(i) == dim) + return i; + } + return std::nullopt; +} + +/// Creates an AddIOp if `isInt` is true otherwise create an arith::AddFOp using +/// operands `x` and `y`. +static Value createAdd(Location loc, Value x, Value y, bool isInt, + PatternRewriter &rewriter) { + if (isInt) + return rewriter.create(loc, x, y); + return rewriter.create(loc, x, y); +} + +/// Creates a MulIOp if `isInt` is true otherwise create an MulFOp using +/// operands `x and `y`. +static Value createMul(Location loc, Value x, Value y, bool isInt, + PatternRewriter &rewriter) { + if (isInt) + return rewriter.create(loc, x, y); + return rewriter.create(loc, x, y); +} + +namespace { + +/// Progressive lowering of a `vector.contract %a, %b, %c` with row-major matmul +/// semantics to: +/// ``` +/// %flattened_a = vector.shape_cast %a +/// %flattened_b = vector.shape_cast %b +/// %flattened_d = vector.matmul %flattened_a, %flattened_b +/// %d = vector.shape_cast %%flattened_d +/// %e = add %c, %d +/// ``` +/// `vector.matmul` later lowers to `llvm.matrix.multiply`. +// +/// This only kicks in when VectorTransformsOptions is set to OuterProduct and +/// the vector.contract op is a row-major matrix multiply. +class ContractionOpToMatmulOpLowering + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + using FilterConstraintType = + std::function; + + static LogicalResult defaultFilter(vector::ContractionOp op) { + return success(); + } + + ContractionOpToMatmulOpLowering( + vector::VectorTransformsOptions vectorTransformOptions, + MLIRContext *context, PatternBenefit benefit = 1, + FilterConstraintType constraint = defaultFilter) + : OpRewritePattern(context, benefit), + vectorTransformOptions(vectorTransformOptions), + filter(std::move(constraint)) {} + + LogicalResult matchAndRewrite(vector::ContractionOp op, + PatternRewriter &rewriter) const override; + +private: + /// Options to control the vector patterns. + vector::VectorTransformsOptions vectorTransformOptions; + FilterConstraintType filter; +}; + +/// Progressive lowering of a `vector.contract %a, %b, %c` with row-major matmul +/// semantics to a reduction_size-unrolled sequence: +/// ``` +/// %at = vector.transpose %a, [1, 0] +/// %bRow0 = vector.extract %b[0] +/// %atRow0 = vector.extract %at[0] +/// %c0 = vector.outerproduct %atRow0, %bRow0, %c +/// ... +/// %bRowK = vector.extract %b[K] +/// %atRowK = vector.extract %at[K] +/// %cK = vector.outerproduct %atRowK, %bRowK, %cK-1 +/// ``` +/// +/// This only kicks in when VectorTransformsOptions is set to OuterProduct and +/// the vector.contract op is a row-major matrix multiply. +class ContractionOpToOuterProductOpLowering + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + using FilterConstraintType = + std::function; + + static LogicalResult defaultFilter(vector::ContractionOp op) { + return success(); + } + + ContractionOpToOuterProductOpLowering( + vector::VectorTransformsOptions vectorTransformOptions, + MLIRContext *context, PatternBenefit benefit = 1, + FilterConstraintType constraint = defaultFilter) + : OpRewritePattern(context, benefit), + vectorTransformOptions(vectorTransformOptions), + filter(std::move(constraint)) {} + + LogicalResult matchAndRewrite(vector::ContractionOp op, + PatternRewriter &rewriter) const override; + +private: + /// Options to control the vector patterns. + vector::VectorTransformsOptions vectorTransformOptions; + FilterConstraintType filter; +}; + +/// Progressive lowering of a `vector.contract %a, %b, %c` with row-major matmul +/// semantics to an output-size-unrolled sequence: +/// ``` +/// %out = arith.constant ... : vector +/// %bt = vector.transpose %b, [1, 0] +/// %aRow0 = vector.extract %a[0] +/// %btRow0 = vector.extract %bt[0] +/// %c00 = vector.reduce %atRow0, %bRow0 +/// %out00 = vector.insert %c00, %out[0, 0] +/// ... +/// %aRowLast = vector.extract %at[M-1] +/// %btRowLast = vector.extract %b[N-1] +/// %cLastLast = vector.reduce %atRowLast, %bRowLast +/// %outcLastLast = vector.insert %cLastLast, %out[M-1, N-1] +/// ``` +/// +/// This only kicks in when VectorTransformsOptions is set to Dot and +/// the vector.contract op is a row-major matmul or matvec. +class ContractionOpToDotLowering + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + using FilterConstraintType = + std::function; + + static LogicalResult defaultFilter(vector::ContractionOp op) { + return success(); + } + + ContractionOpToDotLowering( + vector::VectorTransformsOptions vectorTransformOptions, + MLIRContext *context, PatternBenefit benefit = 1, + const FilterConstraintType &constraint = defaultFilter) + : OpRewritePattern(context, benefit), + vectorTransformOptions(vectorTransformOptions), filter(defaultFilter) {} + + LogicalResult matchAndRewrite(vector::ContractionOp op, + PatternRewriter &rewriter) const override; + +private: + /// Options to control the vector patterns. + vector::VectorTransformsOptions vectorTransformOptions; + FilterConstraintType filter; +}; + +/// Progressive lowering of ContractionOp. +/// +/// One: +/// %x = vector.contract with at least one free/batch dimension +/// is replaced by: +/// %a = vector.contract with one less free/batch dimension +/// %b = vector.contract with one less free/batch dimension +/// .. +/// %x = combine %a %b .. +/// until a pure contraction is reached (no free/batch dimensions), +/// which is replaced by a dot-product. +/// +/// This only kicks in when either VectorTransformsOptions is set +/// to Dot or when other contraction patterns fail. +class ContractionOpLowering : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + using FilterConstraintType = + std::function; + + static LogicalResult defaultFilter(vector::ContractionOp op) { + return success(); + } + + ContractionOpLowering(vector::VectorTransformsOptions vectorTransformOptions, + MLIRContext *context, PatternBenefit benefit = 1, + FilterConstraintType constraint = defaultFilter) + : OpRewritePattern(context, benefit), + vectorTransformOptions(vectorTransformOptions), + filter(std::move(constraint)) {} + + LogicalResult matchAndRewrite(vector::ContractionOp op, + PatternRewriter &rewriter) const override; + +private: + /// Options to control the vector patterns. + vector::VectorTransformsOptions vectorTransformOptions; + FilterConstraintType filter; + // Lower one parallel dimension. + FailureOr lowerParallel(PatternRewriter &rewriter, + vector::ContractionOp op, int64_t lhsIndex, + int64_t rhsIndex, Value mask) const; + // Lower one reduction dimension. + FailureOr lowerReduction(PatternRewriter &rewriter, + vector::ContractionOp op, Value mask) const; +}; + +/// Generate a vector implementation for matmat, matvec and tmatvec. +/// This unrolls outer-products along the reduction dimension. +struct UnrolledOuterProductGenerator + : public StructuredGenerator { + UnrolledOuterProductGenerator(RewriterBase &b, vector::ContractionOp op) + : StructuredGenerator(b, op), + kind(op.getKind()), lhs(op.getLhs()), rhs(op.getRhs()), + res(op.getAcc()), lhsType(op.getLhsType()) { + auto maskableOp = cast(op.getOperation()); + if (maskableOp.isMasked()) + mask = maskableOp.getMaskingOp().getMask(); + } + + Value t(Value v, ArrayRef perm = {1, 0}) { + if (!v) + return v; + return rewriter.create(loc, v, perm); + } + + Value promote(Value v, Type dstElementType) { + Type elementType = v.getType(); + auto vecType = elementType.dyn_cast(); + if (vecType) + elementType = vecType.getElementType(); + if (elementType == dstElementType) + return v; + Type promotedType = dstElementType; + if (vecType) + promotedType = VectorType::get(vecType.getShape(), promotedType); + if (dstElementType.isa()) + return rewriter.create(loc, promotedType, v); + return rewriter.create(loc, promotedType, v); + } + + FailureOr outerProd(Value lhs, Value rhs, Value res, int reductionSize, + std::optional maybeMask = std::nullopt) { + assert(reductionSize > 0); + // Incremental support for masking. + if (mask && !maybeMask.has_value()) + return failure(); + + Type resElementType = res.getType().cast().getElementType(); + for (int64_t k = 0; k < reductionSize; ++k) { + Value extractA = rewriter.create(loc, lhs, k); + Value extractB = rewriter.create(loc, rhs, k); + extractA = promote(extractA, resElementType); + extractB = promote(extractB, resElementType); + Value extractMask; + if (maybeMask.has_value() && maybeMask.value()) + extractMask = + rewriter.create(loc, maybeMask.value(), k); + + Operation *outerProdOp = rewriter.create( + loc, res.getType(), extractA, extractB, res, kind); + res = maskOperation(rewriter, outerProdOp, extractMask)->getResult(0); + } + return res; + } + + /// Two outer parallel, one inner reduction (matmat flavor). + FailureOr matmat() { + if (!iters({Par(), Par(), Red()})) + return failure(); + // Set up the parallel/reduction structure in the right form. + AffineExpr m, n, k; + bindDims(rewriter.getContext(), m, n, k); + // Classical row-major matmul: Just permute the lhs. + if (layout({{m, k}, {k, n}, {m, n}})) + return outerProd(t(lhs), rhs, res, lhsType.getDimSize(1), + t(mask, {2, 0, 1})); + // TODO: may be better to fail and use some vector -> scalar reduction. + if (layout({{m, k}, {n, k}, {m, n}})) { + Value tlhs = t(lhs); + return outerProd(tlhs, t(rhs), res, lhsType.getDimSize(1)); + } + // No need to permute anything. + if (layout({{k, m}, {k, n}, {m, n}})) + return outerProd(lhs, rhs, res, lhsType.getDimSize(0)); + // Just permute the rhs. + if (layout({{k, m}, {n, k}, {m, n}})) + return outerProd(lhs, t(rhs), res, lhsType.getDimSize(0)); + // Transposed output: swap RHS and LHS. + // Classical row-major matmul: permute the lhs. + if (layout({{m, k}, {k, n}, {n, m}})) + return outerProd(rhs, t(lhs), res, lhsType.getDimSize(1)); + // TODO: may be better to fail and use some vector -> scalar reduction. + if (layout({{m, k}, {n, k}, {n, m}})) { + Value trhs = t(rhs); + return outerProd(trhs, t(lhs), res, lhsType.getDimSize(1)); + } + if (layout({{k, m}, {k, n}, {n, m}})) + return outerProd(rhs, lhs, res, lhsType.getDimSize(0)); + if (layout({{k, m}, {n, k}, {n, m}})) + return outerProd(t(rhs), lhs, res, lhsType.getDimSize(0)); + return failure(); + } + + /// One outer parallel, one inner reduction (matvec flavor) + FailureOr matvec() { + if (!iters({Par(), Red()})) + return failure(); + AffineExpr m, k; + bindDims(rewriter.getContext(), m, k); + + // Case mat-vec: transpose. + if (layout({{m, k}, {k}, {m}})) + return outerProd(t(lhs), rhs, res, lhsType.getDimSize(1), t(mask)); + // Case mat-trans-vec: ready to go. + if (layout({{k, m}, {k}, {m}})) + return outerProd(lhs, rhs, res, lhsType.getDimSize(0)); + // Case vec-mat: swap and transpose. + if (layout({{k}, {m, k}, {m}})) + return outerProd(t(rhs), lhs, res, lhsType.getDimSize(0)); + // Case vec-mat-trans: swap and ready to go. + if (layout({{k}, {k, m}, {m}})) + return outerProd(rhs, lhs, res, lhsType.getDimSize(0)); + return failure(); + } + + // + // One outer reduction, one inner parallel (tmatvec flavor) + // + FailureOr tmatvec() { + if (!iters({Red(), Par()})) + return failure(); + AffineExpr k, m; + bindDims(rewriter.getContext(), k, m); + + // Case mat-vec: transpose. + if (layout({{m, k}, {k}, {m}})) + return outerProd(t(lhs), rhs, res, lhsType.getDimSize(1)); + // Case mat-trans-vec: ready to go. + if (layout({{k, m}, {k}, {m}})) + return outerProd(lhs, rhs, res, lhsType.getDimSize(0)); + // Case vec-mat: swap and transpose. + if (layout({{k}, {m, k}, {m}})) + return outerProd(t(rhs), lhs, res, lhsType.getDimSize(0)); + // Case vec-mat-trans: swap and ready to go. + if (layout({{k}, {k, m}, {m}})) + return outerProd(rhs, lhs, res, lhsType.getDimSize(0)); + return failure(); + } + +private: + vector::CombiningKind kind; + Value lhs, rhs, res, mask; + VectorType lhsType; +}; + +/// Progressively lower a `vector.contract %a, %b, %c` with row-major matmul +/// semantics to a reduction_size-unrolled sequence: +/// ``` +/// %at = vector.transpose %a, [1, 0] +/// %bRow0 = vector.extract %b[0] +/// %atRow0 = vector.extract %at[0] +/// %c0 = vector.outerproduct %atRow0, %bRow0, %c +/// ... +/// %bRowK = vector.extract %b[K] +/// %atRowK = vector.extract %at[K] +/// %cK = vector.outerproduct %atRowK, %bRowK, %cK-1 +/// ``` +/// +/// This only kicks in when VectorTransformsOptions is set to OuterProduct but +/// otherwise supports any layout permutation of the matrix-multiply. +LogicalResult ContractionOpToOuterProductOpLowering::matchAndRewrite( + vector::ContractionOp op, PatternRewriter &rewriter) const { + // TODO: Remove native masks from contraction op? + if (!op.getMasks().empty()) + return failure(); + + if (vectorTransformOptions.vectorContractLowering != + vector::VectorContractLowering::OuterProduct) + return failure(); + + if (failed(filter(op))) + return failure(); + + // Vector mask setup. + OpBuilder::InsertionGuard guard(rewriter); + auto maskableOp = cast(op.getOperation()); + Operation *rootOp; + if (maskableOp.isMasked()) { + rewriter.setInsertionPoint(maskableOp.getMaskingOp()); + rootOp = maskableOp.getMaskingOp(); + } else { + rootOp = op; + } + + UnrolledOuterProductGenerator e(rewriter, op); + FailureOr matmatRes = e.matmat(); + if (succeeded(matmatRes)) { + rewriter.replaceOp(rootOp, *matmatRes); + return success(); + } + FailureOr matvecRes = e.matvec(); + if (succeeded(matvecRes)) { + rewriter.replaceOp(rootOp, *matvecRes); + return success(); + } + FailureOr tmatvecRes = e.tmatvec(); + if (succeeded(tmatvecRes)) { + rewriter.replaceOp(rootOp, *tmatvecRes); + return success(); + } + + return failure(); +} + +LogicalResult +ContractionOpToDotLowering::matchAndRewrite(vector::ContractionOp op, + PatternRewriter &rewriter) const { + // TODO: Support vector.mask. + auto maskableOp = cast(op.getOperation()); + if (maskableOp.isMasked()) + return failure(); + + // TODO: Remove native masks from contraction op? + if (!op.getMasks().empty()) + return failure(); + + if (failed(filter(op))) + return failure(); + + if (vectorTransformOptions.vectorContractLowering != + vector::VectorContractLowering::Dot) + return failure(); + + auto iteratorTypes = op.getIteratorTypes().getValue(); + static constexpr std::array perm = {1, 0}; + Location loc = op.getLoc(); + Value lhs = op.getLhs(), rhs = op.getRhs(); + + using MapList = ArrayRef>; + auto infer = [](MapList m) { return AffineMap::inferFromExprList(m); }; + AffineExpr m, n, k; + bindDims(rewriter.getContext(), m, n, k); + SmallVector maps = op.getIndexingMapsArray(); + // + // In the following we wish to make the reduction dimension innermost so we + // can load vectors and just fmul + reduce into a scalar. + // + if (isParallelIterator(iteratorTypes[0]) && + isParallelIterator(iteratorTypes[1]) && + isReductionIterator(iteratorTypes[2])) { + // + // Two outer parallel, one inner reduction (matmat flavor). + // + if (maps == infer({{m, k}, {k, n}, {m, n}})) { + rhs = rewriter.create(loc, rhs, perm); + } else if (maps == infer({{m, k}, {n, k}, {m, n}})) { + // No need to permute anything. + } else if (maps == infer({{k, m}, {k, n}, {m, n}})) { + lhs = rewriter.create(loc, lhs, perm); + rhs = rewriter.create(loc, rhs, perm); + } else if (maps == infer({{k, m}, {n, k}, {m, n}})) { + lhs = rewriter.create(loc, lhs, perm); + } else if (maps == infer({{m, k}, {k, n}, {n, m}})) { + // This is the classical row-major matmul. Just permute the lhs. + Value tmp = lhs; + lhs = rewriter.create(loc, rhs, perm); + rhs = tmp; + } else if (maps == infer({{m, k}, {n, k}, {n, m}})) { + std::swap(lhs, rhs); + } else if (maps == infer({{k, m}, {k, n}, {n, m}})) { + Value tmp = lhs; + lhs = rewriter.create(loc, rhs, perm); + rhs = rewriter.create(loc, tmp, perm); + } else if (maps == infer({{k, m}, {n, k}, {n, m}})) { + Value tmp = rhs; + rhs = rewriter.create(loc, lhs, perm); + lhs = tmp; + } else { + return failure(); + } + } else if (isParallelIterator(iteratorTypes[0]) && + isReductionIterator(iteratorTypes[1])) { + // + // One outer parallel, one inner reduction (matvec flavor) + // + if (maps == infer({{m, n}, {n}, {m}})) { + // No need to permute anything. + } else if (maps == infer({{n, m}, {n}, {m}})) { + lhs = rewriter.create(loc, lhs, perm); + } else if (maps == infer({{n}, {m, n}, {m}})) { + std::swap(lhs, rhs); + } else if (maps == infer({{n}, {n, m}, {m}})) { + std::swap(lhs, rhs); + lhs = rewriter.create(loc, lhs, perm); + } else { + return failure(); + } + } else { + return failure(); + } + + VectorType dstType = op.getResultType().cast(); + assert(dstType.getRank() >= 1 && dstType.getRank() <= 2 && + "Expected dst type of rank 1 or 2"); + + unsigned rank = dstType.getRank(); + unsigned dstRows = dstType.getShape()[0]; + unsigned dstColumns = rank == 1 ? 1 : dstType.getShape()[1]; + + // ExtractOp does not allow dynamic indexing, we must unroll explicitly. + Value res = rewriter.create(loc, dstType, + rewriter.getZeroAttr(dstType)); + bool isInt = dstType.getElementType().isa(); + for (unsigned r = 0; r < dstRows; ++r) { + Value a = rewriter.create(op.getLoc(), lhs, r); + for (unsigned c = 0; c < dstColumns; ++c) { + Value b = rank == 1 + ? rhs + : rewriter.create(op.getLoc(), rhs, c); + Value m = createMul(op.getLoc(), a, b, isInt, rewriter); + Value reduced = rewriter.create( + op.getLoc(), vector::CombiningKind::ADD, m); + + SmallVector pos = rank == 1 ? SmallVector{r} + : SmallVector{r, c}; + res = rewriter.create(op.getLoc(), reduced, res, pos); + } + } + if (auto acc = op.getAcc()) + res = createAdd(op.getLoc(), res, acc, isInt, rewriter); + rewriter.replaceOp(op, res); + return success(); +} + +/// Lower vector.contract with all size one reduction dimensions to +/// elementwise ops when possible. +struct ContractOpToElementwise + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + using FilterConstraintType = + std::function; + static LogicalResult defaultFilter(vector::ContractionOp op) { + return success(); + } + ContractOpToElementwise( + vector::VectorTransformsOptions vectorTransformOptions, + MLIRContext *context, PatternBenefit benefit = 1, + const FilterConstraintType &constraint = defaultFilter) + : OpRewritePattern(context, benefit), + vectorTransformOptions(vectorTransformOptions), filter(defaultFilter) {} + + LogicalResult matchAndRewrite(vector::ContractionOp contractOp, + PatternRewriter &rewriter) const override { + // TODO: Support vector.mask. + auto maskableOp = cast(contractOp.getOperation()); + if (maskableOp.isMasked()) + return failure(); + + // TODO: Remove native masks from contraction op? + if (!contractOp.getMasks().empty()) + return failure(); + + if (failed(filter(contractOp))) + return failure(); + + if (vectorTransformOptions.vectorContractLowering != + vector::VectorContractLowering::ParallelArith) + return failure(); + + ArrayRef lhsShape = contractOp.getLhsType().getShape(); + ArrayRef rhsShape = contractOp.getRhsType().getShape(); + AffineMap lhsMap = contractOp.getIndexingMapsArray()[0]; + AffineMap rhsMap = contractOp.getIndexingMapsArray()[1]; + SmallVector lhsReductionDims = + getReductionIndex(lhsMap, contractOp.getIteratorTypes()); + SmallVector rhsReductionDims = + getReductionIndex(rhsMap, contractOp.getIteratorTypes()); + // All the reduction dimensions must be a size 1. + for (int64_t dim : lhsReductionDims) { + if (lhsShape[dim] != 1) + return failure(); + } + for (int64_t dim : rhsReductionDims) { + if (rhsShape[dim] != 1) + return failure(); + } + AffineMap accMap = contractOp.getIndexingMapsArray()[2]; + unsigned numParallelDims = accMap.getNumResults(); + unsigned numLhsDimToBroadcast = + numParallelDims - (lhsMap.getNumResults() - lhsReductionDims.size()); + unsigned numRhsDimToBroadcast = + numParallelDims - (rhsMap.getNumResults() - rhsReductionDims.size()); + SmallVector lhsDims; + SmallVector lhsTranspose; + SmallVector rhsDims; + SmallVector rhsTranspose; + for (int64_t dim : lhsReductionDims) + lhsTranspose.push_back(numLhsDimToBroadcast + dim); + for (int64_t dim : rhsReductionDims) + rhsTranspose.push_back(numRhsDimToBroadcast + dim); + // Loop through the parallel dimensions to calculate the dimensions to + // broadcast and to permute in order to extract only parallel dimensions. + for (unsigned i = 0; i < numParallelDims; i++) { + std::optional lhsDim = + getDimPosition(lhsMap, accMap.getDimPosition(i)); + if (lhsDim) { + lhsTranspose.push_back(numLhsDimToBroadcast + *lhsDim); + } else { + // If the parallel dimension doesn't exist we will have to broadcast it. + lhsDims.push_back( + contractOp.getResultType().cast().getDimSize(i)); + lhsTranspose.push_back(lhsDims.size() - 1); + } + std::optional rhsDim = + getDimPosition(rhsMap, accMap.getDimPosition(i)); + if (rhsDim) { + rhsTranspose.push_back(numRhsDimToBroadcast + *rhsDim); + } else { + // If the parallel dimension doesn't exist we will have to broadcast it. + rhsDims.push_back( + contractOp.getResultType().cast().getDimSize(i)); + rhsTranspose.push_back(rhsDims.size() - 1); + } + } + Value newLhs = contractOp.getLhs(); + Value newRhs = contractOp.getRhs(); + Location loc = contractOp.getLoc(); + if (!lhsDims.empty()) { + lhsDims.append(lhsShape.begin(), lhsShape.end()); + auto expandedType = + VectorType::get(lhsDims, contractOp.getLhsType().getElementType()); + newLhs = rewriter.create(loc, expandedType, newLhs); + } + if (!rhsDims.empty()) { + rhsDims.append(rhsShape.begin(), rhsShape.end()); + auto expandedType = + VectorType::get(rhsDims, contractOp.getRhsType().getElementType()); + newRhs = rewriter.create(loc, expandedType, newRhs); + } + bool isInt = contractOp.getLhsType().getElementType().isIntOrIndex(); + newLhs = rewriter.create(loc, newLhs, lhsTranspose); + newRhs = rewriter.create(loc, newRhs, rhsTranspose); + SmallVector lhsOffsets(lhsReductionDims.size(), 0); + SmallVector rhsOffsets(rhsReductionDims.size(), 0); + newLhs = rewriter.create( + loc, newLhs, rewriter.getI64ArrayAttr(lhsOffsets)); + newRhs = rewriter.create( + loc, newRhs, rewriter.getI64ArrayAttr(rhsOffsets)); + std::optional result = + createContractArithOp(loc, newLhs, newRhs, contractOp.getAcc(), + contractOp.getKind(), rewriter, isInt); + rewriter.replaceOp(contractOp, {*result}); + return success(); + } + +private: + /// Options to control the vector patterns. + vector::VectorTransformsOptions vectorTransformOptions; + FilterConstraintType filter; +}; + +/// Progressive lowering of ContractionOp. +/// One: +/// %x = vector.contract with at least one free/batch dimension +/// is replaced by: +/// %a = vector.contract with one less free/batch dimension +/// %b = vector.contract with one less free/batch dimension +/// .. +/// %x = combine %a %b .. +/// until a pure contraction is reached (no free/batch dimensions), +/// which is replaced by a dot-product. +/// +/// This only kicks in when either VectorTransformsOptions is set +/// to DOT or when other contraction patterns fail. +// +// TODO: break down into transpose/reshape/cast ops +// when they become available to avoid code dup +// TODO: investigate lowering order impact on performance +LogicalResult +ContractionOpLowering::matchAndRewrite(vector::ContractionOp op, + PatternRewriter &rewriter) const { + // TODO: Remove native masks from contraction op? + if (!op.getMasks().empty()) + return failure(); + + if (failed(filter(op))) + return failure(); + + // TODO: support mixed mode contract lowering. + if (op.getLhsType().getElementType() != + getElementTypeOrSelf(op.getAccType()) || + op.getRhsType().getElementType() != getElementTypeOrSelf(op.getAccType())) + return failure(); + + // TODO: the code below assumes the default contraction, make sure it supports + // other kinds before enabling this lowering. + if (op.getKind() != vector::CombiningKind::ADD) { + return rewriter.notifyMatchFailure( + op, "contractions other than 'add' not supported"); + } + + // TODO: implement benefits, cost models. + MLIRContext *ctx = op.getContext(); + ContractionOpToMatmulOpLowering pat1(vectorTransformOptions, ctx); + if (succeeded(pat1.matchAndRewrite(op, rewriter))) + return success(); + ContractionOpToOuterProductOpLowering pat2(vectorTransformOptions, ctx); + if (succeeded(pat2.matchAndRewrite(op, rewriter))) + return success(); + ContractionOpToDotLowering pat3(vectorTransformOptions, ctx); + if (succeeded(pat3.matchAndRewrite(op, rewriter))) + return success(); + ContractOpToElementwise pat4(vectorTransformOptions, ctx); + if (succeeded(pat4.matchAndRewrite(op, rewriter))) + return success(); + + // Vector mask setup. + OpBuilder::InsertionGuard guard(rewriter); + Operation *rootOp = op; + Value mask; + if (op.isMasked()) { + rewriter.setInsertionPoint(op.getMaskingOp()); + rootOp = op.getMaskingOp(); + mask = op.getMaskingOp().getMask(); + } + + // Find first batch dimension in LHS/RHS, and lower when found. + std::vector> batchDimMap = op.getBatchDimMap(); + if (!batchDimMap.empty()) { + int64_t lhsIndex = batchDimMap[0].first; + int64_t rhsIndex = batchDimMap[0].second; + auto newOp = lowerParallel(rewriter, op, lhsIndex, rhsIndex, mask); + if (failed(newOp)) + return failure(); + rewriter.replaceOp(rootOp, *newOp); + return success(); + } + + // Collect contracting dimensions. + std::vector> contractingDimMap = + op.getContractingDimMap(); + DenseSet lhsContractingDimSet; + DenseSet rhsContractingDimSet; + for (auto &dimPair : contractingDimMap) { + lhsContractingDimSet.insert(dimPair.first); + rhsContractingDimSet.insert(dimPair.second); + } + + // Find first free dimension in LHS, and lower when found. + VectorType lhsType = op.getLhsType(); + for (int64_t lhsIndex = 0, e = lhsType.getRank(); lhsIndex < e; ++lhsIndex) { + if (lhsContractingDimSet.count(lhsIndex) == 0) { + auto newOp = lowerParallel(rewriter, op, lhsIndex, /*rhsIndex=*/-1, mask); + if (failed(newOp)) + return failure(); + rewriter.replaceOp(rootOp, *newOp); + return success(); + } + } + + // Find first free dimension in RHS, and lower when found. + VectorType rhsType = op.getRhsType(); + for (int64_t rhsIndex = 0, e = rhsType.getRank(); rhsIndex < e; ++rhsIndex) { + if (rhsContractingDimSet.count(rhsIndex) == 0) { + auto newOp = lowerParallel(rewriter, op, /*lhsIndex=*/-1, rhsIndex, mask); + if (failed(newOp)) + return failure(); + rewriter.replaceOp(rootOp, *newOp); + return success(); + } + } + + // Lower the first remaining reduction dimension. + if (!contractingDimMap.empty()) { + auto newOp = lowerReduction(rewriter, op, mask); + if (failed(newOp)) + return failure(); + rewriter.replaceOp(rootOp, *newOp); + return success(); + } + + return failure(); +} + +// Lower one parallel dimension. +// Incidentally also tolerates unit-size (hence trivial) reduction dimensions. +// TODO: consider reusing existing contract unrolling +FailureOr ContractionOpLowering::lowerParallel(PatternRewriter &rewriter, + vector::ContractionOp op, + int64_t lhsIndex, + int64_t rhsIndex, + Value mask) const { + VectorType lhsType = op.getLhsType(); + VectorType rhsType = op.getRhsType(); + VectorType resType = op.getResultType().cast(); + // Find the iterator type index and result index. + SmallVector iMap = op.getIndexingMapsArray(); + int64_t iterIndex = -1; + int64_t dimSize = -1; + if (lhsIndex >= 0) { + iterIndex = iMap[0].getDimPosition(lhsIndex); + if (rhsIndex >= 0 && iterIndex != iMap[1].getDimPosition(rhsIndex)) + return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { + diag << "expected lhsIndex=" << lhsIndex << " and rhsIndex=" << rhsIndex + << " to map to the same dimension"; + }); + dimSize = lhsType.getDimSize(lhsIndex); + } else if (rhsIndex >= 0) { + iterIndex = iMap[1].getDimPosition(rhsIndex); + dimSize = rhsType.getDimSize(rhsIndex); + } + if (iterIndex < 0) + return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { + diag << "expected either lhsIndex=" << lhsIndex + << " or rhsIndex=" << rhsIndex << " to be nonnegative"; + }); + // value_or(-1) means that we tolerate a dimension not appearing + // in the result map. That can't happen for actual parallel iterators, but + // the caller ContractionOpLowering::matchAndRewrite is currently calling + // lowerParallel also for the case of unit-size reduction dims appearing only + // on one of LHS or RHS, not both. At the moment, such cases are created by + // CastAwayContractionLeadingOneDim, so we need to either support that or + // modify that pattern. + int64_t resIndex = getResultIndex(iMap[2], iterIndex).value_or(-1); + if (resIndex == -1 && dimSize != 1) + return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { + diag << "expected the dimension for iterIndex=" << iterIndex + << " to either appear in the result map, or to be a unit dimension"; + }); + + // Construct new iterator types and affine map array attribute. + std::array lowIndexingMaps = { + adjustMap(iMap[0], iterIndex, rewriter), + adjustMap(iMap[1], iterIndex, rewriter), + adjustMap(iMap[2], iterIndex, rewriter)}; + auto lowAffine = rewriter.getAffineMapArrayAttr(lowIndexingMaps); + auto lowIter = + rewriter.getArrayAttr(adjustIter(op.getIteratorTypes(), iterIndex)); + // Unroll into a series of lower dimensional vector.contract ops. + Location loc = op.getLoc(); + Value result = rewriter.create( + loc, resType, rewriter.getZeroAttr(resType)); + + for (int64_t d = 0; d < dimSize; ++d) { + auto lhs = reshapeLoad(loc, op.getLhs(), lhsType, lhsIndex, d, rewriter); + auto rhs = reshapeLoad(loc, op.getRhs(), rhsType, rhsIndex, d, rewriter); + auto acc = reshapeLoad(loc, op.getAcc(), resType, resIndex, d, rewriter); + + Value lowMask; + if (mask) + lowMask = reshapeLoad(loc, mask, cast(mask.getType()), + iterIndex, d, rewriter); + + Operation *lowContract = rewriter.create( + loc, lhs, rhs, acc, lowAffine, lowIter); + lowContract = maskOperation(rewriter, lowContract, lowMask); + result = reshapeStore(loc, lowContract->getResult(0), result, resType, + resIndex, d, rewriter); + } + return result; +} + +// Lower one reduction dimension. +FailureOr ContractionOpLowering::lowerReduction( + PatternRewriter &rewriter, vector::ContractionOp op, Value mask) const { + auto loc = op.getLoc(); + VectorType lhsType = op.getLhsType(); + VectorType rhsType = op.getRhsType(); + Type resType = op.getResultType(); + if (resType.isa()) + return rewriter.notifyMatchFailure(op, + "did not expect a VectorType result"); + bool isInt = resType.isa(); + // Use iterator index 0. + int64_t iterIndex = 0; + SmallVector iMap = op.getIndexingMapsArray(); + std::optional lookupLhs = getResultIndex(iMap[0], iterIndex); + std::optional lookupRhs = getResultIndex(iMap[1], iterIndex); + if (!lookupLhs.has_value()) + return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { + diag << "expected iterIndex=" << iterIndex << "to map to a LHS dimension"; + }); + if (!lookupRhs.has_value()) + return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { + diag << "expected iterIndex=" << iterIndex << "to map to a RHS dimension"; + }); + int64_t lhsIndex = *lookupLhs; + int64_t rhsIndex = *lookupRhs; + int64_t dimSize = lhsType.getDimSize(lhsIndex); + if (dimSize != rhsType.getDimSize(rhsIndex)) + return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { + diag << "expect LHS dimension " << lhsIndex + << " to have the same size as RHS dimension " << rhsIndex; + }); + // Base case. + if (lhsType.getRank() == 1) { + if (rhsType.getRank() != 1) + return rewriter.notifyMatchFailure( + op, "When LHS has rank 1, expected also RHS to have rank 1"); + Value m = createMul(loc, op.getLhs(), op.getRhs(), isInt, rewriter); + auto kind = vector::CombiningKind::ADD; + + Value acc = op.getAcc(); + Operation *reductionOp = + acc ? rewriter.create(loc, kind, m, acc) + : rewriter.create(loc, kind, m); + return maskOperation(rewriter, reductionOp, mask)->getResult(0); + } + // Construct new iterator types and affine map array attribute. + std::array lowIndexingMaps = { + adjustMap(iMap[0], iterIndex, rewriter), + adjustMap(iMap[1], iterIndex, rewriter), + adjustMap(iMap[2], iterIndex, rewriter)}; + auto lowAffine = rewriter.getAffineMapArrayAttr(lowIndexingMaps); + auto lowIter = + rewriter.getArrayAttr(adjustIter(op.getIteratorTypes(), iterIndex)); + // Unroll into a series of lower dimensional vector.contract ops. + // By feeding the initial accumulator into the first contraction, + // and the result of each contraction into the next, eventually + // the sum of all reductions is computed. + Value result = op.getAcc(); + for (int64_t d = 0; d < dimSize; ++d) { + auto lhs = reshapeLoad(loc, op.getLhs(), lhsType, lhsIndex, d, rewriter); + auto rhs = reshapeLoad(loc, op.getRhs(), rhsType, rhsIndex, d, rewriter); + Value newMask; + if (mask) + newMask = reshapeLoad(loc, mask, cast(mask.getType()), + iterIndex, d, rewriter); + + Operation *newContract = rewriter.create( + loc, lhs, rhs, result, lowAffine, lowIter); + result = maskOperation(rewriter, newContract, newMask)->getResult(0); + } + return result; +} + +/// Progressive lowering of OuterProductOp. +/// One: +/// %x = vector.outerproduct %lhs, %rhs, %acc +/// is replaced by: +/// %z = zero-result +/// %0 = vector.extract %lhs[0] +/// %1 = vector.broadcast %0 +/// %2 = vector.extract %acc[0] +/// %3 = vector.fma %1, %rhs, %2 +/// %4 = vector.insert %3, %z[0] +/// .. +/// %x = vector.insert %.., %..[N-1] +/// +class OuterProductOpLowering : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::OuterProductOp op, + PatternRewriter &rewriter) const override { + auto loc = op.getLoc(); + + VectorType lhsType = op.getOperandVectorTypeLHS(); + VectorType rhsType = op.getOperandTypeRHS().dyn_cast(); + VectorType resType = op.getResultVectorType(); + Type eltType = resType.getElementType(); + bool isInt = eltType.isa(); + Value acc = (op.getAcc().empty()) ? nullptr : op.getAcc()[0]; + vector::CombiningKind kind = op.getKind(); + + // Vector mask setup. + OpBuilder::InsertionGuard guard(rewriter); + auto maskableOp = cast(op.getOperation()); + Operation *rootOp; + Value mask; + if (maskableOp.isMasked()) { + rewriter.setInsertionPoint(maskableOp.getMaskingOp()); + rootOp = maskableOp.getMaskingOp(); + mask = maskableOp.getMaskingOp().getMask(); + } else { + rootOp = op; + } + + if (!rhsType) { + // Special case: AXPY operation. + Value b = rewriter.create(loc, lhsType, op.getRhs()); + std::optional mult = createContractArithOp( + loc, op.getLhs(), b, acc, kind, rewriter, isInt, mask); + if (!mult.has_value()) + return failure(); + rewriter.replaceOp(rootOp, *mult); + return success(); + } + + Value result = rewriter.create( + loc, resType, rewriter.getZeroAttr(resType)); + for (int64_t d = 0, e = resType.getDimSize(0); d < e; ++d) { + auto pos = rewriter.getI64ArrayAttr(d); + Value x = rewriter.create(loc, op.getLhs(), pos); + Value a = rewriter.create(loc, rhsType, x); + Value r = nullptr; + if (acc) + r = rewriter.create(loc, acc, pos); + Value extrMask; + if (mask) + extrMask = rewriter.create(loc, mask, pos); + + std::optional m = createContractArithOp( + loc, a, op.getRhs(), r, kind, rewriter, isInt, extrMask); + if (!m.has_value()) + return failure(); + result = rewriter.create(loc, resType, *m, result, pos); + } + + rewriter.replaceOp(rootOp, result); + return success(); + } +}; + +/// Progressively lower a `vector.contract %a, %b, %c` with row-major matmul +/// semantics to: +/// ``` +/// %mta = maybe_transpose +/// %mtb = maybe_transpose +/// %flattened_a = vector.shape_cast %mta +/// %flattened_b = vector.shape_cast %mtb +/// %flattened_d = vector.matmul %flattened_a, %flattened_b +/// %mtd = vector.shape_cast %flattened_d +/// %d = maybe_untranspose %mtd +/// %e = add %c, %d +/// ``` +/// `vector.matmul` later lowers to `llvm.matrix.multiply`. +// +/// This only kicks in when VectorTransformsOptions is set to `Matmul`. +/// vector.transpose operations are inserted if the vector.contract op is not a +/// row-major matrix multiply. +LogicalResult +ContractionOpToMatmulOpLowering::matchAndRewrite(vector::ContractionOp op, + PatternRewriter &rew) const { + // TODO: Support vector.mask. + auto maskableOp = cast(op.getOperation()); + if (maskableOp.isMasked()) + return failure(); + + // TODO: Remove native masks from contraction op? + if (!op.getMasks().empty()) + return failure(); + if (vectorTransformOptions.vectorContractLowering != + vector::VectorContractLowering::Matmul) + return failure(); + if (failed(filter(op))) + return failure(); + + auto iteratorTypes = op.getIteratorTypes().getValue(); + if (!isParallelIterator(iteratorTypes[0]) || + !isParallelIterator(iteratorTypes[1]) || + !isReductionIterator(iteratorTypes[2])) + return failure(); + + Type elementType = op.getLhsType().getElementType(); + if (!elementType.isIntOrFloat()) + return failure(); + + Type dstElementType = op.getType(); + if (auto vecType = dstElementType.dyn_cast()) + dstElementType = vecType.getElementType(); + if (elementType != dstElementType) + return failure(); + + // Perform lhs + rhs transpositions to conform to matmul row-major semantics. + // Bail out if the contraction cannot be put in this form. + MLIRContext *ctx = op.getContext(); + Location loc = op.getLoc(); + AffineExpr m, n, k; + bindDims(rew.getContext(), m, n, k); + // LHS must be A(m, k) or A(k, m). + Value lhs = op.getLhs(); + auto lhsMap = op.getIndexingMapsArray()[0]; + if (lhsMap == AffineMap::get(3, 0, {k, m}, ctx)) + lhs = rew.create(loc, lhs, ArrayRef{1, 0}); + else if (lhsMap != AffineMap::get(3, 0, {m, k}, ctx)) + return failure(); + + // RHS must be B(k, n) or B(n, k). + Value rhs = op.getRhs(); + auto rhsMap = op.getIndexingMapsArray()[1]; + if (rhsMap == AffineMap::get(3, 0, {n, k}, ctx)) + rhs = rew.create(loc, rhs, ArrayRef{1, 0}); + else if (rhsMap != AffineMap::get(3, 0, {k, n}, ctx)) + return failure(); + + // At this point lhs and rhs are in row-major. + VectorType lhsType = lhs.getType().cast(); + VectorType rhsType = rhs.getType().cast(); + int64_t lhsRows = lhsType.getDimSize(0); + int64_t lhsColumns = lhsType.getDimSize(1); + int64_t rhsColumns = rhsType.getDimSize(1); + + Type flattenedLHSType = + VectorType::get(lhsType.getNumElements(), lhsType.getElementType()); + lhs = rew.create(loc, flattenedLHSType, lhs); + + Type flattenedRHSType = + VectorType::get(rhsType.getNumElements(), rhsType.getElementType()); + rhs = rew.create(loc, flattenedRHSType, rhs); + + Value mul = rew.create(loc, lhs, rhs, lhsRows, lhsColumns, + rhsColumns); + mul = rew.create( + loc, + VectorType::get({lhsRows, rhsColumns}, + getElementTypeOrSelf(op.getAcc().getType())), + mul); + + // ACC must be C(m, n) or C(n, m). + auto accMap = op.getIndexingMapsArray()[2]; + if (accMap == AffineMap::get(3, 0, {n, m}, ctx)) + mul = rew.create(loc, mul, ArrayRef{1, 0}); + else if (accMap != AffineMap::get(3, 0, {m, n}, ctx)) + llvm_unreachable("invalid contraction semantics"); + + Value res = + elementType.isa() + ? static_cast(rew.create(loc, op.getAcc(), mul)) + : static_cast( + rew.create(loc, op.getAcc(), mul)); + + rew.replaceOp(op, res); + return success(); +} +} // namespace + +void mlir::vector::populateVectorContractLoweringPatterns( + RewritePatternSet &patterns, VectorTransformsOptions options, + PatternBenefit benefit, bool disableOuterProductLowering) { + if (!disableOuterProductLowering) + patterns.add(patterns.getContext(), benefit); + patterns.add( + options, patterns.getContext(), benefit); +} diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorGather.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorGather.cpp new file mode 100644 index 0000000000000..dc10cb6278cb8 --- /dev/null +++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorGather.cpp @@ -0,0 +1,173 @@ +//===- LowerVectorScam.cpp - Lower 'vector.scan' operation ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements target-independent rewrites and utilities to lower the +// 'vector.scan' operation. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Arith/Utils/Utils.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Utils/IndexingUtils.h" +#include "mlir/Dialect/Utils/StructuredOpsUtils.h" +#include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" +#include "mlir/Dialect/Vector/Utils/VectorUtils.h" +#include "mlir/IR/BuiltinAttributeInterfaces.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/ImplicitLocOpBuilder.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/Matchers.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/TypeUtilities.h" +#include "mlir/Interfaces/VectorInterfaces.h" +#include "mlir/Support/LogicalResult.h" + +#define DEBUG_TYPE "vector-broadcast-lowering" + +using namespace mlir; +using namespace mlir::vector; + +namespace { +/// Flattens 2 or more dimensional `vector.gather` ops by unrolling the +/// outermost dimension. For example: +/// ``` +/// %g = vector.gather %base[%c0][%v], %mask, %pass_thru : +/// ... into vector<2x3xf32> +/// +/// ==> +/// +/// %0 = arith.constant dense<0.0> : vector<2x3xf32> +/// %g0 = vector.gather %base[%c0][%v0], %mask0, %pass_thru0 : ... +/// %1 = vector.insert %g0, %0 [0] : vector<3xf32> into vector<2x3xf32> +/// %g1 = vector.gather %base[%c0][%v1], %mask1, %pass_thru1 : ... +/// %g = vector.insert %g1, %1 [1] : vector<3xf32> into vector<2x3xf32> +/// ``` +/// +/// When applied exhaustively, this will produce a sequence of 1-d gather ops. +struct FlattenGather : OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::GatherOp op, + PatternRewriter &rewriter) const override { + VectorType resultTy = op.getType(); + if (resultTy.getRank() < 2) + return rewriter.notifyMatchFailure(op, "already flat"); + + Location loc = op.getLoc(); + Value indexVec = op.getIndexVec(); + Value maskVec = op.getMask(); + Value passThruVec = op.getPassThru(); + + Value result = rewriter.create( + loc, resultTy, rewriter.getZeroAttr(resultTy)); + + Type subTy = VectorType::get(resultTy.getShape().drop_front(), + resultTy.getElementType()); + + for (int64_t i = 0, e = resultTy.getShape().front(); i < e; ++i) { + int64_t thisIdx[1] = {i}; + + Value indexSubVec = + rewriter.create(loc, indexVec, thisIdx); + Value maskSubVec = + rewriter.create(loc, maskVec, thisIdx); + Value passThruSubVec = + rewriter.create(loc, passThruVec, thisIdx); + Value subGather = rewriter.create( + loc, subTy, op.getBase(), op.getIndices(), indexSubVec, maskSubVec, + passThruSubVec); + result = + rewriter.create(loc, subGather, result, thisIdx); + } + + rewriter.replaceOp(op, result); + return success(); + } +}; + +/// Turns 1-d `vector.gather` into a scalarized sequence of `vector.loads` or +/// `tensor.extract`s. To avoid out-of-bounds memory accesses, these +/// loads/extracts are made conditional using `scf.if` ops. +struct Gather1DToConditionalLoads : OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::GatherOp op, + PatternRewriter &rewriter) const override { + VectorType resultTy = op.getType(); + if (resultTy.getRank() != 1) + return rewriter.notifyMatchFailure(op, "unsupported rank"); + + Location loc = op.getLoc(); + Type elemTy = resultTy.getElementType(); + // Vector type with a single element. Used to generate `vector.loads`. + VectorType elemVecTy = VectorType::get({1}, elemTy); + + Value condMask = op.getMask(); + Value base = op.getBase(); + Value indexVec = rewriter.createOrFold( + loc, op.getIndexVectorType().clone(rewriter.getIndexType()), + op.getIndexVec()); + auto baseOffsets = llvm::to_vector(op.getIndices()); + Value lastBaseOffset = baseOffsets.back(); + + Value result = op.getPassThru(); + + // Emit a conditional access for each vector element. + for (int64_t i = 0, e = resultTy.getNumElements(); i < e; ++i) { + int64_t thisIdx[1] = {i}; + Value condition = + rewriter.create(loc, condMask, thisIdx); + Value index = rewriter.create(loc, indexVec, thisIdx); + baseOffsets.back() = + rewriter.createOrFold(loc, lastBaseOffset, index); + + auto loadBuilder = [&](OpBuilder &b, Location loc) { + Value extracted; + if (isa(base.getType())) { + // `vector.load` does not support scalar result; emit a vector load + // and extract the single result instead. + Value load = + b.create(loc, elemVecTy, base, baseOffsets); + int64_t zeroIdx[1] = {0}; + extracted = b.create(loc, load, zeroIdx); + } else { + extracted = b.create(loc, base, baseOffsets); + } + + Value newResult = + b.create(loc, extracted, result, thisIdx); + b.create(loc, newResult); + }; + auto passThruBuilder = [result](OpBuilder &b, Location loc) { + b.create(loc, result); + }; + + result = + rewriter + .create(loc, condition, /*thenBuilder=*/loadBuilder, + /*elseBuilder=*/passThruBuilder) + .getResult(0); + } + + rewriter.replaceOp(op, result); + return success(); + } +}; +} // namespace + +void mlir::vector::populateVectorGatherLoweringPatterns( + RewritePatternSet &patterns, PatternBenefit benefit) { + patterns.add(patterns.getContext(), + benefit); +} diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorMask.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorMask.cpp index 7c66e65fdef8b..e318d4dc15915 100644 --- a/mlir/lib/Dialect/Vector/Transforms/LowerVectorMask.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorMask.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// This file implements target-independent rewrites and utilitites to lower the +// This file implements target-independent rewrites and utilities to lower the // 'vector.mask' operation. // //===----------------------------------------------------------------------===// @@ -14,6 +14,7 @@ #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" #include "mlir/Dialect/Vector/Transforms/Passes.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" @@ -30,6 +31,147 @@ namespace vector { using namespace mlir; using namespace mlir::vector; +//===----------------------------------------------------------------------===// +// populateVectorMaskOpLoweringPatterns +//===----------------------------------------------------------------------===// + +namespace { +/// Progressive lowering of CreateMaskOp. +/// One: +/// %x = vector.create_mask %a, ... : vector +/// is replaced by: +/// %l = vector.create_mask ... : vector<...> ; one lower rank +/// %0 = arith.cmpi "slt", %ci, %a | +/// %1 = select %0, %l, %zeroes | +/// %r = vector.insert %1, %pr [i] | d-times +/// %x = .... +/// until a one-dimensional vector is reached. +class CreateMaskOpLowering : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::CreateMaskOp op, + PatternRewriter &rewriter) const override { + auto dstType = op.getResult().getType().cast(); + int64_t rank = dstType.getRank(); + if (rank <= 1) + return rewriter.notifyMatchFailure( + op, "0-D and 1-D vectors are handled separately"); + + auto loc = op.getLoc(); + auto eltType = dstType.getElementType(); + int64_t dim = dstType.getDimSize(0); + Value idx = op.getOperand(0); + + VectorType lowType = + VectorType::get(dstType.getShape().drop_front(), eltType); + Value trueVal = rewriter.create( + loc, lowType, op.getOperands().drop_front()); + Value falseVal = rewriter.create( + loc, lowType, rewriter.getZeroAttr(lowType)); + Value result = rewriter.create( + loc, dstType, rewriter.getZeroAttr(dstType)); + for (int64_t d = 0; d < dim; d++) { + Value bnd = + rewriter.create(loc, rewriter.getIndexAttr(d)); + Value val = rewriter.create(loc, arith::CmpIPredicate::slt, + bnd, idx); + Value sel = rewriter.create(loc, val, trueVal, falseVal); + auto pos = rewriter.getI64ArrayAttr(d); + result = + rewriter.create(loc, dstType, sel, result, pos); + } + rewriter.replaceOp(op, result); + return success(); + } +}; + +/// Progressive lowering of ConstantMaskOp. +/// One: +/// %x = vector.constant_mask [a,b] +/// is replaced by: +/// %z = zero-result +/// %l = vector.constant_mask [b] +/// %4 = vector.insert %l, %z[0] +/// .. +/// %x = vector.insert %l, %..[a-1] +/// until a one-dimensional vector is reached. All these operations +/// will be folded at LLVM IR level. +class ConstantMaskOpLowering : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::ConstantMaskOp op, + PatternRewriter &rewriter) const override { + auto loc = op.getLoc(); + auto dstType = op.getType(); + auto eltType = dstType.getElementType(); + auto dimSizes = op.getMaskDimSizes(); + int64_t rank = dstType.getRank(); + + if (rank == 0) { + assert(dimSizes.size() == 1 && + "Expected exactly one dim size for a 0-D vector"); + bool value = dimSizes[0].cast().getInt() == 1; + rewriter.replaceOpWithNewOp( + op, dstType, + DenseIntElementsAttr::get( + VectorType::get(ArrayRef{}, rewriter.getI1Type()), + ArrayRef{value})); + return success(); + } + + // Scalable constant masks can only be lowered for the "none set" case. + if (dstType.cast().isScalable()) { + rewriter.replaceOpWithNewOp( + op, DenseElementsAttr::get(dstType, false)); + return success(); + } + + int64_t trueDim = std::min(dstType.getDimSize(0), + dimSizes[0].cast().getInt()); + + if (rank == 1) { + // Express constant 1-D case in explicit vector form: + // [T,..,T,F,..,F]. + SmallVector values(dstType.getDimSize(0)); + for (int64_t d = 0; d < trueDim; d++) + values[d] = true; + rewriter.replaceOpWithNewOp( + op, dstType, rewriter.getBoolVectorAttr(values)); + return success(); + } + + VectorType lowType = + VectorType::get(dstType.getShape().drop_front(), eltType); + SmallVector newDimSizes; + for (int64_t r = 1; r < rank; r++) + newDimSizes.push_back(dimSizes[r].cast().getInt()); + Value trueVal = rewriter.create( + loc, lowType, rewriter.getI64ArrayAttr(newDimSizes)); + Value result = rewriter.create( + loc, dstType, rewriter.getZeroAttr(dstType)); + for (int64_t d = 0; d < trueDim; d++) { + auto pos = rewriter.getI64ArrayAttr(d); + result = + rewriter.create(loc, dstType, trueVal, result, pos); + } + rewriter.replaceOp(op, result); + return success(); + } +}; +} // namespace + +void mlir::vector::populateVectorMaskOpLoweringPatterns( + RewritePatternSet &patterns, PatternBenefit benefit) { + patterns.add( + patterns.getContext(), benefit); +} + +//===----------------------------------------------------------------------===// +// populateVectorMaskLoweringPatternsForSideEffectingOps +//===----------------------------------------------------------------------===// + namespace { /// The `MaskOpRewritePattern` implements a pattern that follows a two-fold diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorMultiDimReductionTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorMultiReduction.cpp similarity index 98% rename from mlir/lib/Dialect/Vector/Transforms/VectorMultiDimReductionTransforms.cpp rename to mlir/lib/Dialect/Vector/Transforms/LowerVectorMultiReduction.cpp index b790d141415aa..1744c46db5886 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorMultiDimReductionTransforms.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorMultiReduction.cpp @@ -1,4 +1,4 @@ -//===- VectorMultiDimReductionTransforms.cpp - Multi-Reduction Transforms -===// +//===- LowerVectorMultiReduction.cpp - Lower `vector.multi_reduction` op --===// // /// Part of the LLVM Project, under the Apache License v2.0 with LLVM /// Exceptions. See https://llvm.org/LICENSE.txt for license information. @@ -6,12 +6,13 @@ // //===----------------------------------------------------------------------===// // -/// This file implements target-independent rewrites of MultiDimReductionOp. +// This file implements target-independent rewrites and utilities to lower the +// 'vector.multi_reduction' operation. // //===----------------------------------------------------------------------===// #include "mlir/Dialect/Arith/IR/Arith.h" -#include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" #include "mlir/IR/Builders.h" #include "mlir/IR/TypeUtilities.h" @@ -19,6 +20,7 @@ using namespace mlir; +namespace { /// This file implements the following transformations as composable atomic /// patterns. @@ -441,6 +443,7 @@ struct OneDimMultiReductionToTwoDim return success(); } }; +} // namespace void mlir::vector::populateVectorMultiReductionLoweringPatterns( RewritePatternSet &patterns, VectorMultiReductionLowering options, diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorScan.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorScan.cpp new file mode 100644 index 0000000000000..eb2deba7bc46b --- /dev/null +++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorScan.cpp @@ -0,0 +1,251 @@ +//===- LowerVectorScam.cpp - Lower 'vector.scan' operation ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements target-independent rewrites and utilities to lower the +// 'vector.scan' operation. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Arith/Utils/Utils.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Utils/IndexingUtils.h" +#include "mlir/Dialect/Utils/StructuredOpsUtils.h" +#include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" +#include "mlir/Dialect/Vector/Utils/VectorUtils.h" +#include "mlir/IR/BuiltinAttributeInterfaces.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/ImplicitLocOpBuilder.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/Matchers.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/TypeUtilities.h" +#include "mlir/Interfaces/VectorInterfaces.h" +#include "mlir/Support/LogicalResult.h" + +#define DEBUG_TYPE "vector-broadcast-lowering" + +using namespace mlir; +using namespace mlir::vector; + +/// This function constructs the appropriate integer or float +/// operation given the vector combining kind and operands. The +/// supported int operations are : add, mul, min (signed/unsigned), +/// max(signed/unsigned), and, or, xor. The supported float +/// operations are : add, mul, min and max. +static Value genOperator(Location loc, Value x, Value y, + vector::CombiningKind kind, + PatternRewriter &rewriter) { + using vector::CombiningKind; + + auto elType = x.getType().cast().getElementType(); + bool isInt = elType.isIntOrIndex(); + + Value combinedResult{nullptr}; + switch (kind) { + case CombiningKind::ADD: + if (isInt) + combinedResult = rewriter.create(loc, x, y); + else + combinedResult = rewriter.create(loc, x, y); + break; + case CombiningKind::MUL: + if (isInt) + combinedResult = rewriter.create(loc, x, y); + else + combinedResult = rewriter.create(loc, x, y); + break; + case CombiningKind::MINUI: + combinedResult = rewriter.create(loc, x, y); + break; + case CombiningKind::MINSI: + combinedResult = rewriter.create(loc, x, y); + break; + case CombiningKind::MAXUI: + combinedResult = rewriter.create(loc, x, y); + break; + case CombiningKind::MAXSI: + combinedResult = rewriter.create(loc, x, y); + break; + case CombiningKind::AND: + combinedResult = rewriter.create(loc, x, y); + break; + case CombiningKind::OR: + combinedResult = rewriter.create(loc, x, y); + break; + case CombiningKind::XOR: + combinedResult = rewriter.create(loc, x, y); + break; + case CombiningKind::MINF: + combinedResult = rewriter.create(loc, x, y); + break; + case CombiningKind::MAXF: + combinedResult = rewriter.create(loc, x, y); + break; + } + return combinedResult; +} + +/// This function checks to see if the vector combining kind +/// is consistent with the integer or float element type. +static bool isValidKind(bool isInt, vector::CombiningKind kind) { + using vector::CombiningKind; + enum class KindType { FLOAT, INT, INVALID }; + KindType type{KindType::INVALID}; + switch (kind) { + case CombiningKind::MINF: + case CombiningKind::MAXF: + type = KindType::FLOAT; + break; + case CombiningKind::MINUI: + case CombiningKind::MINSI: + case CombiningKind::MAXUI: + case CombiningKind::MAXSI: + case CombiningKind::AND: + case CombiningKind::OR: + case CombiningKind::XOR: + type = KindType::INT; + break; + case CombiningKind::ADD: + case CombiningKind::MUL: + type = isInt ? KindType::INT : KindType::FLOAT; + break; + } + bool isValidIntKind = (type == KindType::INT) && isInt; + bool isValidFloatKind = (type == KindType::FLOAT) && (!isInt); + return (isValidIntKind || isValidFloatKind); +} + +namespace { +/// Convert vector.scan op into arith ops and vector.insert_strided_slice / +/// vector.extract_strided_slice. +/// +/// Example: +/// +/// ``` +/// %0:2 = vector.scan , %arg0, %arg1 +/// {inclusive = true, reduction_dim = 1} : +/// (vector<2x3xi32>, vector<2xi32>) to (vector<2x3xi32>, vector<2xi32>) +/// ``` +/// +/// is converted to: +/// +/// ``` +/// %cst = arith.constant dense<0> : vector<2x3xi32> +/// %0 = vector.extract_strided_slice %arg0 +/// {offsets = [0, 0], sizes = [2, 1], strides = [1, 1]} +/// : vector<2x3xi32> to vector<2x1xi32> +/// %1 = vector.insert_strided_slice %0, %cst +/// {offsets = [0, 0], strides = [1, 1]} +/// : vector<2x1xi32> into vector<2x3xi32> +/// %2 = vector.extract_strided_slice %arg0 +/// {offsets = [0, 1], sizes = [2, 1], strides = [1, 1]} +/// : vector<2x3xi32> to vector<2x1xi32> +/// %3 = arith.muli %0, %2 : vector<2x1xi32> +/// %4 = vector.insert_strided_slice %3, %1 +/// {offsets = [0, 1], strides = [1, 1]} +/// : vector<2x1xi32> into vector<2x3xi32> +/// %5 = vector.extract_strided_slice %arg0 +/// {offsets = [0, 2], sizes = [2, 1], strides = [1, 1]} +/// : vector<2x3xi32> to vector<2x1xi32> +/// %6 = arith.muli %3, %5 : vector<2x1xi32> +/// %7 = vector.insert_strided_slice %6, %4 +/// {offsets = [0, 2], strides = [1, 1]} +/// : vector<2x1xi32> into vector<2x3xi32> +/// %8 = vector.shape_cast %6 : vector<2x1xi32> to vector<2xi32> +/// return %7, %8 : vector<2x3xi32>, vector<2xi32> +/// ``` +struct ScanToArithOps : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::ScanOp scanOp, + PatternRewriter &rewriter) const override { + auto loc = scanOp.getLoc(); + VectorType destType = scanOp.getDestType(); + ArrayRef destShape = destType.getShape(); + auto elType = destType.getElementType(); + bool isInt = elType.isIntOrIndex(); + if (!isValidKind(isInt, scanOp.getKind())) + return failure(); + + VectorType resType = VectorType::get(destShape, elType); + Value result = rewriter.create( + loc, resType, rewriter.getZeroAttr(resType)); + int64_t reductionDim = scanOp.getReductionDim(); + bool inclusive = scanOp.getInclusive(); + int64_t destRank = destType.getRank(); + VectorType initialValueType = scanOp.getInitialValueType(); + int64_t initialValueRank = initialValueType.getRank(); + + SmallVector reductionShape(destShape.begin(), destShape.end()); + reductionShape[reductionDim] = 1; + VectorType reductionType = VectorType::get(reductionShape, elType); + SmallVector offsets(destRank, 0); + SmallVector strides(destRank, 1); + SmallVector sizes(destShape.begin(), destShape.end()); + sizes[reductionDim] = 1; + ArrayAttr scanSizes = rewriter.getI64ArrayAttr(sizes); + ArrayAttr scanStrides = rewriter.getI64ArrayAttr(strides); + + Value lastOutput, lastInput; + for (int i = 0; i < destShape[reductionDim]; i++) { + offsets[reductionDim] = i; + ArrayAttr scanOffsets = rewriter.getI64ArrayAttr(offsets); + Value input = rewriter.create( + loc, reductionType, scanOp.getSource(), scanOffsets, scanSizes, + scanStrides); + Value output; + if (i == 0) { + if (inclusive) { + output = input; + } else { + if (initialValueRank == 0) { + // ShapeCastOp cannot handle 0-D vectors + output = rewriter.create( + loc, input.getType(), scanOp.getInitialValue()); + } else { + output = rewriter.create( + loc, input.getType(), scanOp.getInitialValue()); + } + } + } else { + Value y = inclusive ? input : lastInput; + output = genOperator(loc, lastOutput, y, scanOp.getKind(), rewriter); + assert(output != nullptr); + } + result = rewriter.create( + loc, output, result, offsets, strides); + lastOutput = output; + lastInput = input; + } + + Value reduction; + if (initialValueRank == 0) { + Value v = rewriter.create(loc, lastOutput, 0); + reduction = + rewriter.create(loc, initialValueType, v); + } else { + reduction = rewriter.create(loc, initialValueType, + lastOutput); + } + + rewriter.replaceOp(scanOp, {result, reduction}); + return success(); + } +}; +} // namespace + +void mlir::vector::populateVectorScanLoweringPatterns( + RewritePatternSet &patterns, PatternBenefit benefit) { + patterns.add(patterns.getContext(), benefit); +} diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorShapeCast.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorShapeCast.cpp new file mode 100644 index 0000000000000..bd9716cbca94c --- /dev/null +++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorShapeCast.cpp @@ -0,0 +1,177 @@ +//===- LowerVectorShapeCast.cpp - Lower 'vector.shape_cast' operation -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements target-independent rewrites and utilities to lower the +// 'vector.shape_cast' operation. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Arith/Utils/Utils.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Utils/IndexingUtils.h" +#include "mlir/Dialect/Utils/StructuredOpsUtils.h" +#include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" +#include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h" +#include "mlir/Dialect/Vector/Utils/VectorUtils.h" +#include "mlir/IR/BuiltinAttributeInterfaces.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/ImplicitLocOpBuilder.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/Matchers.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/TypeUtilities.h" +#include "mlir/Interfaces/VectorInterfaces.h" +#include "mlir/Support/LogicalResult.h" + +#define DEBUG_TYPE "vector-shape-cast-lowering" + +using namespace mlir; +using namespace mlir::vector; + +namespace { +/// ShapeOp 2D -> 1D downcast serves the purpose of flattening 2-D to 1-D +/// vectors progressively on the way to target llvm.matrix intrinsics. +/// This iterates over the most major dimension of the 2-D vector and performs +/// rewrites into: +/// vector.extract from 2-D + vector.insert_strided_slice offset into 1-D +class ShapeCastOp2DDownCastRewritePattern + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::ShapeCastOp op, + PatternRewriter &rewriter) const override { + auto sourceVectorType = op.getSourceVectorType(); + auto resultVectorType = op.getResultVectorType(); + if (sourceVectorType.getRank() != 2 || resultVectorType.getRank() != 1) + return failure(); + + auto loc = op.getLoc(); + Value desc = rewriter.create( + loc, resultVectorType, rewriter.getZeroAttr(resultVectorType)); + unsigned mostMinorVectorSize = sourceVectorType.getShape()[1]; + for (int64_t i = 0, e = sourceVectorType.getShape().front(); i != e; ++i) { + Value vec = rewriter.create(loc, op.getSource(), i); + desc = rewriter.create( + loc, vec, desc, + /*offsets=*/i * mostMinorVectorSize, /*strides=*/1); + } + rewriter.replaceOp(op, desc); + return success(); + } +}; + +/// ShapeOp 1D -> 2D upcast serves the purpose of unflattening 2-D from 1-D +/// vectors progressively. +/// This iterates over the most major dimension of the 2-D vector and performs +/// rewrites into: +/// vector.extract_strided_slice from 1-D + vector.insert into 2-D +/// Note that 1-D extract_strided_slice are lowered to efficient vector.shuffle. +class ShapeCastOp2DUpCastRewritePattern + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::ShapeCastOp op, + PatternRewriter &rewriter) const override { + auto sourceVectorType = op.getSourceVectorType(); + auto resultVectorType = op.getResultVectorType(); + if (sourceVectorType.getRank() != 1 || resultVectorType.getRank() != 2) + return failure(); + + auto loc = op.getLoc(); + Value desc = rewriter.create( + loc, resultVectorType, rewriter.getZeroAttr(resultVectorType)); + unsigned mostMinorVectorSize = resultVectorType.getShape()[1]; + for (int64_t i = 0, e = resultVectorType.getShape().front(); i != e; ++i) { + Value vec = rewriter.create( + loc, op.getSource(), /*offsets=*/i * mostMinorVectorSize, + /*sizes=*/mostMinorVectorSize, + /*strides=*/1); + desc = rewriter.create(loc, vec, desc, i); + } + rewriter.replaceOp(op, desc); + return success(); + } +}; + +// We typically should not lower general shape cast operations into data +// movement instructions, since the assumption is that these casts are +// optimized away during progressive lowering. For completeness, however, +// we fall back to a reference implementation that moves all elements +// into the right place if we get here. +class ShapeCastOpRewritePattern : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::ShapeCastOp op, + PatternRewriter &rewriter) const override { + Location loc = op.getLoc(); + auto sourceVectorType = op.getSourceVectorType(); + auto resultVectorType = op.getResultVectorType(); + + // Special case 2D / 1D lowerings with better implementations. + // TODO: make is ND / 1D to allow generic ND -> 1D -> MD. + int64_t srcRank = sourceVectorType.getRank(); + int64_t resRank = resultVectorType.getRank(); + if ((srcRank == 2 && resRank == 1) || (srcRank == 1 && resRank == 2)) + return failure(); + + // Generic ShapeCast lowering path goes all the way down to unrolled scalar + // extract/insert chains. + // TODO: consider evolving the semantics to only allow 1D source or dest and + // drop this potentially very expensive lowering. + // Compute number of elements involved in the reshape. + int64_t numElts = 1; + for (int64_t r = 0; r < srcRank; r++) + numElts *= sourceVectorType.getDimSize(r); + // Replace with data movement operations: + // x[0,0,0] = y[0,0] + // x[0,0,1] = y[0,1] + // x[0,1,0] = y[0,2] + // etc., incrementing the two index vectors "row-major" + // within the source and result shape. + SmallVector srcIdx(srcRank); + SmallVector resIdx(resRank); + Value result = rewriter.create( + loc, resultVectorType, rewriter.getZeroAttr(resultVectorType)); + for (int64_t i = 0; i < numElts; i++) { + if (i != 0) { + incIdx(srcIdx, sourceVectorType, srcRank - 1); + incIdx(resIdx, resultVectorType, resRank - 1); + } + Value e = rewriter.create(loc, op.getSource(), srcIdx); + result = rewriter.create(loc, e, result, resIdx); + } + rewriter.replaceOp(op, result); + return success(); + } + +private: + static void incIdx(SmallVector &idx, VectorType tp, int64_t r) { + assert(0 <= r && r < tp.getRank()); + if (++idx[r] == tp.getDimSize(r)) { + idx[r] = 0; + incIdx(idx, tp, r - 1); + } + } +}; +} // namespace + +void mlir::vector::populateVectorShapeCastLoweringPatterns( + RewritePatternSet &patterns, PatternBenefit benefit) { + patterns.add( + patterns.getContext(), benefit); +} diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransferPermutationMapRewritePatterns.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorTransfer.cpp similarity index 57% rename from mlir/lib/Dialect/Vector/Transforms/VectorTransferPermutationMapRewritePatterns.cpp rename to mlir/lib/Dialect/Vector/Transforms/LowerVectorTransfer.cpp index 68d9a349478bf..c2ce9aa10a850 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorTransferPermutationMapRewritePatterns.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorTransfer.cpp @@ -14,7 +14,7 @@ #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" -#include "mlir/Dialect/Vector/Transforms/VectorTransforms.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" #include "mlir/Interfaces/VectorInterfaces.h" using namespace mlir; @@ -46,6 +46,11 @@ static Value extendVectorRank(OpBuilder &builder, Location loc, Value vec, return builder.create(loc, newVecType, vec); } +//===----------------------------------------------------------------------===// +// populateVectorTransferPermutationMapLoweringPatterns +//===----------------------------------------------------------------------===// + +namespace { /// Lower transfer_read op with permutation into a transfer_read with a /// permutation map composed of leading zeros followed by a minor identiy + /// vector.transpose op. @@ -332,6 +337,8 @@ struct TransferOpReduceRank : public OpRewritePattern { } }; +} // namespace + void mlir::vector::populateVectorTransferPermutationMapLoweringPatterns( RewritePatternSet &patterns, PatternBenefit benefit) { patterns @@ -339,3 +346,239 @@ void mlir::vector::populateVectorTransferPermutationMapLoweringPatterns( TransferOpReduceRank, TransferWriteNonPermutationLowering>( patterns.getContext(), benefit); } + +//===----------------------------------------------------------------------===// +// populateVectorTransferLoweringPatterns +//===----------------------------------------------------------------------===// + +namespace { +/// Progressive lowering of transfer_read. This pattern supports lowering of +/// `vector.transfer_read` to a combination of `vector.load` and +/// `vector.broadcast` if all of the following hold: +/// - Stride of most minor memref dimension must be 1. +/// - Out-of-bounds masking is not required. +/// - If the memref's element type is a vector type then it coincides with the +/// result type. +/// - The permutation map doesn't perform permutation (broadcasting is allowed). +struct TransferReadToVectorLoadLowering + : public OpRewritePattern { + TransferReadToVectorLoadLowering(MLIRContext *context, + std::optional maxRank, + PatternBenefit benefit = 1) + : OpRewritePattern(context, benefit), + maxTransferRank(maxRank) {} + + LogicalResult matchAndRewrite(vector::TransferReadOp read, + PatternRewriter &rewriter) const override { + if (maxTransferRank && read.getVectorType().getRank() > *maxTransferRank) + return failure(); + + SmallVector broadcastedDims; + // Permutations are handled by VectorToSCF or + // populateVectorTransferPermutationMapLoweringPatterns. + // We let the 0-d corner case pass-through as it is supported. + if (!read.getPermutationMap().isMinorIdentityWithBroadcasting( + &broadcastedDims)) + return failure(); + + auto memRefType = read.getShapedType().dyn_cast(); + if (!memRefType) + return failure(); + + // Non-unit strides are handled by VectorToSCF. + if (!vector::isLastMemrefDimUnitStride(memRefType)) + return failure(); + + // If there is broadcasting involved then we first load the unbroadcasted + // vector, and then broadcast it with `vector.broadcast`. + ArrayRef vectorShape = read.getVectorType().getShape(); + SmallVector unbroadcastedVectorShape(vectorShape.begin(), + vectorShape.end()); + for (unsigned i : broadcastedDims) + unbroadcastedVectorShape[i] = 1; + VectorType unbroadcastedVectorType = VectorType::get( + unbroadcastedVectorShape, read.getVectorType().getElementType()); + + // `vector.load` supports vector types as memref's elements only when the + // resulting vector type is the same as the element type. + auto memrefElTy = memRefType.getElementType(); + if (memrefElTy.isa() && memrefElTy != unbroadcastedVectorType) + return failure(); + + // Otherwise, element types of the memref and the vector must match. + if (!memrefElTy.isa() && + memrefElTy != read.getVectorType().getElementType()) + return failure(); + + // Out-of-bounds dims are handled by MaterializeTransferMask. + if (read.hasOutOfBoundsDim()) + return failure(); + + // Create vector load op. + Operation *loadOp; + if (read.getMask()) { + Value fill = rewriter.create( + read.getLoc(), unbroadcastedVectorType, read.getPadding()); + loadOp = rewriter.create( + read.getLoc(), unbroadcastedVectorType, read.getSource(), + read.getIndices(), read.getMask(), fill); + } else { + loadOp = rewriter.create( + read.getLoc(), unbroadcastedVectorType, read.getSource(), + read.getIndices()); + } + + // Insert a broadcasting op if required. + if (!broadcastedDims.empty()) { + rewriter.replaceOpWithNewOp( + read, read.getVectorType(), loadOp->getResult(0)); + } else { + rewriter.replaceOp(read, loadOp->getResult(0)); + } + + return success(); + } + + std::optional maxTransferRank; +}; + +/// Replace a 0-d vector.load with a memref.load + vector.broadcast. +// TODO: we shouldn't cross the vector/scalar domains just for this +// but atm we lack the infra to avoid it. Possible solutions include: +// - go directly to LLVM + bitcast +// - introduce a bitcast op and likely a new pointer dialect +// - let memref.load/store additionally support the 0-d vector case +// There are still deeper data layout issues lingering even in this +// trivial case (for architectures for which this matters). +struct VectorLoadToMemrefLoadLowering + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::LoadOp loadOp, + PatternRewriter &rewriter) const override { + auto vecType = loadOp.getVectorType(); + if (vecType.getNumElements() != 1) + return failure(); + auto memrefLoad = rewriter.create( + loadOp.getLoc(), loadOp.getBase(), loadOp.getIndices()); + rewriter.replaceOpWithNewOp(loadOp, vecType, + memrefLoad); + return success(); + } +}; + +/// Replace a 0-d vector.store with a vector.extractelement + memref.store. +struct VectorStoreToMemrefStoreLowering + : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(vector::StoreOp storeOp, + PatternRewriter &rewriter) const override { + auto vecType = storeOp.getVectorType(); + if (vecType.getNumElements() != 1) + return failure(); + Value extracted; + if (vecType.getRank() == 0) { + // TODO: Unifiy once ExtractOp supports 0-d vectors. + extracted = rewriter.create( + storeOp.getLoc(), storeOp.getValueToStore()); + } else { + SmallVector indices(vecType.getRank(), 0); + extracted = rewriter.create( + storeOp.getLoc(), storeOp.getValueToStore(), indices); + } + + rewriter.replaceOpWithNewOp( + storeOp, extracted, storeOp.getBase(), storeOp.getIndices()); + return success(); + } +}; + +/// Progressive lowering of transfer_write. This pattern supports lowering of +/// `vector.transfer_write` to `vector.store` if all of the following hold: +/// - Stride of most minor memref dimension must be 1. +/// - Out-of-bounds masking is not required. +/// - If the memref's element type is a vector type then it coincides with the +/// type of the written value. +/// - The permutation map is the minor identity map (neither permutation nor +/// broadcasting is allowed). +struct TransferWriteToVectorStoreLowering + : public OpRewritePattern { + TransferWriteToVectorStoreLowering(MLIRContext *context, + std::optional maxRank, + PatternBenefit benefit = 1) + : OpRewritePattern(context, benefit), + maxTransferRank(maxRank) {} + + LogicalResult matchAndRewrite(vector::TransferWriteOp write, + PatternRewriter &rewriter) const override { + if (maxTransferRank && write.getVectorType().getRank() > *maxTransferRank) + return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { + diag << "rank exceeds maxTransferRank: " << write; + }); + + // Permutations are handled by VectorToSCF or + // populateVectorTransferPermutationMapLoweringPatterns. + if ( // pass-through for the 0-d corner case. + !write.getPermutationMap().isMinorIdentity()) + return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { + diag << "permutation map is not minor identity: " << write; + }); + + auto memRefType = write.getShapedType().dyn_cast(); + if (!memRefType) + return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { + diag << "not a memref type: " << write; + }); + + // Non-unit strides are handled by VectorToSCF. + if (!vector::isLastMemrefDimUnitStride(memRefType)) + return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { + diag << "most minor stride is not 1: " << write; + }); + + // `vector.store` supports vector types as memref's elements only when the + // type of the vector value being written is the same as the element type. + auto memrefElTy = memRefType.getElementType(); + if (memrefElTy.isa() && memrefElTy != write.getVectorType()) + return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { + diag << "elemental type mismatch: " << write; + }); + + // Otherwise, element types of the memref and the vector must match. + if (!memrefElTy.isa() && + memrefElTy != write.getVectorType().getElementType()) + return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { + diag << "elemental type mismatch: " << write; + }); + + // Out-of-bounds dims are handled by MaterializeTransferMask. + if (write.hasOutOfBoundsDim()) + return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { + diag << "out of bounds dim: " << write; + }); + if (write.getMask()) { + rewriter.replaceOpWithNewOp( + write, write.getSource(), write.getIndices(), write.getMask(), + write.getVector()); + } else { + rewriter.replaceOpWithNewOp( + write, write.getVector(), write.getSource(), write.getIndices()); + } + return success(); + } + + std::optional maxTransferRank; +}; +} // namespace + +void mlir::vector::populateVectorTransferLoweringPatterns( + RewritePatternSet &patterns, std::optional maxTransferRank, + PatternBenefit benefit) { + patterns.add(patterns.getContext(), + maxTransferRank, benefit); + patterns + .add( + patterns.getContext(), benefit); +} diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorTranspose.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorTranspose.cpp new file mode 100644 index 0000000000000..f6e8b0c445c99 --- /dev/null +++ b/mlir/lib/Dialect/Vector/Transforms/LowerVectorTranspose.cpp @@ -0,0 +1,210 @@ +//===- LowerVectorTranspose.cpp - Lower 'vector.transpose' operation ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements target-independent rewrites and utilities to lower the +// 'vector.transpose' operation. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Arith/Utils/Utils.h" +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/Dialect/Tensor/IR/Tensor.h" +#include "mlir/Dialect/Utils/IndexingUtils.h" +#include "mlir/Dialect/Utils/StructuredOpsUtils.h" +#include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" +#include "mlir/Dialect/Vector/Utils/VectorUtils.h" +#include "mlir/IR/BuiltinAttributeInterfaces.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/ImplicitLocOpBuilder.h" +#include "mlir/IR/Location.h" +#include "mlir/IR/Matchers.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/IR/TypeUtilities.h" +#include "mlir/Interfaces/VectorInterfaces.h" +#include "mlir/Support/LogicalResult.h" + +#define DEBUG_TYPE "vector-shape-cast-lowering" + +using namespace mlir; +using namespace mlir::vector; + +/// Given a 'transpose' pattern, prune the rightmost dimensions that are not +/// transposed. +static void pruneNonTransposedDims(ArrayRef transpose, + SmallVectorImpl &result) { + size_t numTransposedDims = transpose.size(); + for (size_t transpDim : llvm::reverse(transpose)) { + if (transpDim != numTransposedDims - 1) + break; + numTransposedDims--; + } + + result.append(transpose.begin(), transpose.begin() + numTransposedDims); +} + +namespace { +/// Progressive lowering of TransposeOp. +/// One: +/// %x = vector.transpose %y, [1, 0] +/// is replaced by: +/// %z = arith.constant dense<0.000000e+00> +/// %0 = vector.extract %y[0, 0] +/// %1 = vector.insert %0, %z [0, 0] +/// .. +/// %x = vector.insert .., .. [.., ..] +class TransposeOpLowering : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + TransposeOpLowering(vector::VectorTransformsOptions vectorTransformOptions, + MLIRContext *context, PatternBenefit benefit = 1) + : OpRewritePattern(context, benefit), + vectorTransformOptions(vectorTransformOptions) {} + + LogicalResult matchAndRewrite(vector::TransposeOp op, + PatternRewriter &rewriter) const override { + auto loc = op.getLoc(); + + Value input = op.getVector(); + VectorType inputType = op.getSourceVectorType(); + VectorType resType = op.getResultVectorType(); + + // Set up convenience transposition table. + SmallVector transp; + for (auto attr : op.getTransp()) + transp.push_back(attr.cast().getInt()); + + if (vectorTransformOptions.vectorTransposeLowering == + vector::VectorTransposeLowering::Shuffle && + resType.getRank() == 2 && transp[0] == 1 && transp[1] == 0) + return rewriter.notifyMatchFailure( + op, "Options specifies lowering to shuffle"); + + // Handle a true 2-D matrix transpose differently when requested. + if (vectorTransformOptions.vectorTransposeLowering == + vector::VectorTransposeLowering::Flat && + resType.getRank() == 2 && transp[0] == 1 && transp[1] == 0) { + Type flattenedType = + VectorType::get(resType.getNumElements(), resType.getElementType()); + auto matrix = + rewriter.create(loc, flattenedType, input); + auto rows = rewriter.getI32IntegerAttr(resType.getShape()[0]); + auto columns = rewriter.getI32IntegerAttr(resType.getShape()[1]); + Value trans = rewriter.create( + loc, flattenedType, matrix, rows, columns); + rewriter.replaceOpWithNewOp(op, resType, trans); + return success(); + } + + // Generate unrolled extract/insert ops. We do not unroll the rightmost + // (i.e., highest-order) dimensions that are not transposed and leave them + // in vector form to improve performance. Therefore, we prune those + // dimensions from the shape/transpose data structures used to generate the + // extract/insert ops. + SmallVector prunedTransp; + pruneNonTransposedDims(transp, prunedTransp); + size_t numPrunedDims = transp.size() - prunedTransp.size(); + auto prunedInShape = inputType.getShape().drop_back(numPrunedDims); + auto prunedInStrides = computeStrides(prunedInShape); + + // Generates the extract/insert operations for every scalar/vector element + // of the leftmost transposed dimensions. We traverse every transpose + // element using a linearized index that we delinearize to generate the + // appropriate indices for the extract/insert operations. + Value result = rewriter.create( + loc, resType, rewriter.getZeroAttr(resType)); + int64_t numTransposedElements = ShapedType::getNumElements(prunedInShape); + + for (int64_t linearIdx = 0; linearIdx < numTransposedElements; + ++linearIdx) { + auto extractIdxs = delinearize(linearIdx, prunedInStrides); + SmallVector insertIdxs(extractIdxs); + applyPermutationToVector(insertIdxs, prunedTransp); + Value extractOp = + rewriter.create(loc, input, extractIdxs); + result = + rewriter.create(loc, extractOp, result, insertIdxs); + } + + rewriter.replaceOp(op, result); + return success(); + } + +private: + /// Options to control the vector patterns. + vector::VectorTransformsOptions vectorTransformOptions; +}; + +/// Rewrite a 2-D vector.transpose as a sequence of: +/// vector.shape_cast 2D -> 1D +/// vector.shuffle +/// vector.shape_cast 1D -> 2D +class TransposeOp2DToShuffleLowering + : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + TransposeOp2DToShuffleLowering( + vector::VectorTransformsOptions vectorTransformOptions, + MLIRContext *context, PatternBenefit benefit = 1) + : OpRewritePattern(context, benefit), + vectorTransformOptions(vectorTransformOptions) {} + + LogicalResult matchAndRewrite(vector::TransposeOp op, + PatternRewriter &rewriter) const override { + auto loc = op.getLoc(); + + VectorType srcType = op.getSourceVectorType(); + if (srcType.getRank() != 2) + return rewriter.notifyMatchFailure(op, "Not a 2D transpose"); + + SmallVector transp; + for (auto attr : op.getTransp()) + transp.push_back(attr.cast().getInt()); + if (transp[0] != 1 && transp[1] != 0) + return rewriter.notifyMatchFailure(op, "Not a 2D transpose permutation"); + + if (vectorTransformOptions.vectorTransposeLowering != + VectorTransposeLowering::Shuffle) + return rewriter.notifyMatchFailure(op, "Options do not ask for Shuffle"); + + int64_t m = srcType.getShape().front(), n = srcType.getShape().back(); + Value casted = rewriter.create( + loc, VectorType::get({m * n}, srcType.getElementType()), + op.getVector()); + SmallVector mask; + mask.reserve(m * n); + for (int64_t j = 0; j < n; ++j) + for (int64_t i = 0; i < m; ++i) + mask.push_back(i * n + j); + + Value shuffled = + rewriter.create(loc, casted, casted, mask); + rewriter.replaceOpWithNewOp( + op, op.getResultVectorType(), shuffled); + + return success(); + } + +private: + /// Options to control the vector patterns. + vector::VectorTransformsOptions vectorTransformOptions; +}; +} // namespace + +void mlir::vector::populateVectorTransposeLoweringPatterns( + RewritePatternSet &patterns, VectorTransformsOptions options, + PatternBenefit benefit) { + patterns.add( + options, patterns.getContext(), benefit); +} diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp index 38062b9893f1a..b0690f63422d9 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp @@ -16,6 +16,7 @@ #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" #include "mlir/Dialect/Vector/Transforms/VectorTransforms.h" #include "mlir/Dialect/Vector/Utils/VectorUtils.h" #include "mlir/IR/BuiltinOps.h" diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp index ee23b5494f707..caf5822256bc6 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include #include +#include #include "mlir/Dialect/Affine/IR/AffineOps.h" #include "mlir/Dialect/Arith/IR/Arith.h" @@ -92,11 +92,11 @@ static Value createInBoundsCond(RewriterBase &b, } /// Split a vector.transfer operation into an in-bounds (i.e., no out-of-bounds -/// masking) fastpath and a slowpath. +/// masking) fast path and a slow path. /// If `ifOp` is not null and the result is `success, the `ifOp` points to the /// newly created conditional upon function return. -/// To accomodate for the fact that the original vector.transfer indexing may be -/// arbitrary and the slow path indexes @[0...0] in the temporary buffer, the +/// To accommodate for the fact that the original vector.transfer indexing may +/// be arbitrary and the slow path indexes @[0...0] in the temporary buffer, the /// scf.if op returns a view and values of type index. /// At this time, only vector.transfer_read case is implemented. /// @@ -107,11 +107,11 @@ static Value createInBoundsCond(RewriterBase &b, /// is transformed into: /// ``` /// %1:3 = scf.if (%inBounds) { -/// // fastpath, direct cast +/// // fast path, direct cast /// memref.cast %A: memref to compatibleMemRefType /// scf.yield %view : compatibleMemRefType, index, index /// } else { -/// // slowpath, not in-bounds vector.transfer or linalg.copy. +/// // slow path, not in-bounds vector.transfer or linalg.copy. /// memref.cast %alloc: memref to compatibleMemRefType /// scf.yield %4 : compatibleMemRefType, index, index // } @@ -172,12 +172,10 @@ static MemRefType getCastCompatibleMemRefType(MemRefType aT, MemRefType bT) { for (int64_t idx = 0, e = aT.getRank(); idx < e; ++idx) { resShape[idx] = (aShape[idx] == bShape[idx]) ? aShape[idx] : ShapedType::kDynamic; - resStrides[idx] = (aStrides[idx] == bStrides[idx]) - ? aStrides[idx] - : ShapedType::kDynamic; + resStrides[idx] = + (aStrides[idx] == bStrides[idx]) ? aStrides[idx] : ShapedType::kDynamic; } - resOffset = - (aOffset == bOffset) ? aOffset : ShapedType::kDynamic; + resOffset = (aOffset == bOffset) ? aOffset : ShapedType::kDynamic; return MemRefType::get( resShape, aT.getElementType(), StridedLayoutAttr::get(aT.getContext(), resOffset, resStrides)); @@ -634,7 +632,34 @@ LogicalResult mlir::vector::splitFullAndPartialTransfer( return success(); } -LogicalResult mlir::vector::VectorTransferFullPartialRewriter::matchAndRewrite( +namespace { +/// Apply `splitFullAndPartialTransfer` selectively via a pattern. This pattern +/// may take an extra filter to perform selection at a finer granularity. +struct VectorTransferFullPartialRewriter : public RewritePattern { + using FilterConstraintType = + std::function; + + explicit VectorTransferFullPartialRewriter( + MLIRContext *context, + VectorTransformsOptions options = VectorTransformsOptions(), + FilterConstraintType filter = + [](VectorTransferOpInterface op) { return success(); }, + PatternBenefit benefit = 1) + : RewritePattern(MatchAnyOpTypeTag(), benefit, context), options(options), + filter(std::move(filter)) {} + + /// Performs the rewrite. + LogicalResult matchAndRewrite(Operation *op, + PatternRewriter &rewriter) const override; + +private: + VectorTransformsOptions options; + FilterConstraintType filter; +}; + +} // namespace + +LogicalResult VectorTransferFullPartialRewriter::matchAndRewrite( Operation *op, PatternRewriter &rewriter) const { auto xferOp = dyn_cast(op); if (!xferOp || failed(splitFullAndPartialTransferPrecondition(xferOp)) || @@ -642,3 +667,9 @@ LogicalResult mlir::vector::VectorTransferFullPartialRewriter::matchAndRewrite( return failure(); return splitFullAndPartialTransfer(rewriter, xferOp, options); } + +void mlir::vector::populateVectorTransferFullPartialPatterns( + RewritePatternSet &patterns, const VectorTransformsOptions &options) { + patterns.add(patterns.getContext(), + options); +} diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp index fe59143ebd55f..20fc59e874ab6 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp @@ -51,102 +51,6 @@ using namespace mlir; using namespace mlir::vector; -// Helper to find an index in an affine map. -static std::optional getResultIndex(AffineMap map, int64_t index) { - for (int64_t i = 0, e = map.getNumResults(); i < e; ++i) { - int64_t idx = map.getDimPosition(i); - if (idx == index) - return i; - } - return std::nullopt; -} - -// Helper to construct iterator types with one index removed. -static SmallVector adjustIter(ArrayAttr iteratorTypes, - int64_t index) { - SmallVector results; - for (const auto &it : llvm::enumerate(iteratorTypes)) { - int64_t idx = it.index(); - if (idx == index) - continue; - results.push_back(it.value()); - } - return results; -} - -// Helper to construct an affine map with one index removed. -static AffineMap adjustMap(AffineMap map, int64_t index, - PatternRewriter &rewriter) { - auto *ctx = rewriter.getContext(); - SmallVector results; - for (int64_t i = 0, e = map.getNumResults(); i < e; ++i) { - int64_t idx = map.getDimPosition(i); - if (idx == index) - continue; - // Re-insert remaining indices, but renamed when occurring - // after the removed index. - auto targetExpr = getAffineDimExpr(idx < index ? idx : idx - 1, ctx); - results.push_back(targetExpr); - } - return AffineMap::get(map.getNumDims() - 1, 0, results, ctx); -} - -// Helper method to possibly drop a dimension in a load. -// TODO -static Value reshapeLoad(Location loc, Value val, VectorType type, - int64_t index, int64_t pos, - PatternRewriter &rewriter) { - if (index == -1) - return val; - Type lowType = VectorType::Builder(type).dropDim(0); - // At extraction dimension? - if (index == 0) { - auto posAttr = rewriter.getI64ArrayAttr(pos); - return rewriter.create(loc, lowType, val, posAttr); - } - // Unroll leading dimensions. - VectorType vType = lowType.cast(); - Type resType = VectorType::Builder(type).dropDim(index); - auto resVectorType = resType.cast(); - Value result = rewriter.create( - loc, resVectorType, rewriter.getZeroAttr(resVectorType)); - for (int64_t d = 0, e = resVectorType.getDimSize(0); d < e; d++) { - auto posAttr = rewriter.getI64ArrayAttr(d); - Value ext = rewriter.create(loc, vType, val, posAttr); - Value load = reshapeLoad(loc, ext, vType, index - 1, pos, rewriter); - result = rewriter.create(loc, resVectorType, load, result, - posAttr); - } - return result; -} - -// Helper method to possibly drop a dimension in a store. -// TODO -static Value reshapeStore(Location loc, Value val, Value result, - VectorType type, int64_t index, int64_t pos, - PatternRewriter &rewriter) { - // Unmodified? - if (index == -1) - return val; - // At insertion dimension? - if (index == 0) { - auto posAttr = rewriter.getI64ArrayAttr(pos); - return rewriter.create(loc, type, val, result, posAttr); - } - // Unroll leading dimensions. - Type lowType = VectorType::Builder(type).dropDim(0); - VectorType vType = lowType.cast(); - Type insType = VectorType::Builder(vType).dropDim(0); - for (int64_t d = 0, e = type.getDimSize(0); d < e; d++) { - auto posAttr = rewriter.getI64ArrayAttr(d); - Value ext = rewriter.create(loc, vType, result, posAttr); - Value ins = rewriter.create(loc, insType, val, posAttr); - Value sto = reshapeStore(loc, ins, ext, vType, index - 1, pos, rewriter); - result = rewriter.create(loc, type, sto, result, posAttr); - } - return result; -} - template static SmallVector extractVector(ArrayAttr arrayAttr) { return llvm::to_vector<4>(llvm::map_range( @@ -154,61 +58,11 @@ static SmallVector extractVector(ArrayAttr arrayAttr) { [](IntegerAttr attr) { return static_cast(attr.getInt()); })); } -/// Helper to create arithmetic operation associated with a kind of contraction. -static std::optional -createContractArithOp(Location loc, Value x, Value y, Value acc, - vector::CombiningKind kind, PatternRewriter &rewriter, - bool isInt, Value mask = Value()) { - using vector::CombiningKind; - Value mul; - - if (isInt) { - if (kind == CombiningKind::MINF || kind == CombiningKind::MAXF) - // Only valid for floating point types. - return std::nullopt; - mul = rewriter.create(loc, x, y); - } else { - // Float case. - if (kind == CombiningKind::AND || kind == CombiningKind::MINUI || - kind == CombiningKind::MINSI || kind == CombiningKind::MAXUI || - kind == CombiningKind::MAXSI || kind == CombiningKind::OR || - kind == CombiningKind::XOR) - // Only valid for integer types. - return std::nullopt; - // Special case for fused multiply-add. - if (acc && acc.getType().isa() && kind == CombiningKind::ADD) { - Value fma = rewriter.create(loc, x, y, acc); - if (mask) - // The fma op doesn't need explicit masking. However, fma ops used in - // reductions must preserve previous 'acc' values for masked-out lanes. - fma = selectPassthru(rewriter, mask, fma, acc); - return fma; - } - mul = rewriter.create(loc, x, y); - } - - if (!acc) - return std::optional(mul); - - return makeArithReduction(rewriter, loc, kind, mul, acc, mask); -} - -/// Return the positions of the reductions in the given map. -static SmallVector getReductionIndex(AffineMap map, - ArrayAttr iteratorTypes) { - SmallVector dimsIdx; - for (unsigned i = 0, e = map.getNumResults(); i < e; i++) { - if (isReductionIterator(iteratorTypes[map.getDimPosition(i)])) - dimsIdx.push_back(i); - } - return dimsIdx; -} - -/// Look for a given dimension in an affine map and return its position. Return -/// std::nullopt if the dimension is not in the map results. -static std::optional getDimPosition(AffineMap map, unsigned dim) { - for (unsigned i = 0, e = map.getNumResults(); i < e; i++) { - if (map.getDimPosition(i) == dim) +// Helper to find an index in an affine map. +static std::optional getResultIndex(AffineMap map, int64_t index) { + for (int64_t i = 0, e = map.getNumResults(); i < e; ++i) { + int64_t idx = map.getDimPosition(i); + if (idx == index) return i; } return std::nullopt; @@ -264,735 +118,6 @@ struct ShapeCastOpFolder : public OpRewritePattern { } }; -/// Progressive lowering of BroadcastOp. -class BroadcastOpLowering : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(vector::BroadcastOp op, - PatternRewriter &rewriter) const override { - auto loc = op.getLoc(); - VectorType dstType = op.getResultVectorType(); - VectorType srcType = op.getSourceType().dyn_cast(); - Type eltType = dstType.getElementType(); - - // Scalar to any vector can use splat. - if (!srcType) { - rewriter.replaceOpWithNewOp(op, dstType, op.getSource()); - return success(); - } - - // Determine rank of source and destination. - int64_t srcRank = srcType.getRank(); - int64_t dstRank = dstType.getRank(); - - // Stretching scalar inside vector (e.g. vector<1xf32>) can use splat. - if (srcRank <= 1 && dstRank == 1) { - Value ext; - if (srcRank == 0) - ext = rewriter.create(loc, op.getSource()); - else - ext = rewriter.create(loc, op.getSource(), 0); - rewriter.replaceOpWithNewOp(op, dstType, ext); - return success(); - } - - // Duplicate this rank. - // For example: - // %x = broadcast %y : k-D to n-D, k < n - // becomes: - // %b = broadcast %y : k-D to (n-1)-D - // %x = [%b,%b,%b,%b] : n-D - // becomes: - // %b = [%y,%y] : (n-1)-D - // %x = [%b,%b,%b,%b] : n-D - if (srcRank < dstRank) { - // Duplication. - VectorType resType = - VectorType::get(dstType.getShape().drop_front(), eltType); - Value bcst = - rewriter.create(loc, resType, op.getSource()); - Value result = rewriter.create( - loc, dstType, rewriter.getZeroAttr(dstType)); - for (int64_t d = 0, dim = dstType.getDimSize(0); d < dim; ++d) - result = rewriter.create(loc, bcst, result, d); - rewriter.replaceOp(op, result); - return success(); - } - - // Find non-matching dimension, if any. - assert(srcRank == dstRank); - int64_t m = -1; - for (int64_t r = 0; r < dstRank; r++) - if (srcType.getDimSize(r) != dstType.getDimSize(r)) { - m = r; - break; - } - - // All trailing dimensions are the same. Simply pass through. - if (m == -1) { - rewriter.replaceOp(op, op.getSource()); - return success(); - } - - // Any non-matching dimension forces a stretch along this rank. - // For example: - // %x = broadcast %y : vector<4x1x2xf32> to vector<4x2x2xf32> - // becomes: - // %a = broadcast %y[0] : vector<1x2xf32> to vector<2x2xf32> - // %b = broadcast %y[1] : vector<1x2xf32> to vector<2x2xf32> - // %c = broadcast %y[2] : vector<1x2xf32> to vector<2x2xf32> - // %d = broadcast %y[3] : vector<1x2xf32> to vector<2x2xf32> - // %x = [%a,%b,%c,%d] - // becomes: - // %u = broadcast %y[0][0] : vector<2xf32> to vector <2x2xf32> - // %v = broadcast %y[1][0] : vector<2xf32> to vector <2x2xf32> - // %a = [%u, %v] - // .. - // %x = [%a,%b,%c,%d] - VectorType resType = - VectorType::get(dstType.getShape().drop_front(), eltType); - Value result = rewriter.create( - loc, dstType, rewriter.getZeroAttr(dstType)); - if (m == 0) { - // Stetch at start. - Value ext = rewriter.create(loc, op.getSource(), 0); - Value bcst = rewriter.create(loc, resType, ext); - for (int64_t d = 0, dim = dstType.getDimSize(0); d < dim; ++d) - result = rewriter.create(loc, bcst, result, d); - } else { - // Stetch not at start. - for (int64_t d = 0, dim = dstType.getDimSize(0); d < dim; ++d) { - Value ext = rewriter.create(loc, op.getSource(), d); - Value bcst = rewriter.create(loc, resType, ext); - result = rewriter.create(loc, bcst, result, d); - } - } - rewriter.replaceOp(op, result); - return success(); - } -}; - -/// Given a 'transpose' pattern, prune the rightmost dimensions that are not -/// transposed. -void pruneNonTransposedDims(ArrayRef transpose, - SmallVectorImpl &result) { - size_t numTransposedDims = transpose.size(); - for (size_t transpDim : llvm::reverse(transpose)) { - if (transpDim != numTransposedDims - 1) - break; - numTransposedDims--; - } - - result.append(transpose.begin(), transpose.begin() + numTransposedDims); -} - -/// Progressive lowering of TransposeOp. -/// One: -/// %x = vector.transpose %y, [1, 0] -/// is replaced by: -/// %z = arith.constant dense<0.000000e+00> -/// %0 = vector.extract %y[0, 0] -/// %1 = vector.insert %0, %z [0, 0] -/// .. -/// %x = vector.insert .., .. [.., ..] -class TransposeOpLowering : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - TransposeOpLowering(vector::VectorTransformsOptions vectorTransformOptions, - MLIRContext *context, PatternBenefit benefit = 1) - : OpRewritePattern(context, benefit), - vectorTransformOptions(vectorTransformOptions) {} - - LogicalResult matchAndRewrite(vector::TransposeOp op, - PatternRewriter &rewriter) const override { - auto loc = op.getLoc(); - - Value input = op.getVector(); - VectorType inputType = op.getSourceVectorType(); - VectorType resType = op.getResultVectorType(); - - // Set up convenience transposition table. - SmallVector transp; - for (auto attr : op.getTransp()) - transp.push_back(attr.cast().getInt()); - - if (vectorTransformOptions.vectorTransposeLowering == - vector::VectorTransposeLowering::Shuffle && - resType.getRank() == 2 && transp[0] == 1 && transp[1] == 0) - return rewriter.notifyMatchFailure( - op, "Options specifies lowering to shuffle"); - - // Handle a true 2-D matrix transpose differently when requested. - if (vectorTransformOptions.vectorTransposeLowering == - vector::VectorTransposeLowering::Flat && - resType.getRank() == 2 && transp[0] == 1 && transp[1] == 0) { - Type flattenedType = - VectorType::get(resType.getNumElements(), resType.getElementType()); - auto matrix = - rewriter.create(loc, flattenedType, input); - auto rows = rewriter.getI32IntegerAttr(resType.getShape()[0]); - auto columns = rewriter.getI32IntegerAttr(resType.getShape()[1]); - Value trans = rewriter.create( - loc, flattenedType, matrix, rows, columns); - rewriter.replaceOpWithNewOp(op, resType, trans); - return success(); - } - - // Generate unrolled extract/insert ops. We do not unroll the rightmost - // (i.e., highest-order) dimensions that are not transposed and leave them - // in vector form to improve performance. Therefore, we prune those - // dimensions from the shape/transpose data structures used to generate the - // extract/insert ops. - SmallVector prunedTransp; - pruneNonTransposedDims(transp, prunedTransp); - size_t numPrunedDims = transp.size() - prunedTransp.size(); - auto prunedInShape = inputType.getShape().drop_back(numPrunedDims); - auto prunedInStrides = computeStrides(prunedInShape); - - // Generates the extract/insert operations for every scalar/vector element - // of the leftmost transposed dimensions. We traverse every transpose - // element using a linearized index that we delinearize to generate the - // appropriate indices for the extract/insert operations. - Value result = rewriter.create( - loc, resType, rewriter.getZeroAttr(resType)); - int64_t numTransposedElements = ShapedType::getNumElements(prunedInShape); - - for (int64_t linearIdx = 0; linearIdx < numTransposedElements; - ++linearIdx) { - auto extractIdxs = delinearize(linearIdx, prunedInStrides); - SmallVector insertIdxs(extractIdxs); - applyPermutationToVector(insertIdxs, prunedTransp); - Value extractOp = - rewriter.create(loc, input, extractIdxs); - result = - rewriter.create(loc, extractOp, result, insertIdxs); - } - - rewriter.replaceOp(op, result); - return success(); - } - -private: - /// Options to control the vector patterns. - vector::VectorTransformsOptions vectorTransformOptions; -}; - -/// Rewrite a 2-D vector.transpose as a sequence of: -/// vector.shape_cast 2D -> 1D -/// vector.shuffle -/// vector.shape_cast 1D -> 2D -class TransposeOp2DToShuffleLowering - : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - TransposeOp2DToShuffleLowering( - vector::VectorTransformsOptions vectorTransformOptions, - MLIRContext *context, PatternBenefit benefit = 1) - : OpRewritePattern(context, benefit), - vectorTransformOptions(vectorTransformOptions) {} - - LogicalResult matchAndRewrite(vector::TransposeOp op, - PatternRewriter &rewriter) const override { - auto loc = op.getLoc(); - - VectorType srcType = op.getSourceVectorType(); - if (srcType.getRank() != 2) - return rewriter.notifyMatchFailure(op, "Not a 2D transpose"); - - SmallVector transp; - for (auto attr : op.getTransp()) - transp.push_back(attr.cast().getInt()); - if (transp[0] != 1 && transp[1] != 0) - return rewriter.notifyMatchFailure(op, "Not a 2D transpose permutation"); - - if (vectorTransformOptions.vectorTransposeLowering != - VectorTransposeLowering::Shuffle) - return rewriter.notifyMatchFailure(op, "Options do not ask for Shuffle"); - - int64_t m = srcType.getShape().front(), n = srcType.getShape().back(); - Value casted = rewriter.create( - loc, VectorType::get({m * n}, srcType.getElementType()), - op.getVector()); - SmallVector mask; - mask.reserve(m * n); - for (int64_t j = 0; j < n; ++j) - for (int64_t i = 0; i < m; ++i) - mask.push_back(i * n + j); - - Value shuffled = - rewriter.create(loc, casted, casted, mask); - rewriter.replaceOpWithNewOp( - op, op.getResultVectorType(), shuffled); - - return success(); - } - -private: - /// Options to control the vector patterns. - vector::VectorTransformsOptions vectorTransformOptions; -}; - -/// Progressive lowering of OuterProductOp. -/// One: -/// %x = vector.outerproduct %lhs, %rhs, %acc -/// is replaced by: -/// %z = zero-result -/// %0 = vector.extract %lhs[0] -/// %1 = vector.broadcast %0 -/// %2 = vector.extract %acc[0] -/// %3 = vector.fma %1, %rhs, %2 -/// %4 = vector.insert %3, %z[0] -/// .. -/// %x = vector.insert %.., %..[N-1] -/// -class OuterProductOpLowering : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(vector::OuterProductOp op, - PatternRewriter &rewriter) const override { - auto loc = op.getLoc(); - - VectorType lhsType = op.getOperandVectorTypeLHS(); - VectorType rhsType = op.getOperandTypeRHS().dyn_cast(); - VectorType resType = op.getResultVectorType(); - Type eltType = resType.getElementType(); - bool isInt = eltType.isa(); - Value acc = (op.getAcc().empty()) ? nullptr : op.getAcc()[0]; - vector::CombiningKind kind = op.getKind(); - - // Vector mask setup. - OpBuilder::InsertionGuard guard(rewriter); - auto maskableOp = cast(op.getOperation()); - Operation *rootOp; - Value mask; - if (maskableOp.isMasked()) { - rewriter.setInsertionPoint(maskableOp.getMaskingOp()); - rootOp = maskableOp.getMaskingOp(); - mask = maskableOp.getMaskingOp().getMask(); - } else { - rootOp = op; - } - - if (!rhsType) { - // Special case: AXPY operation. - Value b = rewriter.create(loc, lhsType, op.getRhs()); - std::optional mult = createContractArithOp( - loc, op.getLhs(), b, acc, kind, rewriter, isInt, mask); - if (!mult.has_value()) - return failure(); - rewriter.replaceOp(rootOp, *mult); - return success(); - } - - Value result = rewriter.create( - loc, resType, rewriter.getZeroAttr(resType)); - for (int64_t d = 0, e = resType.getDimSize(0); d < e; ++d) { - auto pos = rewriter.getI64ArrayAttr(d); - Value x = rewriter.create(loc, op.getLhs(), pos); - Value a = rewriter.create(loc, rhsType, x); - Value r = nullptr; - if (acc) - r = rewriter.create(loc, acc, pos); - Value extrMask; - if (mask) - extrMask = rewriter.create(loc, mask, pos); - - std::optional m = createContractArithOp( - loc, a, op.getRhs(), r, kind, rewriter, isInt, extrMask); - if (!m.has_value()) - return failure(); - result = rewriter.create(loc, resType, *m, result, pos); - } - - rewriter.replaceOp(rootOp, result); - return success(); - } -}; - -/// Lower vector.contract with all size one reduction dimensions to -/// elementwise ops when possible. -struct ContractOpToElementwise - : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - using FilterConstraintType = - std::function; - static LogicalResult defaultFilter(vector::ContractionOp op) { - return success(); - } - ContractOpToElementwise( - vector::VectorTransformsOptions vectorTransformOptions, - MLIRContext *context, PatternBenefit benefit = 1, - const FilterConstraintType &constraint = defaultFilter) - : OpRewritePattern(context, benefit), - vectorTransformOptions(vectorTransformOptions), filter(defaultFilter) {} - - LogicalResult matchAndRewrite(vector::ContractionOp contractOp, - PatternRewriter &rewriter) const override { - // TODO: Support vector.mask. - auto maskableOp = cast(contractOp.getOperation()); - if (maskableOp.isMasked()) - return failure(); - - // TODO: Remove native masks from contraction op? - if (!contractOp.getMasks().empty()) - return failure(); - - if (failed(filter(contractOp))) - return failure(); - - if (vectorTransformOptions.vectorContractLowering != - vector::VectorContractLowering::ParallelArith) - return failure(); - - ArrayRef lhsShape = contractOp.getLhsType().getShape(); - ArrayRef rhsShape = contractOp.getRhsType().getShape(); - AffineMap lhsMap = contractOp.getIndexingMapsArray()[0]; - AffineMap rhsMap = contractOp.getIndexingMapsArray()[1]; - SmallVector lhsReductionDims = - getReductionIndex(lhsMap, contractOp.getIteratorTypes()); - SmallVector rhsReductionDims = - getReductionIndex(rhsMap, contractOp.getIteratorTypes()); - // All the reduction dimensions must be a size 1. - for (int64_t dim : lhsReductionDims) { - if (lhsShape[dim] != 1) - return failure(); - } - for (int64_t dim : rhsReductionDims) { - if (rhsShape[dim] != 1) - return failure(); - } - AffineMap accMap = contractOp.getIndexingMapsArray()[2]; - unsigned numParallelDims = accMap.getNumResults(); - unsigned numLhsDimToBroadcast = - numParallelDims - (lhsMap.getNumResults() - lhsReductionDims.size()); - unsigned numRhsDimToBroadcast = - numParallelDims - (rhsMap.getNumResults() - rhsReductionDims.size()); - SmallVector lhsDims; - SmallVector lhsTranspose; - SmallVector rhsDims; - SmallVector rhsTranspose; - for (int64_t dim : lhsReductionDims) - lhsTranspose.push_back(numLhsDimToBroadcast + dim); - for (int64_t dim : rhsReductionDims) - rhsTranspose.push_back(numRhsDimToBroadcast + dim); - // Loop through the parallel dimensions to calculate the dimensions to - // broadcast and to permute in order to extract only parallel dimensions. - for (unsigned i = 0; i < numParallelDims; i++) { - std::optional lhsDim = - getDimPosition(lhsMap, accMap.getDimPosition(i)); - if (lhsDim) { - lhsTranspose.push_back(numLhsDimToBroadcast + *lhsDim); - } else { - // If the parallel dimension doesn't exist we will have to broadcast it. - lhsDims.push_back( - contractOp.getResultType().cast().getDimSize(i)); - lhsTranspose.push_back(lhsDims.size() - 1); - } - std::optional rhsDim = - getDimPosition(rhsMap, accMap.getDimPosition(i)); - if (rhsDim) { - rhsTranspose.push_back(numRhsDimToBroadcast + *rhsDim); - } else { - // If the parallel dimension doesn't exist we will have to broadcast it. - rhsDims.push_back( - contractOp.getResultType().cast().getDimSize(i)); - rhsTranspose.push_back(rhsDims.size() - 1); - } - } - Value newLhs = contractOp.getLhs(); - Value newRhs = contractOp.getRhs(); - Location loc = contractOp.getLoc(); - if (!lhsDims.empty()) { - lhsDims.append(lhsShape.begin(), lhsShape.end()); - auto expandedType = - VectorType::get(lhsDims, contractOp.getLhsType().getElementType()); - newLhs = rewriter.create(loc, expandedType, newLhs); - } - if (!rhsDims.empty()) { - rhsDims.append(rhsShape.begin(), rhsShape.end()); - auto expandedType = - VectorType::get(rhsDims, contractOp.getRhsType().getElementType()); - newRhs = rewriter.create(loc, expandedType, newRhs); - } - bool isInt = contractOp.getLhsType().getElementType().isIntOrIndex(); - newLhs = rewriter.create(loc, newLhs, lhsTranspose); - newRhs = rewriter.create(loc, newRhs, rhsTranspose); - SmallVector lhsOffsets(lhsReductionDims.size(), 0); - SmallVector rhsOffsets(rhsReductionDims.size(), 0); - newLhs = rewriter.create( - loc, newLhs, rewriter.getI64ArrayAttr(lhsOffsets)); - newRhs = rewriter.create( - loc, newRhs, rewriter.getI64ArrayAttr(rhsOffsets)); - std::optional result = - createContractArithOp(loc, newLhs, newRhs, contractOp.getAcc(), - contractOp.getKind(), rewriter, isInt); - rewriter.replaceOp(contractOp, {*result}); - return success(); - } - -private: - /// Options to control the vector patterns. - vector::VectorTransformsOptions vectorTransformOptions; - FilterConstraintType filter; -}; - -/// Progressive lowering of ConstantMaskOp. -/// One: -/// %x = vector.constant_mask [a,b] -/// is replaced by: -/// %z = zero-result -/// %l = vector.constant_mask [b] -/// %4 = vector.insert %l, %z[0] -/// .. -/// %x = vector.insert %l, %..[a-1] -/// until a one-dimensional vector is reached. All these operations -/// will be folded at LLVM IR level. -class ConstantMaskOpLowering : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(vector::ConstantMaskOp op, - PatternRewriter &rewriter) const override { - auto loc = op.getLoc(); - auto dstType = op.getType(); - auto eltType = dstType.getElementType(); - auto dimSizes = op.getMaskDimSizes(); - int64_t rank = dstType.getRank(); - - if (rank == 0) { - assert(dimSizes.size() == 1 && - "Expected exactly one dim size for a 0-D vector"); - bool value = dimSizes[0].cast().getInt() == 1; - rewriter.replaceOpWithNewOp( - op, dstType, - DenseIntElementsAttr::get( - VectorType::get(ArrayRef{}, rewriter.getI1Type()), - ArrayRef{value})); - return success(); - } - - // Scalable constant masks can only be lowered for the "none set" case. - if (dstType.cast().isScalable()) { - rewriter.replaceOpWithNewOp( - op, DenseElementsAttr::get(dstType, false)); - return success(); - } - - int64_t trueDim = std::min(dstType.getDimSize(0), - dimSizes[0].cast().getInt()); - - if (rank == 1) { - // Express constant 1-D case in explicit vector form: - // [T,..,T,F,..,F]. - SmallVector values(dstType.getDimSize(0)); - for (int64_t d = 0; d < trueDim; d++) - values[d] = true; - rewriter.replaceOpWithNewOp( - op, dstType, rewriter.getBoolVectorAttr(values)); - return success(); - } - - VectorType lowType = - VectorType::get(dstType.getShape().drop_front(), eltType); - SmallVector newDimSizes; - for (int64_t r = 1; r < rank; r++) - newDimSizes.push_back(dimSizes[r].cast().getInt()); - Value trueVal = rewriter.create( - loc, lowType, rewriter.getI64ArrayAttr(newDimSizes)); - Value result = rewriter.create( - loc, dstType, rewriter.getZeroAttr(dstType)); - for (int64_t d = 0; d < trueDim; d++) { - auto pos = rewriter.getI64ArrayAttr(d); - result = - rewriter.create(loc, dstType, trueVal, result, pos); - } - rewriter.replaceOp(op, result); - return success(); - } -}; - -/// Progressive lowering of CreateMaskOp. -/// One: -/// %x = vector.create_mask %a, ... : vector -/// is replaced by: -/// %l = vector.create_mask ... : vector<...> ; one lower rank -/// %0 = arith.cmpi "slt", %ci, %a | -/// %1 = select %0, %l, %zeroes | -/// %r = vector.insert %1, %pr [i] | d-times -/// %x = .... -/// until a one-dimensional vector is reached. -class CreateMaskOpLowering : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(vector::CreateMaskOp op, - PatternRewriter &rewriter) const override { - auto dstType = op.getResult().getType().cast(); - int64_t rank = dstType.getRank(); - if (rank <= 1) - return rewriter.notifyMatchFailure( - op, "0-D and 1-D vectors are handled separately"); - - auto loc = op.getLoc(); - auto eltType = dstType.getElementType(); - int64_t dim = dstType.getDimSize(0); - Value idx = op.getOperand(0); - - VectorType lowType = - VectorType::get(dstType.getShape().drop_front(), eltType); - Value trueVal = rewriter.create( - loc, lowType, op.getOperands().drop_front()); - Value falseVal = rewriter.create( - loc, lowType, rewriter.getZeroAttr(lowType)); - Value result = rewriter.create( - loc, dstType, rewriter.getZeroAttr(dstType)); - for (int64_t d = 0; d < dim; d++) { - Value bnd = - rewriter.create(loc, rewriter.getIndexAttr(d)); - Value val = rewriter.create(loc, arith::CmpIPredicate::slt, - bnd, idx); - Value sel = rewriter.create(loc, val, trueVal, falseVal); - auto pos = rewriter.getI64ArrayAttr(d); - result = - rewriter.create(loc, dstType, sel, result, pos); - } - rewriter.replaceOp(op, result); - return success(); - } -}; - -/// ShapeOp 2D -> 1D downcast serves the purpose of flattening 2-D to 1-D -/// vectors progressively on the way to target llvm.matrix intrinsics. -/// This iterates over the most major dimension of the 2-D vector and performs -/// rewrites into: -/// vector.extract from 2-D + vector.insert_strided_slice offset into 1-D -class ShapeCastOp2DDownCastRewritePattern - : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(vector::ShapeCastOp op, - PatternRewriter &rewriter) const override { - auto sourceVectorType = op.getSourceVectorType(); - auto resultVectorType = op.getResultVectorType(); - if (sourceVectorType.getRank() != 2 || resultVectorType.getRank() != 1) - return failure(); - - auto loc = op.getLoc(); - Value desc = rewriter.create( - loc, resultVectorType, rewriter.getZeroAttr(resultVectorType)); - unsigned mostMinorVectorSize = sourceVectorType.getShape()[1]; - for (int64_t i = 0, e = sourceVectorType.getShape().front(); i != e; ++i) { - Value vec = rewriter.create(loc, op.getSource(), i); - desc = rewriter.create( - loc, vec, desc, - /*offsets=*/i * mostMinorVectorSize, /*strides=*/1); - } - rewriter.replaceOp(op, desc); - return success(); - } -}; - -/// ShapeOp 1D -> 2D upcast serves the purpose of unflattening 2-D from 1-D -/// vectors progressively. -/// This iterates over the most major dimension of the 2-D vector and performs -/// rewrites into: -/// vector.extract_strided_slice from 1-D + vector.insert into 2-D -/// Note that 1-D extract_strided_slice are lowered to efficient vector.shuffle. -class ShapeCastOp2DUpCastRewritePattern - : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(vector::ShapeCastOp op, - PatternRewriter &rewriter) const override { - auto sourceVectorType = op.getSourceVectorType(); - auto resultVectorType = op.getResultVectorType(); - if (sourceVectorType.getRank() != 1 || resultVectorType.getRank() != 2) - return failure(); - - auto loc = op.getLoc(); - Value desc = rewriter.create( - loc, resultVectorType, rewriter.getZeroAttr(resultVectorType)); - unsigned mostMinorVectorSize = resultVectorType.getShape()[1]; - for (int64_t i = 0, e = resultVectorType.getShape().front(); i != e; ++i) { - Value vec = rewriter.create( - loc, op.getSource(), /*offsets=*/i * mostMinorVectorSize, - /*sizes=*/mostMinorVectorSize, - /*strides=*/1); - desc = rewriter.create(loc, vec, desc, i); - } - rewriter.replaceOp(op, desc); - return success(); - } -}; - -// We typically should not lower general shape cast operations into data -// movement instructions, since the assumption is that these casts are -// optimized away during progressive lowering. For completeness, however, -// we fall back to a reference implementation that moves all elements -// into the right place if we get here. -class ShapeCastOpRewritePattern : public OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(vector::ShapeCastOp op, - PatternRewriter &rewriter) const override { - Location loc = op.getLoc(); - auto sourceVectorType = op.getSourceVectorType(); - auto resultVectorType = op.getResultVectorType(); - - // Special case 2D/1D lowerings with better implementations. - // TODO: make is ND/1D to allow generic ND->1D->MD. - int64_t srcRank = sourceVectorType.getRank(); - int64_t resRank = resultVectorType.getRank(); - if ((srcRank == 2 && resRank == 1) || (srcRank == 1 && resRank == 2)) - return failure(); - - // Generic ShapeCast lowering path goes all the way down to unrolled scalar - // extract/insert chains. - // TODO: consider evolving the semantics to only allow 1D source or dest and - // drop this potentially very expensive lowering. - // Compute number of elements involved in the reshape. - int64_t numElts = 1; - for (int64_t r = 0; r < srcRank; r++) - numElts *= sourceVectorType.getDimSize(r); - // Replace with data movement operations: - // x[0,0,0] = y[0,0] - // x[0,0,1] = y[0,1] - // x[0,1,0] = y[0,2] - // etc., incrementing the two index vectors "row-major" - // within the source and result shape. - SmallVector srcIdx(srcRank); - SmallVector resIdx(resRank); - Value result = rewriter.create( - loc, resultVectorType, rewriter.getZeroAttr(resultVectorType)); - for (int64_t i = 0; i < numElts; i++) { - if (i != 0) { - incIdx(srcIdx, sourceVectorType, srcRank - 1); - incIdx(resIdx, resultVectorType, resRank - 1); - } - Value e = rewriter.create(loc, op.getSource(), srcIdx); - result = rewriter.create(loc, e, result, resIdx); - } - rewriter.replaceOp(op, result); - return success(); - } - -private: - static void incIdx(SmallVector &idx, VectorType tp, int64_t r) { - assert(0 <= r && r < tp.getRank()); - if (++idx[r] == tp.getDimSize(r)) { - idx[r] = 0; - incIdx(idx, tp, r - 1); - } - } -}; - /// Convert MulIOp/MulFOp + MultiDimReductionOp into ContractionOp. /// Ex: /// ``` @@ -1425,967 +550,6 @@ struct ReorderElementwiseOpsOnTranspose final } }; -} // namespace - -/// Creates an AddIOp if `isInt` is true otherwise create an arith::AddFOp using -/// operands `x` and `y`. -static Value createAdd(Location loc, Value x, Value y, bool isInt, - PatternRewriter &rewriter) { - if (isInt) - return rewriter.create(loc, x, y); - return rewriter.create(loc, x, y); -} - -/// Creates a MulIOp if `isInt` is true otherwise create an MulFOp using -/// operands `x and `y`. -static Value createMul(Location loc, Value x, Value y, bool isInt, - PatternRewriter &rewriter) { - if (isInt) - return rewriter.create(loc, x, y); - return rewriter.create(loc, x, y); -} - -namespace mlir { - -/// Progressively lower a `vector.contract %a, %b, %c` with row-major matmul -/// semantics to: -/// ``` -/// %mta = maybe_transpose -/// %mtb = maybe_transpose -/// %flattened_a = vector.shape_cast %mta -/// %flattened_b = vector.shape_cast %mtb -/// %flattened_d = vector.matmul %flattened_a, %flattened_b -/// %mtd = vector.shape_cast %flattened_d -/// %d = maybe_untranspose %mtd -/// %e = add %c, %d -/// ``` -/// `vector.matmul` later lowers to `llvm.matrix.multiply`. -// -/// This only kicks in when VectorTransformsOptions is set to `Matmul`. -/// vector.transpose operations are inserted if the vector.contract op is not a -/// row-major matrix multiply. -LogicalResult -ContractionOpToMatmulOpLowering::matchAndRewrite(vector::ContractionOp op, - PatternRewriter &rew) const { - // TODO: Support vector.mask. - auto maskableOp = cast(op.getOperation()); - if (maskableOp.isMasked()) - return failure(); - - // TODO: Remove native masks from contraction op? - if (!op.getMasks().empty()) - return failure(); - if (vectorTransformOptions.vectorContractLowering != - vector::VectorContractLowering::Matmul) - return failure(); - if (failed(filter(op))) - return failure(); - - auto iteratorTypes = op.getIteratorTypes().getValue(); - if (!isParallelIterator(iteratorTypes[0]) || - !isParallelIterator(iteratorTypes[1]) || - !isReductionIterator(iteratorTypes[2])) - return failure(); - - Type elementType = op.getLhsType().getElementType(); - if (!elementType.isIntOrFloat()) - return failure(); - - Type dstElementType = op.getType(); - if (auto vecType = dstElementType.dyn_cast()) - dstElementType = vecType.getElementType(); - if (elementType != dstElementType) - return failure(); - - // Perform lhs + rhs transpositions to conform to matmul row-major semantics. - // Bail out if the contraction cannot be put in this form. - MLIRContext *ctx = op.getContext(); - Location loc = op.getLoc(); - AffineExpr m, n, k; - bindDims(rew.getContext(), m, n, k); - // LHS must be A(m, k) or A(k, m). - Value lhs = op.getLhs(); - auto lhsMap = op.getIndexingMapsArray()[0]; - if (lhsMap == AffineMap::get(3, 0, {k, m}, ctx)) - lhs = rew.create(loc, lhs, ArrayRef{1, 0}); - else if (lhsMap != AffineMap::get(3, 0, {m, k}, ctx)) - return failure(); - - // RHS must be B(k, n) or B(n, k). - Value rhs = op.getRhs(); - auto rhsMap = op.getIndexingMapsArray()[1]; - if (rhsMap == AffineMap::get(3, 0, {n, k}, ctx)) - rhs = rew.create(loc, rhs, ArrayRef{1, 0}); - else if (rhsMap != AffineMap::get(3, 0, {k, n}, ctx)) - return failure(); - - // At this point lhs and rhs are in row-major. - VectorType lhsType = lhs.getType().cast(); - VectorType rhsType = rhs.getType().cast(); - int64_t lhsRows = lhsType.getDimSize(0); - int64_t lhsColumns = lhsType.getDimSize(1); - int64_t rhsColumns = rhsType.getDimSize(1); - - Type flattenedLHSType = - VectorType::get(lhsType.getNumElements(), lhsType.getElementType()); - lhs = rew.create(loc, flattenedLHSType, lhs); - - Type flattenedRHSType = - VectorType::get(rhsType.getNumElements(), rhsType.getElementType()); - rhs = rew.create(loc, flattenedRHSType, rhs); - - Value mul = rew.create(loc, lhs, rhs, lhsRows, lhsColumns, - rhsColumns); - mul = rew.create( - loc, - VectorType::get({lhsRows, rhsColumns}, - getElementTypeOrSelf(op.getAcc().getType())), - mul); - - // ACC must be C(m, n) or C(n, m). - auto accMap = op.getIndexingMapsArray()[2]; - if (accMap == AffineMap::get(3, 0, {n, m}, ctx)) - mul = rew.create(loc, mul, ArrayRef{1, 0}); - else if (accMap != AffineMap::get(3, 0, {m, n}, ctx)) - llvm_unreachable("invalid contraction semantics"); - - Value res = - elementType.isa() - ? static_cast(rew.create(loc, op.getAcc(), mul)) - : static_cast( - rew.create(loc, op.getAcc(), mul)); - - rew.replaceOp(op, res); - return success(); -} - -namespace { - -/// Generate a vector implementation for matmat, matvec and tmatvec. -/// This unrolls outer-products along the reduction dimension. -struct UnrolledOuterProductGenerator - : public StructuredGenerator { - UnrolledOuterProductGenerator(RewriterBase &b, vector::ContractionOp op) - : StructuredGenerator(b, op), - kind(op.getKind()), lhs(op.getLhs()), rhs(op.getRhs()), - res(op.getAcc()), lhsType(op.getLhsType()) { - auto maskableOp = cast(op.getOperation()); - if (maskableOp.isMasked()) - mask = maskableOp.getMaskingOp().getMask(); - } - - Value t(Value v, ArrayRef perm = {1, 0}) { - if (!v) - return v; - return rewriter.create(loc, v, perm); - } - - Value promote(Value v, Type dstElementType) { - Type elementType = v.getType(); - auto vecType = elementType.dyn_cast(); - if (vecType) - elementType = vecType.getElementType(); - if (elementType == dstElementType) - return v; - Type promotedType = dstElementType; - if (vecType) - promotedType = VectorType::get(vecType.getShape(), promotedType); - if (dstElementType.isa()) - return rewriter.create(loc, promotedType, v); - return rewriter.create(loc, promotedType, v); - } - - FailureOr outerProd(Value lhs, Value rhs, Value res, int reductionSize, - std::optional maybeMask = std::nullopt) { - assert(reductionSize > 0); - // Incremental support for masking. - if (mask && !maybeMask.has_value()) - return failure(); - - Type resElementType = res.getType().cast().getElementType(); - for (int64_t k = 0; k < reductionSize; ++k) { - Value extractA = rewriter.create(loc, lhs, k); - Value extractB = rewriter.create(loc, rhs, k); - extractA = promote(extractA, resElementType); - extractB = promote(extractB, resElementType); - Value extractMask; - if (maybeMask.has_value() && maybeMask.value()) - extractMask = - rewriter.create(loc, maybeMask.value(), k); - - Operation *outerProdOp = rewriter.create( - loc, res.getType(), extractA, extractB, res, kind); - res = maskOperation(rewriter, outerProdOp, extractMask)->getResult(0); - } - return res; - } - - /// Two outer parallel, one inner reduction (matmat flavor). - FailureOr matmat() { - if (!iters({Par(), Par(), Red()})) - return failure(); - // Set up the parallel/reduction structure in the right form. - AffineExpr m, n, k; - bindDims(rewriter.getContext(), m, n, k); - // Classical row-major matmul: Just permute the lhs. - if (layout({{m, k}, {k, n}, {m, n}})) - return outerProd(t(lhs), rhs, res, lhsType.getDimSize(1), - t(mask, {2, 0, 1})); - // TODO: may be better to fail and use some vector -> scalar reduction. - if (layout({{m, k}, {n, k}, {m, n}})) { - Value tlhs = t(lhs); - return outerProd(tlhs, t(rhs), res, lhsType.getDimSize(1)); - } - // No need to permute anything. - if (layout({{k, m}, {k, n}, {m, n}})) - return outerProd(lhs, rhs, res, lhsType.getDimSize(0)); - // Just permute the rhs. - if (layout({{k, m}, {n, k}, {m, n}})) - return outerProd(lhs, t(rhs), res, lhsType.getDimSize(0)); - // Transposed output: swap RHS and LHS. - // Classical row-major matmul: permute the lhs. - if (layout({{m, k}, {k, n}, {n, m}})) - return outerProd(rhs, t(lhs), res, lhsType.getDimSize(1)); - // TODO: may be better to fail and use some vector -> scalar reduction. - if (layout({{m, k}, {n, k}, {n, m}})) { - Value trhs = t(rhs); - return outerProd(trhs, t(lhs), res, lhsType.getDimSize(1)); - } - if (layout({{k, m}, {k, n}, {n, m}})) - return outerProd(rhs, lhs, res, lhsType.getDimSize(0)); - if (layout({{k, m}, {n, k}, {n, m}})) - return outerProd(t(rhs), lhs, res, lhsType.getDimSize(0)); - return failure(); - } - - /// One outer parallel, one inner reduction (matvec flavor) - FailureOr matvec() { - if (!iters({Par(), Red()})) - return failure(); - AffineExpr m, k; - bindDims(rewriter.getContext(), m, k); - - // Case mat-vec: transpose. - if (layout({{m, k}, {k}, {m}})) - return outerProd(t(lhs), rhs, res, lhsType.getDimSize(1), t(mask)); - // Case mat-trans-vec: ready to go. - if (layout({{k, m}, {k}, {m}})) - return outerProd(lhs, rhs, res, lhsType.getDimSize(0)); - // Case vec-mat: swap and transpose. - if (layout({{k}, {m, k}, {m}})) - return outerProd(t(rhs), lhs, res, lhsType.getDimSize(0)); - // Case vec-mat-trans: swap and ready to go. - if (layout({{k}, {k, m}, {m}})) - return outerProd(rhs, lhs, res, lhsType.getDimSize(0)); - return failure(); - } - - // - // One outer reduction, one inner parallel (tmatvec flavor) - // - FailureOr tmatvec() { - if (!iters({Red(), Par()})) - return failure(); - AffineExpr k, m; - bindDims(rewriter.getContext(), k, m); - - // Case mat-vec: transpose. - if (layout({{m, k}, {k}, {m}})) - return outerProd(t(lhs), rhs, res, lhsType.getDimSize(1)); - // Case mat-trans-vec: ready to go. - if (layout({{k, m}, {k}, {m}})) - return outerProd(lhs, rhs, res, lhsType.getDimSize(0)); - // Case vec-mat: swap and transpose. - if (layout({{k}, {m, k}, {m}})) - return outerProd(t(rhs), lhs, res, lhsType.getDimSize(0)); - // Case vec-mat-trans: swap and ready to go. - if (layout({{k}, {k, m}, {m}})) - return outerProd(rhs, lhs, res, lhsType.getDimSize(0)); - return failure(); - } - -private: - vector::CombiningKind kind; - Value lhs, rhs, res, mask; - VectorType lhsType; -}; -} // namespace - -/// Progressively lower a `vector.contract %a, %b, %c` with row-major matmul -/// semantics to a reduction_size-unrolled sequence: -/// ``` -/// %at = vector.transpose %a, [1, 0] -/// %bRow0 = vector.extract %b[0] -/// %atRow0 = vector.extract %at[0] -/// %c0 = vector.outerproduct %atRow0, %bRow0, %c -/// ... -/// %bRowK = vector.extract %b[K] -/// %atRowK = vector.extract %at[K] -/// %cK = vector.outerproduct %atRowK, %bRowK, %cK-1 -/// ``` -/// -/// This only kicks in when VectorTransformsOptions is set to OuterProduct but -/// otherwise supports any layout permutation of the matrix-multiply. -LogicalResult ContractionOpToOuterProductOpLowering::matchAndRewrite( - vector::ContractionOp op, PatternRewriter &rewriter) const { - // TODO: Remove native masks from contraction op? - if (!op.getMasks().empty()) - return failure(); - - if (vectorTransformOptions.vectorContractLowering != - vector::VectorContractLowering::OuterProduct) - return failure(); - - if (failed(filter(op))) - return failure(); - - // Vector mask setup. - OpBuilder::InsertionGuard guard(rewriter); - auto maskableOp = cast(op.getOperation()); - Operation *rootOp; - if (maskableOp.isMasked()) { - rewriter.setInsertionPoint(maskableOp.getMaskingOp()); - rootOp = maskableOp.getMaskingOp(); - } else { - rootOp = op; - } - - UnrolledOuterProductGenerator e(rewriter, op); - FailureOr matmatRes = e.matmat(); - if (succeeded(matmatRes)) { - rewriter.replaceOp(rootOp, *matmatRes); - return success(); - } - FailureOr matvecRes = e.matvec(); - if (succeeded(matvecRes)) { - rewriter.replaceOp(rootOp, *matvecRes); - return success(); - } - FailureOr tmatvecRes = e.tmatvec(); - if (succeeded(tmatvecRes)) { - rewriter.replaceOp(rootOp, *tmatvecRes); - return success(); - } - - return failure(); -} - -LogicalResult -ContractionOpToDotLowering::matchAndRewrite(vector::ContractionOp op, - PatternRewriter &rewriter) const { - // TODO: Support vector.mask. - auto maskableOp = cast(op.getOperation()); - if (maskableOp.isMasked()) - return failure(); - - // TODO: Remove native masks from contraction op? - if (!op.getMasks().empty()) - return failure(); - - if (failed(filter(op))) - return failure(); - - if (vectorTransformOptions.vectorContractLowering != - vector::VectorContractLowering::Dot) - return failure(); - - auto iteratorTypes = op.getIteratorTypes().getValue(); - static constexpr std::array perm = {1, 0}; - Location loc = op.getLoc(); - Value lhs = op.getLhs(), rhs = op.getRhs(); - - using MapList = ArrayRef>; - auto infer = [](MapList m) { return AffineMap::inferFromExprList(m); }; - AffineExpr m, n, k; - bindDims(rewriter.getContext(), m, n, k); - SmallVector maps = op.getIndexingMapsArray(); - // - // In the following we wish to make the reduction dimension innermost so we - // can load vectors and just fmul + reduce into a scalar. - // - if (isParallelIterator(iteratorTypes[0]) && - isParallelIterator(iteratorTypes[1]) && - isReductionIterator(iteratorTypes[2])) { - // - // Two outer parallel, one inner reduction (matmat flavor). - // - if (maps == infer({{m, k}, {k, n}, {m, n}})) { - rhs = rewriter.create(loc, rhs, perm); - } else if (maps == infer({{m, k}, {n, k}, {m, n}})) { - // No need to permute anything. - } else if (maps == infer({{k, m}, {k, n}, {m, n}})) { - lhs = rewriter.create(loc, lhs, perm); - rhs = rewriter.create(loc, rhs, perm); - } else if (maps == infer({{k, m}, {n, k}, {m, n}})) { - lhs = rewriter.create(loc, lhs, perm); - } else if (maps == infer({{m, k}, {k, n}, {n, m}})) { - // This is the classical row-major matmul. Just permute the lhs. - Value tmp = lhs; - lhs = rewriter.create(loc, rhs, perm); - rhs = tmp; - } else if (maps == infer({{m, k}, {n, k}, {n, m}})) { - std::swap(lhs, rhs); - } else if (maps == infer({{k, m}, {k, n}, {n, m}})) { - Value tmp = lhs; - lhs = rewriter.create(loc, rhs, perm); - rhs = rewriter.create(loc, tmp, perm); - } else if (maps == infer({{k, m}, {n, k}, {n, m}})) { - Value tmp = rhs; - rhs = rewriter.create(loc, lhs, perm); - lhs = tmp; - } else { - return failure(); - } - } else if (isParallelIterator(iteratorTypes[0]) && - isReductionIterator(iteratorTypes[1])) { - // - // One outer parallel, one inner reduction (matvec flavor) - // - if (maps == infer({{m, n}, {n}, {m}})) { - // No need to permute anything. - } else if (maps == infer({{n, m}, {n}, {m}})) { - lhs = rewriter.create(loc, lhs, perm); - } else if (maps == infer({{n}, {m, n}, {m}})) { - std::swap(lhs, rhs); - } else if (maps == infer({{n}, {n, m}, {m}})) { - std::swap(lhs, rhs); - lhs = rewriter.create(loc, lhs, perm); - } else { - return failure(); - } - } else { - return failure(); - } - - VectorType dstType = op.getResultType().cast(); - assert(dstType.getRank() >= 1 && dstType.getRank() <= 2 && - "Expected dst type of rank 1 or 2"); - - unsigned rank = dstType.getRank(); - unsigned dstRows = dstType.getShape()[0]; - unsigned dstColumns = rank == 1 ? 1 : dstType.getShape()[1]; - - // ExtractOp does not allow dynamic indexing, we must unroll explicitly. - Value res = rewriter.create(loc, dstType, - rewriter.getZeroAttr(dstType)); - bool isInt = dstType.getElementType().isa(); - for (unsigned r = 0; r < dstRows; ++r) { - Value a = rewriter.create(op.getLoc(), lhs, r); - for (unsigned c = 0; c < dstColumns; ++c) { - Value b = rank == 1 - ? rhs - : rewriter.create(op.getLoc(), rhs, c); - Value m = createMul(op.getLoc(), a, b, isInt, rewriter); - Value reduced = rewriter.create( - op.getLoc(), vector::CombiningKind::ADD, m); - - SmallVector pos = rank == 1 ? SmallVector{r} - : SmallVector{r, c}; - res = rewriter.create(op.getLoc(), reduced, res, pos); - } - } - if (auto acc = op.getAcc()) - res = createAdd(op.getLoc(), res, acc, isInt, rewriter); - rewriter.replaceOp(op, res); - return success(); -} - -/// Progressive lowering of ContractionOp. -/// One: -/// %x = vector.contract with at least one free/batch dimension -/// is replaced by: -/// %a = vector.contract with one less free/batch dimension -/// %b = vector.contract with one less free/batch dimension -/// .. -/// %x = combine %a %b .. -/// until a pure contraction is reached (no free/batch dimensions), -/// which is replaced by a dot-product. -/// -/// This only kicks in when either VectorTransformsOptions is set -/// to DOT or when other contraction patterns fail. -// -// TODO: break down into transpose/reshape/cast ops -// when they become available to avoid code dup -// TODO: investigate lowering order impact on performance -LogicalResult -ContractionOpLowering::matchAndRewrite(vector::ContractionOp op, - PatternRewriter &rewriter) const { - // TODO: Remove native masks from contraction op? - if (!op.getMasks().empty()) - return failure(); - - if (failed(filter(op))) - return failure(); - - // TODO: support mixed mode contract lowering. - if (op.getLhsType().getElementType() != - getElementTypeOrSelf(op.getAccType()) || - op.getRhsType().getElementType() != getElementTypeOrSelf(op.getAccType())) - return failure(); - - // TODO: the code below assumes the default contraction, make sure it supports - // other kinds before enabling this lowering. - if (op.getKind() != vector::CombiningKind::ADD) { - return rewriter.notifyMatchFailure( - op, "contractions other than 'add' not supported"); - } - - // TODO: implement benefits, cost models. - MLIRContext *ctx = op.getContext(); - ContractionOpToMatmulOpLowering pat1(vectorTransformOptions, ctx); - if (succeeded(pat1.matchAndRewrite(op, rewriter))) - return success(); - ContractionOpToOuterProductOpLowering pat2(vectorTransformOptions, ctx); - if (succeeded(pat2.matchAndRewrite(op, rewriter))) - return success(); - ContractionOpToDotLowering pat3(vectorTransformOptions, ctx); - if (succeeded(pat3.matchAndRewrite(op, rewriter))) - return success(); - ContractOpToElementwise pat4(vectorTransformOptions, ctx); - if (succeeded(pat4.matchAndRewrite(op, rewriter))) - return success(); - - // Vector mask setup. - OpBuilder::InsertionGuard guard(rewriter); - Operation *rootOp = op; - Value mask; - if (op.isMasked()) { - rewriter.setInsertionPoint(op.getMaskingOp()); - rootOp = op.getMaskingOp(); - mask = op.getMaskingOp().getMask(); - } - - // Find first batch dimension in LHS/RHS, and lower when found. - std::vector> batchDimMap = op.getBatchDimMap(); - if (!batchDimMap.empty()) { - int64_t lhsIndex = batchDimMap[0].first; - int64_t rhsIndex = batchDimMap[0].second; - auto newOp = lowerParallel(rewriter, op, lhsIndex, rhsIndex, mask); - if (failed(newOp)) - return failure(); - rewriter.replaceOp(rootOp, *newOp); - return success(); - } - - // Collect contracting dimensions. - std::vector> contractingDimMap = - op.getContractingDimMap(); - DenseSet lhsContractingDimSet; - DenseSet rhsContractingDimSet; - for (auto &dimPair : contractingDimMap) { - lhsContractingDimSet.insert(dimPair.first); - rhsContractingDimSet.insert(dimPair.second); - } - - // Find first free dimension in LHS, and lower when found. - VectorType lhsType = op.getLhsType(); - for (int64_t lhsIndex = 0, e = lhsType.getRank(); lhsIndex < e; ++lhsIndex) { - if (lhsContractingDimSet.count(lhsIndex) == 0) { - auto newOp = lowerParallel(rewriter, op, lhsIndex, /*rhsIndex=*/-1, mask); - if (failed(newOp)) - return failure(); - rewriter.replaceOp(rootOp, *newOp); - return success(); - } - } - - // Find first free dimension in RHS, and lower when found. - VectorType rhsType = op.getRhsType(); - for (int64_t rhsIndex = 0, e = rhsType.getRank(); rhsIndex < e; ++rhsIndex) { - if (rhsContractingDimSet.count(rhsIndex) == 0) { - auto newOp = lowerParallel(rewriter, op, /*lhsIndex=*/-1, rhsIndex, mask); - if (failed(newOp)) - return failure(); - rewriter.replaceOp(rootOp, *newOp); - return success(); - } - } - - // Lower the first remaining reduction dimension. - if (!contractingDimMap.empty()) { - auto newOp = lowerReduction(rewriter, op, mask); - if (failed(newOp)) - return failure(); - rewriter.replaceOp(rootOp, *newOp); - return success(); - } - - return failure(); -} - -// Lower one parallel dimension. -// Incidentally also tolerates unit-size (hence trivial) reduction dimensions. -// TODO: consider reusing existing contract unrolling -FailureOr ContractionOpLowering::lowerParallel(PatternRewriter &rewriter, - vector::ContractionOp op, - int64_t lhsIndex, - int64_t rhsIndex, - Value mask) const { - VectorType lhsType = op.getLhsType(); - VectorType rhsType = op.getRhsType(); - VectorType resType = op.getResultType().cast(); - // Find the iterator type index and result index. - SmallVector iMap = op.getIndexingMapsArray(); - int64_t iterIndex = -1; - int64_t dimSize = -1; - if (lhsIndex >= 0) { - iterIndex = iMap[0].getDimPosition(lhsIndex); - if (rhsIndex >= 0 && iterIndex != iMap[1].getDimPosition(rhsIndex)) - return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { - diag << "expected lhsIndex=" << lhsIndex << " and rhsIndex=" << rhsIndex - << " to map to the same dimension"; - }); - dimSize = lhsType.getDimSize(lhsIndex); - } else if (rhsIndex >= 0) { - iterIndex = iMap[1].getDimPosition(rhsIndex); - dimSize = rhsType.getDimSize(rhsIndex); - } - if (iterIndex < 0) - return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { - diag << "expected either lhsIndex=" << lhsIndex - << " or rhsIndex=" << rhsIndex << " to be nonnegative"; - }); - // value_or(-1) means that we tolerate a dimension not appearing - // in the result map. That can't happen for actual parallel iterators, but - // the caller ContractionOpLowering::matchAndRewrite is currently calling - // lowerParallel also for the case of unit-size reduction dims appearing only - // on one of LHS or RHS, not both. At the moment, such cases are created by - // CastAwayContractionLeadingOneDim, so we need to either support that or - // modify that pattern. - int64_t resIndex = getResultIndex(iMap[2], iterIndex).value_or(-1); - if (resIndex == -1 && dimSize != 1) - return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { - diag << "expected the dimension for iterIndex=" << iterIndex - << " to either appear in the result map, or to be a unit dimension"; - }); - - // Construct new iterator types and affine map array attribute. - std::array lowIndexingMaps = { - adjustMap(iMap[0], iterIndex, rewriter), - adjustMap(iMap[1], iterIndex, rewriter), - adjustMap(iMap[2], iterIndex, rewriter)}; - auto lowAffine = rewriter.getAffineMapArrayAttr(lowIndexingMaps); - auto lowIter = - rewriter.getArrayAttr(adjustIter(op.getIteratorTypes(), iterIndex)); - // Unroll into a series of lower dimensional vector.contract ops. - Location loc = op.getLoc(); - Value result = rewriter.create( - loc, resType, rewriter.getZeroAttr(resType)); - - for (int64_t d = 0; d < dimSize; ++d) { - auto lhs = reshapeLoad(loc, op.getLhs(), lhsType, lhsIndex, d, rewriter); - auto rhs = reshapeLoad(loc, op.getRhs(), rhsType, rhsIndex, d, rewriter); - auto acc = reshapeLoad(loc, op.getAcc(), resType, resIndex, d, rewriter); - - Value lowMask; - if (mask) - lowMask = reshapeLoad(loc, mask, cast(mask.getType()), - iterIndex, d, rewriter); - - Operation *lowContract = rewriter.create( - loc, lhs, rhs, acc, lowAffine, lowIter); - lowContract = maskOperation(rewriter, lowContract, lowMask); - result = reshapeStore(loc, lowContract->getResult(0), result, resType, - resIndex, d, rewriter); - } - return result; -} - -// Lower one reduction dimension. -FailureOr ContractionOpLowering::lowerReduction( - PatternRewriter &rewriter, vector::ContractionOp op, Value mask) const { - auto loc = op.getLoc(); - VectorType lhsType = op.getLhsType(); - VectorType rhsType = op.getRhsType(); - Type resType = op.getResultType(); - if (resType.isa()) - return rewriter.notifyMatchFailure(op, - "did not expect a VectorType result"); - bool isInt = resType.isa(); - // Use iterator index 0. - int64_t iterIndex = 0; - SmallVector iMap = op.getIndexingMapsArray(); - std::optional lookupLhs = getResultIndex(iMap[0], iterIndex); - std::optional lookupRhs = getResultIndex(iMap[1], iterIndex); - if (!lookupLhs.has_value()) - return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { - diag << "expected iterIndex=" << iterIndex << "to map to a LHS dimension"; - }); - if (!lookupRhs.has_value()) - return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { - diag << "expected iterIndex=" << iterIndex << "to map to a RHS dimension"; - }); - int64_t lhsIndex = *lookupLhs; - int64_t rhsIndex = *lookupRhs; - int64_t dimSize = lhsType.getDimSize(lhsIndex); - if (dimSize != rhsType.getDimSize(rhsIndex)) - return rewriter.notifyMatchFailure(op, [&](Diagnostic &diag) { - diag << "expect LHS dimension " << lhsIndex - << " to have the same size as RHS dimension " << rhsIndex; - }); - // Base case. - if (lhsType.getRank() == 1) { - if (rhsType.getRank() != 1) - return rewriter.notifyMatchFailure( - op, "When LHS has rank 1, expected also RHS to have rank 1"); - Value m = createMul(loc, op.getLhs(), op.getRhs(), isInt, rewriter); - auto kind = vector::CombiningKind::ADD; - - Value acc = op.getAcc(); - Operation *reductionOp = - acc ? rewriter.create(loc, kind, m, acc) - : rewriter.create(loc, kind, m); - return maskOperation(rewriter, reductionOp, mask)->getResult(0); - } - // Construct new iterator types and affine map array attribute. - std::array lowIndexingMaps = { - adjustMap(iMap[0], iterIndex, rewriter), - adjustMap(iMap[1], iterIndex, rewriter), - adjustMap(iMap[2], iterIndex, rewriter)}; - auto lowAffine = rewriter.getAffineMapArrayAttr(lowIndexingMaps); - auto lowIter = - rewriter.getArrayAttr(adjustIter(op.getIteratorTypes(), iterIndex)); - // Unroll into a series of lower dimensional vector.contract ops. - // By feeding the initial accumulator into the first contraction, - // and the result of each contraction into the next, eventually - // the sum of all reductions is computed. - Value result = op.getAcc(); - for (int64_t d = 0; d < dimSize; ++d) { - auto lhs = reshapeLoad(loc, op.getLhs(), lhsType, lhsIndex, d, rewriter); - auto rhs = reshapeLoad(loc, op.getRhs(), rhsType, rhsIndex, d, rewriter); - Value newMask; - if (mask) - newMask = reshapeLoad(loc, mask, cast(mask.getType()), - iterIndex, d, rewriter); - - Operation *newContract = rewriter.create( - loc, lhs, rhs, result, lowAffine, lowIter); - result = maskOperation(rewriter, newContract, newMask)->getResult(0); - } - return result; -} - -} // namespace mlir - -/// Progressive lowering of transfer_read. This pattern supports lowering of -/// `vector.transfer_read` to a combination of `vector.load` and -/// `vector.broadcast` if all of the following hold: -/// - Stride of most minor memref dimension must be 1. -/// - Out-of-bounds masking is not required. -/// - If the memref's element type is a vector type then it coincides with the -/// result type. -/// - The permutation map doesn't perform permutation (broadcasting is allowed). -struct TransferReadToVectorLoadLowering - : public OpRewritePattern { - TransferReadToVectorLoadLowering(MLIRContext *context, - std::optional maxRank, - PatternBenefit benefit = 1) - : OpRewritePattern(context, benefit), - maxTransferRank(maxRank) {} - - LogicalResult matchAndRewrite(vector::TransferReadOp read, - PatternRewriter &rewriter) const override { - if (maxTransferRank && read.getVectorType().getRank() > *maxTransferRank) - return failure(); - - SmallVector broadcastedDims; - // Permutations are handled by VectorToSCF or - // populateVectorTransferPermutationMapLoweringPatterns. - // We let the 0-d corner case pass-through as it is supported. - if (!read.getPermutationMap().isMinorIdentityWithBroadcasting( - &broadcastedDims)) - return failure(); - - auto memRefType = read.getShapedType().dyn_cast(); - if (!memRefType) - return failure(); - - // Non-unit strides are handled by VectorToSCF. - if (!vector::isLastMemrefDimUnitStride(memRefType)) - return failure(); - - // If there is broadcasting involved then we first load the unbroadcasted - // vector, and then broadcast it with `vector.broadcast`. - ArrayRef vectorShape = read.getVectorType().getShape(); - SmallVector unbroadcastedVectorShape(vectorShape.begin(), - vectorShape.end()); - for (unsigned i : broadcastedDims) - unbroadcastedVectorShape[i] = 1; - VectorType unbroadcastedVectorType = VectorType::get( - unbroadcastedVectorShape, read.getVectorType().getElementType()); - - // `vector.load` supports vector types as memref's elements only when the - // resulting vector type is the same as the element type. - auto memrefElTy = memRefType.getElementType(); - if (memrefElTy.isa() && memrefElTy != unbroadcastedVectorType) - return failure(); - - // Otherwise, element types of the memref and the vector must match. - if (!memrefElTy.isa() && - memrefElTy != read.getVectorType().getElementType()) - return failure(); - - // Out-of-bounds dims are handled by MaterializeTransferMask. - if (read.hasOutOfBoundsDim()) - return failure(); - - // Create vector load op. - Operation *loadOp; - if (read.getMask()) { - Value fill = rewriter.create( - read.getLoc(), unbroadcastedVectorType, read.getPadding()); - loadOp = rewriter.create( - read.getLoc(), unbroadcastedVectorType, read.getSource(), - read.getIndices(), read.getMask(), fill); - } else { - loadOp = rewriter.create( - read.getLoc(), unbroadcastedVectorType, read.getSource(), - read.getIndices()); - } - - // Insert a broadcasting op if required. - if (!broadcastedDims.empty()) { - rewriter.replaceOpWithNewOp( - read, read.getVectorType(), loadOp->getResult(0)); - } else { - rewriter.replaceOp(read, loadOp->getResult(0)); - } - - return success(); - } - - std::optional maxTransferRank; -}; - -/// Replace a 0-d vector.load with a memref.load + vector.broadcast. -// TODO: we shouldn't cross the vector/scalar domains just for this -// but atm we lack the infra to avoid it. Possible solutions include: -// - go directly to LLVM + bitcast -// - introduce a bitcast op and likely a new pointer dialect -// - let memref.load/store additionally support the 0-d vector case -// There are still deeper data layout issues lingering even in this -// trivial case (for architectures for which this matters). -struct VectorLoadToMemrefLoadLowering - : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(vector::LoadOp loadOp, - PatternRewriter &rewriter) const override { - auto vecType = loadOp.getVectorType(); - if (vecType.getNumElements() != 1) - return failure(); - auto memrefLoad = rewriter.create( - loadOp.getLoc(), loadOp.getBase(), loadOp.getIndices()); - rewriter.replaceOpWithNewOp(loadOp, vecType, - memrefLoad); - return success(); - } -}; - -/// Replace a 0-d vector.store with a vector.extractelement + memref.store. -struct VectorStoreToMemrefStoreLowering - : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(vector::StoreOp storeOp, - PatternRewriter &rewriter) const override { - auto vecType = storeOp.getVectorType(); - if (vecType.getNumElements() != 1) - return failure(); - Value extracted; - if (vecType.getRank() == 0) { - // TODO: Unifiy once ExtractOp supports 0-d vectors. - extracted = rewriter.create( - storeOp.getLoc(), storeOp.getValueToStore()); - } else { - SmallVector indices(vecType.getRank(), 0); - extracted = rewriter.create( - storeOp.getLoc(), storeOp.getValueToStore(), indices); - } - - rewriter.replaceOpWithNewOp( - storeOp, extracted, storeOp.getBase(), storeOp.getIndices()); - return success(); - } -}; - -/// Progressive lowering of transfer_write. This pattern supports lowering of -/// `vector.transfer_write` to `vector.store` if all of the following hold: -/// - Stride of most minor memref dimension must be 1. -/// - Out-of-bounds masking is not required. -/// - If the memref's element type is a vector type then it coincides with the -/// type of the written value. -/// - The permutation map is the minor identity map (neither permutation nor -/// broadcasting is allowed). -struct TransferWriteToVectorStoreLowering - : public OpRewritePattern { - TransferWriteToVectorStoreLowering(MLIRContext *context, - std::optional maxRank, - PatternBenefit benefit = 1) - : OpRewritePattern(context, benefit), - maxTransferRank(maxRank) {} - - LogicalResult matchAndRewrite(vector::TransferWriteOp write, - PatternRewriter &rewriter) const override { - if (maxTransferRank && write.getVectorType().getRank() > *maxTransferRank) - return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { - diag << "rank exceeds maxTransferRank: " << write; - }); - - // Permutations are handled by VectorToSCF or - // populateVectorTransferPermutationMapLoweringPatterns. - if ( // pass-through for the 0-d corner case. - !write.getPermutationMap().isMinorIdentity()) - return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { - diag << "permutation map is not minor identity: " << write; - }); - - auto memRefType = write.getShapedType().dyn_cast(); - if (!memRefType) - return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { - diag << "not a memref type: " << write; - }); - - // Non-unit strides are handled by VectorToSCF. - if (!vector::isLastMemrefDimUnitStride(memRefType)) - return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { - diag << "most minor stride is not 1: " << write; - }); - - // `vector.store` supports vector types as memref's elements only when the - // type of the vector value being written is the same as the element type. - auto memrefElTy = memRefType.getElementType(); - if (memrefElTy.isa() && memrefElTy != write.getVectorType()) - return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { - diag << "elemental type mismatch: " << write; - }); - - // Otherwise, element types of the memref and the vector must match. - if (!memrefElTy.isa() && - memrefElTy != write.getVectorType().getElementType()) - return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { - diag << "elemental type mismatch: " << write; - }); - - // Out-of-bounds dims are handled by MaterializeTransferMask. - if (write.hasOutOfBoundsDim()) - return rewriter.notifyMatchFailure(write.getLoc(), [=](Diagnostic &diag) { - diag << "out of bounds dim: " << write; - }); - if (write.getMask()) { - rewriter.replaceOpWithNewOp( - write, write.getSource(), write.getIndices(), write.getMask(), - write.getVector()); - } else { - rewriter.replaceOpWithNewOp( - write, write.getVector(), write.getSource(), write.getIndices()); - } - return success(); - } - - std::optional maxTransferRank; -}; - // Returns the values in `arrayAttr` as an integer vector. static SmallVector getIntValueVector(ArrayAttr arrayAttr) { return llvm::to_vector<4>( @@ -2863,202 +1027,6 @@ class DropInnerMostUnitDims : public OpRewritePattern { } }; -namespace { - -/// This function checks to see if the vector combining kind -/// is consistent with the integer or float element type. -static bool isValidKind(bool isInt, vector::CombiningKind kind) { - using vector::CombiningKind; - enum class KindType { FLOAT, INT, INVALID }; - KindType type{KindType::INVALID}; - switch (kind) { - case CombiningKind::MINF: - case CombiningKind::MAXF: - type = KindType::FLOAT; - break; - case CombiningKind::MINUI: - case CombiningKind::MINSI: - case CombiningKind::MAXUI: - case CombiningKind::MAXSI: - case CombiningKind::AND: - case CombiningKind::OR: - case CombiningKind::XOR: - type = KindType::INT; - break; - case CombiningKind::ADD: - case CombiningKind::MUL: - type = isInt ? KindType::INT : KindType::FLOAT; - break; - } - bool isValidIntKind = (type == KindType::INT) && isInt; - bool isValidFloatKind = (type == KindType::FLOAT) && (!isInt); - return (isValidIntKind || isValidFloatKind); -} - -/// This function constructs the appropriate integer or float -/// operation given the vector combining kind and operands. The -/// supported int operations are : add, mul, min (signed/unsigned), -/// max(signed/unsigned), and, or, xor. The supported float -/// operations are : add, mul, min and max. -static Value genOperator(Location loc, Value x, Value y, - vector::CombiningKind kind, - PatternRewriter &rewriter) { - using vector::CombiningKind; - - auto elType = x.getType().cast().getElementType(); - bool isInt = elType.isIntOrIndex(); - - Value combinedResult{nullptr}; - switch (kind) { - case CombiningKind::ADD: - if (isInt) - combinedResult = rewriter.create(loc, x, y); - else - combinedResult = rewriter.create(loc, x, y); - break; - case CombiningKind::MUL: - if (isInt) - combinedResult = rewriter.create(loc, x, y); - else - combinedResult = rewriter.create(loc, x, y); - break; - case CombiningKind::MINUI: - combinedResult = rewriter.create(loc, x, y); - break; - case CombiningKind::MINSI: - combinedResult = rewriter.create(loc, x, y); - break; - case CombiningKind::MAXUI: - combinedResult = rewriter.create(loc, x, y); - break; - case CombiningKind::MAXSI: - combinedResult = rewriter.create(loc, x, y); - break; - case CombiningKind::AND: - combinedResult = rewriter.create(loc, x, y); - break; - case CombiningKind::OR: - combinedResult = rewriter.create(loc, x, y); - break; - case CombiningKind::XOR: - combinedResult = rewriter.create(loc, x, y); - break; - case CombiningKind::MINF: - combinedResult = rewriter.create(loc, x, y); - break; - case CombiningKind::MAXF: - combinedResult = rewriter.create(loc, x, y); - break; - } - return combinedResult; -} - -/// Convert vector.scan op into arith ops and -/// vector.insert_strided_slice/extract_strided_slice -/// -/// Ex: -/// ``` -/// %0:2 = vector.scan , %arg0, %arg1 {inclusive = true, reduction_dim = -/// 1} : -/// (vector<2x3xi32>, vector<2xi32>) to (vector<2x3xi32>, vector<2xi32>) -/// ``` -/// Gets converted to: -/// ``` -/// %cst = arith.constant dense<0> : vector<2x3xi32> -/// %0 = vector.extract_strided_slice %arg0 {offsets = [0, 0], sizes = [2, 1], -/// strides = [1, 1]} : vector<2x3xi32> to vector<2x1xi32> %1 = -/// vector.insert_strided_slice %0, %cst {offsets = [0, 0], strides = [1, 1]} -/// : vector<2x1xi32> into vector<2x3xi32> %2 = vector.extract_strided_slice -/// %arg0 {offsets = [0, 1], sizes = [2, 1], strides = [1, 1]} : -/// vector<2x3xi32> to vector<2x1xi32> %3 = arith.muli %0, %2 : -/// vector<2x1xi32> %4 = vector.insert_strided_slice %3, %1 {offsets = [0, 1], -/// strides = [1, 1]} : vector<2x1xi32> into vector<2x3xi32> %5 = -/// vector.extract_strided_slice %arg0 {offsets = [0, 2], sizes = [2, 1], -/// strides = [1, 1]} : vector<2x3xi32> to vector<2x1xi32> %6 = arith.muli %3, -/// %5 : vector<2x1xi32> %7 = vector.insert_strided_slice %6, %4 {offsets = -/// [0, 2], strides = [1, 1]} : vector<2x1xi32> into vector<2x3xi32> %8 = -/// vector.shape_cast %6 : vector<2x1xi32> to vector<2xi32> return %7, %8 : -/// vector<2x3xi32>, vector<2xi32> -/// ``` -struct ScanToArithOps : public OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(vector::ScanOp scanOp, - PatternRewriter &rewriter) const override { - auto loc = scanOp.getLoc(); - VectorType destType = scanOp.getDestType(); - ArrayRef destShape = destType.getShape(); - auto elType = destType.getElementType(); - bool isInt = elType.isIntOrIndex(); - if (!isValidKind(isInt, scanOp.getKind())) - return failure(); - - VectorType resType = VectorType::get(destShape, elType); - Value result = rewriter.create( - loc, resType, rewriter.getZeroAttr(resType)); - int64_t reductionDim = scanOp.getReductionDim(); - bool inclusive = scanOp.getInclusive(); - int64_t destRank = destType.getRank(); - VectorType initialValueType = scanOp.getInitialValueType(); - int64_t initialValueRank = initialValueType.getRank(); - - SmallVector reductionShape(destShape.begin(), destShape.end()); - reductionShape[reductionDim] = 1; - VectorType reductionType = VectorType::get(reductionShape, elType); - SmallVector offsets(destRank, 0); - SmallVector strides(destRank, 1); - SmallVector sizes(destShape.begin(), destShape.end()); - sizes[reductionDim] = 1; - ArrayAttr scanSizes = rewriter.getI64ArrayAttr(sizes); - ArrayAttr scanStrides = rewriter.getI64ArrayAttr(strides); - - Value lastOutput, lastInput; - for (int i = 0; i < destShape[reductionDim]; i++) { - offsets[reductionDim] = i; - ArrayAttr scanOffsets = rewriter.getI64ArrayAttr(offsets); - Value input = rewriter.create( - loc, reductionType, scanOp.getSource(), scanOffsets, scanSizes, - scanStrides); - Value output; - if (i == 0) { - if (inclusive) { - output = input; - } else { - if (initialValueRank == 0) { - // ShapeCastOp cannot handle 0-D vectors - output = rewriter.create( - loc, input.getType(), scanOp.getInitialValue()); - } else { - output = rewriter.create( - loc, input.getType(), scanOp.getInitialValue()); - } - } - } else { - Value y = inclusive ? input : lastInput; - output = genOperator(loc, lastOutput, y, scanOp.getKind(), rewriter); - assert(output != nullptr); - } - result = rewriter.create( - loc, output, result, offsets, strides); - lastOutput = output; - lastInput = input; - } - - Value reduction; - if (initialValueRank == 0) { - Value v = rewriter.create(loc, lastOutput, 0); - reduction = - rewriter.create(loc, initialValueType, v); - } else { - reduction = rewriter.create(loc, initialValueType, - lastOutput); - } - - rewriter.replaceOp(scanOp, {result, reduction}); - return success(); - } -}; - /// Canonicalization of a `vector.contraction %a, %b, %c` with row-major matmul /// semantics to a contraction suitable for MMT (matrix matrix multiplication /// with the RHS transposed) lowering. @@ -3157,132 +1125,6 @@ struct CanonicalizeContractMatmulToMMT final FilterConstraintType filter; }; -/// Flattens 2 or more dimensional `vector.gather` ops by unrolling the -/// outermost dimension. For example: -/// ``` -/// %g = vector.gather %base[%c0][%v], %mask, %pass_thru : -/// ... into vector<2x3xf32> -/// -/// ==> -/// -/// %0 = arith.constant dense<0.0> : vector<2x3xf32> -/// %g0 = vector.gather %base[%c0][%v0], %mask0, %pass_thru0 : ... -/// %1 = vector.insert %g0, %0 [0] : vector<3xf32> into vector<2x3xf32> -/// %g1 = vector.gather %base[%c0][%v1], %mask1, %pass_thru1 : ... -/// %g = vector.insert %g1, %1 [1] : vector<3xf32> into vector<2x3xf32> -/// ``` -/// -/// When applied exhaustively, this will produce a sequence of 1-d gather ops. -struct FlattenGather : OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(vector::GatherOp op, - PatternRewriter &rewriter) const override { - VectorType resultTy = op.getType(); - if (resultTy.getRank() < 2) - return rewriter.notifyMatchFailure(op, "already flat"); - - Location loc = op.getLoc(); - Value indexVec = op.getIndexVec(); - Value maskVec = op.getMask(); - Value passThruVec = op.getPassThru(); - - Value result = rewriter.create( - loc, resultTy, rewriter.getZeroAttr(resultTy)); - - Type subTy = VectorType::get(resultTy.getShape().drop_front(), - resultTy.getElementType()); - - for (int64_t i = 0, e = resultTy.getShape().front(); i < e; ++i) { - int64_t thisIdx[1] = {i}; - - Value indexSubVec = - rewriter.create(loc, indexVec, thisIdx); - Value maskSubVec = - rewriter.create(loc, maskVec, thisIdx); - Value passThruSubVec = - rewriter.create(loc, passThruVec, thisIdx); - Value subGather = rewriter.create( - loc, subTy, op.getBase(), op.getIndices(), indexSubVec, maskSubVec, - passThruSubVec); - result = - rewriter.create(loc, subGather, result, thisIdx); - } - - rewriter.replaceOp(op, result); - return success(); - } -}; - -/// Turns 1-d `vector.gather` into a scalarized sequence of `vector.loads` or -/// `tensor.extract`s. To avoid out-of-bounds memory accesses, these -/// loads/extracts are made conditional using `scf.if` ops. -struct Gather1DToConditionalLoads : OpRewritePattern { - using OpRewritePattern::OpRewritePattern; - - LogicalResult matchAndRewrite(vector::GatherOp op, - PatternRewriter &rewriter) const override { - VectorType resultTy = op.getType(); - if (resultTy.getRank() != 1) - return rewriter.notifyMatchFailure(op, "unsupported rank"); - - Location loc = op.getLoc(); - Type elemTy = resultTy.getElementType(); - // Vector type with a single element. Used to generate `vector.loads`. - VectorType elemVecTy = VectorType::get({1}, elemTy); - - Value condMask = op.getMask(); - Value base = op.getBase(); - Value indexVec = rewriter.createOrFold( - loc, op.getIndexVectorType().clone(rewriter.getIndexType()), - op.getIndexVec()); - auto baseOffsets = llvm::to_vector(op.getIndices()); - Value lastBaseOffset = baseOffsets.back(); - - Value result = op.getPassThru(); - - // Emit a conditional access for each vector element. - for (int64_t i = 0, e = resultTy.getNumElements(); i < e; ++i) { - int64_t thisIdx[1] = {i}; - Value condition = - rewriter.create(loc, condMask, thisIdx); - Value index = rewriter.create(loc, indexVec, thisIdx); - baseOffsets.back() = - rewriter.createOrFold(loc, lastBaseOffset, index); - - auto loadBuilder = [&](OpBuilder &b, Location loc) { - Value extracted; - if (isa(base.getType())) { - // `vector.load` does not support scalar result; emit a vector load - // and extract the single result instead. - Value load = - b.create(loc, elemVecTy, base, baseOffsets); - int64_t zeroIdx[1] = {0}; - extracted = b.create(loc, load, zeroIdx); - } else { - extracted = b.create(loc, base, baseOffsets); - } - - Value newResult = - b.create(loc, extracted, result, thisIdx); - b.create(loc, newResult); - }; - auto passThruBuilder = [result](OpBuilder &b, Location loc) { - b.create(loc, result); - }; - - result = - rewriter - .create(loc, condition, /*thenBuilder=*/loadBuilder, - /*elseBuilder=*/passThruBuilder) - .getResult(0); - } - - rewriter.replaceOp(op, result); - return success(); - } -}; - } // namespace void mlir::vector::populateVectorMaskMaterializationPatterns( @@ -3307,33 +1149,6 @@ void mlir::vector::populateBubbleVectorBitCastOpPatterns( benefit); } -void mlir::vector::populateVectorBroadcastLoweringPatterns( - RewritePatternSet &patterns, PatternBenefit benefit) { - patterns.add(patterns.getContext(), benefit); -} - -void mlir::vector::populateVectorMaskOpLoweringPatterns( - RewritePatternSet &patterns, PatternBenefit benefit) { - patterns.add( - patterns.getContext(), benefit); -} - -void mlir::vector::populateVectorShapeCastLoweringPatterns( - RewritePatternSet &patterns, PatternBenefit benefit) { - patterns.add( - patterns.getContext(), benefit); -} - -void mlir::vector::populateVectorContractLoweringPatterns( - RewritePatternSet &patterns, VectorTransformsOptions options, - PatternBenefit benefit) { - patterns.add(patterns.getContext(), benefit); - patterns.add( - options, patterns.getContext(), benefit); -} - void mlir::vector::populateVectorContractCanonicalizeMatmulToMMT( RewritePatternSet &patterns, std::function constraint, @@ -3342,13 +1157,6 @@ void mlir::vector::populateVectorContractCanonicalizeMatmulToMMT( std::move(constraint)); } -void mlir::vector::populateVectorTransposeLoweringPatterns( - RewritePatternSet &patterns, VectorTransformsOptions options, - PatternBenefit benefit) { - patterns.add( - options, patterns.getContext(), benefit); -} - void mlir::vector::populateVectorReductionToContractPatterns( RewritePatternSet &patterns, PatternBenefit benefit) { patterns.add(patterns.getContext(), benefit); } -void mlir::vector::populateVectorTransferLoweringPatterns( - RewritePatternSet &patterns, std::optional maxTransferRank, - PatternBenefit benefit) { - patterns.add(patterns.getContext(), - maxTransferRank, benefit); - patterns - .add( - patterns.getContext(), benefit); -} - -void mlir::vector::populateVectorScanLoweringPatterns( - RewritePatternSet &patterns, PatternBenefit benefit) { - patterns.add(patterns.getContext(), benefit); -} - -void mlir::vector::populateVectorGatherLoweringPatterns( - RewritePatternSet &patterns, PatternBenefit benefit) { - patterns.add(patterns.getContext(), - benefit); -} - //===----------------------------------------------------------------------===// // TableGen'd enum attribute definitions //===----------------------------------------------------------------------===// diff --git a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp index f79ca2259fa38..7a4f9cf5e5101 100644 --- a/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp +++ b/mlir/test/lib/Dialect/Vector/TestVectorTransforms.cpp @@ -22,6 +22,7 @@ #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" +#include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" #include "mlir/Dialect/Vector/Transforms/VectorDistribution.h" #include "mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h" #include "mlir/Dialect/Vector/Transforms/VectorTransforms.h" @@ -148,8 +149,9 @@ struct TestVectorContractionLowering if (lowerToOuterProduct) { VectorContractLowering lowering = VectorContractLowering::OuterProduct; VectorTransformsOptions options{lowering}; - patterns.add(options, - &getContext()); + populateVectorContractLoweringPatterns( + patterns, options, /*benefit=*/1, + /*disableOuterProductlowering=*/true); (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); return; } @@ -469,7 +471,7 @@ struct TestVectorTransferFullPartialSplitPatterns options.setVectorTransferSplit(VectorTransferSplit::LinalgCopy); else options.setVectorTransferSplit(VectorTransferSplit::VectorTransfer); - patterns.add(ctx, options); + populateVectorTransferFullPartialPatterns(patterns, options); (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns)); } }; diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 8538c3db59dcd..f565030d63d9f 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -8539,6 +8539,7 @@ cc_library( ":TransformDialect", ":TransformDialectUtils", ":TransformUtils", + ":VectorTransforms", "//llvm:Support", ], ) From a86cc8341de91c48ff724aa07766bc0dbefaa248 Mon Sep 17 00:00:00 2001 From: Alex Brachet Date: Thu, 23 Mar 2023 18:43:09 +0000 Subject: [PATCH 468/691] [libc] Move fma and fmaf into generic dir Differential Revision: https://reviews.llvm.org/D146740 --- libc/src/math/CMakeLists.txt | 27 +++------------------------ libc/src/math/generic/CMakeLists.txt | 24 ++++++++++++++++++++++++ libc/src/math/{ => generic}/fma.cpp | 0 libc/src/math/{ => generic}/fmaf.cpp | 0 4 files changed, 27 insertions(+), 24 deletions(-) rename libc/src/math/{ => generic}/fma.cpp (100%) rename libc/src/math/{ => generic}/fmaf.cpp (100%) diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index 78bab469c28db..bc9a5d7a237f4 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -40,30 +40,6 @@ function(add_math_entrypoint_object name) ) endfunction() -add_entrypoint_object( - fmaf - SRCS - fmaf.cpp - HDRS - fmaf.h - DEPENDS - libc.src.__support.FPUtil.fma - COMPILE_OPTIONS - -O3 -) - -add_entrypoint_object( - fma - SRCS - fma.cpp - HDRS - fma.h - DEPENDS - libc.src.__support.FPUtil.fma - COMPILE_OPTIONS - -O3 -) - add_math_entrypoint_object(acosf) add_math_entrypoint_object(acoshf) @@ -107,6 +83,9 @@ add_math_entrypoint_object(floor) add_math_entrypoint_object(floorf) add_math_entrypoint_object(floorl) +add_math_entrypoint_object(fma) +add_math_entrypoint_object(fmaf) + add_math_entrypoint_object(fmax) add_math_entrypoint_object(fmaxf) add_math_entrypoint_object(fmaxl) diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 09aefc67d1b51..9fe0fce1c8a67 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -1491,3 +1491,27 @@ add_entrypoint_object( COMPILE_OPTIONS -O3 ) + +add_entrypoint_object( + fmaf + SRCS + fmaf.cpp + HDRS + ../fmaf.h + DEPENDS + libc.src.__support.FPUtil.fma + COMPILE_OPTIONS + -O3 +) + +add_entrypoint_object( + fma + SRCS + fma.cpp + HDRS + ../fma.h + DEPENDS + libc.src.__support.FPUtil.fma + COMPILE_OPTIONS + -O3 +) diff --git a/libc/src/math/fma.cpp b/libc/src/math/generic/fma.cpp similarity index 100% rename from libc/src/math/fma.cpp rename to libc/src/math/generic/fma.cpp diff --git a/libc/src/math/fmaf.cpp b/libc/src/math/generic/fmaf.cpp similarity index 100% rename from libc/src/math/fmaf.cpp rename to libc/src/math/generic/fmaf.cpp From 30e89166d765cbe676fdc85bc653df9a3c09ce48 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 23 Mar 2023 19:36:29 +0000 Subject: [PATCH 469/691] [X86] combineVectorSizedSetCCEquality - update arguments to use individual SETCC operands. NFC. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a87dc476a1849..b9ccb5b2c48dc 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -53970,7 +53970,7 @@ static bool isOrXorXorTree(SDValue X, bool Root = true) { /// Recursive helper for combineVectorSizedSetCCEquality() to emit the memcmp /// expansion. template -static SDValue emitOrXorXorTree(SDValue X, SDLoc &DL, SelectionDAG &DAG, +static SDValue emitOrXorXorTree(SDValue X, const SDLoc &DL, SelectionDAG &DAG, EVT VecVT, EVT CmpVT, bool HasPT, F SToV) { SDValue Op0 = X.getOperand(0); SDValue Op1 = X.getOperand(1); @@ -53997,14 +53997,14 @@ static SDValue emitOrXorXorTree(SDValue X, SDLoc &DL, SelectionDAG &DAG, /// Try to map a 128-bit or larger integer comparison to vector instructions /// before type legalization splits it up into chunks. -static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG, +static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, + ISD::CondCode CC, + const SDLoc &DL, + SelectionDAG &DAG, const X86Subtarget &Subtarget) { - ISD::CondCode CC = cast(SetCC->getOperand(2))->get(); assert((CC == ISD::SETNE || CC == ISD::SETEQ) && "Bad comparison predicate"); // We're looking for an oversized integer equality comparison. - SDValue X = SetCC->getOperand(0); - SDValue Y = SetCC->getOperand(1); EVT OpVT = X.getValueType(); unsigned OpSize = OpVT.getSizeInBits(); if (!OpVT.isScalarInteger() || OpSize < 128) @@ -54029,9 +54029,6 @@ static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG, !IsOrXorXorTreeCCZero) return SDValue(); - EVT VT = SetCC->getValueType(0); - SDLoc DL(SetCC); - // Use XOR (plus OR) and PTEST after SSE4.1 for 128/256-bit operands. // Use PCMPNEQ (plus OR) and KORTEST for 512-bit operands. // Otherwise use PCMPEQ (plus AND) and mask testing. @@ -54173,7 +54170,8 @@ static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, SDLoc DL(N); if (CC == ISD::SETNE || CC == ISD::SETEQ) { - if (SDValue V = combineVectorSizedSetCCEquality(N, DAG, Subtarget)) + if (SDValue V = combineVectorSizedSetCCEquality(VT, LHS, RHS, CC, DL, DAG, + Subtarget)) return V; if (VT == MVT::i1 && isNullConstant(RHS)) { From e0361396c2281a108a36d186161ace1843925431 Mon Sep 17 00:00:00 2001 From: Chia-hung Duan Date: Thu, 23 Mar 2023 19:38:48 +0000 Subject: [PATCH 470/691] [scudo] Add a Timer class to assist performance measurement Add Timer and TimingManager which provide convenient way to meause the execution time of code snippets. The output looks like, ``` -- Average Operation Time -- -- Name (# of Calls) -- 1747.2(ns) popBatch (59) 92.3(ns) popBatchImpl (73) 101.6(ns) EmptyBatchProcess (5) 2587.0(ns) pushBlocksImpl (13) ``` Note that `EmptyBatchProcess` is nested under the timer `popBatchImpl`. Reviewed By: cferris Differential Revision: https://reviews.llvm.org/D143626 --- .../lib/scudo/standalone/CMakeLists.txt | 2 + .../lib/scudo/standalone/tests/CMakeLists.txt | 1 + .../scudo/standalone/tests/timing_test.cpp | 86 +++++++ compiler-rt/lib/scudo/standalone/timing.cpp | 29 +++ compiler-rt/lib/scudo/standalone/timing.h | 215 ++++++++++++++++++ 5 files changed, 333 insertions(+) create mode 100644 compiler-rt/lib/scudo/standalone/tests/timing_test.cpp create mode 100644 compiler-rt/lib/scudo/standalone/timing.cpp create mode 100644 compiler-rt/lib/scudo/standalone/timing.h diff --git a/compiler-rt/lib/scudo/standalone/CMakeLists.txt b/compiler-rt/lib/scudo/standalone/CMakeLists.txt index eefcffd4cfc56..6fcd4deddf716 100644 --- a/compiler-rt/lib/scudo/standalone/CMakeLists.txt +++ b/compiler-rt/lib/scudo/standalone/CMakeLists.txt @@ -85,6 +85,7 @@ set(SCUDO_HEADERS stack_depot.h stats.h string_utils.h + timing.h tsd_exclusive.h tsd_shared.h tsd.h @@ -107,6 +108,7 @@ set(SCUDO_SOURCES report.cpp rss_limit_checker.cpp string_utils.cpp + timing.cpp ) # Enable the necessary instruction set for scudo_crc32.cpp, if available. diff --git a/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt b/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt index 50468d9c6ddc3..335e4b7dbd899 100644 --- a/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt +++ b/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt @@ -105,6 +105,7 @@ set(SCUDO_UNIT_TEST_SOURCES size_class_map_test.cpp stats_test.cpp strings_test.cpp + timing_test.cpp tsd_test.cpp vector_test.cpp scudo_unit_test_main.cpp diff --git a/compiler-rt/lib/scudo/standalone/tests/timing_test.cpp b/compiler-rt/lib/scudo/standalone/tests/timing_test.cpp new file mode 100644 index 0000000000000..09a6c31224673 --- /dev/null +++ b/compiler-rt/lib/scudo/standalone/tests/timing_test.cpp @@ -0,0 +1,86 @@ +//===-- timing_test.cpp -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "tests/scudo_unit_test.h" + +#include "timing.h" + +#include + +class ScudoTimingTest : public Test { +public: + void testFunc1() { scudo::ScopedTimer ST(Manager, __func__); } + + void testFunc2() { + scudo::ScopedTimer ST(Manager, __func__); + testFunc1(); + } + + void testChainedCalls() { + scudo::ScopedTimer ST(Manager, __func__); + testFunc2(); + } + + void testIgnoredTimer() { + scudo::ScopedTimer ST(Manager, __func__); + ST.ignore(); + } + + void printAllTimersStats() { Manager.printAll(); } + + scudo::TimingManager &getTimingManager() { return Manager; } + +private: + scudo::TimingManager Manager; +}; + +// Given that the output of statistics of timers are dumped through +// `scudo::Printf` which is platform dependent, so we don't have a reliable way +// to catch the output and verify the details. Now we only verify the number of +// invocations on linux. +TEST_F(ScudoTimingTest, SimpleTimer) { +#if SCUDO_LINUX + testing::internal::LogToStderr(); + testing::internal::CaptureStderr(); +#endif + + testIgnoredTimer(); + testChainedCalls(); + printAllTimersStats(); + +#if SCUDO_LINUX + std::string output = testing::internal::GetCapturedStderr(); + EXPECT_TRUE(output.find("testIgnoredTimer (1)") == std::string::npos); + EXPECT_TRUE(output.find("testChainedCalls (1)") != std::string::npos); + EXPECT_TRUE(output.find("testFunc2 (1)") != std::string::npos); + EXPECT_TRUE(output.find("testFunc1 (1)") != std::string::npos); +#endif +} + +TEST_F(ScudoTimingTest, NestedTimer) { +#if SCUDO_LINUX + testing::internal::LogToStderr(); + testing::internal::CaptureStderr(); +#endif + + { + scudo::ScopedTimer Outer(getTimingManager(), "Outer"); + { + scudo::ScopedTimer Inner1(getTimingManager(), Outer, "Inner1"); + { scudo::ScopedTimer Inner2(getTimingManager(), Inner1, "Inner2"); } + } + } + printAllTimersStats(); + +#if SCUDO_LINUX + std::string output = testing::internal::GetCapturedStderr(); + EXPECT_TRUE(output.find("Outer (1)") != std::string::npos); + EXPECT_TRUE(output.find("Inner1 (1)") != std::string::npos); + EXPECT_TRUE(output.find("Inner2 (1)") != std::string::npos); +#endif +} diff --git a/compiler-rt/lib/scudo/standalone/timing.cpp b/compiler-rt/lib/scudo/standalone/timing.cpp new file mode 100644 index 0000000000000..59ae21d10f0f6 --- /dev/null +++ b/compiler-rt/lib/scudo/standalone/timing.cpp @@ -0,0 +1,29 @@ +//===-- timing.cpp ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "timing.h" + +namespace scudo { + +Timer::~Timer() { + if (Manager) + Manager->report(*this); +} + +ScopedTimer::ScopedTimer(TimingManager &Manager, const char *Name) + : Timer(Manager.getOrCreateTimer(Name)) { + start(); +} + +ScopedTimer::ScopedTimer(TimingManager &Manager, const Timer &Nest, + const char *Name) + : Timer(Manager.nest(Nest, Name)) { + start(); +} + +} // namespace scudo diff --git a/compiler-rt/lib/scudo/standalone/timing.h b/compiler-rt/lib/scudo/standalone/timing.h new file mode 100644 index 0000000000000..155111f9f8e52 --- /dev/null +++ b/compiler-rt/lib/scudo/standalone/timing.h @@ -0,0 +1,215 @@ +//===-- timing.h ------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "common.h" +#include "mutex.h" +#include "string_utils.h" +#include "thread_annotations.h" + +#include + +namespace scudo { + +class TimingManager; + +// A simple timer for evaluating execution time of code snippets. It can be used +// along with TimingManager or standalone. +class Timer { +public: + // The use of Timer without binding to a TimingManager is supposed to do the + // timer logging manually. Otherwise, TimingManager will do the logging stuff + // for you. + Timer() = default; + Timer(Timer &&Other) + : StartTime(0), AccTime(Other.AccTime), Manager(Other.Manager), + HandleId(Other.HandleId) { + Other.Manager = nullptr; + } + + Timer(const Timer &) = delete; + + virtual ~Timer(); + + void start() { + CHECK_EQ(StartTime, 0U); + StartTime = getMonotonicTime(); + } + void stop() { + AccTime += getMonotonicTime() - StartTime; + StartTime = 0; + } + u64 getAccumulatedTime() const { return AccTime; } + + // Unset the bound TimingManager so that we don't report the data back. This + // is useful if we only want to track subset of certain scope events. + void ignore() { + StartTime = 0; + AccTime = 0; + Manager = nullptr; + } + +protected: + friend class TimingManager; + Timer(TimingManager &Manager, u32 HandleId) + : Manager(&Manager), HandleId(HandleId) {} + + u64 StartTime = 0; + u64 AccTime = 0; + TimingManager *Manager = nullptr; + u32 HandleId; +}; + +// A RAII-style wrapper for easy scope execution measurement. Note that in order +// not to take additional space for the message like `Name`. It only works with +// TimingManager. +class ScopedTimer : public Timer { +public: + ScopedTimer(TimingManager &Manager, const char *Name); + ScopedTimer(TimingManager &Manager, const Timer &Nest, const char *Name); + ~ScopedTimer() override { stop(); } +}; + +// In Scudo, the execution time of single run of code snippets may not be +// useful, we are more interested in the average time from several runs. +// TimingManager lets the registered timer report their data and reports the +// average execution time for each timer periodically. +class TimingManager { +public: + TimingManager(u32 PrintingInterval = DefaultPrintingInterval) + : PrintingInterval(PrintingInterval) {} + ~TimingManager() { + if (NumAllocatedTimers != 0) + printAll(); + } + + Timer getOrCreateTimer(const char *Name) EXCLUDES(Mutex) { + ScopedLock L(Mutex); + + CHECK_LT(strlen(Name), MaxLenOfTimerName); + for (u32 I = 0; I < NumAllocatedTimers; ++I) { + if (strncmp(Name, Timers[I].Name, MaxLenOfTimerName) == 0) + return Timer(*this, I); + } + + CHECK_LT(NumAllocatedTimers, MaxNumberOfTimers); + strncpy(Timers[NumAllocatedTimers].Name, Name, MaxLenOfTimerName); + TimerRecords[NumAllocatedTimers].AccumulatedTime = 0; + TimerRecords[NumAllocatedTimers].Occurrence = 0; + return Timer(*this, NumAllocatedTimers++); + } + + // Add a sub-Timer associated with another Timer. This is used when we want to + // detail the execution time in the scope of a Timer. + // For example, + // void Foo() { + // // T1 records the time spent in both first and second tasks. + // ScopedTimer T1(getTimingManager(), "Task1"); + // { + // // T2 records the time spent in first task + // ScopedTimer T2(getTimingManager, T1, "Task2"); + // // Do first task. + // } + // // Do second task. + // } + // + // The report will show proper indents to indicate the nested relation like, + // -- Average Operation Time -- -- Name (# of Calls) -- + // 10.0(ns) Task1 (1) + // 5.0(ns) Task2 (1) + Timer nest(const Timer &T, const char *Name) EXCLUDES(Mutex) { + CHECK_EQ(T.Manager, this); + Timer Nesting = getOrCreateTimer(Name); + + ScopedLock L(Mutex); + CHECK_NE(Nesting.HandleId, T.HandleId); + Timers[Nesting.HandleId].Nesting = T.HandleId; + return Nesting; + } + + void report(const Timer &T) EXCLUDES(Mutex) { + ScopedLock L(Mutex); + + const u32 HandleId = T.HandleId; + CHECK_LT(HandleId, MaxNumberOfTimers); + TimerRecords[HandleId].AccumulatedTime += T.getAccumulatedTime(); + ++TimerRecords[HandleId].Occurrence; + ++NumEventsReported; + if (NumEventsReported % PrintingInterval == 0) + printAllImpl(); + } + + void printAll() EXCLUDES(Mutex) { + ScopedLock L(Mutex); + printAllImpl(); + } + +private: + void printAllImpl() REQUIRES(Mutex) { + static char NameHeader[] = "-- Name (# of Calls) --"; + static char AvgHeader[] = "-- Average Operation Time --"; + ScopedString Str; + Str.append("%-15s %-15s\n", AvgHeader, NameHeader); + + for (u32 I = 0; I < NumAllocatedTimers; ++I) { + if (Timers[I].Nesting != MaxNumberOfTimers) + continue; + printImpl(Str, I); + } + + Str.output(); + } + + void printImpl(ScopedString &Str, const u32 HandleId, + const u32 ExtraIndent = 0) REQUIRES(Mutex) { + const uptr AccumulatedTime = TimerRecords[HandleId].AccumulatedTime; + const uptr Occurrence = TimerRecords[HandleId].Occurrence; + const uptr Integral = Occurrence == 0 ? 0 : AccumulatedTime / Occurrence; + // Only keep single digit of fraction is enough and it enables easier layout + // maintenance. + const uptr Fraction = + Occurrence == 0 ? 0 + : ((AccumulatedTime % Occurrence) * 10) / Occurrence; + + Str.append("%14zu.%zu(ns) %-11s", Integral, Fraction, " "); + + for (u32 I = 0; I < ExtraIndent; ++I) + Str.append("%s", " "); + Str.append("%s (%zu)\n", Timers[HandleId].Name, Occurrence); + + for (u32 I = 0; I < NumAllocatedTimers; ++I) + if (Timers[I].Nesting == HandleId) + printImpl(Str, I, ExtraIndent + 1); + } + + // Instead of maintaining pages for timer registration, a static buffer is + // sufficient for most use cases in Scudo. + static constexpr u32 MaxNumberOfTimers = 50; + static constexpr u32 MaxLenOfTimerName = 50; + static constexpr u32 DefaultPrintingInterval = 100; + + struct Record { + uptr AccumulatedTime = 0; + uptr Occurrence = 0; + }; + + struct TimerInfo { + char Name[MaxLenOfTimerName + 1]; + u32 Nesting = MaxNumberOfTimers; + }; + + HybridMutex Mutex; + // The frequency of proactively dumping the timer statistics. For example, the + // default setting is to dump the statistics every 100 reported events. + u32 PrintingInterval GUARDED_BY(Mutex); + uptr NumEventsReported GUARDED_BY(Mutex) = 0; + u32 NumAllocatedTimers GUARDED_BY(Mutex) = 0; + TimerInfo Timers[MaxNumberOfTimers] GUARDED_BY(Mutex); + Record TimerRecords[MaxNumberOfTimers] GUARDED_BY(Mutex); +}; + +} // namespace scudo From d10110a8a60137d430f7a75051d0794293982ef6 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 23 Mar 2023 12:38:57 -0700 Subject: [PATCH 471/691] [StackProtector] attribute __stack_chk_fail as NoReturn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When GCC added support for stack smashing protections, it was defined that: > This hook returns a CALL_EXPR that alerts the runtime that the stack > protect guard variable has been modified. This expression should > involve a call to a noreturn function. > The default version of this hook invokes a function called > ‘__stack_chk_fail’, taking no arguments. Do so as well for __stack_smash_handler for OpenBSD. Every libc implementation I could find has __stack_chk_fail marked noreturn, or the implementation calls abort, exit, or panic (which themselves are noreturn). Glibc: https://sourceware.org/git/?p=glibc.git;a=blob;f=debug/stack_chk_fail.c Musl: https://git.musl-libc.org/cgit/musl/tree/src/env/__stack_chk_fail.c Bionic: https://android.googlesource.com/platform/bionic/+/refs/heads/master/libc/bionic/__stack_chk_fail.cpp FreeBSD: https://cgit.freebsd.org/src/tree/lib/libc/secure/stack_protector.c OpenBSD: https://github.com/openbsd/src/blob/master/lib/libc/sys/stack_protector.c NetBSD: https://github.com/NetBSD/src/blob/trunk/lib/libc/misc/stack_protector.c Linux Kernel: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/panic.c Apple: https://opensource.apple.com/source/Libc/Libc-1439.40.11/sys/OpenBSD/stack_protector.c.auto.html Link: https://gcc.gnu.org/onlinedocs/gccint/Stack-Smashing-Protection.html#Stack-Smashing-Protection This will later help us diagnose functions that fall through to other functions vs end in calls to functions that are noreturn. Reviewed By: efriedma Differential Revision: https://reviews.llvm.org/D146339 --- llvm/lib/CodeGen/StackProtector.cpp | 18 ++++++++++-------- .../test/CodeGen/X86/2009-04-14-IllegalRegs.ll | 1 - .../test/CodeGen/X86/stack-protector-weight.ll | 2 +- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp index aa92dcb386560..05ac176461a5c 100644 --- a/llvm/lib/CodeGen/StackProtector.cpp +++ b/llvm/lib/CodeGen/StackProtector.cpp @@ -15,6 +15,7 @@ #include "llvm/CodeGen/StackProtector.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/MemoryLocation.h" @@ -620,18 +621,19 @@ BasicBlock *StackProtector::CreateFailBB() { if (F->getSubprogram()) B.SetCurrentDebugLocation( DILocation::get(Context, 0, 0, F->getSubprogram())); + FunctionCallee StackChkFail; + SmallVector Args; if (Trip.isOSOpenBSD()) { - FunctionCallee StackChkFail = M->getOrInsertFunction( - "__stack_smash_handler", Type::getVoidTy(Context), - Type::getInt8PtrTy(Context)); - - B.CreateCall(StackChkFail, B.CreateGlobalStringPtr(F->getName(), "SSH")); + StackChkFail = M->getOrInsertFunction("__stack_smash_handler", + Type::getVoidTy(Context), + Type::getInt8PtrTy(Context)); + Args.push_back(B.CreateGlobalStringPtr(F->getName(), "SSH")); } else { - FunctionCallee StackChkFail = + StackChkFail = M->getOrInsertFunction("__stack_chk_fail", Type::getVoidTy(Context)); - - B.CreateCall(StackChkFail, {}); } + cast(StackChkFail.getCallee())->addFnAttr(Attribute::NoReturn); + B.CreateCall(StackChkFail, Args); B.CreateUnreachable(); return FailBB; } diff --git a/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll b/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll index 5e3bea0a83c24..da8e7b16a0cef 100644 --- a/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll +++ b/llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll @@ -53,7 +53,6 @@ define i32 @z() nounwind ssp { ; CHECK-NEXT: retl ; CHECK-NEXT: LBB0_3: ## %CallStackCheckFailBlk ; CHECK-NEXT: calll ___stack_chk_fail -; CHECK-NEXT: ud2 entry: %retval = alloca i32 ; [#uses=2] %xxx = alloca %struct.X ; [#uses=6] diff --git a/llvm/test/CodeGen/X86/stack-protector-weight.ll b/llvm/test/CodeGen/X86/stack-protector-weight.ll index 0b7620fdee657..862b130bfa4c6 100644 --- a/llvm/test/CodeGen/X86/stack-protector-weight.ll +++ b/llvm/test/CodeGen/X86/stack-protector-weight.ll @@ -10,7 +10,7 @@ ; DARWIN-SELDAG: bb.[[SUCCESS]]{{[0-9a-zA-Z_.]+}}: ; DARWIN-IR: # Machine code for function test_branch_weights: -; DARWIN-IR: successors: %bb.[[SUCCESS:[0-9]+]](0x7fffffff), %bb.[[FAILURE:[0-9]+]] +; DARWIN-IR: successors: %bb.[[SUCCESS:[0-9]+]](0x7ffff800), %bb.[[FAILURE:[0-9]+]] ; DARWIN-IR: bb.[[SUCCESS]]{{[0-9a-zA-Z_.]+}}: ; DARWIN-IR: bb.[[FAILURE]]{{[0-9a-zA-Z_.]+}}: ; DARWIN-IR: CALL64pcrel32 @__stack_chk_fail From 0c5cee779929f840f4f286c5894a01f583ee7b4a Mon Sep 17 00:00:00 2001 From: Alex Langford Date: Wed, 22 Mar 2023 16:17:49 -0700 Subject: [PATCH 472/691] [lldb-server] Use Platform plugin corresponding to the host In ee232506b870ce5282cc4da5ca493d41d361feb3 I moved UnixSignal initialization from lldbTarget to the various platform plugins. This inadvertently broke lldb-server because lldb-server doesn't use Platform plugins. lldb-server still needs to be able to create a UnixSignals object for the host platform so we can add the relevant platform plugin to lldb-server to make sure we always have a HostPlatform. Differential Revision: https://reviews.llvm.org/D146668 --- .../inferior-crashing/TestInferiorCrashing.py | 4 +++- lldb/tools/lldb-server/CMakeLists.txt | 9 +++++++++ .../lldb-server/SystemInitializerLLGS.cpp | 19 +++++++++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/lldb/test/API/functionalities/inferior-crashing/TestInferiorCrashing.py b/lldb/test/API/functionalities/inferior-crashing/TestInferiorCrashing.py index b63a09d047024..172c00eb59dc2 100644 --- a/lldb/test/API/functionalities/inferior-crashing/TestInferiorCrashing.py +++ b/lldb/test/API/functionalities/inferior-crashing/TestInferiorCrashing.py @@ -63,7 +63,9 @@ def inferior_crashing(self): # The exact stop reason depends on the platform if self.platformIsDarwin(): stop_reason = 'stop reason = EXC_BAD_ACCESS' - elif self.getPlatform() == "linux" or self.getPlatform() == "freebsd": + elif self.getPlatform() == "linux": + stop_reason = 'stop reason = signal SIGSEGV: address not mapped to object' + elif self.getPlatform() == "freebsd": stop_reason = 'stop reason = signal SIGSEGV' else: stop_reason = 'stop reason = invalid address' diff --git a/lldb/tools/lldb-server/CMakeLists.txt b/lldb/tools/lldb-server/CMakeLists.txt index 67103e87a1d4a..56da4c8b56807 100644 --- a/lldb/tools/lldb-server/CMakeLists.txt +++ b/lldb/tools/lldb-server/CMakeLists.txt @@ -7,20 +7,29 @@ set(LLDB_PLUGINS) if(CMAKE_SYSTEM_NAME MATCHES "Linux|Android") list(APPEND LLDB_PLUGINS lldbPluginProcessLinux) + if (CMAKE_SYSTEM_NAME MATCHES "Linux") + list(APPEND LLDB_PLUGINS lldbPluginPlatformLinux) + else() + list(APPEND LLDB_PLUGINS lldbPluginPlatformAndroid) + endif() endif() if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") list(APPEND LLDB_PLUGINS lldbPluginProcessFreeBSD) + list(APPEND LLDB_PLUGINS lldbPluginPlatformFreeBSD) endif() if(CMAKE_SYSTEM_NAME MATCHES "NetBSD") list(APPEND LLDB_PLUGINS lldbPluginProcessNetBSD) + list(APPEND LLDB_PLUGINS lldbPluginPlatformNetBSD) endif() if(CMAKE_SYSTEM_NAME MATCHES "Darwin") list(APPEND LLDB_PLUGINS lldbPluginObjectFileMachO) + list(APPEND LLDB_PLUGINS lldbPluginPlatformMacOSX) elseif(CMAKE_SYSTEM_NAME MATCHES "Windows") list(APPEND LLDB_PLUGINS lldbPluginObjectFilePECOFF) + list(APPEND LLDB_PLUGINS lldbPluginPlatformWindows) else() list(APPEND LLDB_PLUGINS lldbPluginObjectFileELF) endif() diff --git a/lldb/tools/lldb-server/SystemInitializerLLGS.cpp b/lldb/tools/lldb-server/SystemInitializerLLGS.cpp index 4233252a84dfc..1909ea4dc7984 100644 --- a/lldb/tools/lldb-server/SystemInitializerLLGS.cpp +++ b/lldb/tools/lldb-server/SystemInitializerLLGS.cpp @@ -11,12 +11,29 @@ #if defined(__APPLE__) #include "Plugins/ObjectFile/Mach-O/ObjectFileMachO.h" using HostObjectFile = ObjectFileMachO; +#include "Plugins/Platform/MacOSX/PlatformMacOSX.h" +using HostPlatform = lldb_private::PlatformMacOSX; #elif defined(_WIN32) #include "Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.h" using HostObjectFile = ObjectFilePECOFF; +#include "Plugins/Platform/Windows/PlatformWindows.h" +using HostPlatform = lldb_private::PlatformWindows; #else #include "Plugins/ObjectFile/ELF/ObjectFileELF.h" using HostObjectFile = ObjectFileELF; +#if defined(__ANDROID__) +#include "Plugins/Platform/Android/PlatformAndroid.h" +using HostPlatform = lldb_private::platform_android::PlatformAndroid; +#elif defined(__FreeBSD__) +#include "Plugins/Platform/FreeBSD/PlatformFreeBSD.h" +using HostPlatform = lldb_private::platform_freebsd::PlatformFreeBSD; +#elif defined(__linux__) +#include "Plugins/Platform/Linux/PlatformLinux.h" +using HostPlatform = lldb_private::platform_linux::PlatformLinux; +#elif defined(__NetBSD__) +#include "Plugins/Platform/NetBSD/PlatformNetBSD.h" +using HostPlatform = lldb_private::platform_netbsd::PlatformNetBSD; +#endif #endif #if defined(__arm64__) || defined(__aarch64__) || defined(_M_ARM64) @@ -58,6 +75,7 @@ llvm::Error SystemInitializerLLGS::Initialize() { return e; HostObjectFile::Initialize(); + HostPlatform::Initialize(); #if defined(LLDB_TARGET_ARM) || defined(LLDB_TARGET_ARM64) EmulateInstructionARM::Initialize(); @@ -80,6 +98,7 @@ llvm::Error SystemInitializerLLGS::Initialize() { void SystemInitializerLLGS::Terminate() { HostObjectFile::Terminate(); + HostPlatform::Terminate(); #if defined(LLDB_TARGET_ARM) || defined(LLDB_TARGET_ARM64) EmulateInstructionARM::Terminate(); From 4b398ec456cdba142251918267c605d49ca7c6ef Mon Sep 17 00:00:00 2001 From: Kirill Stoimenov Date: Thu, 23 Mar 2023 20:25:47 +0000 Subject: [PATCH 473/691] [HWASAN] Fix decorate_proc_maps to work with HWASAN --- .../sanitizer_common/TestCases/Linux/decorate_proc_maps.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/decorate_proc_maps.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/decorate_proc_maps.cpp index 60f32c2d83dbf..d0400afe743b3 100644 --- a/compiler-rt/test/sanitizer_common/TestCases/Linux/decorate_proc_maps.cpp +++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/decorate_proc_maps.cpp @@ -49,6 +49,10 @@ int main(void) { // CHECK-asan: ---p {{.*}} [shadow gap] // CHECK-asan: rw-p {{.*}} [high shadow] +// CHECK-hwasan: rw-p {{.*}} [low shadow] +// CHECK-hwasan: ---p {{.*}} [shadow gap] +// CHECK-hwasan: rw-p {{.*}} [high shadow] + // CHECK-msan: ---p {{.*}} [invalid] // CHECK-msan: rw-p {{.*}} [shadow{{.*}}] // CHECK-msan: ---p {{.*}} [origin{{.*}}] From 1d30afdc2d97d348b971ae48716a0e79c6c5a29e Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Thu, 23 Mar 2023 13:22:32 -0700 Subject: [PATCH 474/691] [PATCH] Enable targeting riscv64-linux-android Reviewers: ccross, asb, phosek, enh, srhines, hiraditya Putting: https://android.googlesource.com/toolchain/llvm_android/+/refs/heads/master/patches/Enable-targeting-riscv64-linux-android.patch for review. Differential Revision: https://reviews.llvm.org/D146560 --- clang/lib/Driver/ToolChains/Linux.cpp | 2 ++ compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake | 2 +- compiler-rt/lib/sanitizer_common/sanitizer_linux.h | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp index 848d7247c20c0..e25895fd0b636 100644 --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -126,6 +126,8 @@ std::string Linux::getMultiarchTriple(const Driver &D, case llvm::Triple::ppc64le: return "powerpc64le-linux-gnu"; case llvm::Triple::riscv64: + if (IsAndroid) + return "riscv64-linux-android"; return "riscv64-linux-gnu"; case llvm::Triple::sparc: return "sparc-linux-gnu"; diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake index e3fe5570de26a..99d672de4e882 100644 --- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake +++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake @@ -42,7 +42,7 @@ if(OS_NAME MATCHES "Linux") elseif (OS_NAME MATCHES "Windows") set(ALL_FUZZER_SUPPORTED_ARCH ${X86} ${X86_64}) elseif(OS_NAME MATCHES "Android") - set(ALL_FUZZER_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64}) + set(ALL_FUZZER_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${RISCV64}) else() set(ALL_FUZZER_SUPPORTED_ARCH ${X86_64} ${ARM64}) endif() diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.h b/compiler-rt/lib/sanitizer_common/sanitizer_linux.h index 2c769dd59aa09..c84c04a877594 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.h @@ -152,6 +152,9 @@ inline void ReleaseMemoryPagesToOSAndZeroFill(uptr beg, uptr end) { "rdhwr %0,$29\n" \ ".set pop\n" : "=r"(__v)); \ __v; }) +#elif defined (__riscv) +# define __get_tls() \ + ({ void** __v; __asm__("mv %0, tp" : "=r"(__v)); __v; }) #elif defined(__i386__) # define __get_tls() \ ({ void** __v; __asm__("movl %%gs:0, %0" : "=r"(__v)); __v; }) From 156d966ec47041a071022460d68d94717460fa5a Mon Sep 17 00:00:00 2001 From: AdityaK <1894981+hiraditya@users.noreply.github.com> Date: Tue, 21 Mar 2023 15:42:25 -0700 Subject: [PATCH 475/691] Remove mips target triple for Android Reviewers: enh, phosek, srhines, MaskRay thanks to @enh for pointing these out. Differential Revision: https://reviews.llvm.org/D146565 --- clang/lib/Driver/ToolChains/Gnu.cpp | 19 ------ clang/lib/Driver/ToolChains/Linux.cpp | 4 -- clang/test/Driver/android-ndk-standalone.cpp | 16 ----- clang/test/Driver/android-pie.c | 10 --- clang/test/Driver/android-standalone.cpp | 48 ------------- clang/test/Driver/clang-translation.c | 18 ----- clang/test/Driver/linux-ld.c | 72 -------------------- clang/test/Driver/pic.c | 3 - 8 files changed, 190 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 7e72a1d1433da..0c8868109f7ee 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -2431,9 +2431,6 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( static const char *const AArch64AndroidTriples[] = { "aarch64-linux-android"}; static const char *const ARMAndroidTriples[] = {"arm-linux-androideabi"}; - static const char *const MIPSELAndroidTriples[] = {"mipsel-linux-android"}; - static const char *const MIPS64ELAndroidTriples[] = { - "mips64el-linux-android"}; static const char *const X86AndroidTriples[] = {"i686-linux-android"}; static const char *const X86_64AndroidTriples[] = {"x86_64-linux-android"}; @@ -2448,22 +2445,6 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( LibDirs.append(begin(ARMLibDirs), end(ARMLibDirs)); TripleAliases.append(begin(ARMAndroidTriples), end(ARMAndroidTriples)); break; - case llvm::Triple::mipsel: - LibDirs.append(begin(MIPSELLibDirs), end(MIPSELLibDirs)); - TripleAliases.append(begin(MIPSELAndroidTriples), - end(MIPSELAndroidTriples)); - BiarchLibDirs.append(begin(MIPS64ELLibDirs), end(MIPS64ELLibDirs)); - BiarchTripleAliases.append(begin(MIPS64ELAndroidTriples), - end(MIPS64ELAndroidTriples)); - break; - case llvm::Triple::mips64el: - LibDirs.append(begin(MIPS64ELLibDirs), end(MIPS64ELLibDirs)); - TripleAliases.append(begin(MIPS64ELAndroidTriples), - end(MIPS64ELAndroidTriples)); - BiarchLibDirs.append(begin(MIPSELLibDirs), end(MIPSELLibDirs)); - BiarchTripleAliases.append(begin(MIPSELAndroidTriples), - end(MIPSELAndroidTriples)); - break; case llvm::Triple::x86_64: LibDirs.append(begin(X86_64LibDirs), end(X86_64LibDirs)); TripleAliases.append(begin(X86_64AndroidTriples), diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp index e25895fd0b636..77ad9605addab 100644 --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -92,8 +92,6 @@ std::string Linux::getMultiarchTriple(const Driver &D, case llvm::Triple::mips: return IsMipsR6 ? "mipsisa32r6-linux-gnu" : "mips-linux-gnu"; case llvm::Triple::mipsel: - if (IsAndroid) - return "mipsel-linux-android"; return IsMipsR6 ? "mipsisa32r6el-linux-gnu" : "mipsel-linux-gnu"; case llvm::Triple::mips64: { std::string MT = std::string(IsMipsR6 ? "mipsisa64r6" : "mips64") + @@ -105,8 +103,6 @@ std::string Linux::getMultiarchTriple(const Driver &D, break; } case llvm::Triple::mips64el: { - if (IsAndroid) - return "mips64el-linux-android"; std::string MT = std::string(IsMipsR6 ? "mipsisa64r6el" : "mips64el") + "-linux-" + (IsMipsN32Abi ? "gnuabin32" : "gnuabi64"); if (D.getVFS().exists(concat(SysRoot, "/lib", MT))) diff --git a/clang/test/Driver/android-ndk-standalone.cpp b/clang/test/Driver/android-ndk-standalone.cpp index aeb2678c9caae..397460dbd7803 100644 --- a/clang/test/Driver/android-ndk-standalone.cpp +++ b/clang/test/Driver/android-ndk-standalone.cpp @@ -246,22 +246,6 @@ // CHECK-ARM64: "-L{{.*}}/lib/gcc/aarch64-linux-android/4.9/../../../../aarch64-linux-android/lib" // // RUN: %clang -### %s 2>&1 \ -// RUN: --target=mipsel-linux-android21 \ -// RUN: -mips32 \ -// RUN: --gcc-toolchain=%S/Inputs/basic_android_ndk_tree \ -// RUN: --sysroot=%S/Inputs/basic_android_ndk_tree/sysroot \ -// RUN: | FileCheck --check-prefix=CHECK-MIPS %s -// CHECK-MIPS: "-cc1" -// CHECK-MIPS: "-internal-isystem" "{{.*}}/include/c++/v1" -// CHECK-MIPS: "-internal-externc-isystem" "{{.*}}/sysroot/include" -// CHECK-MIPS: "-internal-externc-isystem" "{{.*}}/sysroot/usr/include" -// CHECK-MIPS: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]" -// CHECK-MIPS: "-L{{.*}}/lib/gcc/mipsel-linux-android/4.9" -// CHECK-MIPS: "-L{{.*}}/sysroot/usr/lib/mipsel-linux-android/21" -// CHECK-MIPS: "-L{{.*}}/sysroot/usr/lib/mipsel-linux-android" -// CHECK-MIPS: "-L{{.*}}/lib/gcc/mipsel-linux-android/4.9/../../../../mipsel-linux-android/lib" -// -// RUN: %clang -### %s 2>&1 \ // RUN: --target=i686-linux-android21 \ // RUN: --gcc-toolchain=%S/Inputs/basic_android_ndk_tree \ // RUN: --sysroot=%S/Inputs/basic_android_ndk_tree/sysroot \ diff --git a/clang/test/Driver/android-pie.c b/clang/test/Driver/android-pie.c index c006b90891e89..8620e18565458 100644 --- a/clang/test/Driver/android-pie.c +++ b/clang/test/Driver/android-pie.c @@ -8,11 +8,6 @@ // RUN: %clang %s -### -o %t.o 2>&1 --target=arm-linux-android24 \ // RUN: | FileCheck --check-prefix=PIE %s -// RUN: %clang %s -### -o %t.o 2>&1 --target=mipsel-linux-android \ -// RUN: | FileCheck --check-prefix=PIE %s -// RUN: %clang %s -### -o %t.o 2>&1 --target=mipsel-linux-android24 \ -// RUN: | FileCheck --check-prefix=PIE %s - // RUN: %clang %s -### -o %t.o 2>&1 --target=i686-linux-android \ // RUN: | FileCheck --check-prefix=PIE %s // RUN: %clang %s -### -o %t.o 2>&1 --target=i686-linux-android24 \ @@ -28,11 +23,6 @@ // RUN: %clang %s -### -o %t.o 2>&1 --target=arm64-linux-android24 \ // RUN: | FileCheck --check-prefix=PIE %s -// RUN: %clang %s -### -o %t.o 2>&1 --target=mips64el-linux-android \ -// RUN: | FileCheck --check-prefix=PIE %s -// RUN: %clang %s -### -o %t.o 2>&1 --target=mips64el-linux-android24 \ -// RUN: | FileCheck --check-prefix=PIE %s - // RUN: %clang %s -### -o %t.o 2>&1 --target=x86_64-linux-android \ // RUN: | FileCheck --check-prefix=PIE %s // RUN: %clang %s -### -o %t.o 2>&1 --target=x86_64-linux-android24 \ diff --git a/clang/test/Driver/android-standalone.cpp b/clang/test/Driver/android-standalone.cpp index 7363497c880a5..0246d1371deb9 100644 --- a/clang/test/Driver/android-standalone.cpp +++ b/clang/test/Driver/android-standalone.cpp @@ -45,51 +45,3 @@ // CHECK-ARM64: "-L{{.*}}/lib/gcc/aarch64-linux-android/4.8" // CHECK-ARM64: "-L{{.*}}/lib/gcc/aarch64-linux-android/4.8/../../../../aarch64-linux-android/lib" // CHECK-ARM64: "-L{{.*}}/sysroot/usr/lib" -// -// RUN: %clang -### %s 2>&1 \ -// RUN: --target=mipsel-linux-android \ -// RUN: -mips32 -stdlib=libstdc++ \ -// RUN: --gcc-toolchain=%S/Inputs/basic_android_tree \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: | FileCheck --check-prefix=CHECK-MIPS %s -// CHECK-MIPS: "-cc1" -// CHECK-MIPS: "-internal-isystem" "{{.*}}/mipsel-linux-android/include/c++/4.4.3" -// CHECK-MIPS: "-internal-isystem" "{{.*}}/mipsel-linux-android/include/c++/4.4.3/mipsel-linux-android" -// CHECK-MIPS: "-internal-externc-isystem" "{{.*}}/sysroot/include" -// CHECK-MIPS: "-internal-externc-isystem" "{{.*}}/sysroot/usr/include" -// CHECK-MIPS: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]" -// CHECK-MIPS: "-L{{.*}}/lib/gcc/mipsel-linux-android/4.4.3" -// CHECK-MIPS: "-L{{.*}}/lib/gcc/mipsel-linux-android/4.4.3/../../../../mipsel-linux-android/lib" -// CHECK-MIPS: "-L{{.*}}/sysroot/usr/lib" -// -// RUN: %clang -### %s 2>&1 \ -// RUN: --target=mipsel-linux-android \ -// RUN: -march=mips32 -mips32r2 -stdlib=libstdc++ \ -// RUN: --gcc-toolchain=%S/Inputs/basic_android_tree \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: | FileCheck --check-prefix=CHECK-MIPSR2 %s -// CHECK-MIPSR2: "-cc1" -// CHECK-MIPSR2: "-internal-isystem" "{{.*}}/mipsel-linux-android/include/c++/4.4.3" -// CHECK-MIPSR2: "-internal-isystem" "{{.*}}/mipsel-linux-android/include/c++/4.4.3/mipsel-linux-android" -// CHECK-MIPSR2: "-internal-externc-isystem" "{{.*}}/sysroot/include" -// CHECK-MIPSR2: "-internal-externc-isystem" "{{.*}}/sysroot/usr/include" -// CHECK-MIPSR2: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]" -// CHECK-MIPSR2: "-L{{.*}}/lib/gcc/mipsel-linux-android/4.4.3/mips-r2" -// CHECK-MIPSR2: "-L{{.*}}/lib/gcc/mipsel-linux-android/4.4.3/../../../../mipsel-linux-android/lib" -// CHECK-MIPSR2: "-L{{.*}}/sysroot/usr/lib" -// -// RUN: %clang -### %s 2>&1 \ -// RUN: --target=mipsel-linux-android \ -// RUN: -mips32 -march=mips32r2 -stdlib=libstdc++ \ -// RUN: --gcc-toolchain=%S/Inputs/basic_android_tree \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: | FileCheck --check-prefix=CHECK-MIPSR2-A %s -// CHECK-MIPSR2-A: "-cc1" -// CHECK-MIPSR2-A: "-internal-isystem" "{{.*}}/mipsel-linux-android/include/c++/4.4.3" -// CHECK-MIPSR2-A: "-internal-isystem" "{{.*}}/mipsel-linux-android/include/c++/4.4.3/mipsel-linux-android" -// CHECK-MIPSR2-A: "-internal-externc-isystem" "{{.*}}/sysroot/include" -// CHECK-MIPSR2-A: "-internal-externc-isystem" "{{.*}}/sysroot/usr/include" -// CHECK-MIPSR2-A: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]" -// CHECK-MIPSR2-A: "-L{{.*}}/lib/gcc/mipsel-linux-android/4.4.3/mips-r2" -// CHECK-MIPSR2-A: "-L{{.*}}/lib/gcc/mipsel-linux-android/4.4.3/../../../../mipsel-linux-android/lib" -// CHECK-MIPSR2-A: "-L{{.*}}/sysroot/usr/lib" diff --git a/clang/test/Driver/clang-translation.c b/clang/test/Driver/clang-translation.c index ca98ca5e8228d..058ac32bbdb50 100644 --- a/clang/test/Driver/clang-translation.c +++ b/clang/test/Driver/clang-translation.c @@ -392,24 +392,6 @@ // MIPSR6EL: "-target-cpu" "mips32r6" // MIPSR6EL: "-mfloat-abi" "hard" -// RUN: %clang -target mipsel-linux-android -### -S %s 2>&1 | \ -// RUN: FileCheck -check-prefix=MIPSEL-ANDROID %s -// MIPSEL-ANDROID: clang -// MIPSEL-ANDROID: "-cc1" -// MIPSEL-ANDROID: "-target-cpu" "mips32" -// MIPSEL-ANDROID: "-target-feature" "+fpxx" -// MIPSEL-ANDROID: "-target-feature" "+nooddspreg" -// MIPSEL-ANDROID: "-mfloat-abi" "hard" - -// RUN: %clang -target mipsel-linux-android -### -S %s -mcpu=mips32r6 2>&1 | \ -// RUN: FileCheck -check-prefix=MIPSEL-ANDROID-R6 %s -// MIPSEL-ANDROID-R6: clang -// MIPSEL-ANDROID-R6: "-cc1" -// MIPSEL-ANDROID-R6: "-target-cpu" "mips32r6" -// MIPSEL-ANDROID-R6: "-target-feature" "+fp64" -// MIPSEL-ANDROID-R6: "-target-feature" "+nooddspreg" -// MIPSEL-ANDROID-R6: "-mfloat-abi" "hard" - // RUN: %clang -target mips64-linux-gnu -### -S %s 2>&1 | \ // RUN: FileCheck -check-prefix=MIPS64 %s // MIPS64: clang diff --git a/clang/test/Driver/linux-ld.c b/clang/test/Driver/linux-ld.c index 10e6bee183050..be1230ac0ab63 100644 --- a/clang/test/Driver/linux-ld.c +++ b/clang/test/Driver/linux-ld.c @@ -1046,16 +1046,6 @@ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ // RUN: | FileCheck --check-prefix=CHECK-ANDROID %s // RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mipsel-linux-android -rtlib=platform --unwindlib=platform \ -// RUN: --gcc-toolchain="" \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID %s -// RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mips64el-linux-android -rtlib=platform --unwindlib=platform \ -// RUN: --gcc-toolchain="" \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID %s -// RUN: %clang -### %s -no-pie 2>&1 \ // RUN: --target=i686-linux-android -rtlib=platform --unwindlib=platform \ // RUN: --gcc-toolchain="" \ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ @@ -1101,18 +1091,6 @@ // RUN: -shared \ // RUN: | FileCheck --check-prefix=CHECK-ANDROID-SO %s // RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mipsel-linux-android -rtlib=platform --unwindlib=platform \ -// RUN: --gcc-toolchain="" \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: -shared \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID-SO %s -// RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mips64el-linux-android -rtlib=platform --unwindlib=platform \ -// RUN: --gcc-toolchain="" \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: -shared \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID-SO %s -// RUN: %clang -### %s -no-pie 2>&1 \ // RUN: --target=i686-linux-android -rtlib=platform --unwindlib=platform \ // RUN: --gcc-toolchain="" \ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ @@ -1158,18 +1136,6 @@ // RUN: -static \ // RUN: | FileCheck --check-prefix=CHECK-ANDROID-STATIC %s // RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mipsel-linux-android -rtlib=platform --unwindlib=platform \ -// RUN: --gcc-toolchain="" \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: -static \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID-STATIC %s -// RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mips64el-linux-android -rtlib=platform --unwindlib=platform \ -// RUN: --gcc-toolchain="" \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: -static \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID-STATIC %s -// RUN: %clang -### %s -no-pie 2>&1 \ // RUN: --target=i686-linux-android -rtlib=platform --unwindlib=platform \ // RUN: --gcc-toolchain="" \ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ @@ -1216,18 +1182,6 @@ // RUN: -pie \ // RUN: | FileCheck --check-prefix=CHECK-ANDROID-PIE %s // RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mipsel-linux-android -rtlib=platform --unwindlib=platform \ -// RUN: --gcc-toolchain="" \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: -pie \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID-PIE %s -// RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mips64el-linux-android -rtlib=platform --unwindlib=platform \ -// RUN: --gcc-toolchain="" \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: -pie \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID-PIE %s -// RUN: %clang -### %s -no-pie 2>&1 \ // RUN: --target=i686-linux-android -rtlib=platform --unwindlib=platform \ // RUN: --gcc-toolchain="" \ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ @@ -1259,11 +1213,6 @@ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ // RUN: | FileCheck --check-prefix=CHECK-ANDROID-32 %s // RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mipsel-linux-android \ -// RUN: --gcc-toolchain="" \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID-32 %s -// RUN: %clang -### %s -no-pie 2>&1 \ // RUN: --target=aarch64-linux-android \ // RUN: --gcc-toolchain="" \ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ @@ -1313,15 +1262,6 @@ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ // RUN: | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s // RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mipsel-linux-android -pthread \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s -// RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mips64el-linux-android -pthread \ -// RUN: --gcc-toolchain="" \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s -// RUN: %clang -### %s -no-pie 2>&1 \ // RUN: --target=i686-linux-android -pthread \ // RUN: --gcc-toolchain="" \ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ @@ -1356,18 +1296,6 @@ // RUN: -shared \ // RUN: | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s // RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mipsel-linux-android -pthread \ -// RUN: --gcc-toolchain="" \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: -shared \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s -// RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mips64el-linux-android -pthread \ -// RUN: --gcc-toolchain="" \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: -shared \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID-PTHREAD %s -// RUN: %clang -### %s -no-pie 2>&1 \ // RUN: --target=i686-linux-android -pthread \ // RUN: --gcc-toolchain="" \ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ diff --git a/clang/test/Driver/pic.c b/clang/test/Driver/pic.c index b05f363603a2a..daa3a55430068 100644 --- a/clang/test/Driver/pic.c +++ b/clang/test/Driver/pic.c @@ -280,9 +280,6 @@ // RUN: %clang -c %s -target arm-linux-androideabi24 -### 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-PIE2 // -// RUN: %clang -c %s -target mipsel-linux-android24 -### 2>&1 \ -// RUN: | FileCheck %s --check-prefix=CHECK-PIE1 -// // 64-bit Android targets are always PIE. // RUN: %clang -c %s -target aarch64-linux-android -### 2>&1 \ // RUN: | FileCheck %s --check-prefix=CHECK-PIE2 From f23dcb2f2a4728b812d70ead630798a205e77d60 Mon Sep 17 00:00:00 2001 From: Gulfem Savrun Yeniceri Date: Thu, 23 Mar 2023 20:54:21 +0000 Subject: [PATCH 476/691] Revert "[JITLink] Initial AArch32 backend" This reverts commit c2de8ff92753acdb1ace7a27cc11cb09f28eb8fa. It caused a segmentation fault while running ExecutionEngine tests on Mac. https://luci-milo.appspot.com/ui/p/fuchsia/builders/toolchain.ci/clang-mac-x64/b8785839382041226465/overview --- .../ExecutionEngine/JITLink/ELF_aarch32.h | 38 -- .../llvm/ExecutionEngine/JITLink/aarch32.h | 293 ---------- .../ExecutionEngine/JITLink/CMakeLists.txt | 2 - llvm/lib/ExecutionEngine/JITLink/ELF.cpp | 9 - .../JITLink/ELFLinkGraphBuilder.h | 21 - .../ExecutionEngine/JITLink/ELF_aarch32.cpp | 299 ---------- llvm/lib/ExecutionEngine/JITLink/aarch32.cpp | 514 ------------------ .../Orc/ObjectLinkingLayer.cpp | 6 +- .../JITLink/AArch32/ELF_thumbv7_printf.s | 46 -- .../JITLink/AArch32/lit.local.cfg | 2 - .../ExecutionEngine/JITLink/AArch32Tests.cpp | 200 ------- .../ExecutionEngine/JITLink/CMakeLists.txt | 1 - 12 files changed, 1 insertion(+), 1430 deletions(-) delete mode 100644 llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch32.h delete mode 100644 llvm/include/llvm/ExecutionEngine/JITLink/aarch32.h delete mode 100644 llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp delete mode 100644 llvm/lib/ExecutionEngine/JITLink/aarch32.cpp delete mode 100644 llvm/test/ExecutionEngine/JITLink/AArch32/ELF_thumbv7_printf.s delete mode 100644 llvm/test/ExecutionEngine/JITLink/AArch32/lit.local.cfg delete mode 100644 llvm/unittests/ExecutionEngine/JITLink/AArch32Tests.cpp diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch32.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch32.h deleted file mode 100644 index 25d1c3aac2c26..0000000000000 --- a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch32.h +++ /dev/null @@ -1,38 +0,0 @@ -//===---- ELF_aarch32.h - JIT link functions for arm/thumb -----*- C++ -*--===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// jit-link functions for ELF/aarch32. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXECUTIONENGINE_JITLINK_ELF_AARCH32 -#define LLVM_EXECUTIONENGINE_JITLINK_ELF_AARCH32 - -#include "llvm/ExecutionEngine/JITLink/JITLink.h" -#include "llvm/ExecutionEngine/JITLink/aarch32.h" - -namespace llvm { -namespace jitlink { - -/// Create a LinkGraph from an ELF/arm relocatable object -/// -/// Note: The graph does not take ownership of the underlying buffer, nor copy -/// its contents. The caller is responsible for ensuring that the object buffer -/// outlives the graph. -Expected> -createLinkGraphFromELFObject_aarch32(MemoryBufferRef ObjectBuffer); - -/// jit-link the given object buffer, which must be an ELF arm/thumb object -/// file. -void link_ELF_aarch32(std::unique_ptr G, - std::unique_ptr Ctx); - -} // end namespace jitlink -} // end namespace llvm - -#endif // LLVM_EXECUTIONENGINE_JITLINK_ELF_AARCH32 diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/aarch32.h b/llvm/include/llvm/ExecutionEngine/JITLink/aarch32.h deleted file mode 100644 index 8488b10278771..0000000000000 --- a/llvm/include/llvm/ExecutionEngine/JITLink/aarch32.h +++ /dev/null @@ -1,293 +0,0 @@ -//===------ aarch32.h - Generic JITLink arm/thumb utilities -----*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Generic utilities for graphs representing arm/thumb objects. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXECUTIONENGINE_JITLINK_AARCH32 -#define LLVM_EXECUTIONENGINE_JITLINK_AARCH32 - -#include "TableManager.h" -#include "llvm/ExecutionEngine/JITLink/JITLink.h" -#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" -#include "llvm/Support/ARMBuildAttributes.h" -#include "llvm/Support/Error.h" - -namespace llvm { -namespace jitlink { -namespace aarch32 { - -/// JITLink-internal AArch32 fixup kinds -enum EdgeKind_aarch32 : Edge::Kind { - - /// - /// Relocations of class Data - /// - FirstDataRelocation = Edge::FirstRelocation, - - /// Plain 32-bit value relocation in target endianness - Data_Delta32 = FirstDataRelocation, - - LastDataRelocation = Data_Delta32, - - /// - /// Relocations of class Arm (covers fixed-width 4-byte instruction subset) - /// - FirstArmRelocation, - - /// TODO: Arm_Call is here only as a placeholder for now. - Arm_Call = FirstArmRelocation, - - LastArmRelocation = Arm_Call, - - /// - /// Relocations of class Thumb16 and Thumb32 (covers Thumb instruction subset) - /// - FirstThumbRelocation, - - /// Write immediate value for PC-relative branch with link (can bridge between - /// Arm and Thumb). - Thumb_Call = FirstThumbRelocation, - - /// Write immediate value for (unconditional) PC-relative branch without link. - Thumb_Jump24, - - /// Write immediate value to the lower halfword of the destination register - Thumb_MovwAbsNC, - - /// Write immediate value to the top halfword of the destination register - Thumb_MovtAbs, - - LastThumbRelocation = Thumb_MovtAbs, -}; - -/// Flags enum for AArch32-specific symbol properties -enum TargetFlags_aarch32 : TargetFlagsType { - ThumbSymbol = 1 << 0, -}; - -/// Human-readable name for a given CPU architecture kind -const char *getCPUArchName(ARMBuildAttrs::CPUArch K); - -/// Get a human-readable name for the given AArch32 edge kind. -const char *getEdgeKindName(Edge::Kind K); - -/// AArch32 uses stubs for a number of purposes, like branch range extension -/// or interworking between Arm and Thumb instruction subsets. -/// -/// Stub implementations vary depending on CPU architecture (v4, v6, v7), -/// instruction subset and branch type (absolute/PC-relative). -/// -/// For each kind of stub, the StubsFlavor defines one concrete form that is -/// used throughout the LinkGraph. -/// -/// Stubs are often called "veneers" in the official docs and online. -/// -enum StubsFlavor { - Unsupported = 0, - Thumbv7, -}; - -/// JITLink sub-arch configuration for Arm CPU models -struct ArmConfig { - bool J1J2BranchEncoding = false; - StubsFlavor Stubs = Unsupported; -}; - -/// Obtain the sub-arch configuration for a given Arm CPU model. -inline ArmConfig getArmConfigForCPUArch(ARMBuildAttrs::CPUArch CPUArch) { - ArmConfig ArmCfg; - switch (CPUArch) { - case ARMBuildAttrs::v7: - case ARMBuildAttrs::v8_A: - ArmCfg.J1J2BranchEncoding = true; - ArmCfg.Stubs = Thumbv7; - break; - default: - DEBUG_WITH_TYPE("jitlink", { - dbgs() << " Warning: ARM config not defined for CPU architecture " - << getCPUArchName(CPUArch); - }); - break; - } - return ArmCfg; -} - -/// Immutable pair of halfwords, Hi and Lo, with overflow check -struct HalfWords { - constexpr HalfWords() : Hi(0), Lo(0) {} - constexpr HalfWords(uint32_t Hi, uint32_t Lo) : Hi(Hi), Lo(Lo) { - assert(isUInt<16>(Hi) && "Overflow in first half-word"); - assert(isUInt<16>(Lo) && "Overflow in second half-word"); - } - const uint16_t Hi; // First halfword - const uint16_t Lo; // Second halfword -}; - -/// Collection of named constants per fixup kind. It may contain but is not -/// limited to the following entries: -/// -/// Opcode - Values of the op-code bits in the instruction, with -/// unaffected bits nulled -/// OpcodeMask - Mask with all bits set that encode the op-code -/// ImmMask - Mask with all bits set that encode the immediate value -/// RegMask - Mask with all bits set that encode the register -/// -template struct FixupInfo {}; - -template <> struct FixupInfo { - static constexpr HalfWords Opcode{0xf000, 0x8000}; - static constexpr HalfWords OpcodeMask{0xf800, 0x8000}; - static constexpr HalfWords ImmMask{0x07ff, 0x2fff}; - static constexpr uint16_t LoBitConditional = 0x1000; -}; - -template <> struct FixupInfo { - static constexpr HalfWords Opcode{0xf000, 0xc000}; - static constexpr HalfWords OpcodeMask{0xf800, 0xc000}; - static constexpr HalfWords ImmMask{0x07ff, 0x2fff}; - static constexpr uint16_t LoBitH = 0x0001; - static constexpr uint16_t LoBitNoBlx = 0x1000; -}; - -template <> struct FixupInfo { - static constexpr HalfWords Opcode{0xf2c0, 0x0000}; - static constexpr HalfWords OpcodeMask{0xfbf0, 0x8000}; - static constexpr HalfWords ImmMask{0x040f, 0x70ff}; - static constexpr HalfWords RegMask{0x0000, 0x0f00}; -}; - -template <> -struct FixupInfo : public FixupInfo { - static constexpr HalfWords Opcode{0xf240, 0x0000}; -}; - -/// Helper function to read the initial addend for Data-class relocations. -Expected readAddendData(LinkGraph &G, Block &B, const Edge &E); - -/// Helper function to read the initial addend for Arm-class relocations. -Expected readAddendArm(LinkGraph &G, Block &B, const Edge &E); - -/// Helper function to read the initial addend for Thumb-class relocations. -Expected readAddendThumb(LinkGraph &G, Block &B, const Edge &E, - const ArmConfig &ArmCfg); - -/// Read the initial addend for a REL-type relocation. It's the value encoded -/// in the immediate field of the fixup location by the compiler. -inline Expected readAddend(LinkGraph &G, Block &B, const Edge &E, - const ArmConfig &ArmCfg) { - Edge::Kind Kind = E.getKind(); - if (Kind <= LastDataRelocation) - return readAddendData(G, B, E); - - if (Kind <= LastArmRelocation) - return readAddendArm(G, B, E); - - if (Kind <= LastThumbRelocation) - return readAddendThumb(G, B, E, ArmCfg); - - llvm_unreachable("Relocation must be of class Data, Arm or Thumb"); -} - -/// Helper function to apply the fixup for Data-class relocations. -Error applyFixupData(LinkGraph &G, Block &B, const Edge &E); - -/// Helper function to apply the fixup for Arm-class relocations. -Error applyFixupArm(LinkGraph &G, Block &B, const Edge &E); - -/// Helper function to apply the fixup for Thumb-class relocations. -Error applyFixupThumb(LinkGraph &G, Block &B, const Edge &E, - const ArmConfig &ArmCfg); - -/// Apply fixup expression for edge to block content. -inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E, - const ArmConfig &ArmCfg) { - Edge::Kind Kind = E.getKind(); - - if (Kind <= LastDataRelocation) - return applyFixupData(G, B, E); - - if (Kind <= LastArmRelocation) - return applyFixupArm(G, B, E); - - if (Kind <= LastThumbRelocation) - return applyFixupThumb(G, B, E, ArmCfg); - - llvm_unreachable("Relocation must be of class Data, Arm or Thumb"); -} - -/// Stubs builder for a specific StubsFlavor -/// -/// Right now we only have one default stub kind, but we want to extend this -/// and allow creation of specific kinds in the future (e.g. branch range -/// extension or interworking). -/// -/// Let's keep it simple for the moment and not wire this through a GOT. -/// -template -class StubsManager : public TableManager> { -public: - StubsManager() = default; - - /// Name of the object file section that will contain all our stubs. - static StringRef getSectionName() { return "__llvm_jitlink_STUBS"; } - - /// Implements link-graph traversal via visitExistingEdges(). - bool visitEdge(LinkGraph &G, Block *B, Edge &E) { - if (E.getTarget().isDefined()) - return false; - - switch (E.getKind()) { - case Thumb_Call: - case Thumb_Jump24: { - DEBUG_WITH_TYPE("jitlink", { - dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at " - << B->getFixupAddress(E) << " (" << B->getAddress() << " + " - << formatv("{0:x}", E.getOffset()) << ")\n"; - }); - E.setTarget(this->getEntryForTarget(G, E.getTarget())); - return true; - } - } - return false; - } - - /// Create a branch range extension stub for the class's flavor. - Symbol &createEntry(LinkGraph &G, Symbol &Target); - -private: - /// Create a new node in the link-graph for the given stub template. - template - Block &addStub(LinkGraph &G, const uint8_t (&Code)[Size], - uint64_t Alignment) { - ArrayRef Template(reinterpret_cast(Code), Size); - return G.createContentBlock(getStubsSection(G), Template, - orc::ExecutorAddr(), Alignment, 0); - } - - /// Get or create the object file section that will contain all our stubs. - Section &getStubsSection(LinkGraph &G) { - if (!StubsSection) - StubsSection = &G.createSection(getSectionName(), - orc::MemProt::Read | orc::MemProt::Exec); - return *StubsSection; - } - - Section *StubsSection = nullptr; -}; - -/// Create a branch range extension stub with Thumb encoding for v7 CPUs. -template <> -Symbol &StubsManager::createEntry(LinkGraph &G, Symbol &Target); - -} // namespace aarch32 -} // namespace jitlink -} // namespace llvm - -#endif // LLVM_EXECUTIONENGINE_JITLINK_AARCH32 diff --git a/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt b/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt index bc86f45d3c185..52ff5e8370031 100644 --- a/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt @@ -20,7 +20,6 @@ add_llvm_component_library(LLVMJITLink # ELF ELF.cpp ELFLinkGraphBuilder.cpp - ELF_aarch32.cpp ELF_aarch64.cpp ELF_i386.cpp ELF_loongarch.cpp @@ -34,7 +33,6 @@ add_llvm_component_library(LLVMJITLink COFF_x86_64.cpp # Architectures: - aarch32.cpp aarch64.cpp i386.cpp loongarch.cpp diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF.cpp index 340a0ce134475..ef0f19a785712 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF.cpp @@ -13,7 +13,6 @@ #include "llvm/ExecutionEngine/JITLink/ELF.h" #include "llvm/BinaryFormat/ELF.h" -#include "llvm/ExecutionEngine/JITLink/ELF_aarch32.h" #include "llvm/ExecutionEngine/JITLink/ELF_aarch64.h" #include "llvm/ExecutionEngine/JITLink/ELF_i386.h" #include "llvm/ExecutionEngine/JITLink/ELF_loongarch.h" @@ -70,8 +69,6 @@ createLinkGraphFromELFObject(MemoryBufferRef ObjectBuffer) { switch (*TargetMachineArch) { case ELF::EM_AARCH64: return createLinkGraphFromELFObject_aarch64(ObjectBuffer); - case ELF::EM_ARM: - return createLinkGraphFromELFObject_aarch32(ObjectBuffer); case ELF::EM_LOONGARCH: return createLinkGraphFromELFObject_loongarch(ObjectBuffer); case ELF::EM_RISCV: @@ -93,12 +90,6 @@ void link_ELF(std::unique_ptr G, case Triple::aarch64: link_ELF_aarch64(std::move(G), std::move(Ctx)); return; - case Triple::arm: - case Triple::armeb: - case Triple::thumb: - case Triple::thumbeb: - link_ELF_aarch32(std::move(G), std::move(Ctx)); - return; case Triple::loongarch32: case Triple::loongarch64: link_ELF_loongarch(std::move(G), std::move(Ctx)); diff --git a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h index 1d98acf868695..9d2d4958dcf6c 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h +++ b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h @@ -127,12 +127,6 @@ class ELFLinkGraphBuilder : public ELFLinkGraphBuilderBase { Error graphifySections(); Error graphifySymbols(); - /// Override in derived classes to suppress certain sections in the link - /// graph. - virtual bool excludeSection(const typename ELFT::Shdr &Sect) const { - return false; - } - /// Traverse all matching ELFT::Rela relocation records in the given section. /// The handler function Func should be callable with this signature: /// Error(const typename ELFT::Rela &, @@ -327,13 +321,6 @@ template Error ELFLinkGraphBuilder::graphifySections() { auto Name = Obj.getSectionName(Sec, SectionStringTab); if (!Name) return Name.takeError(); - if (excludeSection(Sec)) { - LLVM_DEBUG({ - dbgs() << " " << SecIndex << ": Skipping section \"" << *Name - << "\" explicitly\n"; - }); - continue; - } // Skip null sections. if (Sec.sh_type == ELF::SHT_NULL) { @@ -577,10 +564,6 @@ Error ELFLinkGraphBuilder::forEachRelaRelocation( LLVM_DEBUG(dbgs() << " skipped (dwarf section)\n\n"); return Error::success(); } - if (excludeSection(**FixupSection)) { - LLVM_DEBUG(dbgs() << " skipped (fixup section excluded explicitly)\n\n"); - return Error::success(); - } // Lookup the link-graph node corresponding to the target section name. auto *BlockToFix = getGraphBlock(RelSect.sh_info); @@ -627,10 +610,6 @@ Error ELFLinkGraphBuilder::forEachRelRelocation( LLVM_DEBUG(dbgs() << " skipped (dwarf section)\n\n"); return Error::success(); } - if (excludeSection(**FixupSection)) { - LLVM_DEBUG(dbgs() << " skipped (fixup section excluded explicitly)\n\n"); - return Error::success(); - } // Lookup the link-graph node corresponding to the target section name. auto *BlockToFix = getGraphBlock(RelSect.sh_info); diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp deleted file mode 100644 index 0010088fef1e7..0000000000000 --- a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp +++ /dev/null @@ -1,299 +0,0 @@ -//===----- ELF_aarch32.cpp - JIT linker implementation for arm/thumb ------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// ELF/aarch32 jit-link implementation. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ExecutionEngine/JITLink/ELF_aarch32.h" - -#include "llvm/BinaryFormat/ELF.h" -#include "llvm/ExecutionEngine/JITLink/JITLink.h" -#include "llvm/ExecutionEngine/JITLink/aarch32.h" -#include "llvm/Object/ELF.h" -#include "llvm/Object/ELFObjectFile.h" -#include "llvm/Support/Endian.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/TargetParser/ARMTargetParser.h" - -#include "ELFLinkGraphBuilder.h" -#include "JITLinkGeneric.h" - -#define DEBUG_TYPE "jitlink" - -using namespace llvm::object; - -namespace llvm { -namespace jitlink { - -/// Translate from ELF relocation type to JITLink-internal edge kind. -Expected getJITLinkEdgeKind(uint32_t ELFType) { - switch (ELFType) { - case ELF::R_ARM_REL32: - return aarch32::Data_Delta32; - case ELF::R_ARM_CALL: - return aarch32::Arm_Call; - case ELF::R_ARM_THM_CALL: - return aarch32::Thumb_Call; - case ELF::R_ARM_THM_JUMP24: - return aarch32::Thumb_Jump24; - case ELF::R_ARM_THM_MOVW_ABS_NC: - return aarch32::Thumb_MovwAbsNC; - case ELF::R_ARM_THM_MOVT_ABS: - return aarch32::Thumb_MovtAbs; - } - - return make_error( - "Unsupported aarch32 relocation " + formatv("{0:d}: ", ELFType) + - object::getELFRelocationTypeName(ELF::EM_ARM, ELFType)); -} - -/// Translate from JITLink-internal edge kind back to ELF relocation type. -Expected getELFRelocationType(Edge::Kind Kind) { - switch (static_cast(Kind)) { - case aarch32::Data_Delta32: - return ELF::R_ARM_REL32; - case aarch32::Arm_Call: - return ELF::R_ARM_CALL; - case aarch32::Thumb_Call: - return ELF::R_ARM_THM_CALL; - case aarch32::Thumb_Jump24: - return ELF::R_ARM_THM_JUMP24; - case aarch32::Thumb_MovwAbsNC: - return ELF::R_ARM_THM_MOVW_ABS_NC; - case aarch32::Thumb_MovtAbs: - return ELF::R_ARM_THM_MOVT_ABS; - } - - return make_error(formatv("Invalid aarch32 edge {0:d}: ", - Kind)); -} - -/// Get a human-readable name for the given ELF AArch32 edge kind. -const char *getELFAArch32EdgeKindName(Edge::Kind R) { - // No ELF-specific edge kinds yet - return aarch32::getEdgeKindName(R); -} - -class ELFJITLinker_aarch32 : public JITLinker { - friend class JITLinker; - -public: - ELFJITLinker_aarch32(std::unique_ptr Ctx, - std::unique_ptr G, PassConfiguration PassCfg, - aarch32::ArmConfig ArmCfg) - : JITLinker(std::move(Ctx), std::move(G), std::move(PassCfg)), - ArmCfg(std::move(ArmCfg)) {} - -private: - aarch32::ArmConfig ArmCfg; - - Error applyFixup(LinkGraph &G, Block &B, const Edge &E) const { - return aarch32::applyFixup(G, B, E, ArmCfg); - } -}; - -template -class ELFLinkGraphBuilder_aarch32 - : public ELFLinkGraphBuilder> { -private: - using ELFT = ELFType; - using Base = ELFLinkGraphBuilder; - - bool excludeSection(const typename ELFT::Shdr &Sect) const override { - // TODO: An .ARM.exidx (Exception Index table) entry is 8-bytes in size and - // consists of 2 words. It might be sufficient to process only relocations - // in the the second word (offset 4). Please find more details in: Exception - // Handling ABI for the Arm® Architecture -> Index table entries - if (Sect.sh_type == ELF::SHT_ARM_EXIDX) - return true; - return false; - } - - Error addRelocations() override { - LLVM_DEBUG(dbgs() << "Processing relocations:\n"); - using Self = ELFLinkGraphBuilder_aarch32; - for (const auto &RelSect : Base::Sections) { - if (Error Err = Base::forEachRelRelocation(RelSect, this, - &Self::addSingleRelRelocation)) - return Err; - } - return Error::success(); - } - - Error addSingleRelRelocation(const typename ELFT::Rel &Rel, - const typename ELFT::Shdr &FixupSect, - Block &BlockToFix) { - uint32_t SymbolIndex = Rel.getSymbol(false); - auto ObjSymbol = Base::Obj.getRelocationSymbol(Rel, Base::SymTabSec); - if (!ObjSymbol) - return ObjSymbol.takeError(); - - Symbol *GraphSymbol = Base::getGraphSymbol(SymbolIndex); - if (!GraphSymbol) - return make_error( - formatv("Could not find symbol at given index, did you add it to " - "JITSymbolTable? index: {0}, shndx: {1} Size of table: {2}", - SymbolIndex, (*ObjSymbol)->st_shndx, - Base::GraphSymbols.size()), - inconvertibleErrorCode()); - - uint32_t Type = Rel.getType(false); - Expected Kind = getJITLinkEdgeKind(Type); - if (!Kind) - return Kind.takeError(); - - auto FixupAddress = orc::ExecutorAddr(FixupSect.sh_addr) + Rel.r_offset; - Edge::OffsetT Offset = FixupAddress - BlockToFix.getAddress(); - Edge E(*Kind, Offset, *GraphSymbol, 0); - - Expected Addend = - aarch32::readAddend(*Base::G, BlockToFix, E, ArmCfg); - if (!Addend) - return Addend.takeError(); - - E.setAddend(*Addend); - LLVM_DEBUG({ - dbgs() << " "; - printEdge(dbgs(), BlockToFix, E, getELFAArch32EdgeKindName(*Kind)); - dbgs() << "\n"; - }); - - BlockToFix.addEdge(std::move(E)); - return Error::success(); - } - - aarch32::ArmConfig ArmCfg; - -protected: - TargetFlagsType makeTargetFlags(const typename ELFT::Sym &Sym) override { - if (Sym.getValue() & 0x01) - return aarch32::ThumbSymbol; - return TargetFlagsType{}; - } - - orc::ExecutorAddrDiff getRawOffset(const typename ELFT::Sym &Sym, - TargetFlagsType Flags) override { - assert((makeTargetFlags(Sym) & Flags) == Flags); - static constexpr uint64_t ThumbBit = 0x01; - return Sym.getValue() & ~ThumbBit; - } - -public: - ELFLinkGraphBuilder_aarch32(StringRef FileName, const ELFFile &Obj, - Triple TT, aarch32::ArmConfig ArmCfg) - : ELFLinkGraphBuilder(Obj, std::move(TT), FileName, - getELFAArch32EdgeKindName), - ArmCfg(std::move(ArmCfg)) {} -}; - -template -Error buildTables_ELF_aarch32(LinkGraph &G) { - LLVM_DEBUG(dbgs() << "Visiting edges in graph:\n"); - - aarch32::StubsManager PLT; - visitExistingEdges(G, PLT); - return Error::success(); -} - -Expected> -createLinkGraphFromELFObject_aarch32(MemoryBufferRef ObjectBuffer) { - LLVM_DEBUG({ - dbgs() << "Building jitlink graph for new input " - << ObjectBuffer.getBufferIdentifier() << "...\n"; - }); - - auto ELFObj = ObjectFile::createELFObjectFile(ObjectBuffer); - if (!ELFObj) - return ELFObj.takeError(); - - // Find out what exact AArch32 instruction set and features we target. - auto TT = (*ELFObj)->makeTriple(); - ARM::ArchKind AK = ARM::parseArch(TT.getArchName()); - if (AK == ARM::ArchKind::INVALID) - return make_error( - "Failed to build ELF link graph: Invalid ARM ArchKind"); - - // Resolve our internal configuration for the target. If at some point the - // CPUArch alone becomes too unprecise, we can find more details in the - // Tag_CPU_arch_profile. - aarch32::ArmConfig ArmCfg; - using namespace ARMBuildAttrs; - auto Arch = static_cast(ARM::getArchAttr(AK)); - switch (Arch) { - case v7: - case v8_A: - ArmCfg = aarch32::getArmConfigForCPUArch(Arch); - assert(ArmCfg.Stubs != aarch32::Unsupported && - "Provide a config for each supported CPU"); - break; - default: - return make_error( - "Failed to build ELF link graph: Unsupported CPU arch " + - StringRef(aarch32::getCPUArchName(Arch))); - } - - // Populate the link-graph. - switch (TT.getArch()) { - case Triple::arm: - case Triple::thumb: { - auto &ELFFile = cast>(**ELFObj).getELFFile(); - return ELFLinkGraphBuilder_aarch32( - (*ELFObj)->getFileName(), ELFFile, TT, ArmCfg) - .buildGraph(); - } - case Triple::armeb: - case Triple::thumbeb: { - auto &ELFFile = cast>(**ELFObj).getELFFile(); - return ELFLinkGraphBuilder_aarch32((*ELFObj)->getFileName(), - ELFFile, TT, ArmCfg) - .buildGraph(); - } - default: - return make_error( - "Failed to build ELF/aarch32 link graph: Invalid target triple " + - TT.getTriple()); - } -} - -void link_ELF_aarch32(std::unique_ptr G, - std::unique_ptr Ctx) { - const Triple &TT = G->getTargetTriple(); - - using namespace ARMBuildAttrs; - ARM::ArchKind AK = ARM::parseArch(TT.getArchName()); - auto CPU = static_cast(ARM::getArchAttr(AK)); - aarch32::ArmConfig ArmCfg = aarch32::getArmConfigForCPUArch(CPU); - - PassConfiguration PassCfg; - if (Ctx->shouldAddDefaultTargetPasses(TT)) { - // Add a mark-live pass. - if (auto MarkLive = Ctx->getMarkLivePass(TT)) - PassCfg.PrePrunePasses.push_back(std::move(MarkLive)); - else - PassCfg.PrePrunePasses.push_back(markAllSymbolsLive); - - switch (ArmCfg.Stubs) { - case aarch32::Thumbv7: - PassCfg.PostPrunePasses.push_back( - buildTables_ELF_aarch32); - break; - case aarch32::Unsupported: - llvm_unreachable("Check before building graph"); - } - } - - if (auto Err = Ctx->modifyPassConfig(*G, PassCfg)) - return Ctx->notifyFailed(std::move(Err)); - - ELFJITLinker_aarch32::link(std::move(Ctx), std::move(G), std::move(PassCfg), - std::move(ArmCfg)); -} - -} // namespace jitlink -} // namespace llvm diff --git a/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp b/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp deleted file mode 100644 index 6f49a4578cf7c..0000000000000 --- a/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp +++ /dev/null @@ -1,514 +0,0 @@ -//===--------- aarch32.cpp - Generic JITLink arm/thumb utilities ----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// Generic utilities for graphs representing arm/thumb objects. -// -//===----------------------------------------------------------------------===// - -#include "llvm/ExecutionEngine/JITLink/aarch32.h" - -#include "llvm/ADT/StringExtras.h" -#include "llvm/BinaryFormat/ELF.h" -#include "llvm/ExecutionEngine/JITLink/JITLink.h" -#include "llvm/Object/ELFObjectFile.h" -#include "llvm/Support/Endian.h" -#include "llvm/Support/MathExtras.h" - -#define DEBUG_TYPE "jitlink" - -namespace llvm { -namespace jitlink { -namespace aarch32 { - -using namespace support; -using namespace support::endian; - -/// Encode 22-bit immediate value for branch instructions without J1J2 range -/// extension (formats B T4, BL T1 and BLX T2). -/// -/// 00000:Imm11H:Imm11L:0 -> [ 00000:Imm11H, 00000:Imm11L ] -/// J1^ ^J2 will always be 1 -/// -HalfWords encodeImmBT4BlT1BlxT2(int64_t Value) { - constexpr uint32_t J1J2 = 0x2800; - uint32_t Imm11H = (Value >> 12) & 0x07ff; - uint32_t Imm11L = (Value >> 1) & 0x07ff; - return HalfWords{Imm11H, Imm11L | J1J2}; -} - -/// Decode 22-bit immediate value for branch instructions without J1J2 range -/// extension (formats B T4, BL T1 and BLX T2). -/// -/// [ 00000:Imm11H, 00000:Imm11L ] -> 00000:Imm11H:Imm11L:0 -/// J1^ ^J2 will always be 1 -/// -int64_t decodeImmBT4BlT1BlxT2(uint32_t Hi, uint32_t Lo) { - uint32_t Imm11H = Hi & 0x07ff; - uint32_t Imm11L = Lo & 0x07ff; - return SignExtend64<22>(Imm11H << 12 | Imm11L << 1); -} - -/// Encode 25-bit immediate value for branch instructions with J1J2 range -/// extension (formats B T4, BL T1 and BLX T2). -/// -/// S:I1:I2:Imm10:Imm11:0 -> [ 00000:S:Imm10, 00:J1:0:J2:Imm11 ] -/// -HalfWords encodeImmBT4BlT1BlxT2_J1J2(int64_t Value) { - uint32_t S = (Value >> 14) & 0x0400; - uint32_t J1 = (((~(Value >> 10)) ^ (Value >> 11)) & 0x2000); - uint32_t J2 = (((~(Value >> 11)) ^ (Value >> 13)) & 0x0800); - uint32_t Imm10 = (Value >> 12) & 0x03ff; - uint32_t Imm11 = (Value >> 1) & 0x07ff; - return HalfWords{S | Imm10, J1 | J2 | Imm11}; -} - -/// Decode 25-bit immediate value for branch instructions with J1J2 range -/// extension (formats B T4, BL T1 and BLX T2). -/// -/// [ 00000:S:Imm10, 00:J1:0:J2:Imm11] -> S:I1:I2:Imm10:Imm11:0 -/// -int64_t decodeImmBT4BlT1BlxT2_J1J2(uint32_t Hi, uint32_t Lo) { - uint32_t S = Hi & 0x0400; - uint32_t I1 = ~((Lo ^ (Hi << 3)) << 10) & 0x00800000; - uint32_t I2 = ~((Lo ^ (Hi << 1)) << 11) & 0x00400000; - uint32_t Imm10 = Hi & 0x03ff; - uint32_t Imm11 = Lo & 0x07ff; - return SignExtend64<25>(S << 14 | I1 | I2 | Imm10 << 12 | Imm11 << 1); -} - -/// Encode 16-bit immediate value for move instruction formats MOVT T1 and -/// MOVW T3. -/// -/// Imm4:Imm1:Imm3:Imm8 -> [ 00000:i:000000:Imm4, 0:Imm3:0000:Imm8 ] -/// -HalfWords encodeImmMovtT1MovwT3(uint16_t Value) { - uint32_t Imm4 = (Value >> 12) & 0x0f; - uint32_t Imm1 = (Value >> 11) & 0x01; - uint32_t Imm3 = (Value >> 8) & 0x07; - uint32_t Imm8 = Value & 0xff; - return HalfWords{Imm1 << 10 | Imm4, Imm3 << 12 | Imm8}; -} - -/// Decode 16-bit immediate value from move instruction formats MOVT T1 and -/// MOVW T3. -/// -/// [ 00000:i:000000:Imm4, 0:Imm3:0000:Imm8 ] -> Imm4:Imm1:Imm3:Imm8 -/// -uint16_t decodeImmMovtT1MovwT3(uint32_t Hi, uint32_t Lo) { - uint32_t Imm4 = Hi & 0x0f; - uint32_t Imm1 = (Hi >> 10) & 0x01; - uint32_t Imm3 = (Lo >> 12) & 0x07; - uint32_t Imm8 = Lo & 0xff; - uint32_t Imm16 = Imm4 << 12 | Imm1 << 11 | Imm3 << 8 | Imm8; - assert(Imm16 <= 0xffff && "Decoded value out-of-range"); - return Imm16; -} - -/// Encode register ID for instruction formats MOVT T1 and MOVW T3. -/// -/// Rd4 -> [0000000000000000, 0000:Rd4:00000000] -/// -HalfWords encodeRegMovtT1MovwT3(int64_t Value) { - uint32_t Rd4 = (Value & 0x0f) << 8; - return HalfWords{0, Rd4}; -} - -/// Decode register ID from instruction formats MOVT T1 and MOVW T3. -/// -/// [0000000000000000, 0000:Rd4:00000000] -> Rd4 -/// -int64_t decodeRegMovtT1MovwT3(uint32_t Hi, uint32_t Lo) { - uint32_t Rd4 = (Lo >> 8) & 0x0f; - return Rd4; -} - -/// 32-bit Thumb instructions are stored as two little-endian halfwords. -/// An instruction at address A encodes bytes A+1, A in the first halfword (Hi), -/// followed by bytes A+3, A+2 in the second halfword (Lo). -struct WritableThumbRelocation { - /// Create a writable reference to a Thumb32 fixup. - WritableThumbRelocation(char *FixupPtr) - : Hi{*reinterpret_cast(FixupPtr)}, - Lo{*reinterpret_cast(FixupPtr + 2)} {} - - support::ulittle16_t &Hi; // First halfword - support::ulittle16_t &Lo; // Second halfword -}; - -struct ThumbRelocation { - /// Create a read-only reference to a Thumb32 fixup. - ThumbRelocation(const char *FixupPtr) - : Hi{*reinterpret_cast(FixupPtr)}, - Lo{*reinterpret_cast(FixupPtr + 2)} {} - - /// Create a read-only Thumb32 fixup from a writeable one. - ThumbRelocation(WritableThumbRelocation &Writable) - : Hi{Writable.Hi}, Lo(Writable.Lo) {} - - const support::ulittle16_t &Hi; // First halfword - const support::ulittle16_t &Lo; // Second halfword -}; - -Error makeUnexpectedOpcodeError(const LinkGraph &G, const ThumbRelocation &R, - Edge::Kind Kind) { - return make_error( - formatv("Invalid opcode [ 0x{0:x4}, 0x{1:x4} ] for relocation: {2}", R.Hi, - R.Lo, G.getEdgeKindName(Kind))); -} - -template bool checkOpcode(const ThumbRelocation &R) { - uint16_t Hi = R.Hi & FixupInfo::OpcodeMask.Hi; - uint16_t Lo = R.Lo & FixupInfo::OpcodeMask.Lo; - return Hi == FixupInfo::Opcode.Hi && Lo == FixupInfo::Opcode.Lo; -} - -template -bool checkRegister(const ThumbRelocation &R, HalfWords Reg) { - uint16_t Hi = R.Hi & FixupInfo::RegMask.Hi; - uint16_t Lo = R.Lo & FixupInfo::RegMask.Lo; - return Hi == Reg.Hi && Lo == Reg.Lo; -} - -template -bool writeRegister(WritableThumbRelocation &R, HalfWords Reg) { - static constexpr HalfWords Mask = FixupInfo::RegMask; - assert((Mask.Hi & Reg.Hi) == Reg.Hi && (Mask.Hi & Reg.Hi) == Reg.Hi && - "Value bits exceed bit range of given mask"); - R.Hi = (R.Hi & ~Mask.Hi) | Reg.Hi; - R.Lo = (R.Lo & ~Mask.Lo) | Reg.Lo; -} - -template -void writeImmediate(WritableThumbRelocation &R, HalfWords Imm) { - static constexpr HalfWords Mask = FixupInfo::ImmMask; - assert((Mask.Hi & Imm.Hi) == Imm.Hi && (Mask.Hi & Imm.Hi) == Imm.Hi && - "Value bits exceed bit range of given mask"); - R.Hi = (R.Hi & ~Mask.Hi) | Imm.Hi; - R.Lo = (R.Lo & ~Mask.Lo) | Imm.Lo; -} - -Expected readAddendData(LinkGraph &G, Block &B, const Edge &E) { - endianness Endian = G.getEndianness(); - assert(Endian != native && "Declare as little or big explicitly"); - - Edge::Kind Kind = E.getKind(); - const char *BlockWorkingMem = B.getContent().data(); - const char *FixupPtr = BlockWorkingMem + E.getOffset(); - - switch (Kind) { - case Data_Delta32: - return SignExtend64<32>((Endian == little) ? read32(FixupPtr) - : read32(FixupPtr)); - default: - return make_error( - "In graph " + G.getName() + ", section " + B.getSection().getName() + - " can not read implicit addend for aarch32 edge kind " + - G.getEdgeKindName(E.getKind())); - } -} - -Expected readAddendArm(LinkGraph &G, Block &B, const Edge &E) { - Edge::Kind Kind = E.getKind(); - - switch (Kind) { - case Arm_Call: - return make_error( - "Addend extraction for relocation type not yet implemented: " + - StringRef(G.getEdgeKindName(Kind))); - default: - return make_error( - "In graph " + G.getName() + ", section " + B.getSection().getName() + - " can not read implicit addend for aarch32 edge kind " + - G.getEdgeKindName(E.getKind())); - } -} - -Expected readAddendThumb(LinkGraph &G, Block &B, const Edge &E, - const ArmConfig &ArmCfg) { - ThumbRelocation R(B.getContent().data() + E.getOffset()); - Edge::Kind Kind = E.getKind(); - - switch (Kind) { - case Thumb_Call: - if (!checkOpcode(R)) - return makeUnexpectedOpcodeError(G, R, Kind); - return LLVM_LIKELY(ArmCfg.J1J2BranchEncoding) - ? decodeImmBT4BlT1BlxT2_J1J2(R.Hi, R.Lo) - : decodeImmBT4BlT1BlxT2(R.Hi, R.Lo); - - case Thumb_Jump24: - if (!checkOpcode(R)) - return makeUnexpectedOpcodeError(G, R, Kind); - if (R.Lo & FixupInfo::LoBitConditional) - return make_error("Relocation expects an unconditional " - "B.W branch instruction: " + - StringRef(G.getEdgeKindName(Kind))); - return LLVM_LIKELY(ArmCfg.J1J2BranchEncoding) - ? decodeImmBT4BlT1BlxT2_J1J2(R.Hi, R.Lo) - : decodeImmBT4BlT1BlxT2(R.Hi, R.Lo); - - case Thumb_MovwAbsNC: - if (!checkOpcode(R)) - return makeUnexpectedOpcodeError(G, R, Kind); - // Initial addend is interpreted as a signed value - return SignExtend64<16>(decodeImmMovtT1MovwT3(R.Hi, R.Lo)); - - case Thumb_MovtAbs: - if (!checkOpcode(R)) - return makeUnexpectedOpcodeError(G, R, Kind); - // Initial addend is interpreted as a signed value - return SignExtend64<16>(decodeImmMovtT1MovwT3(R.Hi, R.Lo)); - - default: - return make_error( - "In graph " + G.getName() + ", section " + B.getSection().getName() + - " can not read implicit addend for aarch32 edge kind " + - G.getEdgeKindName(E.getKind())); - } -} - -Error applyFixupData(LinkGraph &G, Block &B, const Edge &E) { - using namespace support; - - char *BlockWorkingMem = B.getAlreadyMutableContent().data(); - char *FixupPtr = BlockWorkingMem + E.getOffset(); - - auto Write32 = [FixupPtr, Endian = G.getEndianness()](int64_t Value) { - assert(Endian != native && "Must be explicit: little or big"); - assert(isInt<32>(Value) && "Must be in signed 32-bit range"); - uint32_t Imm = static_cast(Value); - if (LLVM_LIKELY(Endian == little)) - endian::write32(FixupPtr, Imm); - else - endian::write32(FixupPtr, Imm); - }; - - Edge::Kind Kind = E.getKind(); - uint64_t FixupAddress = (B.getAddress() + E.getOffset()).getValue(); - int64_t Addend = E.getAddend(); - Symbol &TargetSymbol = E.getTarget(); - uint64_t TargetAddress = TargetSymbol.getAddress().getValue(); - assert(!TargetSymbol.hasTargetFlags(ThumbSymbol)); - - // Regular data relocations have size 4, alignment 1 and write the full 32-bit - // result to the place; no need for overflow checking. There are three - // exceptions: R_ARM_ABS8, R_ARM_ABS16, R_ARM_PREL31 - switch (Kind) { - case Data_Delta32: { - int64_t Value = TargetAddress - FixupAddress + Addend; - if (!isInt<32>(Value)) - return makeTargetOutOfRangeError(G, B, E); - Write32(Value); - return Error::success(); - } - default: - return make_error( - "In graph " + G.getName() + ", section " + B.getSection().getName() + - " encountered unfixable aarch32 edge kind " + - G.getEdgeKindName(E.getKind())); - } -} - -Error applyFixupArm(LinkGraph &G, Block &B, const Edge &E) { - Edge::Kind Kind = E.getKind(); - - switch (Kind) { - case Arm_Call: - return make_error( - "Fix-up for relocation type not yet implemented: " + - StringRef(G.getEdgeKindName(Kind))); - default: - return make_error( - "In graph " + G.getName() + ", section " + B.getSection().getName() + - " encountered unfixable aarch32 edge kind " + - G.getEdgeKindName(E.getKind())); - } -} - -Error applyFixupThumb(LinkGraph &G, Block &B, const Edge &E, - const ArmConfig &ArmCfg) { - WritableThumbRelocation R(B.getAlreadyMutableContent().data() + - E.getOffset()); - - Edge::Kind Kind = E.getKind(); - uint64_t FixupAddress = (B.getAddress() + E.getOffset()).getValue(); - int64_t Addend = E.getAddend(); - Symbol &TargetSymbol = E.getTarget(); - uint64_t TargetAddress = TargetSymbol.getAddress().getValue(); - if (TargetSymbol.hasTargetFlags(ThumbSymbol)) - TargetAddress |= 0x01; - - switch (Kind) { - case Thumb_Jump24: { - if (!checkOpcode(R)) - return makeUnexpectedOpcodeError(G, R, Kind); - if (R.Lo & FixupInfo::LoBitConditional) - return make_error("Relocation expects an unconditional " - "B.W branch instruction: " + - StringRef(G.getEdgeKindName(Kind))); - if (!(TargetSymbol.hasTargetFlags(ThumbSymbol))) - return make_error("Branch relocation needs interworking " - "stub when bridging to ARM: " + - StringRef(G.getEdgeKindName(Kind))); - - int64_t Value = TargetAddress - FixupAddress + Addend; - if (LLVM_LIKELY(ArmCfg.J1J2BranchEncoding)) { - if (!isInt<25>(Value)) - return makeTargetOutOfRangeError(G, B, E); - writeImmediate(R, encodeImmBT4BlT1BlxT2_J1J2(Value)); - } else { - if (!isInt<22>(Value)) - return makeTargetOutOfRangeError(G, B, E); - writeImmediate(R, encodeImmBT4BlT1BlxT2(Value)); - } - - return Error::success(); - } - - case Thumb_Call: { - if (!checkOpcode(R)) - return makeUnexpectedOpcodeError(G, R, Kind); - - int64_t Value = TargetAddress - FixupAddress + Addend; - - // The call instruction itself is Thumb. The call destination can either be - // Thumb or Arm. We use BL to stay in Thumb and BLX to change to Arm. - bool TargetIsArm = !TargetSymbol.hasTargetFlags(ThumbSymbol); - bool InstrIsBlx = (R.Lo & FixupInfo::LoBitNoBlx) == 0; - if (TargetIsArm != InstrIsBlx) { - if (LLVM_LIKELY(TargetIsArm)) { - // Change opcode BL -> BLX and fix range value (account for 4-byte - // aligned destination while instruction may only be 2-byte aligned - // and clear Thumb bit). - R.Lo = R.Lo & ~FixupInfo::LoBitNoBlx; - R.Lo = R.Lo & ~FixupInfo::LoBitH; - Value = alignTo(Value, 4); - } else { - // Change opcode BLX -> BL and set Thumb bit - R.Lo = R.Lo & ~FixupInfo::LoBitNoBlx; - Value |= 0x01; - } - } - - if (LLVM_LIKELY(ArmCfg.J1J2BranchEncoding)) { - if (!isInt<25>(Value)) - return makeTargetOutOfRangeError(G, B, E); - writeImmediate(R, encodeImmBT4BlT1BlxT2_J1J2(Value)); - } else { - if (!isInt<22>(Value)) - return makeTargetOutOfRangeError(G, B, E); - writeImmediate(R, encodeImmBT4BlT1BlxT2(Value)); - } - - assert(((R.Lo & FixupInfo::LoBitNoBlx) || - (R.Lo & FixupInfo::LoBitH) == 0) && - "Opcode BLX implies H bit is clear (avoid UB in BLX T2)"); - return Error::success(); - } - - case Thumb_MovwAbsNC: { - if (!checkOpcode(R)) - return makeUnexpectedOpcodeError(G, R, Kind); - uint16_t Value = (TargetAddress + Addend) & 0xffff; - writeImmediate(R, encodeImmMovtT1MovwT3(Value)); - return Error::success(); - } - - case Thumb_MovtAbs: { - if (!checkOpcode(R)) - return makeUnexpectedOpcodeError(G, R, Kind); - uint16_t Value = ((TargetAddress + Addend) >> 16) & 0xffff; - writeImmediate(R, encodeImmMovtT1MovwT3(Value)); - return Error::success(); - } - - default: - return make_error( - "In graph " + G.getName() + ", section " + B.getSection().getName() + - " encountered unfixable aarch32 edge kind " + - G.getEdgeKindName(E.getKind())); - } -} - -const uint8_t Thumbv7ABS[] = { - 0x40, 0xf2, 0x00, 0x0c, // movw r12, #0x0000 ; lower 16-bit - 0xc0, 0xf2, 0x00, 0x0c, // movt r12, #0x0000 ; upper 16-bit - 0x60, 0x47 // bx r12 -}; - -template <> -Symbol &StubsManager::createEntry(LinkGraph &G, Symbol &Target) { - constexpr uint64_t Alignment = 4; - Block &B = addStub(G, Thumbv7ABS, Alignment); - LLVM_DEBUG({ - const char *StubPtr = B.getContent().data(); - HalfWords Reg12 = encodeRegMovtT1MovwT3(12); - assert(checkRegister(StubPtr, Reg12) && - checkRegister(StubPtr + 4, Reg12) && - "Linker generated stubs may only corrupt register r12 (IP)"); - }); - B.addEdge(Thumb_MovwAbsNC, 0, Target, 0); - B.addEdge(Thumb_MovtAbs, 4, Target, 0); - Symbol &Stub = G.addAnonymousSymbol(B, 0, B.getSize(), true, false); - Stub.setTargetFlags(ThumbSymbol); - return Stub; -} - -const char *getEdgeKindName(Edge::Kind K) { -#define KIND_NAME_CASE(K) \ - case K: \ - return #K; - - switch (K) { - KIND_NAME_CASE(Data_Delta32) - KIND_NAME_CASE(Arm_Call) - KIND_NAME_CASE(Thumb_Call) - KIND_NAME_CASE(Thumb_Jump24) - KIND_NAME_CASE(Thumb_MovwAbsNC) - KIND_NAME_CASE(Thumb_MovtAbs) - default: - return getGenericEdgeKindName(K); - } -#undef KIND_NAME_CASE -} - -const char *getCPUArchName(ARMBuildAttrs::CPUArch K) { -#define CPUARCH_NAME_CASE(K) \ - case K: \ - return #K; - - using namespace ARMBuildAttrs; - switch (K) { - CPUARCH_NAME_CASE(Pre_v4) - CPUARCH_NAME_CASE(v4) - CPUARCH_NAME_CASE(v4T) - CPUARCH_NAME_CASE(v5T) - CPUARCH_NAME_CASE(v5TE) - CPUARCH_NAME_CASE(v5TEJ) - CPUARCH_NAME_CASE(v6) - CPUARCH_NAME_CASE(v6KZ) - CPUARCH_NAME_CASE(v6T2) - CPUARCH_NAME_CASE(v6K) - CPUARCH_NAME_CASE(v7) - CPUARCH_NAME_CASE(v6_M) - CPUARCH_NAME_CASE(v6S_M) - CPUARCH_NAME_CASE(v7E_M) - CPUARCH_NAME_CASE(v8_A) - CPUARCH_NAME_CASE(v8_R) - CPUARCH_NAME_CASE(v8_M_Base) - CPUARCH_NAME_CASE(v8_M_Main) - CPUARCH_NAME_CASE(v8_1_M_Main) - CPUARCH_NAME_CASE(v9_A) - } - llvm_unreachable("Missing CPUArch in switch?"); -#undef CPUARCH_NAME_CASE -} - -} // namespace aarch32 -} // namespace jitlink -} // namespace llvm diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp index 83a09b8d41e91..2c270cd66285d 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp @@ -8,7 +8,6 @@ #include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" #include "llvm/ExecutionEngine/JITLink/EHFrameSupport.h" -#include "llvm/ExecutionEngine/JITLink/aarch32.h" #include "llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h" #include "llvm/ExecutionEngine/Orc/ObjectFileInterface.h" #include "llvm/ExecutionEngine/Orc/Shared/ObjectFormats.h" @@ -41,10 +40,7 @@ bool hasInitializerSection(jitlink::LinkGraph &G) { } JITTargetAddress getJITSymbolPtrForSymbol(Symbol &Sym) { - uint64_t CallableAddr = Sym.getAddress().getValue(); - if (Sym.isCallable() && Sym.hasTargetFlags(aarch32::ThumbSymbol)) - CallableAddr |= 0x01; // thumb bit - return CallableAddr; + return Sym.getAddress().getValue(); } JITSymbolFlags getJITSymbolFlagsForSymbol(Symbol &Sym) { diff --git a/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_thumbv7_printf.s b/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_thumbv7_printf.s deleted file mode 100644 index 11a77c95cfa8f..0000000000000 --- a/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_thumbv7_printf.s +++ /dev/null @@ -1,46 +0,0 @@ -// RUN: llvm-mc -triple=thumbv7-none-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t.o %s -// RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb -slab-page-size 4096 -abs printf=0x76bbe880 -show-entry-es %t.o | FileCheck %s - -// Check that main is a thumb symbol (with LSB set) and printf is arm (with LSB clear) -// -// CHECK-LABEL: Symbol table: -// CHECK-NEXT: "main": 0x{{[0-9a-f]+[13579bdf]}} [Callable] Ready -// CHECK-NEXT: "printf": 0x76bbe880 [Data] Ready - - .globl main - .p2align 2 - .type main,%function - .code 16 - .thumb_func -main: - .fnstart - .save {r7, lr} - push {r7, lr} - .setfp r7, sp - mov r7, sp - .pad #8 - sub sp, #8 - movs r0, #0 - str r0, [sp] - str r0, [sp, #4] - ldr r0, .LCPI0_0 -.LPC0_0: - add r0, pc - bl printf - ldr r0, [sp] - add sp, #8 - pop {r7, pc} - - .p2align 2 -.LCPI0_0: - .long .L.str-(.LPC0_0+4) - - .size main, .-main - .cantunwind - .fnend - - .type .L.str,%object - .section .rodata.str1.1,"aMS",%progbits,1 -.L.str: - .asciz "Hello AArch32!\n" - .size .L.str, 12 diff --git a/llvm/test/ExecutionEngine/JITLink/AArch32/lit.local.cfg b/llvm/test/ExecutionEngine/JITLink/AArch32/lit.local.cfg deleted file mode 100644 index 20e19aeb06f9d..0000000000000 --- a/llvm/test/ExecutionEngine/JITLink/AArch32/lit.local.cfg +++ /dev/null @@ -1,2 +0,0 @@ -if not 'ARM' in config.root.targets: - config.unsupported = True diff --git a/llvm/unittests/ExecutionEngine/JITLink/AArch32Tests.cpp b/llvm/unittests/ExecutionEngine/JITLink/AArch32Tests.cpp deleted file mode 100644 index 0e41174040b68..0000000000000 --- a/llvm/unittests/ExecutionEngine/JITLink/AArch32Tests.cpp +++ /dev/null @@ -1,200 +0,0 @@ -//===------- AArch32Tests.cpp - Unit tests for the AArch32 backend --------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include -#include - -#include "gtest/gtest.h" - -using namespace llvm; -using namespace llvm::jitlink; -using namespace llvm::jitlink::aarch32; -using namespace llvm::support; -using namespace llvm::support::endian; - -struct MutableHalfWords { - MutableHalfWords(HalfWords Preset) : Hi(Preset.Hi), Lo(Preset.Lo) {} - - void patch(HalfWords Value, HalfWords Mask) { - Hi = (Hi & ~Mask.Hi) | Value.Hi; - Lo = (Lo & ~Mask.Lo) | Value.Lo; - } - - uint16_t Hi; // First halfword - uint16_t Lo; // Second halfword -}; - -namespace llvm { -namespace jitlink { - -Expected getJITLinkEdgeKind(uint32_t ELFType); -Expected getELFRelocationType(Edge::Kind Kind); - -} // namespace jitlink -} // namespace llvm - -TEST(AArch32_ELF, EdgeKinds) { - // Fails: Invalid ELF type -> JITLink kind - Expected ErrKind = getJITLinkEdgeKind(ELF::R_ARM_NONE); - EXPECT_TRUE(errorToBool(ErrKind.takeError())); - - // Fails: Invalid JITLink kind -> ELF type - Expected ErrType = getELFRelocationType(Edge::Invalid); - EXPECT_TRUE(errorToBool(ErrType.takeError())); - - for (Edge::Kind K = FirstDataRelocation; K < LastThumbRelocation; K += 1) { - Expected ELFType = getELFRelocationType(K); - EXPECT_FALSE(errorToBool(ELFType.takeError())) - << "Failed to translate JITLink kind -> ELF type"; - - Expected JITLinkKind = getJITLinkEdgeKind(*ELFType); - EXPECT_FALSE(errorToBool(JITLinkKind.takeError())) - << "Failed to translate ELF type -> JITLink kind"; - - EXPECT_EQ(*JITLinkKind, K) << "Round-trip value inconsistent?"; - } -} - -namespace llvm { -namespace jitlink { -namespace aarch32 { - -HalfWords encodeImmBT4BlT1BlxT2(int64_t Value); -HalfWords encodeImmBT4BlT1BlxT2_J1J2(int64_t Value); -HalfWords encodeImmMovtT1MovwT3(uint16_t Value); -HalfWords encodeRegMovtT1MovwT3(int64_t Value); - -int64_t decodeImmBT4BlT1BlxT2(uint32_t Hi, uint32_t Lo); -int64_t decodeImmBT4BlT1BlxT2_J1J2(uint32_t Hi, uint32_t Lo); -uint16_t decodeImmMovtT1MovwT3(uint32_t Hi, uint32_t Lo); -int64_t decodeRegMovtT1MovwT3(uint32_t Hi, uint32_t Lo); - -} // namespace aarch32 -} // namespace jitlink -} // namespace llvm - -// Big-endian for v7 and v8 (and v6 unless in legacy backwards compatible mode -// be32) have little-endian instructions and big-endian data. In ELF relocatable -// objects big-endian instructions may still be encountered. A be8 supporting -// linker is expected to endian-reverse instructions for the executable. -template -static HalfWords makeHalfWords(std::array Mem) { - return HalfWords{read16(Mem.data()), read16(Mem.data() + 2)}; -} - -/// 25-bit branch with link (with J1J2 range extension) -TEST(AArch32_Relocations, Thumb_Call_J1J2) { - static_assert(isInt<25>(16777215), "Max value"); - static_assert(isInt<25>(-16777215), "Min value"); - static_assert(!isInt<25>(16777217), "First overflow"); - static_assert(!isInt<25>(-16777217), "First underflow"); - - constexpr HalfWords ImmMask = FixupInfo::ImmMask; - - static std::array MemPresets{ - makeHalfWords({0xff, 0xf7, 0xfe, 0xef}), // common - makeHalfWords({0x00, 0x00, 0x00, 0x00}), // zeros - makeHalfWords({0xff, 0xff, 0xff, 0xff}), // ones - }; - - auto EncodeDecode = [ImmMask](int64_t In, MutableHalfWords &Mem) { - Mem.patch(encodeImmBT4BlT1BlxT2_J1J2(In), ImmMask); - return decodeImmBT4BlT1BlxT2_J1J2(Mem.Hi, Mem.Lo); - }; - - for (MutableHalfWords Mem : MemPresets) { - HalfWords UnaffectedBits(Mem.Hi & ~ImmMask.Hi, Mem.Lo & ~ImmMask.Lo); - - EXPECT_EQ(EncodeDecode(1, Mem), 0); // Zero value - EXPECT_EQ(EncodeDecode(0x41, Mem), 0x40); // Common value - EXPECT_EQ(EncodeDecode(16777215, Mem), 16777214); // Maximum value - EXPECT_EQ(EncodeDecode(-16777215, Mem), -16777216); // Minimum value - EXPECT_NE(EncodeDecode(16777217, Mem), 16777217); // First overflow - EXPECT_NE(EncodeDecode(-16777217, Mem), -16777217); // First underflow - - EXPECT_TRUE(UnaffectedBits.Hi == (Mem.Hi & ~ImmMask.Hi) && - UnaffectedBits.Lo == (Mem.Lo & ~ImmMask.Lo)) - << "Diff outside immediate field"; - } -} - -/// 22-bit branch with link (without J1J2 range extension) -TEST(AArch32_Relocations, Thumb_Call_Bare) { - static_assert(isInt<22>(2097151), "Max value"); - static_assert(isInt<22>(-2097151), "Min value"); - static_assert(!isInt<22>(2097153), "First overflow"); - static_assert(!isInt<22>(-2097153), "First underflow"); - - constexpr HalfWords ImmMask = FixupInfo::ImmMask; - - static std::array MemPresets{ - makeHalfWords({0xff, 0xf7, 0xfe, 0xef}), // common - makeHalfWords({0x00, 0x00, 0x00, 0x00}), // zeros - makeHalfWords({0xff, 0xff, 0xff, 0xff}), // ones - }; - - auto EncodeDecode = [ImmMask](int64_t In, MutableHalfWords &Mem) { - Mem.patch(encodeImmBT4BlT1BlxT2_J1J2(In), ImmMask); - return decodeImmBT4BlT1BlxT2_J1J2(Mem.Hi, Mem.Lo); - }; - - for (MutableHalfWords Mem : MemPresets) { - HalfWords UnaffectedBits(Mem.Hi & ~ImmMask.Hi, Mem.Lo & ~ImmMask.Lo); - - EXPECT_EQ(EncodeDecode(1, Mem), 0); // Zero value - EXPECT_EQ(EncodeDecode(0x41, Mem), 0x40); // Common value - EXPECT_EQ(EncodeDecode(2097151, Mem), 2097150); // Maximum value - EXPECT_EQ(EncodeDecode(-2097151, Mem), -2097152); // Minimum value - EXPECT_NE(EncodeDecode(2097153, Mem), 2097153); // First overflow - EXPECT_NE(EncodeDecode(-2097153, Mem), -2097153); // First underflow - - EXPECT_TRUE(UnaffectedBits.Hi == (Mem.Hi & ~ImmMask.Hi) && - UnaffectedBits.Lo == (Mem.Lo & ~ImmMask.Lo)) - << "Diff outside immediate field"; - } -} - -/// Write immediate value to the top halfword of the destination register -TEST(AArch32_Relocations, Thumb_MovtAbs) { - static_assert(isUInt<16>(65535), "Max value"); - static_assert(!isUInt<16>(65536), "First overflow"); - - constexpr HalfWords ImmMask = FixupInfo::ImmMask; - constexpr HalfWords RegMask = FixupInfo::RegMask; - - static std::array Registers{0, 5, 12}; - static std::array MemPresets{ - makeHalfWords({0xff, 0xf7, 0xfe, 0xef}), // common - makeHalfWords({0x00, 0x00, 0x00, 0x00}), // zeros - makeHalfWords({0xff, 0xff, 0xff, 0xff}), // ones - }; - - auto EncodeDecode = [ImmMask](uint32_t In, MutableHalfWords &Mem) { - Mem.patch(encodeImmMovtT1MovwT3(In), ImmMask); - return decodeImmMovtT1MovwT3(Mem.Hi, Mem.Lo); - }; - - for (MutableHalfWords Mem : MemPresets) { - for (uint8_t Reg : Registers) { - HalfWords UnaffectedBits(Mem.Hi & ~(ImmMask.Hi | RegMask.Hi), - Mem.Lo & ~(ImmMask.Lo | RegMask.Lo)); - - Mem.patch(encodeRegMovtT1MovwT3(Reg), RegMask); - EXPECT_EQ(EncodeDecode(0x76bb, Mem), 0x76bb); // Common value - EXPECT_EQ(EncodeDecode(0, Mem), 0); // Minimum value - EXPECT_EQ(EncodeDecode(0xffff, Mem), 0xffff); // Maximum value - EXPECT_NE(EncodeDecode(0x10000, Mem), 0x10000); // First overflow - - // Destination register as well as unaffacted bits should be intact - EXPECT_EQ(decodeRegMovtT1MovwT3(Mem.Hi, Mem.Lo), Reg); - EXPECT_TRUE(UnaffectedBits.Hi == (Mem.Hi & ~(ImmMask.Hi | RegMask.Hi)) && - UnaffectedBits.Lo == (Mem.Lo & ~(ImmMask.Lo | RegMask.Lo))) - << "Diff outside immediate/register field"; - } - } -} diff --git a/llvm/unittests/ExecutionEngine/JITLink/CMakeLists.txt b/llvm/unittests/ExecutionEngine/JITLink/CMakeLists.txt index 978914c748c63..1a71a62d3756d 100644 --- a/llvm/unittests/ExecutionEngine/JITLink/CMakeLists.txt +++ b/llvm/unittests/ExecutionEngine/JITLink/CMakeLists.txt @@ -8,7 +8,6 @@ set(LLVM_LINK_COMPONENTS ) add_llvm_unittest(JITLinkTests - AArch32Tests.cpp EHFrameSupportTests.cpp LinkGraphTests.cpp ) From d557384b43d32700ed09b08564a4f7823061d999 Mon Sep 17 00:00:00 2001 From: Alexander Yermolovich Date: Thu, 23 Mar 2023 13:20:38 -0700 Subject: [PATCH 477/691] [LLDB] Fix for D139955 Summary: Fixing a small typo. Reviewed By: clayborg Differential Revision: https://reviews.llvm.org/D146659 --- .../SymbolFile/DWARF/SymbolFileDWARF.cpp | 2 +- .../DWARF/range-lower-then-low-pc.s | 317 ++++++++++++++++++ 2 files changed, 318 insertions(+), 1 deletion(-) create mode 100644 lldb/test/Shell/SymbolFile/DWARF/range-lower-then-low-pc.s diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index 99a0152eaf6e6..c6873a5b7a09a 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -1319,7 +1319,7 @@ size_t SymbolFileDWARF::ParseBlocksRecursive( range.GetByteSize())); else { GetObjectFile()->GetModule()->ReportError( - "{0x:+8}: adding range [{1:x16}-{2:x16}) which has a base " + "{0:x8}: adding range [{1:x16}-{2:x16}) which has a base " "that is less than the function's low PC {3:x16}. Please file " "a bug and attach the file at the " "start of this error message", diff --git a/lldb/test/Shell/SymbolFile/DWARF/range-lower-then-low-pc.s b/lldb/test/Shell/SymbolFile/DWARF/range-lower-then-low-pc.s new file mode 100644 index 0000000000000..e3cc84db12652 --- /dev/null +++ b/lldb/test/Shell/SymbolFile/DWARF/range-lower-then-low-pc.s @@ -0,0 +1,317 @@ +# REQUIRES: x86 + +# RUN: llvm-mc -triple=x86_64-pc-linux -filetype=obj %s > %t +# RUN: lldb-test symbols %t &> %t.txt +# RUN: cat %t.txt | FileCheck %s + +# Tests that error is printed correctly when DW_AT_low_pc value is +# greater then a range entry. + +# CHECK: 0x0000006e: adding range [0x0000000000000000-0x000000000000001f) +# CHECK-SAME: which has a base that is less than the function's low PC 0x0000000000000021. +# CHECK-SAME: Please file a bug and attach the file at the start of this error message + + + +# Test was manually modified to change DW_TAG_lexical_block +# to use DW_AT_ranges, and value lower then DW_AT_low_pc value +# in DW_TAG_subprogram +# static int foo(bool b) { +# if (b) { +# int food = 1; +# return food; +# } +# return 0; +# } +# int main() { +# return foo(true); +# } + .text + .file "main.cpp" + .section .text.main,"ax",@progbits + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin0: + .file 1 "base-lower-then-range-entry" "main.cpp" + .loc 1 8 0 # main.cpp:8:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + subq $16, %rsp + movl $0, -4(%rbp) +.Ltmp0: + .loc 1 9 10 prologue_end # main.cpp:9:10 + movl $1, %edi + callq _ZL3foob + .loc 1 9 3 epilogue_begin is_stmt 0 # main.cpp:9:3 + addq $16, %rsp + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp1: +.Lfunc_end0: + .size main, .Lfunc_end0-main + .cfi_endproc + # -- End function + .section .text._ZL3foob,"ax",@progbits + .p2align 4, 0x90 # -- Begin function _ZL3foob + .type _ZL3foob,@function +_ZL3foob: # @_ZL3foob +.Lfunc_begin1: + .loc 1 1 0 is_stmt 1 # main.cpp:1:0 + .cfi_startproc +# %bb.0: # %entry + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movb %dil, %al + andb $1, %al + movb %al, -5(%rbp) +.Ltmp2: + .loc 1 2 7 prologue_end # main.cpp:2:7 + testb $1, -5(%rbp) + je .LBB1_2 +# %bb.1: # %if.then +.Ltmp3: + .loc 1 3 8 # main.cpp:3:8 + movl $1, -12(%rbp) + .loc 1 4 12 # main.cpp:4:12 + movl -12(%rbp), %eax + .loc 1 4 5 is_stmt 0 # main.cpp:4:5 + movl %eax, -4(%rbp) + jmp .LBB1_3 +.Ltmp4: +.LBB1_2: # %if.end + .loc 1 6 3 is_stmt 1 # main.cpp:6:3 + movl $0, -4(%rbp) +.LBB1_3: # %return + .loc 1 7 1 # main.cpp:7:1 + movl -4(%rbp), %eax + .loc 1 7 1 epilogue_begin is_stmt 0 # main.cpp:7:1 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp5: +.Lfunc_end1: + .size _ZL3foob, .Lfunc_end1-_ZL3foob + .cfi_endproc + # -- End function + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 14 # DW_FORM_strp + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 14 # DW_FORM_strp + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 85 # DW_AT_ranges + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 1 # DW_FORM_addr + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 110 # DW_AT_linkage_name + .byte 14 # DW_FORM_strp + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 5 # DW_TAG_formal_parameter + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 5 # Abbreviation Code + .byte 11 # DW_TAG_lexical_block + .byte 1 # DW_CHILDREN_yes + .byte 85 # DW_AT_ranges <------ Manually modified. Replaced low_pc/high)_pc with rangres. + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 6 # Abbreviation Code + .byte 52 # DW_TAG_variable + .byte 0 # DW_CHILDREN_no + .byte 2 # DW_AT_location + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 7 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 14 # DW_FORM_strp + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 4 # DWARF version number + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .byte 1 # Abbrev [1] 0xb:0x8f DW_TAG_compile_unit + .long .Linfo_string0 # DW_AT_producer + .short 33 # DW_AT_language + .long .Linfo_string1 # DW_AT_name + .long .Lline_table_start0 # DW_AT_stmt_list + .long .Linfo_string2 # DW_AT_comp_dir + .quad 0 # DW_AT_low_pc + .long .Ldebug_ranges0 # DW_AT_ranges + .byte 2 # Abbrev [2] 0x2a:0x19 DW_TAG_subprogram + .quad .Lfunc_begin0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string3 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 8 # DW_AT_decl_line + .long 138 # DW_AT_type + # DW_AT_external + .byte 3 # Abbrev [3] 0x43:0x48 DW_TAG_subprogram + .quad .Lfunc_begin1 + 1 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .long .Linfo_string5 # DW_AT_linkage_name + .long .Linfo_string6 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 138 # DW_AT_type + .byte 4 # Abbrev [4] 0x60:0xe DW_TAG_formal_parameter + .byte 2 # DW_AT_location + .byte 145 + .byte 123 + .long .Linfo_string7 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 138 # DW_AT_type + .byte 5 # Abbrev [5] 0x6e:0x1c DW_TAG_lexical_block + .long .Ldebug_ranges0 # DW_AT_ranges <-- Manually modified replaced low_pc/high_pc to rangres. + .byte 6 # Abbrev [6] 0x7b:0xe DW_TAG_variable + .byte 2 # DW_AT_location + .byte 145 + .byte 116 + .long .Linfo_string9 # DW_AT_name + .byte 1 # DW_AT_decl_file + .byte 3 # DW_AT_decl_line + .long 138 # DW_AT_type + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark + .byte 7 # Abbrev [7] 0x8b:0x7 DW_TAG_base_type + .long .Linfo_string4 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 7 # Abbrev [7] 0x92:0x7 DW_TAG_base_type + .long .Linfo_string8 # DW_AT_name + .byte 2 # DW_AT_encoding + .byte 1 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_ranges,"",@progbits +.Ldebug_ranges0: + .quad .Lfunc_begin0 + .quad .Lfunc_end0 + .quad .Lfunc_begin1 + .quad .Lfunc_end1 + .quad 0 + .quad 0 + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 73027ae39b1492e5b6033358a13b86d7d1e781ae)" # string offset=0 +.Linfo_string1: + .asciz "main.cpp" # string offset=105 +.Linfo_string2: + .asciz "base-lower-then-range-entry" # string offset=114 +.Linfo_string3: + .asciz "main" # string offset=179 +.Linfo_string4: + .asciz "int" # string offset=184 +.Linfo_string5: + .asciz "_ZL3foob" # string offset=188 +.Linfo_string6: + .asciz "foo" # string offset=197 +.Linfo_string7: + .asciz "b" # string offset=201 +.Linfo_string8: + .asciz "bool" # string offset=203 +.Linfo_string9: + .asciz "food" # string offset=208 + .ident "clang version 17.0.0 (https://github.com/llvm/llvm-project.git 73027ae39b1492e5b6033358a13b86d7d1e781ae)" + .section ".note.GNU-stack","",@progbits + .addrsig + .addrsig_sym _ZL3foob + .section .debug_line,"",@progbits +.Lline_table_start0: From 088da8a0e57a461f3be4b554f28c4419418c097c Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Thu, 23 Mar 2023 11:28:49 -0700 Subject: [PATCH 478/691] [lldb][NFC] makeArrayRef -> ArrayRef makeArrayRef is deprecated. --- lldb/source/Commands/CommandOptionsProcessAttach.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Commands/CommandOptionsProcessAttach.cpp b/lldb/source/Commands/CommandOptionsProcessAttach.cpp index f9bd92938fa1c..d3d864dfe0255 100644 --- a/lldb/source/Commands/CommandOptionsProcessAttach.cpp +++ b/lldb/source/Commands/CommandOptionsProcessAttach.cpp @@ -72,5 +72,5 @@ Status CommandOptionsProcessAttach::SetOptionValue( } llvm::ArrayRef CommandOptionsProcessAttach::GetDefinitions() { - return llvm::makeArrayRef(g_process_attach_options); + return llvm::ArrayRef(g_process_attach_options); } From 805f51f9fedf90d2aa0ad46c61cb4c9c0c5bcfe9 Mon Sep 17 00:00:00 2001 From: AdityaK <1894981+hiraditya@users.noreply.github.com> Date: Thu, 23 Mar 2023 13:54:58 -0700 Subject: [PATCH 479/691] Remove Android-mips related tests Split from: https://reviews.llvm.org/D146565, already reviewed there. --- llvm/test/CodeGen/Mips/ehframe-indirect.ll | 4 -- llvm/test/CodeGen/Mips/emutls_generic.ll | 75 ---------------------- 2 files changed, 79 deletions(-) delete mode 100644 llvm/test/CodeGen/Mips/emutls_generic.ll diff --git a/llvm/test/CodeGen/Mips/ehframe-indirect.ll b/llvm/test/CodeGen/Mips/ehframe-indirect.ll index 59f358316ddfd..b3f4b48329d7b 100644 --- a/llvm/test/CodeGen/Mips/ehframe-indirect.ll +++ b/llvm/test/CodeGen/Mips/ehframe-indirect.ll @@ -1,13 +1,9 @@ ; RUN: llc -mtriple=mipsel-linux-gnu < %s -asm-verbose -relocation-model=pic | \ ; RUN: FileCheck -check-prefixes=ALL,LINUX,LINUX-O32,O32 %s -; RUN: llc -mtriple=mipsel-linux-android < %s -asm-verbose -relocation-model=pic | \ -; RUN: FileCheck -check-prefixes=ALL,LINUX,LINUX-O32,O32 %s ; RUN: llc -mtriple=mips64el-linux-gnu -target-abi=n32 < %s -asm-verbose -relocation-model=pic | \ ; RUN: FileCheck -check-prefixes=ALL,LINUX,LINUX-NEW,N32 %s ; RUN: llc -mtriple=mips64el-linux-gnu < %s -asm-verbose -relocation-model=pic | \ ; RUN: FileCheck -check-prefixes=ALL,LINUX,LINUX-NEW,N64 %s -; RUN: llc -mtriple=mips64el-linux-android < %s -asm-verbose -relocation-model=pic | \ -; RUN: FileCheck -check-prefixes=ALL,LINUX,LINUX-NEW,N64 %s ; RUN: llc -mtriple=mips64el-linux-gnu < %s -asm-verbose -relocation-model=pic | \ ; RUN: FileCheck -check-prefixes=ALL,LINUX,LINUX-NEW,N64 %s ; RUN: llc -mtriple=mips-unknown-freebsd11.0 < %s -asm-verbose -relocation-model=pic | \ diff --git a/llvm/test/CodeGen/Mips/emutls_generic.ll b/llvm/test/CodeGen/Mips/emutls_generic.ll deleted file mode 100644 index 344a581d6b4b7..0000000000000 --- a/llvm/test/CodeGen/Mips/emutls_generic.ll +++ /dev/null @@ -1,75 +0,0 @@ -; RUN: llc < %s -emulated-tls -mtriple=mipsel-linux-android -relocation-model=pic \ -; RUN: | FileCheck -check-prefix=MIPS_32 %s -; RUN: llc < %s -emulated-tls -mtriple=mips64el-linux-android -relocation-model=pic \ -; RUN: | FileCheck -check-prefix=MIPS_64 %s - -; RUN: llc < %s -mtriple=mipsel-linux-android -relocation-model=pic \ -; RUN: | FileCheck -check-prefix=MIPS_32 %s -; RUN: llc < %s -mtriple=mips64el-linux-android -relocation-model=pic \ -; RUN: | FileCheck -check-prefix=MIPS_64 %s - -; Make sure that TLS symbols are emitted in expected order. - -@external_x = external thread_local global i32, align 8 -@external_y = thread_local global i8 7, align 2 -@internal_y = internal thread_local global i64 9, align 16 - -define ptr @get_external_x() { -entry: - ret ptr @external_x -} - -define ptr @get_external_y() { -entry: - ret ptr @external_y -} - -define ptr @get_internal_y() { -entry: - ret ptr @internal_y -} - -; MIPS_32-LABEL: get_external_y: -; MIPS_32-LABEL: get_internal_y: -; MIPS_32: lw {{.+}}(__emutls_v.internal_y -; MIPS_32: lw {{.+}}call16(__emutls_get_address -; MIPS_32-NOT: __emutls_t.external_x -; MIPS_32-NOT: __emutls_v.external_x: -; MIPS_32: .data -; MIPS_32: .p2align 2 -; MIPS_32-LABEL: __emutls_v.external_y: -; MIPS_32: .section .rodata, -; MIPS_32-LABEL: __emutls_t.external_y: -; MIPS_32-NEXT: .byte 7 -; MIPS_32: .data -; MIPS_32: .p2align 2 -; MIPS_32-LABEL: __emutls_v.internal_y: -; MIPS_32-NEXT: .4byte 8 -; MIPS_32-NEXT: .4byte 16 -; MIPS_32-NEXT: .4byte 0 -; MIPS_32-NEXT: .4byte __emutls_t.internal_y -; MIPS_32-LABEL: __emutls_t.internal_y: -; MIPS_32-NEXT: .8byte 9 - -; MIPS_64-LABEL: get_external_x: -; MIPS_64-LABEL: get_external_y: -; MIPS_64-LABEL: get_internal_y: -; MIPS_64: ld {{.+}}(__emutls_v.internal_y -; MIPS_64: ld {{.+}}call16(__emutls_get_address -; MIPS_64-NOT: __emutls_t.external_x -; MIPS_64-NOT: __emutls_v.external_x: -; MIPS_64-LABEL: __emutls_v.external_y: -; MIPS_64-NOT: __emutls_v.external_x: -; MIPS_64: .section .rodata, -; MIPS_64-LABEL: __emutls_t.external_y: -; MIPS_64-NEXT: .byte 7 -; MIPS_64: .data -; MIPS_64: .p2align 3 -; MIPS_64-LABEL: __emutls_v.internal_y: -; MIPS_64-NEXT: .8byte 8 -; MIPS_64-NEXT: .8byte 16 -; MIPS_64-NEXT: .8byte 0 -; MIPS_64-NEXT: .8byte __emutls_t.internal_y -; MIPS_64: .section .rodata, -; MIPS_64-LABEL: __emutls_t.internal_y: -; MIPS_64-NEXT: .8byte 9 From 1c9173365a932a0d289ec86704ec645a138de03e Mon Sep 17 00:00:00 2001 From: NagaChaitanya Vellanki Date: Thu, 23 Mar 2023 14:16:25 -0700 Subject: [PATCH 480/691] Fix highlighting issue with _complex and initialization list with more than 2 items Fixes https://github.com/llvm/llvm-project/issues/61518 Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D146503 --- clang/docs/ReleaseNotes.rst | 2 + clang/lib/Sema/SemaInit.cpp | 2 +- clang/test/Sema/caret-diags-complex-init.cpp | 39 ++++++++++++++++++++ clang/test/Sema/complex-init-list.c | 18 +++++++-- 4 files changed, 56 insertions(+), 5 deletions(-) create mode 100644 clang/test/Sema/caret-diags-complex-init.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 005bf99a62457..faac3b17b223f 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -225,6 +225,8 @@ Bug Fixes in This Version enabling short-circuiting coroutines use cases. This fixes (`#56532 `_) in antecipation of `CWG2563 _`. +- Fix highlighting issue with ``_Complex`` and initialization list with more than + 2 items. (`#61518 `_) Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index 17d8b6c98207b..46517c9dde06a 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -1536,7 +1536,7 @@ void InitListChecker::CheckComplexType(const InitializedEntity &Entity, // the element type of the complex type. The first element initializes // the real part, and the second element intitializes the imaginary part. - if (IList->getNumInits() != 2) + if (IList->getNumInits() < 2) return CheckScalarType(Entity, IList, DeclType, Index, StructuredList, StructuredIndex); diff --git a/clang/test/Sema/caret-diags-complex-init.cpp b/clang/test/Sema/caret-diags-complex-init.cpp new file mode 100644 index 0000000000000..d8a1b7837a640 --- /dev/null +++ b/clang/test/Sema/caret-diags-complex-init.cpp @@ -0,0 +1,39 @@ +// RUN: not %clang_cc1 -std=c++11 -fsyntax-only -fcaret-diagnostics-max-lines 5 %s 2>&1 | FileCheck %s -strict-whitespace + + +//CHECK: {{.*}}: error: excess elements in scalar initializer +//CHECK-NEXT: {{^}}_Complex double gz1 = {1, 2, 3}; +//CHECK-NEXT: {{^}} ^{{$}} +_Complex double gz1 = {1, 2, 3}; + +//CHECK: {{.*}}: error: excess elements in scalar initializer +//CHECK-NEXT: {{^}}_Complex double dd = {1.0, 2.0, 3.0}; +//CHECK-NEXT: {{^}} ^~~{{$}} +_Complex double dd = {1.0, 2.0, 3.0}; + +//CHECK: {{.*}}: error: excess elements in scalar initializer +//CHECK-NEXT: {{^}}_Complex float fd = {1.0, 2.0, 3.0, 4.0, 5.0}; +//CHECK-NEXT: {{^}} ^~~{{$}} +_Complex float fd = {1.0, 2.0, 3.0, 4.0, 5.0}; + +//CHECK: {{.*}}: error: no viable conversion from 'foo' to 'double' +//CHECK-NEXT: {{^}}_Complex double ds = {f, 1.0, b}; +//CHECK-NEXT: {{^}} ^{{$}} +struct foo{}; +struct bar{}; + +foo f; +bar b; +_Complex double ds = {f, 1.0, b}; + +//CHECK: {{.*}}: error: no viable conversion from 'foo' to 'double' +//CHECK-NEXT: {{^}}_Complex double fg = {1.0, f}; +//CHECK-NEXT: {{^}} ^{{$}} +_Complex double fg = {1.0, f}; + + +//CHECK: {{.*}}: error: excess elements in scalar initializer +//CHECK-NEXT: {{^}}_Complex double gg = {1.0, 2.0, f}; +//CHECK-NEXT: {{^}} ^{{$}} +//CHECK-NEXT: {{^}}6 errors generated. +_Complex double gg = {1.0, 2.0, f}; diff --git a/clang/test/Sema/complex-init-list.c b/clang/test/Sema/complex-init-list.c index bfc6899ac235d..b8f87f57f0793 100644 --- a/clang/test/Sema/complex-init-list.c +++ b/clang/test/Sema/complex-init-list.c @@ -25,17 +25,21 @@ struct teststruct { _Complex float x; }; // Random other valid stuff -_Complex int valid2 = { 1, 2 }; // expected-warning {{complex integer}} expected-warning {{specifying real and imaginary components is an extension}} +_Complex int valid2 = { 1, 2 }; // expected-warning {{complex integer}} \ + // expected-warning {{specifying real and imaginary components is an extension}} struct teststruct valid3 = { { 1.0f, 2.0f} }; // expected-warning {{specifying real and imaginary components is an extension}} _Complex float valid4[2] = { {1.0f, 1.0f}, {1.0f, 1.0f} }; // expected-warning 2 {{specifying real and imaginary components is an extension}} // FIXME: We need some sort of warning for valid5 -_Complex float valid5 = {1.0f, 1.0fi}; // expected-warning {{imaginary constants}} expected-warning {{specifying real and imaginary components is an extension}} +_Complex float valid5 = {1.0f, 1.0fi}; // expected-warning {{imaginary constants}} \ + // expected-warning {{specifying real and imaginary components is an extension}} // Random invalid stuff struct teststruct invalid1 = { 1, 2 }; // expected-warning {{excess elements}} -_Complex float invalid2 = { 1, 2, 3 }; // expected-warning {{excess elements}} -_Complex float invalid3 = {}; // expected-error {{scalar initializer cannot be empty}} expected-warning {{GNU empty initializer}} +_Complex float invalid2 = { 1, 2, 3 }; // expected-warning {{specifying real and imaginary components is an extension}} \ + // expected-warning {{excess elements in scalar initializer}} +_Complex float invalid3 = {}; // expected-error {{scalar initializer cannot be empty}} \ + // expected-warning {{GNU empty initializer}} // Check incomplete array sizing @@ -46,3 +50,9 @@ _Complex float sizecheck2[(sizeof(sizetest2) == sizeof(*sizetest2)*3) ? 1 : -1]; // Constant-folding with init list. _Complex float x = 2 + (_Complex float) { 1, 2 }; // expected-warning {{specifying real and imaginary components is an extension}} + +// initialization list +_Complex double cd = {1.0, 2.0, 3.0}; // expected-warning {{specifying real and imaginary components is an extension}} \ + // expected-warning {{excess elements in scalar initializer}} +_Complex float cf = {1.1f, 2.2f, 3.3f, 4.4f}; // expected-warning {{specifying real and imaginary components is an extension}} \ + // expected-warning {{excess elements in scalar initializer}} From 3111784ff7d3d51a9e981b1a0bbc8f6511c34d25 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Tue, 6 Dec 2022 16:49:13 -0800 Subject: [PATCH 481/691] [lld][WebAssembly] Initial support for stub libraries See the docs in lld/docs/WebAssembly.rst for more on this. This feature unlocks a lot of simplification in the emscripten toolchain since we can represent the JS libraries to wasm-ld as stub libraries. See https://github.com/emscripten-core/emscripten/issues/18875 Differential Revision: https://reviews.llvm.org/D145308 --- lld/docs/WebAssembly.rst | 33 +++++++++++++ lld/test/wasm/Inputs/libstub-missing-dep.so | 2 + lld/test/wasm/Inputs/libstub-missing-sym.so | 3 ++ lld/test/wasm/Inputs/libstub.so | 5 ++ lld/test/wasm/stub_library.s | 48 ++++++++++++++++++ lld/wasm/Driver.cpp | 55 +++++++++++++++++++++ lld/wasm/InputFiles.cpp | 43 ++++++++++++++++ lld/wasm/InputFiles.h | 13 +++++ lld/wasm/Relocations.cpp | 4 +- lld/wasm/SymbolTable.cpp | 7 +++ lld/wasm/SymbolTable.h | 1 + lld/wasm/Symbols.cpp | 4 ++ lld/wasm/Symbols.h | 7 ++- lld/wasm/Writer.cpp | 4 +- 14 files changed, 224 insertions(+), 5 deletions(-) create mode 100644 lld/test/wasm/Inputs/libstub-missing-dep.so create mode 100644 lld/test/wasm/Inputs/libstub-missing-sym.so create mode 100644 lld/test/wasm/Inputs/libstub.so create mode 100644 lld/test/wasm/stub_library.s diff --git a/lld/docs/WebAssembly.rst b/lld/docs/WebAssembly.rst index c40d4b322080a..dad3177e2c7df 100644 --- a/lld/docs/WebAssembly.rst +++ b/lld/docs/WebAssembly.rst @@ -75,6 +75,11 @@ WebAssembly-specific options: flag which corresponds to ``--unresolve-symbols=ignore`` + ``--import-undefined``. +.. option:: --allow-undefined-file= + + Like ``--allow-undefined``, but the filename specified a flat list of + symbols, one per line, which are allowed to be undefined. + .. option:: --unresolved-symbols= This is a more full featured version of ``--allow-undefined``. @@ -182,11 +187,39 @@ Imports By default no undefined symbols are allowed in the final binary. The flag ``--allow-undefined`` results in a WebAssembly import being defined for each undefined symbol. It is then up to the runtime to provide such symbols. +``--allow-undefined-file`` is the same but allows a list of symbols to be +specified. Alternatively symbols can be marked in the source code as with the ``import_name`` and/or ``import_module`` clang attributes which signals that they are expected to be undefined at static link time. +Stub Libraries +~~~~~~~~~~~~~~ + +Another way to specify imports and exports is via a "stub library". This +feature is inspired by the ELF stub objects which are supported by the Solaris +linker. Stub libraries are text files that can be passed as normal linker +inputs, similar to how linker scripts can be passed to the ELF linker. The stub +library is a stand-in for a set of symbols that will be available at runtime, +but doesn't contain any actual code or data. Instead it contains just a list of +symbols, one per line. Each symbol can specify zero or more dependencies. +These dependencies are symbols that must be defined, and exported, by the output +module if the symbol is question is imported/required by the output module. + +For example, imagine the runtime provides an external symbol ``foo`` that +depends on the ``malloc`` and ``free``. This can be expressed simply as:: + + #STUB + foo: malloc,free + +Here we are saying that ``foo`` is allowed to be imported (undefined) but that +if it is imported, then the output module must also export ``malloc`` and +``free`` to the runtime. If ``foo`` is imported (undefined), but the output +module does not define ``malloc`` and ``free`` then the link will fail. + +Stub libraries must begin with ``#STUB`` on a line by itself. + Garbage Collection ~~~~~~~~~~~~~~~~~~ diff --git a/lld/test/wasm/Inputs/libstub-missing-dep.so b/lld/test/wasm/Inputs/libstub-missing-dep.so new file mode 100644 index 0000000000000..f2345b766f099 --- /dev/null +++ b/lld/test/wasm/Inputs/libstub-missing-dep.so @@ -0,0 +1,2 @@ +#STUB +foo: missing_dep,missing_dep2 diff --git a/lld/test/wasm/Inputs/libstub-missing-sym.so b/lld/test/wasm/Inputs/libstub-missing-sym.so new file mode 100644 index 0000000000000..2120b948511e9 --- /dev/null +++ b/lld/test/wasm/Inputs/libstub-missing-sym.so @@ -0,0 +1,3 @@ +#STUB +# Symbol `foo` is missing from this file which causes stub_object.s to fail +bar diff --git a/lld/test/wasm/Inputs/libstub.so b/lld/test/wasm/Inputs/libstub.so new file mode 100644 index 0000000000000..57e61f632b101 --- /dev/null +++ b/lld/test/wasm/Inputs/libstub.so @@ -0,0 +1,5 @@ +#STUB +# This is a comment +foo: foodep1,foodep2 +# This symbols as no dependencies +bar diff --git a/lld/test/wasm/stub_library.s b/lld/test/wasm/stub_library.s new file mode 100644 index 0000000000000..9cbf2505ea9e7 --- /dev/null +++ b/lld/test/wasm/stub_library.s @@ -0,0 +1,48 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s +# RUN: wasm-ld %t.o %p/Inputs/libstub.so -o %t.wasm +# RUN: obj2yaml %t.wasm | FileCheck %s + +# When the dependencies are missing the link fails +# RUN: not wasm-ld %t.o %p/Inputs/libstub-missing-dep.so -o %t.wasm 2>&1 | FileCheck --check-prefix=MISSING-DEP %s + +# When the dependencies are missing the link fails +# RUN: not wasm-ld %t.o %p/Inputs/libstub-missing-sym.so -o %t.wasm 2>&1 | FileCheck --check-prefix=MISSING-SYM %s + +# MISSING-DEP: libstub-missing-dep.so: undefined symbol: missing_dep. Required by foo +# MISSING-DEP: libstub-missing-dep.so: undefined symbol: missing_dep2. Required by foo + +# MISSING-SYM: undefined symbol: foo + +# The function foo is defined in libstub.so but depend on foodep1 and foodep2 +.functype foo () -> () + +.globl foodep1 +foodep1: + .functype foodep1 () -> () + end_function + +.globl foodep2 +foodep2: + .functype foodep2 () -> () + end_function + +.globl _start +_start: + .functype _start () -> () + call foo + end_function + +# CHECK: - Type: EXPORT +# CHECK-NEXT: Exports: +# CHECK-NEXT: - Name: memory +# CHECK-NEXT: Kind: MEMORY +# CHECK-NEXT: Index: 0 +# CHECK-NEXT: - Name: foodep1 +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Index: 1 +# CHECK-NEXT: - Name: foodep2 +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Index: 2 +# CHECK-NEXT: - Name: _start +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Index: 3 diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index 310f9df2d5b68..68cd8cabbd7f2 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -279,6 +279,12 @@ void LinkerDriver::addFile(StringRef path) { case file_magic::wasm_object: files.push_back(createObjectFile(mbref)); break; + case file_magic::unknown: + if (mbref.getBuffer().starts_with("#STUB\n")) { + files.push_back(make(mbref)); + break; + } + [[fallthrough]]; default: error("unknown file type: " + mbref.getBufferIdentifier()); } @@ -868,6 +874,53 @@ static void createOptionalSymbols() { WasmSym::tlsBase = createOptionalGlobal("__tls_base", false); } +static void processStubLibraries() { + log("-- processStubLibraries"); + for (auto &stub_file : symtab->stubFiles) { + LLVM_DEBUG(llvm::dbgs() + << "processing stub file: " << stub_file->getName() << "\n"); + for (auto [name, deps]: stub_file->symbolDependencies) { + auto* sym = symtab->find(name); + if (!sym || !sym->isUndefined() || !sym->isUsedInRegularObj || + sym->forceImport) { + LLVM_DEBUG(llvm::dbgs() << "stub not in needed: " << name << "\n"); + continue; + } + // The first stub library to define a given symbol sets this and + // definitions in later stub libraries are ignored. + sym->forceImport = true; + if (sym->traced) + message(toString(stub_file) + ": importing " + name); + else + LLVM_DEBUG(llvm::dbgs() + << toString(stub_file) << ": importing " << name << "\n"); + for (const auto dep : deps) { + auto* needed = symtab->find(dep); + if (!needed) { + error(toString(stub_file) + ": undefined symbol: " + dep + + ". Required by " + toString(*sym)); + } else if (needed->isUndefined()) { + error(toString(stub_file) + + ": undefined symbol: " + toString(*needed) + + ". Required by " + toString(*sym)); + } else { + LLVM_DEBUG(llvm::dbgs() + << "force export: " << toString(*needed) << "\n"); + needed->forceExport = true; + needed->isUsedInRegularObj = true; + if (auto *lazy = dyn_cast(needed)) { + lazy->fetch(); + if (!config->whyExtract.empty()) + config->whyExtractRecords.emplace_back(stub_file->getName(), + sym->getFile(), *sym); + } + } + } + } + } + log("-- done processStubLibraries"); +} + // Reconstructs command line arguments so that so that you can re-run // the same command with the same inputs. This is for --reproduce. static std::string createResponseFile(const opt::InputArgList &args) { @@ -1166,6 +1219,8 @@ void LinkerDriver::linkerMain(ArrayRef argsArr) { if (errorCount()) return; + processStubLibraries(); + createOptionalSymbols(); // Resolve any variant symbols that were created due to signature diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp index 75760293bbaae..2d9768c768f29 100644 --- a/lld/wasm/InputFiles.cpp +++ b/lld/wasm/InputFiles.cpp @@ -12,6 +12,7 @@ #include "InputElement.h" #include "OutputSegment.h" #include "SymbolTable.h" +#include "lld/Common/Args.h" #include "lld/Common/CommonLinkerContext.h" #include "lld/Common/Reproduce.h" #include "llvm/Object/Binary.h" @@ -678,6 +679,48 @@ Symbol *ObjFile::createUndefined(const WasmSymbol &sym, bool isCalledDirectly) { llvm_unreachable("unknown symbol kind"); } + +StringRef strip(StringRef s) { + while (s.starts_with(" ")) { + s = s.drop_front(); + } + while (s.ends_with(" ")) { + s = s.drop_back(); + } + return s; +} + +void StubFile::parse() { + bool first = false; + + for (StringRef line : args::getLines(mb)) { + // File must begin with #STUB + if (first) { + assert(line == "#STUB\n"); + first = false; + } + + // Lines starting with # are considered comments + if (line.startswith("#")) + continue; + + StringRef sym; + StringRef rest; + std::tie(sym, rest) = line.split(':'); + sym = strip(sym); + rest = strip(rest); + + symbolDependencies[sym] = {}; + + while (rest.size()) { + StringRef first; + std::tie(first, rest) = rest.split(','); + first = strip(first); + symbolDependencies[sym].push_back(first); + } + } +} + void ArchiveFile::parse() { // Parse a MemoryBufferRef as an archive file. LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n"); diff --git a/lld/wasm/InputFiles.h b/lld/wasm/InputFiles.h index c72f64cb2bd04..11cee5405b657 100644 --- a/lld/wasm/InputFiles.h +++ b/lld/wasm/InputFiles.h @@ -47,6 +47,7 @@ class InputFile { SharedKind, ArchiveKind, BitcodeKind, + StubKind, }; virtual ~InputFile() {} @@ -183,6 +184,18 @@ class BitcodeFile : public InputFile { static bool doneLTO; }; +// Stub libray (See docs/WebAssembly.rst) +class StubFile : public InputFile { +public: + explicit StubFile(MemoryBufferRef m) : InputFile(StubKind, m) {} + + static bool classof(const InputFile *f) { return f->kind() == StubKind; } + + void parse(); + + llvm::DenseMap> symbolDependencies; +}; + inline bool isBitcode(MemoryBufferRef mb) { return identify_magic(mb.getBuffer()) == llvm::file_magic::bitcode; } diff --git a/lld/wasm/Relocations.cpp b/lld/wasm/Relocations.cpp index 2f6dd6af2d030..ce41cdcb3e07f 100644 --- a/lld/wasm/Relocations.cpp +++ b/lld/wasm/Relocations.cpp @@ -32,9 +32,9 @@ static bool requiresGOTAccess(const Symbol *sym) { } static bool allowUndefined(const Symbol* sym) { - // Symbols with explicit import names are always allowed to be undefined at + // Symbols that are explicitly imported are always allowed to be undefined at // link time. - if (sym->importName) + if (sym->isImported()) return true; if (isa(sym) && config->importUndefined) return true; diff --git a/lld/wasm/SymbolTable.cpp b/lld/wasm/SymbolTable.cpp index 881b1231ffdf9..d33176a0fa54a 100644 --- a/lld/wasm/SymbolTable.cpp +++ b/lld/wasm/SymbolTable.cpp @@ -38,6 +38,13 @@ void SymbolTable::addFile(InputFile *file) { return; } + // stub file + if (auto *f = dyn_cast(file)) { + f->parse(); + stubFiles.push_back(f); + return; + } + if (config->trace) message(toString(file)); diff --git a/lld/wasm/SymbolTable.h b/lld/wasm/SymbolTable.h index 5009e6039602b..ef2a023b68c44 100644 --- a/lld/wasm/SymbolTable.h +++ b/lld/wasm/SymbolTable.h @@ -102,6 +102,7 @@ class SymbolTable { DefinedFunction *createUndefinedStub(const WasmSignature &sig); std::vector objectFiles; + std::vector stubFiles; std::vector sharedFiles; std::vector bitcodeFiles; std::vector syntheticFunctions; diff --git a/lld/wasm/Symbols.cpp b/lld/wasm/Symbols.cpp index 8864e840dd585..567ff49dfa444 100644 --- a/lld/wasm/Symbols.cpp +++ b/lld/wasm/Symbols.cpp @@ -221,6 +221,10 @@ void Symbol::setHidden(bool isHidden) { flags |= WASM_SYMBOL_VISIBILITY_DEFAULT; } +bool Symbol::isImported() const { + return isUndefined() && (importName.has_value() || forceImport); +} + bool Symbol::isExported() const { if (!isDefined() || isLocal()) return false; diff --git a/lld/wasm/Symbols.h b/lld/wasm/Symbols.h index 16f1b535876e0..34fff4b962bdc 100644 --- a/lld/wasm/Symbols.h +++ b/lld/wasm/Symbols.h @@ -114,6 +114,7 @@ class Symbol { void setOutputSymbolIndex(uint32_t index); WasmSymbolType getWasmType() const; + bool isImported() const; bool isExported() const; bool isExportedExplicit() const; @@ -135,7 +136,8 @@ class Symbol { Symbol(StringRef name, Kind k, uint32_t flags, InputFile *f) : name(name), file(f), symbolKind(k), referenced(!config->gcSections), requiresGOT(false), isUsedInRegularObj(false), forceExport(false), - canInline(false), traced(false), isStub(false), flags(flags) {} + forceImport(false), canInline(false), traced(false), isStub(false), + flags(flags) {} StringRef name; InputFile *file; @@ -160,6 +162,8 @@ class Symbol { // -e/--export command line flag) bool forceExport : 1; + bool forceImport : 1; + // False if LTO shouldn't inline whatever this symbol points to. If a symbol // is overwritten after LTO, LTO shouldn't inline the symbol because it // doesn't know the final contents of the symbol. @@ -661,6 +665,7 @@ T *replaceSymbol(Symbol *s, ArgT &&... arg) { T *s2 = new (s) T(std::forward(arg)...); s2->isUsedInRegularObj = symCopy.isUsedInRegularObj; s2->forceExport = symCopy.forceExport; + s2->forceImport = symCopy.forceImport; s2->canInline = symCopy.canInline; s2->traced = symCopy.traced; s2->referenced = symCopy.referenced; diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index 030ef7468791a..d9e87276b31b0 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -744,7 +744,7 @@ static bool shouldImport(Symbol *sym) { if (config->allowUndefinedSymbols.count(sym->getName()) != 0) return true; - return sym->importName.has_value(); + return sym->isImported(); } void Writer::calculateImports() { @@ -1709,7 +1709,7 @@ void Writer::run() { sym->forceExport = true; } - // Delay reporting error about explicit exports until after + // Delay reporting errors about explicit exports until after // addStartStopSymbols which can create optional symbols. for (auto &name : config->requiredExports) { Symbol *sym = symtab->find(name); From af54d1e852850edcc7b9485851320d9ebf1be4fe Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 23 Mar 2023 14:15:01 -0500 Subject: [PATCH 482/691] [NVPTX] Set the atomic inling threshold when targeting NVPTX directly Since Clang 16.0.0 users can target the `NVPTX` architecture directly via `--target=nvptx64-nvidia-cuda`. However, this does not set the atomic inlining size correctly. This leads to spurious warnings and emission of runtime atomics that are never implemented. This patch ensures that we set this to the appropriate pointer width. This will always be 64 in the future as `nvptx64` will only be supported moving forward. Fixes: https://github.com/llvm/llvm-project/issues/61410 Reviewed By: tra Differential Revision: https://reviews.llvm.org/D146750 --- clang/lib/Basic/Targets/NVPTX.cpp | 2 ++ clang/test/CodeGen/atomics-inlining.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index bacd93ee1c379..aca51b2b95b59 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -93,6 +93,8 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple, default: llvm_unreachable("TargetPointerWidth must be 32 or 64"); } + + MaxAtomicInlineWidth = TargetPointerWidth; return; } diff --git a/clang/test/CodeGen/atomics-inlining.c b/clang/test/CodeGen/atomics-inlining.c index ade0e3d75bcb0..862c63076b2dc 100644 --- a/clang/test/CodeGen/atomics-inlining.c +++ b/clang/test/CodeGen/atomics-inlining.c @@ -8,6 +8,7 @@ // RUN: %clang_cc1 -triple mipsisa64r6el-linux-gnuabi64 -emit-llvm %s -o - | FileCheck %s -check-prefix=MIPS64 // RUN: %clang_cc1 -triple sparc-unknown-eabi -emit-llvm %s -o - | FileCheck %s -check-prefix=SPARCV8 -check-prefix=SPARC // RUN: %clang_cc1 -triple sparcv9-unknown-eabi -emit-llvm %s -o - | FileCheck %s -check-prefix=SPARCV9 -check-prefix=SPARC +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s -check-prefix=NVPTX unsigned char c1, c2; unsigned short s1, s2; @@ -109,4 +110,17 @@ void test1(void) { // SPARCV9: store atomic i64 {{.*}}, ptr @ll1 seq_cst, align 8 // SPARCV8: call void @__atomic_load(i32 noundef 100, ptr noundef @a1, ptr noundef @a2 // SPARCV8: call void @__atomic_store(i32 noundef 100, ptr noundef @a1, ptr noundef @a2 + +// NVPTX-LABEL: define{{.*}} void @test1 +// NVPTX: = load atomic i8, ptr @c1 seq_cst, align 1 +// NVPTX: store atomic i8 {{.*}}, ptr @c1 seq_cst, align 1 +// NVPTX: = load atomic i16, ptr @s1 seq_cst, align 2 +// NVPTX: store atomic i16 {{.*}}, ptr @s1 seq_cst, align 2 +// NVPTX: = load atomic i32, ptr @i1 seq_cst, align 4 +// NVPTX: store atomic i32 {{.*}}, ptr @i1 seq_cst, align 4 +// NVPTX: = load atomic i64, ptr @ll1 seq_cst, align 8 +// NVPTX: store atomic i64 {{.*}}, ptr @ll1 seq_cst, align 8 +// NVPTX: call void @__atomic_load(i64 noundef 100, ptr noundef @a1, ptr noundef @a2, i32 noundef 5) +// NVPTX: call void @__atomic_store(i64 noundef 100, ptr noundef @a1, ptr noundef @a2, i32 noundef 5) + } From d11e49f0c8c52d715cd0f7dea436ac5e0dce9c42 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 23 Mar 2023 16:30:31 -0500 Subject: [PATCH 483/691] [libc][NFC] Fix misspelled variable name in cmake message --- libc/cmake/modules/prepare_libc_gpu_build.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/cmake/modules/prepare_libc_gpu_build.cmake b/libc/cmake/modules/prepare_libc_gpu_build.cmake index 1f9b68afd95cd..fe0f4ef4826d5 100644 --- a/libc/cmake/modules/prepare_libc_gpu_build.cmake +++ b/libc/cmake/modules/prepare_libc_gpu_build.cmake @@ -45,7 +45,7 @@ endif() set(LIBC_GPU_TEST_ARCHITECTURE "" CACHE STRING "Architecture for the GPU tests") if(LIBC_GPU_TEST_ARCHITECTURE) message(STATUS "Using user-specified GPU architecture for testing " - "'${LIBC_GPU_TARGET_ARCHITECTURE}'") + "'${LIBC_GPU_TEST_ARCHITECTURE}'") if("${LIBC_GPU_TEST_ARCHITECTURE}" IN_LIST all_amdgpu_architectures) set(LIBC_GPU_TARGET_ARCHITECTURE_IS_AMDGPU TRUE) set(LIBC_GPU_TARGET_TRIPLE "amdgcn-amd-amdhsa") From 9ddc03a17dba1d7aaad73067325344f6b79441b0 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 23 Mar 2023 16:41:25 -0500 Subject: [PATCH 484/691] [OpenMP] Fix test after updating NVPTX atomic inlines Summary: The previous patch fixed how we handle emitting atomics for targeting NVPTX directly. This is the only other file that really does that and has atomics and I forgot to update it. --- .../OpenMP/nvptx_nested_parallel_codegen.cpp | 36 +++++-------------- 1 file changed, 8 insertions(+), 28 deletions(-) diff --git a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp index c5c31c601ed39..010cbae25b9af 100644 --- a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp @@ -45,7 +45,7 @@ int main() { // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK1: user_code.entry: // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK1-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR7:[0-9]+]] +// CHECK1-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 8 // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 1) @@ -78,7 +78,7 @@ int main() { // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR7]] +// CHECK1-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6]] // CHECK1-NEXT: ret void // // @@ -111,7 +111,7 @@ int main() { // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 8 -// CHECK1-NEXT: call void @_Z4workPi(ptr noundef [[TMP1]]) #[[ATTR7]] +// CHECK1-NEXT: call void @_Z4workPi(ptr noundef [[TMP1]]) #[[ATTR6]] // CHECK1-NEXT: ret void // // @@ -119,19 +119,9 @@ int main() { // CHECK1-SAME: (ptr noundef [[C:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @__atomic_load(i64 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 0) #[[ATTR7]] -// CHECK1-NEXT: br label [[ATOMIC_CONT:%.*]] -// CHECK1: atomic_cont: -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[ATOMIC_TEMP1]], align 4 -// CHECK1-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i64 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef 0, i32 noundef 0) #[[ATTR7]] -// CHECK1-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] -// CHECK1: atomic_exit: +// CHECK1-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[TMP0]], i32 1 monotonic, align 4 // CHECK1-NEXT: ret void // // @@ -165,7 +155,7 @@ int main() { // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] // CHECK2: user_code.entry: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK2-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR7:[0-9]+]] +// CHECK2-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6:[0-9]+]] // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[TMP0]], ptr [[TMP3]], align 4 // CHECK2-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP2]], i32 1, i32 2, i32 -1, ptr @__omp_outlined__, ptr @__omp_outlined___wrapper, ptr [[CAPTURED_VARS_ADDRS]], i32 1) @@ -198,7 +188,7 @@ int main() { // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR7]] +// CHECK2-NEXT: call void @_Z3usePi(ptr noundef [[TMP0]]) #[[ATTR6]] // CHECK2-NEXT: ret void // // @@ -231,7 +221,7 @@ int main() { // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TMP0]], align 4 -// CHECK2-NEXT: call void @_Z4workPi(ptr noundef [[TMP1]]) #[[ATTR7]] +// CHECK2-NEXT: call void @_Z4workPi(ptr noundef [[TMP1]]) #[[ATTR6]] // CHECK2-NEXT: ret void // // @@ -239,19 +229,9 @@ int main() { // CHECK2-SAME: (ptr noundef [[C:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[ATOMIC_TEMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4 -// CHECK2-NEXT: call void @__atomic_load(i32 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 0) #[[ATTR7]] -// CHECK2-NEXT: br label [[ATOMIC_CONT:%.*]] -// CHECK2: atomic_cont: -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, ptr [[ATOMIC_TEMP]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK2-NEXT: store i32 [[ADD]], ptr [[ATOMIC_TEMP1]], align 4 -// CHECK2-NEXT: [[CALL:%.*]] = call noundef zeroext i1 @__atomic_compare_exchange(i32 noundef 4, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], ptr noundef [[ATOMIC_TEMP1]], i32 noundef 0, i32 noundef 0) #[[ATTR7]] -// CHECK2-NEXT: br i1 [[CALL]], label [[ATOMIC_EXIT:%.*]], label [[ATOMIC_CONT]] -// CHECK2: atomic_exit: +// CHECK2-NEXT: [[TMP1:%.*]] = atomicrmw add ptr [[TMP0]], i32 1 monotonic, align 4 // CHECK2-NEXT: ret void // // From 53a917595186d711026505dbc42b95aca5a67825 Mon Sep 17 00:00:00 2001 From: Leonard Chan Date: Thu, 23 Mar 2023 21:44:59 +0000 Subject: [PATCH 485/691] [llvm] Handle duplicate call bases when applying branch funneling It's possible to segfault in `DevirtModule::applyICallBranchFunnel` when attempting to call `getCaller` on a call base that was erased in a prior iteration. This can occur when attempting to find devirtualizable calls via `findDevirtualizableCallsForTypeTest` if the vtable passed to llvm.type.test is a global and not a local. The function works by taking the first argument of the llvm.type.test call (which is a vtable), iterating through all uses of it, and adding any relevant all uses that are calls associated with that intrinsic call to a vector. For most cases where the vtable is actually a *local*, this wouldn't be an issue. Take for example: ``` define i32 @fn(ptr %obj) #0 { %vtable = load ptr, ptr %obj %p = call i1 @llvm.type.test(ptr %vtable, metadata !"typeid2") call void @llvm.assume(i1 %p) %fptr = load ptr, ptr %vtable %result = call i32 %fptr(ptr %obj, i32 1) ret i32 %result } ``` `findDevirtualizableCallsForTypeTest` will check the call base ` %result = call i32 %fptr(ptr %obj, i32 1)`, find that it is associated with a virtualizable call from `%vtable`, find all loads for `%vtable`, and add any instances those load results are called into a vector. Now consider the case where instead `%vtable` was the global itself rather than a local: ``` define i32 @fn(ptr %obj) #0 { %p = call i1 @llvm.type.test(ptr @vtable, metadata !"typeid2") call void @llvm.assume(i1 %p) %fptr = load ptr, ptr @vtable %result = call i32 %fptr(ptr %obj, i32 1) ret i32 %result } ``` `findDevirtualizableCallsForTypeTest` should work normally and add one unique call instance to a vector. However, if there are multiple instances where this same global is used for llvm.type.test, like with: ``` define i32 @fn(ptr %obj) #0 { %p = call i1 @llvm.type.test(ptr @vtable, metadata !"typeid2") call void @llvm.assume(i1 %p) %fptr = load ptr, ptr @vtable %result = call i32 %fptr(ptr %obj, i32 1) ret i32 %result } define i32 @fn2(ptr %obj) #0 { %p = call i1 @llvm.type.test(ptr @vtable, metadata !"typeid2") call void @llvm.assume(i1 %p) %fptr = load ptr, ptr @vtable %result = call i32 %fptr(ptr %obj, i32 1) ret i32 %result } ``` Then each call base `%result = call i32 %fptr(ptr %obj, i32 1)` will be added to the vector twice. This is because for either call base `%result = call i32 %fptr(ptr %obj, i32 1) `, we determine it is associated with a virtualizable call from `@vtable`, and then we iterate through all the uses of `@vtable`, which is used across multiple functions. So when scanning the first `%result = call i32 %fptr(ptr %obj, i32 1)`, then both call bases will be added to the vector, but when scanning the second one, both call bases are added again, resulting in duplicate call bases in the CSInfo.CallSites vector. Note this is actually accounted for in every other instance WPD iterates over CallSites. What everything else does is actually add the call base to the `OptimizedCalls` set and just check if it's already in the set. We can't reuse that particular set since it serves a different purpose marking which calls where devirtualized which `applyICallBranchFunnel` explicitly says it doesn't. For this fix, we can just account for duplicates with a map and do the actual replacements afterwards by iterating over the map. Differential Revision: https://reviews.llvm.org/D146267 --- .../lib/Transforms/IPO/WholeProgramDevirt.cpp | 19 +++++++- .../WholeProgramDevirt/branch-funnel.ll | 48 +++++++++++++++++++ 2 files changed, 65 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index e380b47c735fe..8224de30d6986 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -1391,9 +1391,20 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo, IsExported = true; if (CSInfo.AllCallSitesDevirted) return; + + std::map CallBases; for (auto &&VCallSite : CSInfo.CallSites) { CallBase &CB = VCallSite.CB; + if (CallBases.find(&CB) != CallBases.end()) { + // When finding devirtualizable calls, it's possible to find the same + // vtable passed to multiple llvm.type.test or llvm.type.checked.load + // calls, which can cause duplicate call sites to be recorded in + // [Const]CallSites. If we've already found one of these + // call instances, just ignore it. It will be replaced later. + continue; + } + // Jump tables are only profitable if the retpoline mitigation is enabled. Attribute FSAttr = CB.getCaller()->getFnAttribute("target-features"); if (!FSAttr.isValid() || @@ -1440,8 +1451,7 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo, AttributeList::get(M.getContext(), Attrs.getFnAttrs(), Attrs.getRetAttrs(), NewArgAttrs)); - CB.replaceAllUsesWith(NewCS); - CB.eraseFromParent(); + CallBases[&CB] = NewCS; // This use is no longer unsafe. if (VCallSite.NumUnsafeUses) @@ -1451,6 +1461,11 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo, // retpoline mitigation, which would mean that they are lowered to // llvm.type.test and therefore require an llvm.type.test resolution for the // type identifier. + + std::for_each(CallBases.begin(), CallBases.end(), [](auto &CBs) { + CBs.first->replaceAllUsesWith(CBs.second); + CBs.first->eraseFromParent(); + }); }; Apply(SlotInfo.CSInfo); for (auto &P : SlotInfo.ConstCSInfo) diff --git a/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll b/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll index 4a6e3634a5d16..0b1023eee2732 100644 --- a/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll +++ b/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll @@ -233,6 +233,54 @@ define i32 @fn3_rv(ptr %obj) #0 { ret i32 %result } +; CHECK-LABEL: define i32 @fn4 +; CHECK-NOT: call void (...) @llvm.icall.branch.funnel +define i32 @fn4(ptr %obj) #0 { + %p = call i1 @llvm.type.test(ptr @vt1_1, metadata !"typeid1") + call void @llvm.assume(i1 %p) + %fptr = load ptr, ptr @vt1_1 + ; RETP: call i32 @__typeid_typeid1_0_branch_funnel(ptr nest @vt1_1, ptr %obj, i32 1) + %result = call i32 %fptr(ptr %obj, i32 1) + ; NORETP: call i32 % + ret i32 %result +} + +; CHECK-LABEL: define i32 @fn4_cpy +; CHECK-NOT: call void (...) @llvm.icall.branch.funnel +define i32 @fn4_cpy(ptr %obj) #0 { + %p = call i1 @llvm.type.test(ptr @vt1_1, metadata !"typeid1") + call void @llvm.assume(i1 %p) + %fptr = load ptr, ptr @vt1_1 + ; RETP: call i32 @__typeid_typeid1_0_branch_funnel(ptr nest @vt1_1, ptr %obj, i32 1) + %result = call i32 %fptr(ptr %obj, i32 1) + ; NORETP: call i32 % + ret i32 %result +} + +; CHECK-LABEL: define i32 @fn4_rv +; CHECK-NOT: call void (...) @llvm.icall.branch.funnel +define i32 @fn4_rv(ptr %obj) #0 { + %p = call i1 @llvm.type.test(ptr @vt1_1_rv, metadata !"typeid1_rv") + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr @vt1_1_rv, i32 0) + ; RETP: call i32 @__typeid_typeid1_rv_0_branch_funnel(ptr nest @vt1_1_rv, ptr %obj, i32 1) + %result = call i32 %fptr(ptr %obj, i32 1) + ; NORETP: call i32 % + ret i32 %result +} + +; CHECK-LABEL: define i32 @fn4_rv_cpy +; CHECK-NOT: call void (...) @llvm.icall.branch.funnel +define i32 @fn4_rv_cpy(ptr %obj) #0 { + %p = call i1 @llvm.type.test(ptr @vt1_1_rv, metadata !"typeid1_rv") + call void @llvm.assume(i1 %p) + %fptr = call ptr @llvm.load.relative.i32(ptr @vt1_1_rv, i32 0) + ; RETP: call i32 @__typeid_typeid1_rv_0_branch_funnel(ptr nest @vt1_1_rv, ptr %obj, i32 1) + %result = call i32 %fptr(ptr %obj, i32 1) + ; NORETP: call i32 % + ret i32 %result +} + ; CHECK-LABEL: define hidden void @__typeid_typeid1_0_branch_funnel(ptr nest %0, ...) ; CHECK-NEXT: musttail call void (...) @llvm.icall.branch.funnel(ptr %0, ptr {{(nonnull )?}}@vt1_1, ptr {{(nonnull )?}}@vf1_1, ptr {{(nonnull )?}}@vt1_2, ptr {{(nonnull )?}}@vf1_2, ...) From 2e9bcadb7c8acaa8f6ec7d807e5666246923e468 Mon Sep 17 00:00:00 2001 From: Chia-hung Duan Date: Thu, 23 Mar 2023 21:49:02 +0000 Subject: [PATCH 486/691] Revert "[scudo] Add a Timer class to assist performance measurement" This reverts commit e0361396c2281a108a36d186161ace1843925431. --- .../lib/scudo/standalone/CMakeLists.txt | 2 - .../lib/scudo/standalone/tests/CMakeLists.txt | 1 - .../scudo/standalone/tests/timing_test.cpp | 86 ------- compiler-rt/lib/scudo/standalone/timing.cpp | 29 --- compiler-rt/lib/scudo/standalone/timing.h | 215 ------------------ 5 files changed, 333 deletions(-) delete mode 100644 compiler-rt/lib/scudo/standalone/tests/timing_test.cpp delete mode 100644 compiler-rt/lib/scudo/standalone/timing.cpp delete mode 100644 compiler-rt/lib/scudo/standalone/timing.h diff --git a/compiler-rt/lib/scudo/standalone/CMakeLists.txt b/compiler-rt/lib/scudo/standalone/CMakeLists.txt index 6fcd4deddf716..eefcffd4cfc56 100644 --- a/compiler-rt/lib/scudo/standalone/CMakeLists.txt +++ b/compiler-rt/lib/scudo/standalone/CMakeLists.txt @@ -85,7 +85,6 @@ set(SCUDO_HEADERS stack_depot.h stats.h string_utils.h - timing.h tsd_exclusive.h tsd_shared.h tsd.h @@ -108,7 +107,6 @@ set(SCUDO_SOURCES report.cpp rss_limit_checker.cpp string_utils.cpp - timing.cpp ) # Enable the necessary instruction set for scudo_crc32.cpp, if available. diff --git a/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt b/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt index 335e4b7dbd899..50468d9c6ddc3 100644 --- a/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt +++ b/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt @@ -105,7 +105,6 @@ set(SCUDO_UNIT_TEST_SOURCES size_class_map_test.cpp stats_test.cpp strings_test.cpp - timing_test.cpp tsd_test.cpp vector_test.cpp scudo_unit_test_main.cpp diff --git a/compiler-rt/lib/scudo/standalone/tests/timing_test.cpp b/compiler-rt/lib/scudo/standalone/tests/timing_test.cpp deleted file mode 100644 index 09a6c31224673..0000000000000 --- a/compiler-rt/lib/scudo/standalone/tests/timing_test.cpp +++ /dev/null @@ -1,86 +0,0 @@ -//===-- timing_test.cpp -----------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "tests/scudo_unit_test.h" - -#include "timing.h" - -#include - -class ScudoTimingTest : public Test { -public: - void testFunc1() { scudo::ScopedTimer ST(Manager, __func__); } - - void testFunc2() { - scudo::ScopedTimer ST(Manager, __func__); - testFunc1(); - } - - void testChainedCalls() { - scudo::ScopedTimer ST(Manager, __func__); - testFunc2(); - } - - void testIgnoredTimer() { - scudo::ScopedTimer ST(Manager, __func__); - ST.ignore(); - } - - void printAllTimersStats() { Manager.printAll(); } - - scudo::TimingManager &getTimingManager() { return Manager; } - -private: - scudo::TimingManager Manager; -}; - -// Given that the output of statistics of timers are dumped through -// `scudo::Printf` which is platform dependent, so we don't have a reliable way -// to catch the output and verify the details. Now we only verify the number of -// invocations on linux. -TEST_F(ScudoTimingTest, SimpleTimer) { -#if SCUDO_LINUX - testing::internal::LogToStderr(); - testing::internal::CaptureStderr(); -#endif - - testIgnoredTimer(); - testChainedCalls(); - printAllTimersStats(); - -#if SCUDO_LINUX - std::string output = testing::internal::GetCapturedStderr(); - EXPECT_TRUE(output.find("testIgnoredTimer (1)") == std::string::npos); - EXPECT_TRUE(output.find("testChainedCalls (1)") != std::string::npos); - EXPECT_TRUE(output.find("testFunc2 (1)") != std::string::npos); - EXPECT_TRUE(output.find("testFunc1 (1)") != std::string::npos); -#endif -} - -TEST_F(ScudoTimingTest, NestedTimer) { -#if SCUDO_LINUX - testing::internal::LogToStderr(); - testing::internal::CaptureStderr(); -#endif - - { - scudo::ScopedTimer Outer(getTimingManager(), "Outer"); - { - scudo::ScopedTimer Inner1(getTimingManager(), Outer, "Inner1"); - { scudo::ScopedTimer Inner2(getTimingManager(), Inner1, "Inner2"); } - } - } - printAllTimersStats(); - -#if SCUDO_LINUX - std::string output = testing::internal::GetCapturedStderr(); - EXPECT_TRUE(output.find("Outer (1)") != std::string::npos); - EXPECT_TRUE(output.find("Inner1 (1)") != std::string::npos); - EXPECT_TRUE(output.find("Inner2 (1)") != std::string::npos); -#endif -} diff --git a/compiler-rt/lib/scudo/standalone/timing.cpp b/compiler-rt/lib/scudo/standalone/timing.cpp deleted file mode 100644 index 59ae21d10f0f6..0000000000000 --- a/compiler-rt/lib/scudo/standalone/timing.cpp +++ /dev/null @@ -1,29 +0,0 @@ -//===-- timing.cpp ----------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "timing.h" - -namespace scudo { - -Timer::~Timer() { - if (Manager) - Manager->report(*this); -} - -ScopedTimer::ScopedTimer(TimingManager &Manager, const char *Name) - : Timer(Manager.getOrCreateTimer(Name)) { - start(); -} - -ScopedTimer::ScopedTimer(TimingManager &Manager, const Timer &Nest, - const char *Name) - : Timer(Manager.nest(Nest, Name)) { - start(); -} - -} // namespace scudo diff --git a/compiler-rt/lib/scudo/standalone/timing.h b/compiler-rt/lib/scudo/standalone/timing.h deleted file mode 100644 index 155111f9f8e52..0000000000000 --- a/compiler-rt/lib/scudo/standalone/timing.h +++ /dev/null @@ -1,215 +0,0 @@ -//===-- timing.h ------------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "common.h" -#include "mutex.h" -#include "string_utils.h" -#include "thread_annotations.h" - -#include - -namespace scudo { - -class TimingManager; - -// A simple timer for evaluating execution time of code snippets. It can be used -// along with TimingManager or standalone. -class Timer { -public: - // The use of Timer without binding to a TimingManager is supposed to do the - // timer logging manually. Otherwise, TimingManager will do the logging stuff - // for you. - Timer() = default; - Timer(Timer &&Other) - : StartTime(0), AccTime(Other.AccTime), Manager(Other.Manager), - HandleId(Other.HandleId) { - Other.Manager = nullptr; - } - - Timer(const Timer &) = delete; - - virtual ~Timer(); - - void start() { - CHECK_EQ(StartTime, 0U); - StartTime = getMonotonicTime(); - } - void stop() { - AccTime += getMonotonicTime() - StartTime; - StartTime = 0; - } - u64 getAccumulatedTime() const { return AccTime; } - - // Unset the bound TimingManager so that we don't report the data back. This - // is useful if we only want to track subset of certain scope events. - void ignore() { - StartTime = 0; - AccTime = 0; - Manager = nullptr; - } - -protected: - friend class TimingManager; - Timer(TimingManager &Manager, u32 HandleId) - : Manager(&Manager), HandleId(HandleId) {} - - u64 StartTime = 0; - u64 AccTime = 0; - TimingManager *Manager = nullptr; - u32 HandleId; -}; - -// A RAII-style wrapper for easy scope execution measurement. Note that in order -// not to take additional space for the message like `Name`. It only works with -// TimingManager. -class ScopedTimer : public Timer { -public: - ScopedTimer(TimingManager &Manager, const char *Name); - ScopedTimer(TimingManager &Manager, const Timer &Nest, const char *Name); - ~ScopedTimer() override { stop(); } -}; - -// In Scudo, the execution time of single run of code snippets may not be -// useful, we are more interested in the average time from several runs. -// TimingManager lets the registered timer report their data and reports the -// average execution time for each timer periodically. -class TimingManager { -public: - TimingManager(u32 PrintingInterval = DefaultPrintingInterval) - : PrintingInterval(PrintingInterval) {} - ~TimingManager() { - if (NumAllocatedTimers != 0) - printAll(); - } - - Timer getOrCreateTimer(const char *Name) EXCLUDES(Mutex) { - ScopedLock L(Mutex); - - CHECK_LT(strlen(Name), MaxLenOfTimerName); - for (u32 I = 0; I < NumAllocatedTimers; ++I) { - if (strncmp(Name, Timers[I].Name, MaxLenOfTimerName) == 0) - return Timer(*this, I); - } - - CHECK_LT(NumAllocatedTimers, MaxNumberOfTimers); - strncpy(Timers[NumAllocatedTimers].Name, Name, MaxLenOfTimerName); - TimerRecords[NumAllocatedTimers].AccumulatedTime = 0; - TimerRecords[NumAllocatedTimers].Occurrence = 0; - return Timer(*this, NumAllocatedTimers++); - } - - // Add a sub-Timer associated with another Timer. This is used when we want to - // detail the execution time in the scope of a Timer. - // For example, - // void Foo() { - // // T1 records the time spent in both first and second tasks. - // ScopedTimer T1(getTimingManager(), "Task1"); - // { - // // T2 records the time spent in first task - // ScopedTimer T2(getTimingManager, T1, "Task2"); - // // Do first task. - // } - // // Do second task. - // } - // - // The report will show proper indents to indicate the nested relation like, - // -- Average Operation Time -- -- Name (# of Calls) -- - // 10.0(ns) Task1 (1) - // 5.0(ns) Task2 (1) - Timer nest(const Timer &T, const char *Name) EXCLUDES(Mutex) { - CHECK_EQ(T.Manager, this); - Timer Nesting = getOrCreateTimer(Name); - - ScopedLock L(Mutex); - CHECK_NE(Nesting.HandleId, T.HandleId); - Timers[Nesting.HandleId].Nesting = T.HandleId; - return Nesting; - } - - void report(const Timer &T) EXCLUDES(Mutex) { - ScopedLock L(Mutex); - - const u32 HandleId = T.HandleId; - CHECK_LT(HandleId, MaxNumberOfTimers); - TimerRecords[HandleId].AccumulatedTime += T.getAccumulatedTime(); - ++TimerRecords[HandleId].Occurrence; - ++NumEventsReported; - if (NumEventsReported % PrintingInterval == 0) - printAllImpl(); - } - - void printAll() EXCLUDES(Mutex) { - ScopedLock L(Mutex); - printAllImpl(); - } - -private: - void printAllImpl() REQUIRES(Mutex) { - static char NameHeader[] = "-- Name (# of Calls) --"; - static char AvgHeader[] = "-- Average Operation Time --"; - ScopedString Str; - Str.append("%-15s %-15s\n", AvgHeader, NameHeader); - - for (u32 I = 0; I < NumAllocatedTimers; ++I) { - if (Timers[I].Nesting != MaxNumberOfTimers) - continue; - printImpl(Str, I); - } - - Str.output(); - } - - void printImpl(ScopedString &Str, const u32 HandleId, - const u32 ExtraIndent = 0) REQUIRES(Mutex) { - const uptr AccumulatedTime = TimerRecords[HandleId].AccumulatedTime; - const uptr Occurrence = TimerRecords[HandleId].Occurrence; - const uptr Integral = Occurrence == 0 ? 0 : AccumulatedTime / Occurrence; - // Only keep single digit of fraction is enough and it enables easier layout - // maintenance. - const uptr Fraction = - Occurrence == 0 ? 0 - : ((AccumulatedTime % Occurrence) * 10) / Occurrence; - - Str.append("%14zu.%zu(ns) %-11s", Integral, Fraction, " "); - - for (u32 I = 0; I < ExtraIndent; ++I) - Str.append("%s", " "); - Str.append("%s (%zu)\n", Timers[HandleId].Name, Occurrence); - - for (u32 I = 0; I < NumAllocatedTimers; ++I) - if (Timers[I].Nesting == HandleId) - printImpl(Str, I, ExtraIndent + 1); - } - - // Instead of maintaining pages for timer registration, a static buffer is - // sufficient for most use cases in Scudo. - static constexpr u32 MaxNumberOfTimers = 50; - static constexpr u32 MaxLenOfTimerName = 50; - static constexpr u32 DefaultPrintingInterval = 100; - - struct Record { - uptr AccumulatedTime = 0; - uptr Occurrence = 0; - }; - - struct TimerInfo { - char Name[MaxLenOfTimerName + 1]; - u32 Nesting = MaxNumberOfTimers; - }; - - HybridMutex Mutex; - // The frequency of proactively dumping the timer statistics. For example, the - // default setting is to dump the statistics every 100 reported events. - u32 PrintingInterval GUARDED_BY(Mutex); - uptr NumEventsReported GUARDED_BY(Mutex) = 0; - u32 NumAllocatedTimers GUARDED_BY(Mutex) = 0; - TimerInfo Timers[MaxNumberOfTimers] GUARDED_BY(Mutex); - Record TimerRecords[MaxNumberOfTimers] GUARDED_BY(Mutex); -}; - -} // namespace scudo From ec2333d88538c1675227a555140a13bc27aafb69 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Thu, 23 Mar 2023 14:43:23 -0700 Subject: [PATCH 487/691] [JITLink] Add a jitlink::Section::empty operation. --- .../llvm/ExecutionEngine/JITLink/JITLink.h | 3 +++ .../ExecutionEngine/JITLink/LinkGraphTests.cpp | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h index 3bc9bebea6e0b..464b21d536300 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h @@ -727,6 +727,9 @@ class Section { /// Returns the ordinal for this section. SectionOrdinal getOrdinal() const { return SecOrdinal; } + /// Returns true if this section is empty (contains no blocks or symbols). + bool empty() const { return Blocks.empty(); } + /// Returns an iterator over the blocks defined in this section. iterator_range blocks() { return make_range(Blocks.begin(), Blocks.end()); diff --git a/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp b/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp index 0146c3b4cf6e0..a3cb1b6fd638b 100644 --- a/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp +++ b/llvm/unittests/ExecutionEngine/JITLink/LinkGraphTests.cpp @@ -92,6 +92,24 @@ TEST(LinkGraphTest, AddressAccess) { EXPECT_EQ(B1.getFixupAddress(E1), B1Addr + 8) << "Incorrect fixup address"; } +TEST(LinkGraphTest, SectionEmpty) { + // Check that Section::empty behaves as expected. + LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, support::little, + getGenericEdgeKindName); + auto &Sec1 = + G.createSection("__data.1", orc::MemProt::Read | orc::MemProt::Write); + auto &B = + G.createContentBlock(Sec1, BlockContent, orc::ExecutorAddr(0x1000), 8, 0); + G.addDefinedSymbol(B, 0, "S", 4, Linkage::Strong, Scope::Default, false, + false); + + auto &Sec2 = + G.createSection("__data.2", orc::MemProt::Read | orc::MemProt::Write); + + EXPECT_FALSE(Sec1.empty()); + EXPECT_TRUE(Sec2.empty()); +} + TEST(LinkGraphTest, BlockAndSymbolIteration) { // Check that we can iterate over blocks within Sections and across sections. LinkGraph G("foo", Triple("x86_64-apple-darwin"), 8, support::little, From 397486566e995a019c249784b1d07c53b6ac670d Mon Sep 17 00:00:00 2001 From: Cyndy Ishida Date: Thu, 23 Mar 2023 14:51:37 -0700 Subject: [PATCH 488/691] [llvm][TextAPI] Handle implicitly upgraded deployment versions Sometimes the clang driver will receive a target triple where the deployment version is too low to support the platform + arch. In those cases, the compiler upgrades the final minOS which is what gets recorded ultimately by the linker in LC_BUILD_VERSION. TextAPI should also reuse this logic for capturing minOS in recorded TBDv5 files. Reviewed By: ributzka Differential Revision: https://reviews.llvm.org/D145690 --- llvm/include/llvm/TextAPI/Platform.h | 2 + llvm/include/llvm/TextAPI/Target.h | 2 +- llvm/lib/TextAPI/Platform.cpp | 7 ++++ llvm/lib/TextAPI/TextStubV5.cpp | 6 ++- llvm/unittests/TextAPI/TextStubV5Tests.cpp | 44 ++++++++++++++++++++++ 5 files changed, 58 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/TextAPI/Platform.h b/llvm/include/llvm/TextAPI/Platform.h index d4225ca533fc0..834f833306d1b 100644 --- a/llvm/include/llvm/TextAPI/Platform.h +++ b/llvm/include/llvm/TextAPI/Platform.h @@ -14,6 +14,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/BinaryFormat/MachO.h" +#include "llvm/Support/VersionTuple.h" namespace llvm { namespace MachO { @@ -27,6 +28,7 @@ StringRef getPlatformName(PlatformType Platform); PlatformType getPlatformFromName(StringRef Name); std::string getOSAndEnvironmentName(PlatformType Platform, std::string Version = ""); +VersionTuple mapToSupportedOSVersion(const Triple &Triple); } // end namespace MachO. } // end namespace llvm. diff --git a/llvm/include/llvm/TextAPI/Target.h b/llvm/include/llvm/TextAPI/Target.h index dc0e4f92ae802..0ab2783fc60c5 100644 --- a/llvm/include/llvm/TextAPI/Target.h +++ b/llvm/include/llvm/TextAPI/Target.h @@ -33,7 +33,7 @@ class Target { : Arch(Arch), Platform(Platform), MinDeployment(MinDeployment) {} explicit Target(const llvm::Triple &Triple) : Arch(mapToArchitecture(Triple)), Platform(mapToPlatformType(Triple)), - MinDeployment(Triple.getOSVersion()) {} + MinDeployment(mapToSupportedOSVersion(Triple)) {} static llvm::Expected create(StringRef Target); diff --git a/llvm/lib/TextAPI/Platform.cpp b/llvm/lib/TextAPI/Platform.cpp index 673fcb764bf86..a432462c82e33 100644 --- a/llvm/lib/TextAPI/Platform.cpp +++ b/llvm/lib/TextAPI/Platform.cpp @@ -132,5 +132,12 @@ std::string getOSAndEnvironmentName(PlatformType Platform, llvm_unreachable("Unknown llvm::MachO::PlatformType enum"); } +VersionTuple mapToSupportedOSVersion(const Triple &Triple) { + const VersionTuple MinSupportedOS = Triple.getMinimumSupportedOSVersion(); + if (MinSupportedOS > Triple.getOSVersion()) + return MinSupportedOS; + return Triple.getOSVersion(); +} + } // end namespace MachO. } // end namespace llvm. diff --git a/llvm/lib/TextAPI/TextStubV5.cpp b/llvm/lib/TextAPI/TextStubV5.cpp index a9355fabe2202..ade4c867fa49d 100644 --- a/llvm/lib/TextAPI/TextStubV5.cpp +++ b/llvm/lib/TextAPI/TextStubV5.cpp @@ -293,8 +293,10 @@ Expected getTargetsSection(const Object *Section) { if (!TargetOrErr) return make_error(getParseErrorMsg(TBDKey::Target)); TargetOrErr->MinDeployment = Version; - - IFTargets.push_back(*TargetOrErr); + // Convert to LLVM::Triple to accurately compute minOS + platform + arch + // pairing. + IFTargets.push_back( + MachO::Target(Triple(getTargetTripleName(*TargetOrErr)))); } return std::move(IFTargets); } diff --git a/llvm/unittests/TextAPI/TextStubV5Tests.cpp b/llvm/unittests/TextAPI/TextStubV5Tests.cpp index 3deb38a5a0a3d..b4e8f513daee2 100644 --- a/llvm/unittests/TextAPI/TextStubV5Tests.cpp +++ b/llvm/unittests/TextAPI/TextStubV5Tests.cpp @@ -944,6 +944,50 @@ TEST(TBDv5, Target_Simulator) { EXPECT_EQ(*File, *WriteResultFile); } +TEST(TBDv5, Target_UnsupportedMinOS) { + static const char TBDv5File[] = R"({ +"tapi_tbd_version": 5, +"main_library": { + "target_info": [ + { + "target": "arm64-macos", + "min_deployment": "10.14" + }, + { + "target": "x86_64-macos", + "min_deployment": "10.14" + } + ], + "install_names":[ + { "name":"/S/L/F/Foo.framework/Foo" } + ] +}})"; + + Expected Result = + TextAPIReader::get(MemoryBufferRef(TBDv5File, "Test.tbd")); + EXPECT_TRUE(!!Result); + TBDFile File = std::move(Result.get()); + EXPECT_EQ(FileType::TBD_V5, File->getFileType()); + TargetList ExpectedTargets = { + Target(AK_x86_64, PLATFORM_MACOS, VersionTuple(10, 14)), + Target(AK_arm64, PLATFORM_MACOS, VersionTuple(11, 0)), + }; + TargetList Targets{File->targets().begin(), File->targets().end()}; + llvm::sort(Targets); + EXPECT_EQ(Targets, ExpectedTargets); + + SmallString<4096> Buffer; + raw_svector_ostream OS(Buffer); + Error WriteResult = TextAPIWriter::writeToStream(OS, *File); + EXPECT_TRUE(!WriteResult); + + Expected Output = + TextAPIReader::get(MemoryBufferRef(Buffer, "Output.tbd")); + EXPECT_TRUE(!!Output); + TBDFile WriteResultFile = std::move(Output.get()); + EXPECT_EQ(*File, *WriteResultFile); +} + TEST(TBDv5, MisspelledKey) { static const char TBDv5File[] = R"({ "tapi_tbd_version": 5, From c13ccf1fbabede34ff28461b29d2d14aceb293fd Mon Sep 17 00:00:00 2001 From: NagaChaitanya Vellanki Date: Thu, 23 Mar 2023 14:38:37 -0700 Subject: [PATCH 489/691] [clang][ExtractAPI]Fix Declaration fragments for instancetype in the type position degrade to id Fixes https://github.com/llvm/llvm-project/issues/61481 Reviewed By: dang Differential Revision: https://reviews.llvm.org/D146671 --- clang/lib/ExtractAPI/DeclarationFragments.cpp | 22 +- clang/test/ExtractAPI/objc_instancetype.m | 254 ++++++++++++++++++ 2 files changed, 267 insertions(+), 9 deletions(-) create mode 100644 clang/test/ExtractAPI/objc_instancetype.m diff --git a/clang/lib/ExtractAPI/DeclarationFragments.cpp b/clang/lib/ExtractAPI/DeclarationFragments.cpp index b8de1270b5f02..c42a1de2fd358 100644 --- a/clang/lib/ExtractAPI/DeclarationFragments.cpp +++ b/clang/lib/ExtractAPI/DeclarationFragments.cpp @@ -243,26 +243,30 @@ DeclarationFragments DeclarationFragmentsBuilder::getFragmentsForType( return Fragments.append(getFragmentsForType(ET->desugar(), Context, After)); } - // Everything we care about has been handled now, reduce to the canonical - // unqualified base type. - QualType Base = T->getCanonicalTypeUnqualified(); - - // Render Objective-C `id`/`instancetype` as keywords. - if (T->isObjCIdType()) - return Fragments.append(Base.getAsString(), - DeclarationFragments::FragmentKind::Keyword); - // If the type is a typedefed type, get the underlying TypedefNameDecl for a // direct reference to the typedef instead of the wrapped type. + + // 'id' type is a typedef for an ObjCObjectPointerType + // we treat it as a typedef if (const TypedefType *TypedefTy = dyn_cast(T)) { const TypedefNameDecl *Decl = TypedefTy->getDecl(); TypedefUnderlyingTypeResolver TypedefResolver(Context); std::string USR = TypedefResolver.getUSRForType(QualType(T, 0)); + + if (T->isObjCIdType()) { + return Fragments.append(Decl->getName(), + DeclarationFragments::FragmentKind::Keyword); + } + return Fragments.append( Decl->getName(), DeclarationFragments::FragmentKind::TypeIdentifier, USR, TypedefResolver.getUnderlyingTypeDecl(QualType(T, 0))); } + // Everything we care about has been handled now, reduce to the canonical + // unqualified base type. + QualType Base = T->getCanonicalTypeUnqualified(); + // If the base type is a TagType (struct/interface/union/class/enum), let's // get the underlying Decl for better names and USRs. if (const TagType *TagTy = dyn_cast(Base)) { diff --git a/clang/test/ExtractAPI/objc_instancetype.m b/clang/test/ExtractAPI/objc_instancetype.m new file mode 100644 index 0000000000000..1680fe9336cf3 --- /dev/null +++ b/clang/test/ExtractAPI/objc_instancetype.m @@ -0,0 +1,254 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ + // RUN: %t/reference.output.json.in >> %t/reference.output.json +// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx -x objective-c-header %t/input.h -o %t/output.json -verify + +// Generator version is not consistent across test runs, normalize it. +// RUN: sed -e "s@\"generator\": \".*\"@\"generator\": \"?\"@g" \ + // RUN: %t/output.json >> %t/output-normalized.json +// RUN: diff %t/reference.output.json %t/output-normalized.json + + +//--- input.h +@interface Foo +- (instancetype) init; +- (id) reset; +@end +// expected-no-diagnostics + + +//--- reference.output.json.in +{ + "metadata": { + "formatVersion": { + "major": 0, + "minor": 5, + "patch": 3 + }, + "generator": "?" + }, + "module": { + "name": "", + "platform": { + "architecture": "arm64", + "operatingSystem": { + "minimumVersion": { + "major": 11, + "minor": 0, + "patch": 0 + }, + "name": "macosx" + }, + "vendor": "apple" + } + }, + "relationships": [ + { + "kind": "memberOf", + "source": "c:objc(cs)Foo(im)init", + "target": "c:objc(cs)Foo", + "targetFallback": "Foo" + }, + { + "kind": "memberOf", + "source": "c:objc(cs)Foo(im)reset", + "target": "c:objc(cs)Foo", + "targetFallback": "Foo" + } + ], + "symbols": [ + { + "accessLevel": "public", + "declarationFragments": [ + { + "kind": "keyword", + "spelling": "@interface" + }, + { + "kind": "text", + "spelling": " " + }, + { + "kind": "identifier", + "spelling": "Foo" + } + ], + "identifier": { + "interfaceLanguage": "objective-c", + "precise": "c:objc(cs)Foo" + }, + "kind": { + "displayName": "Class", + "identifier": "objective-c.class" + }, + "location": { + "position": { + "character": 12, + "line": 1 + }, + "uri": "file://INPUT_DIR/input.h" + }, + "names": { + "navigator": [ + { + "kind": "identifier", + "spelling": "Foo" + } + ], + "subHeading": [ + { + "kind": "identifier", + "spelling": "Foo" + } + ], + "title": "Foo" + }, + "pathComponents": [ + "Foo" + ] + }, + { + "accessLevel": "public", + "declarationFragments": [ + { + "kind": "text", + "spelling": "- (" + }, + { + "kind": "keyword", + "spelling": "instancetype" + }, + { + "kind": "text", + "spelling": ") " + }, + { + "kind": "identifier", + "spelling": "init" + }, + { + "kind": "text", + "spelling": ";" + } + ], + "functionSignature": { + "returns": [ + { + "kind": "keyword", + "spelling": "instancetype" + } + ] + }, + "identifier": { + "interfaceLanguage": "objective-c", + "precise": "c:objc(cs)Foo(im)init" + }, + "kind": { + "displayName": "Instance Method", + "identifier": "objective-c.method" + }, + "location": { + "position": { + "character": 1, + "line": 2 + }, + "uri": "file://INPUT_DIR/input.h" + }, + "names": { + "navigator": [ + { + "kind": "identifier", + "spelling": "init" + } + ], + "subHeading": [ + { + "kind": "text", + "spelling": "- " + }, + { + "kind": "identifier", + "spelling": "init" + } + ], + "title": "init" + }, + "pathComponents": [ + "Foo", + "init" + ] + }, + { + "accessLevel": "public", + "declarationFragments": [ + { + "kind": "text", + "spelling": "- (" + }, + { + "kind": "keyword", + "spelling": "id" + }, + { + "kind": "text", + "spelling": ") " + }, + { + "kind": "identifier", + "spelling": "reset" + }, + { + "kind": "text", + "spelling": ";" + } + ], + "functionSignature": { + "returns": [ + { + "kind": "keyword", + "spelling": "id" + } + ] + }, + "identifier": { + "interfaceLanguage": "objective-c", + "precise": "c:objc(cs)Foo(im)reset" + }, + "kind": { + "displayName": "Instance Method", + "identifier": "objective-c.method" + }, + "location": { + "position": { + "character": 1, + "line": 3 + }, + "uri": "file://INPUT_DIR/input.h" + }, + "names": { + "navigator": [ + { + "kind": "identifier", + "spelling": "reset" + } + ], + "subHeading": [ + { + "kind": "text", + "spelling": "- " + }, + { + "kind": "identifier", + "spelling": "reset" + } + ], + "title": "reset" + }, + "pathComponents": [ + "Foo", + "reset" + ] + } + ] +} From 999643f1513e86d7d438ec953a3d73c4bc21eb25 Mon Sep 17 00:00:00 2001 From: Heejin Ahn Date: Wed, 22 Mar 2023 19:14:00 -0700 Subject: [PATCH 490/691] [WebAssembly] Tidy up DebugValueManager (NFC) Misc. cleanups for `WebAssemblyDebugValueManager`. - Use `Register` for registers - Simpler for loop iteration - Rename a variable - Reorder methods - Reduce `SmallVector` size for `DBG_VALUE`s to 1; one def usually have a single `DBG_VALUE` attached to it in most cases - Add a few more lines of comments Reviewed By: dschuff Differential Revision: https://reviews.llvm.org/D146743 --- .../WebAssemblyDebugValueManager.cpp | 35 +++++++++---------- .../WebAssemblyDebugValueManager.h | 17 ++++++--- 2 files changed, 28 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp index 55be64ad7da01..45502a577e4e2 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp @@ -18,21 +18,18 @@ using namespace llvm; -WebAssemblyDebugValueManager::WebAssemblyDebugValueManager( - MachineInstr *Instr) { +WebAssemblyDebugValueManager::WebAssemblyDebugValueManager(MachineInstr *Def) { // This code differs from MachineInstr::collectDebugValues in that it scans // the whole BB, not just contiguous DBG_VALUEs. - if (!Instr->getOperand(0).isReg()) + if (!Def->getOperand(0).isReg()) return; - CurrentReg = Instr->getOperand(0).getReg(); + CurrentReg = Def->getOperand(0).getReg(); - MachineBasicBlock::iterator DI = *Instr; - ++DI; - for (MachineBasicBlock::iterator DE = Instr->getParent()->end(); DI != DE; - ++DI) { - if (DI->isDebugValue() && - DI->hasDebugOperandForReg(Instr->getOperand(0).getReg())) - DbgValues.push_back(&*DI); + for (MachineBasicBlock::iterator MI = std::next(Def->getIterator()), + ME = Def->getParent()->end(); + MI != ME; ++MI) { + if (MI->isDebugValue() && MI->hasDebugOperandForReg(CurrentReg)) + DbgValues.push_back(&*MI); } } @@ -42,15 +39,8 @@ void WebAssemblyDebugValueManager::move(MachineInstr *Insert) { MBB->splice(Insert, DBI->getParent(), DBI); } -void WebAssemblyDebugValueManager::updateReg(unsigned Reg) { - for (auto *DBI : DbgValues) - for (auto &MO : DBI->getDebugOperandsForReg(CurrentReg)) - MO.setReg(Reg); - CurrentReg = Reg; -} - void WebAssemblyDebugValueManager::clone(MachineInstr *Insert, - unsigned NewReg) { + Register NewReg) { MachineBasicBlock *MBB = Insert->getParent(); MachineFunction *MF = MBB->getParent(); for (MachineInstr *DBI : reverse(DbgValues)) { @@ -61,6 +51,13 @@ void WebAssemblyDebugValueManager::clone(MachineInstr *Insert, } } +void WebAssemblyDebugValueManager::updateReg(Register Reg) { + for (auto *DBI : DbgValues) + for (auto &MO : DBI->getDebugOperandsForReg(CurrentReg)) + MO.setReg(Reg); + CurrentReg = Reg; +} + void WebAssemblyDebugValueManager::replaceWithLocal(unsigned LocalId) { for (auto *DBI : DbgValues) { auto IndexType = DBI->isIndirectDebugValue() diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h b/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h index c2dd569093044..4c63af21406e1 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.h @@ -9,6 +9,9 @@ /// \file /// This file contains the declaration of the WebAssembly-specific /// manager for DebugValues associated with the specific MachineInstr. +/// This pass currently does not handle DBG_VALUE_LISTs; they are assumed to +/// have been set to undef in NullifyDebugValueLists pass. +/// TODO Handle DBG_VALUE_LIST /// //===----------------------------------------------------------------------===// @@ -16,21 +19,25 @@ #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYDEBUGVALUEMANAGER_H #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/Register.h" namespace llvm { class MachineInstr; class WebAssemblyDebugValueManager { - SmallVector DbgValues; - unsigned CurrentReg; + SmallVector DbgValues; + Register CurrentReg; public: - WebAssemblyDebugValueManager(MachineInstr *Instr); + WebAssemblyDebugValueManager(MachineInstr *Def); void move(MachineInstr *Insert); - void updateReg(unsigned Reg); - void clone(MachineInstr *Insert, unsigned NewReg); + void clone(MachineInstr *Insert, Register NewReg); + // Update the register for Def and DBG_VALUEs. + void updateReg(Register Reg); + // Replace the current register in DBG_VALUEs with the given LocalId target + // index. void replaceWithLocal(unsigned LocalId); }; From bb0ecb7bf0d0025e61086ae449dae099a8a8bf14 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 23 Mar 2023 15:49:38 -0700 Subject: [PATCH 491/691] [Driver][test] Remove remnant mips*-linux-android tests after 805f51f9fedf90d2aa0ad46c61cb4c9c0c5bcfe9 --- clang/test/Driver/clang-translation.c | 7 ------- clang/test/Driver/constructors.c | 6 ------ clang/test/Driver/linux-ld.c | 5 ----- 3 files changed, 18 deletions(-) diff --git a/clang/test/Driver/clang-translation.c b/clang/test/Driver/clang-translation.c index 058ac32bbdb50..d950d9a4de9be 100644 --- a/clang/test/Driver/clang-translation.c +++ b/clang/test/Driver/clang-translation.c @@ -483,10 +483,3 @@ // MIPSN32R6EL: "-target-cpu" "mips64r6" // MIPSN32R6EL: "-target-abi" "n32" // MIPSN32R6EL: "-mfloat-abi" "hard" - -// RUN: %clang -target mips64el-linux-android -### -S %s 2>&1 | \ -// RUN: FileCheck -check-prefix=MIPS64EL-ANDROID %s -// MIPS64EL-ANDROID: clang -// MIPS64EL-ANDROID: "-cc1" -// MIPS64EL-ANDROID: "-target-cpu" "mips64r6" -// MIPS64EL-ANDROID: "-mfloat-abi" "hard" diff --git a/clang/test/Driver/constructors.c b/clang/test/Driver/constructors.c index f844e80a5450f..f210ad512f270 100644 --- a/clang/test/Driver/constructors.c +++ b/clang/test/Driver/constructors.c @@ -50,12 +50,6 @@ // RUN: | FileCheck --check-prefix=CHECK-INIT-ARRAY %s // // RUN: %clang -### %s -fsyntax-only 2>&1 \ -// RUN: --target=mipsel-unknown-linux-android \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: --gcc-toolchain="" \ -// RUN: | FileCheck --check-prefix=CHECK-INIT-ARRAY %s -// -// RUN: %clang -### %s -fsyntax-only 2>&1 \ // RUN: --target=i386-unknown-linux-android \ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ // RUN: --gcc-toolchain="" \ diff --git a/clang/test/Driver/linux-ld.c b/clang/test/Driver/linux-ld.c index be1230ac0ab63..27786dce67cc6 100644 --- a/clang/test/Driver/linux-ld.c +++ b/clang/test/Driver/linux-ld.c @@ -1223,11 +1223,6 @@ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ // RUN: | FileCheck --check-prefix=CHECK-ANDROID-64 %s // RUN: %clang -### %s -no-pie 2>&1 \ -// RUN: --target=mips64el-linux-android \ -// RUN: --gcc-toolchain="" \ -// RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ -// RUN: | FileCheck --check-prefix=CHECK-ANDROID-64 %s -// RUN: %clang -### %s -no-pie 2>&1 \ // RUN: --target=i686-linux-android \ // RUN: --gcc-toolchain="" \ // RUN: --sysroot=%S/Inputs/basic_android_tree/sysroot \ From ccc2f362db352df8991f493d8a05bdf99eaeea4b Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 23 Mar 2023 15:58:42 -0700 Subject: [PATCH 492/691] Android.rules: remove mips* rules They have been obsoleted for a long time and D146565 recently removed Clang support. --- lldb/packages/Python/lldbsuite/test/make/Android.rules | 8 -------- 1 file changed, 8 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/make/Android.rules b/lldb/packages/Python/lldbsuite/test/make/Android.rules index 32f786aa34756..cd7d8ae74d6bf 100644 --- a/lldb/packages/Python/lldbsuite/test/make/Android.rules +++ b/lldb/packages/Python/lldbsuite/test/make/Android.rules @@ -24,14 +24,6 @@ else ifeq "$(ARCH)" "i386" SYSROOT_ARCH := x86 STL_ARCH := x86 TRIPLE := i686-none-linux-android -else ifeq "$(ARCH)" "mips64r6" - SYSROOT_ARCH := mips64 - STL_ARCH := mips64 - TRIPLE := mips64el-none-linux-android -else ifeq "$(ARCH)" "mips32" - SYSROOT_ARCH := mips - STL_ARCH := mips - TRIPLE := mipsel-none-linux-android else SYSROOT_ARCH := $(ARCH) STL_ARCH := $(ARCH) From 399f313f271342d1d838bf396af5c5d8d587915a Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 23 Mar 2023 16:00:16 -0700 Subject: [PATCH 493/691] [Driver] Remove remnant mips64el-linux-android code after D146565 --- clang/lib/Driver/ToolChains/Arch/Mips.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Arch/Mips.cpp b/clang/lib/Driver/ToolChains/Arch/Mips.cpp index 7da00a8854006..f9f14c01b2b9f 100644 --- a/clang/lib/Driver/ToolChains/Arch/Mips.cpp +++ b/clang/lib/Driver/ToolChains/Arch/Mips.cpp @@ -39,12 +39,6 @@ void mips::getMipsCPUAndABI(const ArgList &Args, const llvm::Triple &Triple, DefMips64CPU = "mips64r6"; } - // MIPS64r6 is the default for Android MIPS64 (mips64el-linux-android). - if (Triple.isAndroid()) { - DefMips32CPU = "mips32"; - DefMips64CPU = "mips64r6"; - } - // MIPS3 is the default for mips64*-unknown-openbsd. if (Triple.isOSOpenBSD()) DefMips64CPU = "mips3"; From 07ef7b1ff21e8e3faaf8279b8ec6a7f0ac252fad Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Thu, 23 Mar 2023 14:34:12 -0700 Subject: [PATCH 494/691] [Builtins] Add __builtin_assume_separate_storage Plumbing from the language level to the assume intrinsics with separate_storage operand bundles. Patch by David Goldblatt (davidtgoldblatt) Differential Revision: https://reviews.llvm.org/D136515 --- clang/docs/LanguageExtensions.rst | 40 +++++++++++++++++++ clang/docs/ReleaseNotes.rst | 2 + clang/include/clang/Basic/Builtins.def | 1 + clang/lib/CodeGen/CGBuiltin.cpp | 12 ++++++ .../CodeGen/builtin-assume-separate-storage.c | 36 +++++++++++++++++ .../Sema/builtin-assume-separate-storage.c | 13 ++++++ 6 files changed, 104 insertions(+) create mode 100644 clang/test/CodeGen/builtin-assume-separate-storage.c create mode 100644 clang/test/Sema/builtin-assume-separate-storage.c diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index f8c83d4d6d162..a9bdc83c53e7a 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -2358,6 +2358,46 @@ evaluated, so any side effects of the expression will be discarded. Query for this feature with ``__has_builtin(__builtin_assume)``. +.. _langext-__builtin_assume_separate_storage: + +``__builtin_assume_separate_storage`` +-------------------- + +``__builtin_assume_separate_storage`` is used to provide the optimizer with the +knowledge that its two arguments point to separately allocated objects. + +**Syntax**: + +.. code-block:: c++ + + __builtin_assume_separate_storage(const volatile void *, const volatile void *) + +**Example of Use**: + +.. code-block:: c++ + + int foo(int *x, int *y) { + __builtin_assume_separate_storage(x, y); + *x = 0; + *y = 1; + // The optimizer may optimize this to return 0 without reloading from *x. + return *x; + } + +**Description**: + +The arguments to this function are assumed to point into separately allocated +storage (either different variable definitions or different dynamic storage +allocations). The optimizer may use this fact to aid in alias analysis. If the +arguments point into the same storage, the behavior is undefined. Note that the +definition of "storage" here refers to the outermost enclosing allocation of any +particular object (so for example, it's never correct to call this function +passing the addresses of fields in the same struct, elements of the same array, +etc.). + +Query for this feature with ``__has_builtin(__builtin_assume_separate_storage)``. + + ``__builtin_offsetof`` ---------------------- diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index faac3b17b223f..29e3f516c06e5 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -124,6 +124,8 @@ Non-comprehensive list of changes in this release - Clang now supports ``__builtin_FILE_NAME()`` which returns the same information as the ``__FILE_NAME__`` macro (the presumed file name from the invocation point, with no path components included). +- Clang now supports ``__builtin_assume_separate_storage`` that indicates that + its arguments point to objects in separate storage allocations. New Compiler Flags ------------------ diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def index 957375eccb84a..dea806099efbf 100644 --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -1591,6 +1591,7 @@ BUILTIN(__builtin_annotation, "v.", "tn") // Invariants BUILTIN(__builtin_assume, "vb", "nE") +BUILTIN(__builtin_assume_separate_storage, "vvCD*vCD*", "nE") // Multiprecision Arithmetic Builtins. BUILTIN(__builtin_addcb, "UcUcCUcCUcCUc*", "n") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 6381d68c161c6..b3aea13878c1c 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2856,6 +2856,18 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Builder.CreateCall(FnAssume, ArgValue); return RValue::get(nullptr); } + case Builtin::BI__builtin_assume_separate_storage: { + const Expr *Arg0 = E->getArg(0); + const Expr *Arg1 = E->getArg(1); + + Value *Value0 = EmitScalarExpr(Arg0); + Value *Value1 = EmitScalarExpr(Arg1); + + Value *Values[] = {Value0, Value1}; + OperandBundleDefT OBD("separate_storage", Values); + Builder.CreateAssumption(ConstantInt::getTrue(getLLVMContext()), {OBD}); + return RValue::get(nullptr); + } case Builtin::BI__arithmetic_fence: { // Create the builtin call if FastMath is selected, and the target // supports the builtin, otherwise just return the argument. diff --git a/clang/test/CodeGen/builtin-assume-separate-storage.c b/clang/test/CodeGen/builtin-assume-separate-storage.c new file mode 100644 index 0000000000000..ac82f27b3e720 --- /dev/null +++ b/clang/test/CodeGen/builtin-assume-separate-storage.c @@ -0,0 +1,36 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s +void *nonconst(void); + +// CHECK-LABEL: @test1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store ptr [[B:%.*]], ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8 +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "separate_storage"(ptr [[TMP0]], ptr [[TMP1]]) ] +// CHECK-NEXT: ret void +// +void test1(int *a, int *b) { + + __builtin_assume_separate_storage(a, b); +} + +// Separate storage assumptions evaluate their arguments unconditionally, like +// assume_aligned but *unlike* assume. Check that we actually do so. +// CHECK-LABEL: @test2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR]], align 8 +// CHECK-NEXT: store ptr [[B:%.*]], ptr [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 8 +// CHECK-NEXT: [[CALL:%.*]] = call ptr @nonconst() +// CHECK-NEXT: call void @llvm.assume(i1 true) [ "separate_storage"(ptr [[TMP0]], ptr [[CALL]]) ] +// CHECK-NEXT: ret void +// +void test2(int *a, int *b) { + __builtin_assume_separate_storage(a, nonconst()); +} diff --git a/clang/test/Sema/builtin-assume-separate-storage.c b/clang/test/Sema/builtin-assume-separate-storage.c new file mode 100644 index 0000000000000..f27d0b821d5ab --- /dev/null +++ b/clang/test/Sema/builtin-assume-separate-storage.c @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify %s + +void *nonconst(void); + +void test1(int *a, int *b) { + __builtin_assume_separate_storage(a, b); + // Separate storage assumptions evaluate their arguments unconditionally, like + // assume_aligned but *unlike* assume. Check that we don't warn on it. + __builtin_assume_separate_storage(a, nonconst()); + __builtin_assume_separate_storage(nonconst(), a); + __builtin_assume_separate_storage(a, 3); // expected-error {{incompatible integer to pointer conversion}} + __builtin_assume_separate_storage(3, a); // expected-error {{incompatible integer to pointer conversion}} +} From c82803097f6a89edc49577e5bb4f7309e053efcc Mon Sep 17 00:00:00 2001 From: Quinn Dawkins Date: Thu, 23 Feb 2023 11:30:20 -0500 Subject: [PATCH 495/691] [mlir][linalg] Refactor convolution to img2col conversion to use gather semantics Following up on the comments in https://reviews.llvm.org/D144108 this patch refactors the im2col conversion patterns for `linalg.conv_2d_nhwc_hwcf` and `linalg.conv_2d_nchw_fchw` convolutions to use gather semantics for the im2col packing `linalg.generic`. Follow up work can include a similar pattern for depthwise convolutions and a generalization of the patterns here to work with any `LinalgOp` as well. Differential Revision: https://reviews.llvm.org/D144678 --- .../Transforms/ConvertConv2DToImg2Col.cpp | 372 +++++++++--------- .../Linalg/convert-conv2d-to-img2col.mlir | 150 ++++--- 2 files changed, 297 insertions(+), 225 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/Transforms/ConvertConv2DToImg2Col.cpp b/mlir/lib/Dialect/Linalg/Transforms/ConvertConv2DToImg2Col.cpp index 14bff411ef8c1..58a23e2be54d1 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ConvertConv2DToImg2Col.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ConvertConv2DToImg2Col.cpp @@ -41,6 +41,49 @@ static Value createMul(Location loc, Value x, Value y, OpBuilder &builder) { return builder.create(loc, x, y); } +// Unrolls the given composite `index` into a set of subindices with maximum +// iteration ranges specified by `factors` according to the following +// assumptions: +// 1. The iteration range for `index` is [0, f1 * f2 * ... * fn] i.e. the +// product of the given list of factors +// 2. The iterators corresponding to the entries in `factors` are ordered from +// slowest to fastest varying +// Each subindex is then computed as: +// subindex[i] = floor( (index % (fi * ... * fn)) / (fi-1 * ... * fn) ) +static SmallVector unrollIndex(OpBuilder &b, Location loc, + Value index, + ArrayRef factors) { + assert(factors.size() >= 1 && "empty factor list"); + SmallVector indices(factors.size()); + int64_t runningProd = 1; + for (int i = factors.size() - 1, end = 0; i >= end; i--) { + Value unrolledIndex = index; + if (i > 0) { + Value modBase = b.create( + loc, b.getIndexAttr(runningProd * factors[i])); + unrolledIndex = b.create(loc, unrolledIndex, modBase); + } + if (runningProd > 1) { + Value divDenom = + b.create(loc, b.getIndexAttr(runningProd)); + unrolledIndex = b.create(loc, unrolledIndex, divDenom); + } + runningProd *= factors[i]; + indices[i] = unrolledIndex; + } + return indices; +} + +// Given indices corresponding to iterators in the output (oIndex) and filter +// (fIndex) for a convolution, compute the convolved index for the +// input as `oIndex * stride + fIndex`. +static Value getConvolvedIndex(OpBuilder &b, Location loc, Value oIndex, + Value fIndex, int64_t stride) { + Value strideVal = b.create(loc, b.getIndexAttr(stride)); + Value convIndex = b.create(loc, oIndex, strideVal); + return b.create(loc, convIndex, fIndex); +} + FailureOr> rewriteInIm2Col(RewriterBase &rewriter, linalg::Conv2DNhwcHwcfOp convOp) { auto inputType = convOp.getInputs()[0].getType().cast(); @@ -68,32 +111,34 @@ rewriteInIm2Col(RewriterBase &rewriter, linalg::Conv2DNhwcHwcfOp convOp) { ArrayRef filterShape = filterType.getShape(); ArrayRef outputShape = outputType.getShape(); - int n = outputShape[0]; - int oh = outputShape[1]; - int ow = outputShape[2]; - int oc = outputShape[3]; - int fh = filterShape[0]; - int fw = filterShape[1]; - int ic = filterShape[2]; + int64_t n = outputShape[0]; + int64_t oh = outputShape[1]; + int64_t ow = outputShape[2]; + int64_t oc = outputShape[3]; + int64_t fh = filterShape[0]; + int64_t fw = filterShape[1]; + int64_t ic = filterShape[2]; Location loc = convOp.getLoc(); - SmallVector colTensorShape = {n, oh, ow, fh, fw, ic}; + // Reshape output and filter to the LHS and result of a (B)MNK matmul. + SmallVector filterReassocIndices = {{0, 1, 2}, {3}}; + auto reshapedFilterType = + RankedTensorType::get({fh * fw * ic, oc}, inputType.getElementType()); + Value reshapedFilter = rewriter.create( + loc, reshapedFilterType, filter, filterReassocIndices); + + SmallVector outputReassocIndices = {{0}, {1, 2}, {3}}; + RankedTensorType reshapedOutputType = + RankedTensorType::get({n, oh * ow, oc}, outputType.getElementType()); + Value reshapedOutput = rewriter.create( + loc, reshapedOutputType, output, outputReassocIndices); + SmallVector colTensorShape = {n, oh * ow, fh * fw * ic}; Value colTensor = rewriter.create( loc, colTensorShape, inputType.getElementType()); - AffineExpr nDim, ohDim, owDim, khDim, kwDim, icDim; - bindDims(context, nDim, ohDim, owDim, khDim, kwDim, icDim); - - AffineExpr shSym = rewriter.getAffineConstantExpr( - convOp.getStrides().getValues()[0]); - AffineExpr swSym = rewriter.getAffineConstantExpr( - convOp.getStrides().getValues()[1]); - - SmallVector inputExprs = {nDim, ohDim * shSym + khDim, - owDim * swSym + kwDim, icDim}; - + // Convert the input to a (BMK) column tensor. auto nloops = colTensorShape.size(); auto parallel = utils::IteratorType::parallel; @@ -101,85 +146,68 @@ rewriteInIm2Col(RewriterBase &rewriter, linalg::Conv2DNhwcHwcfOp convOp) { SmallVector img2colIterators(nloops, parallel); SmallVector img2colIndexingMaps = { - AffineMap::get(nloops, 0, inputExprs, context), AffineMap::getMultiDimIdentityMap(nloops, context)}; auto img2ColTensor = rewriter.create( loc, colTensor.getType(), - /*inputs=*/input, /*outputs=*/colTensor, img2colIndexingMaps, + /*inputs=*/ValueRange{}, /*outputs=*/colTensor, img2colIndexingMaps, img2colIterators, [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) { - nestedBuilder.create(nestedLoc, args[0]); + // Get the iterators named based on the matmul (batch, m, k). + Value bIndex = nestedBuilder.create(loc, 0); + Value mIndex = nestedBuilder.create(loc, 1); + Value kIndex = nestedBuilder.create(loc, 2); + + // Recover the original iteration indices from the problem/input sizes. + SmallVector mIndices = unrollIndex( + nestedBuilder, nestedLoc, mIndex, ArrayRef{oh, ow}); + auto ohIndex = mIndices[0]; + auto owIndex = mIndices[1]; + + SmallVector kIndices = unrollIndex( + nestedBuilder, nestedLoc, kIndex, ArrayRef{fh, fw, ic}); + auto fhIndex = kIndices[0]; + auto fwIndex = kIndices[1]; + auto icIndex = kIndices[2]; + + // Extract the input element corresponding to the expanded indices. + Value hIndex = + getConvolvedIndex(nestedBuilder, nestedLoc, ohIndex, fhIndex, + convOp.getStrides().getValues()[0]); + Value wIndex = + getConvolvedIndex(nestedBuilder, nestedLoc, owIndex, fwIndex, + convOp.getStrides().getValues()[1]); + + // im2col[n, oh*ow, fh*fw*ic] = input[n, sh*oh + fh, sw*ow + fw, ic] + SmallVector extractionIndices{bIndex, hIndex, wIndex, icIndex}; + Value inputVal = nestedBuilder.create( + loc, input, extractionIndices); + nestedBuilder.create(nestedLoc, inputVal); }); - SmallVector img2ColTensorReassocIndices; - SmallVector outputReassocIndices; - RankedTensorType reshapedImg2ColTensorType, reshapedOutputType; - if (n == 1) { - img2ColTensorReassocIndices = {{0, 1, 2}, {3, 4, 5}}; - outputReassocIndices = {{0, 1, 2}, {3}}; - - reshapedImg2ColTensorType = RankedTensorType::get( - {oh * ow, fh * fw * ic}, inputType.getElementType()); - reshapedOutputType = - RankedTensorType::get({oh * ow, oc}, outputType.getElementType()); - } else { - img2ColTensorReassocIndices = {{0}, {1, 2}, {3, 4, 5}}; - outputReassocIndices = {{0}, {1, 2}, {3}}; - - reshapedImg2ColTensorType = RankedTensorType::get( - {n, oh * ow, fh * fw * ic}, inputType.getElementType()); - reshapedOutputType = - RankedTensorType::get({n, oh * ow, oc}, outputType.getElementType()); - } - - SmallVector filterReassocIndices = {{0, 1, 2}, {3}}; - auto reshapedFilterType = - RankedTensorType::get({fh * fw * ic, oc}, inputType.getElementType()); - - Value reshapedImg2ColTensor = rewriter.create( - loc, reshapedImg2ColTensorType, img2ColTensor.getResult(0), - img2ColTensorReassocIndices); - - Value reshapedFilter = rewriter.create( - loc, reshapedFilterType, filter, filterReassocIndices); - - Value reshapedOutput = rewriter.create( - loc, reshapedOutputType, output, outputReassocIndices); - - Value result; - if (n == 1) { - auto matmulOp = rewriter.create( - loc, reshapedOutputType, - ArrayRef{reshapedImg2ColTensor, reshapedFilter}, - ArrayRef{reshapedOutput}); - result = matmulOp.getResults().front(); - } else { - // For cases where batch is not 1, we need to keep the batch dimension - // separate. Because the filter does not share the same batch dimension, - // the batch dimension is only used in indexing the input and output. Thus - // we cannot use existing linalg named ops like linalg.batch_matmul. - // i.e. (B x) M x K * K x N = (B x) M x N - AffineExpr bDim, mDim, nDim, kDim; - bindDims(context, bDim, mDim, nDim, kDim); - auto lhsMap = AffineMap::get(4, 0, {bDim, mDim, kDim}, context); - auto rhsMap = AffineMap::get(4, 0, {kDim, nDim}, context); - auto resultMap = AffineMap::get(4, 0, {bDim, mDim, nDim}, context); - SmallVector genericIterators = {parallel, parallel, - parallel, reduction}; - - auto genericOp = rewriter.create( - loc, reshapedOutputType, - /*inputs=*/ValueRange{reshapedImg2ColTensor, reshapedFilter}, - /*outputs=*/ValueRange{reshapedOutput}, - ArrayRef{lhsMap, rhsMap, resultMap}, genericIterators, - [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) { - Value mul = createMul(loc, args[0], args[1], nestedBuilder); - Value add = createAdd(loc, mul, args[2], nestedBuilder); - nestedBuilder.create(nestedLoc, add); - }); - result = genericOp.getResults().front(); - } + // Because the filter does not share the same batch dimension, + // the batch dimension is only used in indexing the input and output. Thus + // we cannot use existing linalg named ops like linalg.batch_matmul. + // i.e. (B x) M x K * K x N = (B x) M x N + AffineExpr bDim, mDim, nDim, kDim; + bindDims(context, bDim, mDim, nDim, kDim); + auto lhsMap = AffineMap::get(4, 0, {bDim, mDim, kDim}, context); + auto rhsMap = AffineMap::get(4, 0, {kDim, nDim}, context); + auto resultMap = AffineMap::get(4, 0, {bDim, mDim, nDim}, context); + SmallVector genericIterators = {parallel, parallel, + parallel, reduction}; + + auto genericOp = rewriter.create( + loc, reshapedOutputType, + /*inputs=*/ValueRange{img2ColTensor.getResult(0), reshapedFilter}, + /*outputs=*/ValueRange{reshapedOutput}, + ArrayRef{lhsMap, rhsMap, resultMap}, genericIterators, + [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) { + Value mul = createMul(loc, args[0], args[1], nestedBuilder); + Value add = createAdd(loc, mul, args[2], nestedBuilder); + nestedBuilder.create(nestedLoc, add); + }); + Value result = genericOp.getResults().front(); auto reshapedResult = rewriter.create( loc, outputType, result, outputReassocIndices); @@ -367,33 +395,33 @@ rewriteInIm2Col(RewriterBase &rewriter, linalg::Conv2DNchwFchwOp convOp) { auto filterShape = filterType.getShape(); auto outputShape = outputType.getShape(); - int n = outputShape[0]; - int oc = outputShape[1]; - int oh = outputShape[2]; - int ow = outputShape[3]; - int ic = filterShape[1]; - int fh = filterShape[2]; - int fw = filterShape[3]; + int64_t n = outputShape[0]; + int64_t oc = outputShape[1]; + int64_t oh = outputShape[2]; + int64_t ow = outputShape[3]; + int64_t ic = filterShape[1]; + int64_t fh = filterShape[2]; + int64_t fw = filterShape[3]; auto loc = convOp.getLoc(); - - SmallVector colTensorShape = {n, ic, fh, fw, oh, ow}; - - Value colTensor = rewriter.create( - loc, colTensorShape, inputType.getElementType()); - MLIRContext *context = rewriter.getContext(); - AffineExpr nDim, icDim, khDim, kwDim, ohDim, owDim; - bindDims(context, nDim, icDim, khDim, kwDim, ohDim, owDim); + SmallVector filterReassocIndices = {{0}, {1, 2, 3}}; + auto reshapedFilterType = + RankedTensorType::get({oc, ic * fh * fw}, inputType.getElementType()); + Value reshapedFilter = rewriter.create( + loc, reshapedFilterType, filter, filterReassocIndices); - auto shSym = rewriter.getAffineConstantExpr( - convOp.getStrides().getValues()[0]); - auto swSym = rewriter.getAffineConstantExpr( - convOp.getStrides().getValues()[1]); + SmallVector outputReassocIndices = {{0}, {1}, {2, 3}}; + auto reshapedOutputType = + RankedTensorType::get({n, oc, oh * ow}, outputType.getElementType()); + Value reshapedOutput = rewriter.create( + loc, reshapedOutputType, output, outputReassocIndices); - SmallVector inputExprs = {nDim, icDim, ohDim * shSym + khDim, - owDim * swSym + kwDim}; + // Convert the input to a (BKN) tensor. + SmallVector colTensorShape = {n, ic * fh * fw, oh * ow}; + Value colTensor = rewriter.create( + loc, colTensorShape, inputType.getElementType()); auto nloops = colTensorShape.size(); @@ -402,83 +430,67 @@ rewriteInIm2Col(RewriterBase &rewriter, linalg::Conv2DNchwFchwOp convOp) { SmallVector img2colIterators(nloops, parallel); SmallVector img2colIndexingMaps = { - AffineMap::get(nloops, 0, inputExprs, context), AffineMap::getMultiDimIdentityMap(nloops, context)}; auto img2ColTensor = rewriter.create( loc, colTensor.getType(), - /*inputs=*/input, /*outputs=*/colTensor, img2colIndexingMaps, + /*inputs=*/ValueRange{}, /*outputs=*/colTensor, img2colIndexingMaps, img2colIterators, [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) { - nestedBuilder.create(nestedLoc, args[0]); + // Get the iterators named based on the matmul (batch, m, k). + Value bIndex = nestedBuilder.create(loc, 0); + Value kIndex = nestedBuilder.create(loc, 1); + Value nIndex = nestedBuilder.create(loc, 2); + + // Recover the original iteration indices from the problem/input sizes. + SmallVector kIndices = unrollIndex( + nestedBuilder, nestedLoc, kIndex, ArrayRef{ic, fh, fw}); + auto icIndex = kIndices[0]; + auto fhIndex = kIndices[1]; + auto fwIndex = kIndices[2]; + + SmallVector nIndices = unrollIndex( + nestedBuilder, nestedLoc, nIndex, ArrayRef{oh, ow}); + auto ohIndex = nIndices[0]; + auto owIndex = nIndices[1]; + + // Extract the input element corresponding to the expanded indices. + Value hIndex = + getConvolvedIndex(nestedBuilder, nestedLoc, ohIndex, fhIndex, + convOp.getStrides().getValues()[0]); + Value wIndex = + getConvolvedIndex(nestedBuilder, nestedLoc, owIndex, fwIndex, + convOp.getStrides().getValues()[1]); + + // im2col[n, ic*fh*fw, oh*ow] = input[n, ic, sh*oh + fh, sw*ow + fw] + SmallVector extractionIndices{bIndex, icIndex, hIndex, wIndex}; + Value inputVal = nestedBuilder.create( + loc, input, extractionIndices); + nestedBuilder.create(nestedLoc, inputVal); }); - SmallVector filterReassocIndices = {{0}, {1, 2, 3}}; - auto reshapedFilterType = - RankedTensorType::get({oc, fh * fw * ic}, inputType.getElementType()); - Value reshapedFilter = rewriter.create( - loc, reshapedFilterType, filter, filterReassocIndices); - - SmallVector img2ColTensorReassocIndices; - SmallVector outputReassocIndices; - RankedTensorType reshapedImg2ColTensorType, reshapedOutputType; - if (n == 1) { - img2ColTensorReassocIndices = {{0, 1, 2, 3}, {4, 5}}; - outputReassocIndices = {{0, 1}, {2, 3}}; - - reshapedImg2ColTensorType = RankedTensorType::get( - {fh * fw * ic, oh * ow}, inputType.getElementType()); - reshapedOutputType = - RankedTensorType::get({oc, oh * ow}, outputType.getElementType()); - } else { - img2ColTensorReassocIndices = {{0}, {1, 2, 3}, {4, 5}}; - outputReassocIndices = {{0}, {1}, {2, 3}}; - - reshapedImg2ColTensorType = RankedTensorType::get( - {n, fh * fw * ic, oh * ow}, inputType.getElementType()); - reshapedOutputType = - RankedTensorType::get({n, oc, oh * ow}, outputType.getElementType()); - } - - Value reshapedImg2ColTensor = rewriter.create( - loc, reshapedImg2ColTensorType, img2ColTensor.getResult(0), - img2ColTensorReassocIndices); - - Value reshapedOutput = rewriter.create( - loc, reshapedOutputType, output, outputReassocIndices); - - Value result; - if (n == 1) { - auto matmulOp = rewriter.create( - loc, reshapedOutputType, - ArrayRef{reshapedFilter, reshapedImg2ColTensor}, - ArrayRef{reshapedOutput}); - result = matmulOp.getResults().front(); - } else { - // For cases where batch is not 1, we need to keep the batch dimension - // separate. Because the filter does not share the same batch dimension, - // the batch dimension is only used in indexing the input and output. Thus - // we cannot use existing linalg named ops like linalg.batch_matmul. - // i.e. M x K * (B x) K x N = (B x) M x N - AffineExpr bDim, mDim, nDim, kDim; - bindDims(context, bDim, mDim, nDim, kDim); - auto lhsMap = AffineMap::get(4, 0, {mDim, kDim}, context); - auto rhsMap = AffineMap::get(4, 0, {bDim, kDim, nDim}, context); - auto resultMap = AffineMap::get(4, 0, {bDim, mDim, nDim}, context); - SmallVector genericIterators = {parallel, parallel, - parallel, reduction}; - auto genericOp = rewriter.create( - loc, reshapedOutputType, - /*inputs=*/ValueRange{reshapedFilter, reshapedImg2ColTensor}, - /*outputs=*/ValueRange{reshapedOutput}, - ArrayRef{lhsMap, rhsMap, resultMap}, genericIterators, - [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) { - Value mul = createMul(loc, args[0], args[1], nestedBuilder); - Value add = createAdd(loc, mul, args[2], nestedBuilder); - nestedBuilder.create(nestedLoc, add); - }); - result = genericOp.getResults().front(); - } + // Because the filter does not share the same batch dimension, + // the batch dimension is only used in indexing the input and output. Thus + // we cannot use existing linalg named ops like linalg.batch_matmul. + // i.e. M x K * (B x) K x N = (B x) M x N + AffineExpr bDim, mDim, nDim, kDim; + bindDims(context, bDim, mDim, nDim, kDim); + auto lhsMap = AffineMap::get(4, 0, {mDim, kDim}, context); + auto rhsMap = AffineMap::get(4, 0, {bDim, kDim, nDim}, context); + auto resultMap = AffineMap::get(4, 0, {bDim, mDim, nDim}, context); + SmallVector genericIterators = {parallel, parallel, + parallel, reduction}; + auto genericOp = rewriter.create( + loc, reshapedOutputType, + /*inputs=*/ValueRange{reshapedFilter, img2ColTensor.getResult(0)}, + /*outputs=*/ValueRange{reshapedOutput}, + ArrayRef{lhsMap, rhsMap, resultMap}, genericIterators, + [&](OpBuilder &nestedBuilder, Location nestedLoc, ValueRange args) { + Value mul = createMul(loc, args[0], args[1], nestedBuilder); + Value add = createAdd(loc, mul, args[2], nestedBuilder); + nestedBuilder.create(nestedLoc, add); + }); + Value result = genericOp.getResults().front(); auto reshapedResult = rewriter.create( loc, outputType, result, outputReassocIndices); diff --git a/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir b/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir index e33e51ddababb..ffcba1086f3f6 100644 --- a/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir +++ b/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir @@ -29,36 +29,71 @@ transform.sequence failures(propagate) { // CHECK: IR printer: tensor_producer // CHECK-NEXT: %[[COL_TENSOR:.+]] = linalg.generic -// CHECK-SAME: affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1 + d3, d2 + d4, d5)>, -// CHECK-SAME: affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)>] -// CHECK: ^bb0(%[[IN_DATA:.+]]: f32, %[[OUT_DATA:.+]]: f32) -// CHECK: linalg.yield %[[IN_DATA]] : f32 +// CHECK-SAME: affine_map<(d0, d1, d2) -> (d0, d1, d2)>] +// CHECK: ^bb0(%[[OUT_DATA:.+]]: f32) + +// Collapsed indices. +// CHECK: %[[BINDEX:.+]] = linalg.index 0 : index +// CHECK: %[[MINDEX:.+]] = linalg.index 1 : index +// CHECK: %[[KINDEX:.+]] = linalg.index 2 : index + +// Unrolled output shape indices. +// CHECK: %[[C14:.+]] = arith.constant 14 : index +// CHECK: %[[OWINDEX:.+]] = arith.remui %[[MINDEX]], %[[C14]] : index +// CHECK: %[[C14_1:.+]] = arith.constant 14 : index +// CHECK: %[[OHINDEX:.+]] = arith.divui %[[MINDEX]], %[[C14_1]] : index + +// Unrolled filter shape indices. +// CHECK: %[[C4:.+]] = arith.constant 4 : index +// CHECK: %[[ICINDEX:.+]] = arith.remui %[[KINDEX]], %[[C4]] : index +// CHECK: %[[C12:.+]] = arith.constant 12 : index +// CHECK: %[[FWREM:.+]] = arith.remui %[[KINDEX]], %[[C12]] : index +// CHECK: %[[C4_2:.+]] = arith.constant 4 : index +// CHECK: %[[FWINDEX:.+]] = arith.divui %[[FWREM]], %[[C4_2]] : index +// CHECK: %[[C12_3:.+]] = arith.constant 12 : index +// CHECK: %[[FHINDEX:.+]] = arith.divui %[[KINDEX]], %[[C12_3]] : index + +// Compute input indices. +// CHECK: %[[SH:.+]] = arith.constant 1 : index +// CHECK: %[[STRIDEDOH:.+]] = arith.muli %[[OHINDEX]], %[[SH]] : index +// CHECK: %[[CONVH:.+]] = arith.addi %[[STRIDEDOH]], %[[FHINDEX]] : index +// CHECK: %[[SW:.+]] = arith.constant 1 : index +// CHECK: %[[STRIDEDOW:.+]] = arith.muli %[[OWINDEX]], %[[SW]] : index +// CHECK: %[[CONVW:.+]] = arith.addi %[[STRIDEDOW]], %[[FWINDEX]] : index +// CHECK: %[[EXTRACTED_INPUT:.+]] = tensor.extract +// CHECK-SAME: %{{.+}}{{\[}}%[[BINDEX]], %[[CONVH]], %[[CONVW]], %[[ICINDEX]]] : tensor<1x16x16x4xf32> +// CHECK: linalg.yield %[[EXTRACTED_INPUT]] : f32 // CHECK: IR printer: transformed -// CHECK: tensor.expand_shape %{{[^ ]*}} {{\[}}[0, 1, 2], [3]] : tensor<196x16xf32> into tensor<1x14x14x16xf32> +// CHECK: tensor.expand_shape %{{[^ ]*}} {{\[}}[0], [1, 2], [3]] : tensor<1x196x16xf32> into tensor<1x14x14x16xf32> -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1 + d3, d2 + d4, d5)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)> +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> +// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0, d1, d2, d3) -> (d3, d2)> +// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)> // CHECK: @conv_16433136 -// CHECK: %[[INPUT:.+]]: tensor<1x16x16x4xf32> -// CHECK: %[[FILTER:.+]]: tensor<3x3x4x16xf32> -// CHECK: %[[OUTPUT:.+]]: tensor<1x14x14x16xf32> -// CHECK: %[[INIT_COL_TENSOR:.+]] = tensor.empty() : tensor<1x14x14x3x3x4xf32> +// CHECK-SAME: %[[INPUT:.+]]: tensor<1x16x16x4xf32> +// CHECK-SAME: %[[FILTER:.+]]: tensor<3x3x4x16xf32> +// CHECK-SAME: %[[OUTPUT:.+]]: tensor<1x14x14x16xf32> +// CHECK-DAG: %[[COLLAPSED_FILTER:.+]] = tensor.collapse_shape %[[FILTER]] {{\[}}[0, 1, 2], [3]] : tensor<3x3x4x16xf32> into tensor<36x16xf32> +// CHECK-DAG: %[[COLLAPSED_OUT:.+]] = tensor.collapse_shape %[[OUTPUT]] {{\[}}[0], [1, 2], [3]] : tensor<1x14x14x16xf32> into tensor<1x196x16xf32> +// CHECK: %[[INIT_COL_TENSOR:.+]] = tensor.empty() : tensor<1x196x36xf32> // CHECK: %[[COL_TENSOR:.+]] = linalg.generic // CHECK-SAME: #[[MAP0]] +// CHECK: ^bb0(%[[OUT_DATA:.+]]: f32) +// CHECK: linalg.yield %{{.+}} : f32 +// CHECK: %[[MATMUL_RESULT:.+]] = linalg.generic // CHECK-SAME: #[[MAP1]] -// CHECK: ^bb0(%[[IN_DATA:.+]]: f32, %[[OUT_DATA:.+]]: f32) -// CHECK: linalg.yield %[[IN_DATA]] : f32 -// CHECK-DAG: %[[RESHAPED_INIT_COL_TENSOR:.+]] = tensor.collapse_shape %[[COL_TENSOR]] -// CHECK-SAME: [0, 1, 2], [3, 4, 5] -// CHECK-SAME: tensor<1x14x14x3x3x4xf32> into tensor<196x36xf32> -// CHECK-DAG: %[[RESHAPED_FILTER:.+]] = tensor.collapse_shape %[[FILTER]] -// CHECK-SAME: [0, 1, 2], [3] -// CHECK-SAME: tensor<3x3x4x16xf32> into tensor<36x16xf32> -// CHECK-DAG: %[[RESHAPED_OUTPUT:.+]] = tensor.collapse_shape %[[OUTPUT]] -// CHECK-SAME: [0, 1, 2], [3] -// CHECK: %[[MATMUL_RESULT:.+]] = linalg.matmul ins(%[[RESHAPED_INIT_COL_TENSOR]], %[[RESHAPED_FILTER]] : tensor<196x36xf32>, tensor<36x16xf32>) outs(%[[RESHAPED_OUTPUT]] : tensor<196x16xf32>) -// CHECK: %[[RESULT:.+]] = tensor.expand_shape %[[MATMUL_RESULT]] {{\[}}[0, 1, 2], [3]] : tensor<196x16xf32> into tensor<1x14x14x16xf32> +// CHECK-SAME: #[[MAP2]] +// CHECK-SAME: #[[MAP3]] +// CHECK-SAME: ins(%[[COL_TENSOR]], %[[COLLAPSED_FILTER]] : tensor<1x196x36xf32>, tensor<36x16xf32>) +// CHECK-SAME: outs(%[[COLLAPSED_OUT]] : tensor<1x196x16xf32>) +// CHECK: ^bb0(%[[ARG0:.+]]: f32, %[[ARG1:.+]]: f32, %[[ARG2:.+]]: f32) +// CHECK: %[[MUL:.+]] = arith.mulf %[[ARG0]], %[[ARG1]] : f32 +// CHECK: %[[ADD:.+]] = arith.addf %[[MUL]], %[[ARG2]] : f32 +// CHECK: linalg.yield %[[ADD]] : f32 +// CHECK: } -> tensor<1x196x16xf32> +// CHECK: %[[RESULT:.+]] = tensor.expand_shape %[[MATMUL_RESULT]] {{\[}}[0], [1, 2], [3]] : tensor<1x196x16xf32> into tensor<1x14x14x16xf32> // CHECK: return %[[RESULT]] func.func @conv_16433136(%arg0: tensor<1x16x16x4xf32>, %arg1: tensor<3x3x4x16xf32>, %arg2: tensor<1x14x14x16xf32>) -> tensor<1x14x14x16xf32> { @@ -156,27 +191,24 @@ transform.sequence failures(propagate) { // ----- -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1 + d3, d2 + d4, d5)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> // CHECK-DAG: #[[LHSMAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> // CHECK-DAG: #[[RHSMAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d3, d2)> // CHECK-DAG: #[[RESMAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)> // CHECK: func.func @batch_nhwc_conv // CHECK-SAME: (%[[INPUT:.+]]: tensor<8x16x16x4xf32>, %[[FILTER:.+]]: tensor<3x3x4x16xf32>, %[[INIT:.+]]: tensor<8x14x14x16xf32>) -// CHECK: %[[IT:.+]] = tensor.empty() : tensor<8x14x14x3x3x4xf32> +// CHECK-DAG: %[[CS_FILTER:.+]] = tensor.collapse_shape %[[FILTER]] {{\[}}[0, 1, 2], [3]] : tensor<3x3x4x16xf32> into tensor<36x16xf32> +// CHECK-DAG: %[[CS_RESULT:.+]] = tensor.collapse_shape %[[INIT]] {{\[}}[0], [1, 2], [3]] : tensor<8x14x14x16xf32> into tensor<8x196x16xf32> +// CHECK: %[[IT:.+]] = tensor.empty() : tensor<8x196x36xf32> // CHECK: %[[IMG2COL:.+]] = linalg.generic -// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]] -// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel"] -// CHECK-SAME: ins(%[[INPUT]] : tensor<8x16x16x4xf32>) -// CHECK-SAME: outs(%[[IT]] : tensor<8x14x14x3x3x4xf32>) -// CHECK: %[[CS_INPUT:.+]] = tensor.collapse_shape %[[IMG2COL]] {{\[}}[0], [1, 2], [3, 4, 5]] : tensor<8x14x14x3x3x4xf32> into tensor<8x196x36xf32> -// CHECK: %[[CS_FILTER:.+]] = tensor.collapse_shape %[[FILTER]] {{\[}}[0, 1, 2], [3]] : tensor<3x3x4x16xf32> into tensor<36x16xf32> -// CHECK: %[[CS_RESULT:.+]] = tensor.collapse_shape %[[INIT]] {{\[}}[0], [1, 2], [3]] : tensor<8x14x14x16xf32> into tensor<8x196x16xf32> +// CHECK-SAME: indexing_maps = [#[[MAP]]] +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"] +// CHECK-SAME: outs(%[[IT]] : tensor<8x196x36xf32>) // CHECK: %[[MATMUL:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[LHSMAP]], #[[RHSMAP]], #[[RESMAP]]], // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] -// CHECK-SAME: ins(%[[CS_INPUT]], %[[CS_FILTER]] : tensor<8x196x36xf32>, tensor<36x16xf32>) +// CHECK-SAME: ins(%[[IMG2COL]], %[[CS_FILTER]] : tensor<8x196x36xf32>, tensor<36x16xf32>) // CHECK-SAME: outs(%[[CS_RESULT]] : tensor<8x196x16xf32>) // CHECK: ^bb0(%[[ARG0:.+]]: f32, %[[ARG1:.+]]: f32, %[[ARG2:.+]]: f32): // CHECK: %[[MUL:.+]] = arith.mulf %[[ARG0]], %[[ARG1]] : f32 @@ -201,27 +233,55 @@ transform.sequence failures(propagate) { // ----- -// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d4 + d2, d5 + d3)> -// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)> +// CHECK-DAG: #[[MAP:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> // CHECK-DAG: #[[LHSMAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d1, d3)> // CHECK-DAG: #[[RHSMAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)> // CHECK-DAG: #[[RESMAP:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2)> // CHECK: func.func @batch_nchw_conv // CHECK-SAME: (%[[INPUT:.+]]: tensor<8x4x16x16xf32>, %[[FILTER:.+]]: tensor<16x4x3x3xf32>, %[[INIT:.+]]: tensor<8x16x14x14xf32>) -// CHECK: %[[IT:.+]] = tensor.empty() : tensor<8x4x3x3x14x14xf32> +// CHECK-DAG: %[[CS_FILTER:.+]] = tensor.collapse_shape %[[FILTER]] {{\[}}[0], [1, 2, 3]] : tensor<16x4x3x3xf32> into tensor<16x36xf32> +// CHECK-DAG: %[[CS_RESULT:.+]] = tensor.collapse_shape %[[INIT]] {{\[}}[0], [1], [2, 3]] : tensor<8x16x14x14xf32> into tensor<8x16x196xf32> +// CHECK: %[[IT:.+]] = tensor.empty() : tensor<8x36x196xf32> // CHECK: %[[IMG2COL:.+]] = linalg.generic -// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]] -// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel"] -// CHECK-SAME: ins(%[[INPUT]] : tensor<8x4x16x16xf32>) -// CHECK-SAME: outs(%[[IT]] : tensor<8x4x3x3x14x14xf32>) -// CHECK: %[[CS_FILTER:.+]] = tensor.collapse_shape %[[FILTER]] {{\[}}[0], [1, 2, 3]] : tensor<16x4x3x3xf32> into tensor<16x36xf32> -// CHECK: %[[CS_INPUT:.+]] = tensor.collapse_shape %[[IMG2COL]] {{\[}}[0], [1, 2, 3], [4, 5]] : tensor<8x4x3x3x14x14xf32> into tensor<8x36x196xf32> -// CHECK: %[[CS_RESULT:.+]] = tensor.collapse_shape %[[INIT]] {{\[}}[0], [1], [2, 3]] : tensor<8x16x14x14xf32> into tensor<8x16x196xf32> +// CHECK-SAME: indexing_maps = [#[[MAP]]] +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"] +// CHECK-SAME: outs(%[[IT]] : tensor<8x36x196xf32>) +// Collapsed indices. +// CHECK: %[[BINDEX:.+]] = linalg.index 0 : index +// CHECK: %[[KINDEX:.+]] = linalg.index 1 : index +// CHECK: %[[NINDEX:.+]] = linalg.index 2 : index + +// Unrolled filter shape indices. +// CHECK: %[[C3:.+]] = arith.constant 3 : index +// CHECK: %[[FWINDEX:.+]] = arith.remui %[[KINDEX]], %[[C3]] : index +// CHECK: %[[C9:.+]] = arith.constant 9 : index +// CHECK: %[[FHREM:.+]] = arith.remui %[[KINDEX]], %[[C9]] : index +// CHECK: %[[C3_1:.+]] = arith.constant 3 : index +// CHECK: %[[FHINDEX:.+]] = arith.divui %[[FHREM]], %[[C3_1]] : index +// CHECK: %[[C9_2:.+]] = arith.constant 9 : index +// CHECK: %[[ICINDEX:.+]] = arith.divui %[[KINDEX]], %[[C9_2]] : index + +// Unrolled output shape indices. +// CHECK: %[[C14:.+]] = arith.constant 14 : index +// CHECK: %[[OWINDEX:.+]] = arith.remui %[[NINDEX]], %[[C14]] : index +// CHECK: %[[C14_3:.+]] = arith.constant 14 : index +// CHECK: %[[OHINDEX:.+]] = arith.divui %[[NINDEX]], %[[C14_3]] : index + +// Compute input indices. +// CHECK: %[[SH:.+]] = arith.constant 1 : index +// CHECK: %[[STRIDEDOH:.+]] = arith.muli %[[OHINDEX]], %[[SH]] : index +// CHECK: %[[CONVH:.+]] = arith.addi %[[STRIDEDOH]], %[[FHINDEX]] : index +// CHECK: %[[SW:.+]] = arith.constant 1 : index +// CHECK: %[[STRIDEDOW:.+]] = arith.muli %[[OWINDEX]], %[[SW]] : index +// CHECK: %[[CONVW:.+]] = arith.addi %[[STRIDEDOW]], %[[FWINDEX]] : index +// CHECK: %[[EXTRACTED_INPUT:.+]] = tensor.extract +// CHECK-SAME: %[[INPUT]]{{\[}}%[[BINDEX]], %[[ICINDEX]], %[[CONVH]], %[[CONVW]]] : tensor<8x4x16x16xf32> +// CHECK: linalg.yield %[[EXTRACTED_INPUT]] : f32 // CHECK: %[[MATMUL:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[LHSMAP]], #[[RHSMAP]], #[[RESMAP]]], // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] -// CHECK-SAME: ins(%[[CS_FILTER]], %[[CS_INPUT]] : tensor<16x36xf32>, tensor<8x36x196xf32>) +// CHECK-SAME: ins(%[[CS_FILTER]], %[[IMG2COL]] : tensor<16x36xf32>, tensor<8x36x196xf32>) // CHECK-SAME: outs(%[[CS_RESULT]] : tensor<8x16x196xf32>) // CHECK: ^bb0(%[[ARG0:.+]]: f32, %[[ARG1:.+]]: f32, %[[ARG2:.+]]: f32): // CHECK: %[[MUL:.+]] = arith.mulf %[[ARG0]], %[[ARG1]] : f32 From 24847a90aaf5842041a2cfa977a4167997307b50 Mon Sep 17 00:00:00 2001 From: LiaoChunyu Date: Fri, 24 Mar 2023 09:04:59 +0800 Subject: [PATCH 496/691] [LegalizeTypes][RISCV] Add a special case for (add X, -1) to ExpandIntRes_ADDSUB On targets without ADDCARRY or ADDE, we need to emit a separate SETCC to determine carry from the low half to the high half. The high half is calculated by a series of ADDs. When RHSLo and RHSHi are -1, without this patch, we get: Hi = (add (add LHSHi,(setult Lo, LHSLo), -1) Where as with the patch we get: Hi = (sub LHSHi, (seteq LHSLo, 0)) Only RHSLo is -1 we can instead do (setne Lo, 0). Similar to gcc: https://godbolt.org/z/M83f6rz39 Reviewed By: craig.topper Differential Revision: https://reviews.llvm.org/D146635 --- .../SelectionDAG/LegalizeIntegerTypes.cpp | 14 +++- llvm/test/CodeGen/RISCV/alu64.ll | 73 +++++++++++++++++++ .../CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll | 10 +-- .../test/CodeGen/RISCV/overflow-intrinsics.ll | 39 +++++----- llvm/test/CodeGen/RISCV/sext-zext-trunc.ll | 14 ++-- 5 files changed, 113 insertions(+), 37 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index f741ee4849dfc..c4f2fbc90e3eb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -3026,7 +3026,14 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, if (isOneConstant(LoOps[1])) Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, DAG.getConstant(0, dl, NVT), ISD::SETEQ); - else + else if (isAllOnesConstant(LoOps[1])) { + if (isAllOnesConstant(HiOps[1])) + Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), LoOps[0], + DAG.getConstant(0, dl, NVT), ISD::SETEQ); + else + Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), LoOps[0], + DAG.getConstant(0, dl, NVT), ISD::SETNE); + } else Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0], ISD::SETULT); @@ -3037,7 +3044,10 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, Carry = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT), DAG.getConstant(0, dl, NVT)); - Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry); + if (isAllOnesConstant(LoOps[1]) && isAllOnesConstant(HiOps[1])) + Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps[0], Carry); + else + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry); } else { Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps); Hi = DAG.getNode(ISD::SUB, dl, NVT, ArrayRef(HiOps, 2)); diff --git a/llvm/test/CodeGen/RISCV/alu64.ll b/llvm/test/CodeGen/RISCV/alu64.ll index 5349c82ef0f0f..29eb12f7f5429 100644 --- a/llvm/test/CodeGen/RISCV/alu64.ll +++ b/llvm/test/CodeGen/RISCV/alu64.ll @@ -530,3 +530,76 @@ define signext i32 @sraw(i64 %a, i32 zeroext %b) nounwind { %2 = ashr i32 %1, %b ret i32 %2 } + +define i64 @add_hi_and_lo_negone(i64 %0) { +; RV64I-LABEL: add_hi_and_lo_negone: +; RV64I: # %bb.0: +; RV64I-NEXT: addi a0, a0, -1 +; RV64I-NEXT: ret +; +; RV32I-LABEL: add_hi_and_lo_negone: +; RV32I: # %bb.0: +; RV32I-NEXT: seqz a2, a0 +; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: ret + %2 = add nsw i64 %0, -1 + ret i64 %2 +} + +define i64 @add_hi_zero_lo_negone(i64 %0) { +; RV64I-LABEL: add_hi_zero_lo_negone: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32I-LABEL: add_hi_zero_lo_negone: +; RV32I: # %bb.0: +; RV32I-NEXT: snez a2, a0 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: ret + %2 = add i64 %0, 4294967295 + ret i64 %2 +} + +define i64 @add_lo_negone(i64 %0) { +; RV64I-LABEL: add_lo_negone: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32I-LABEL: add_lo_negone: +; RV32I: # %bb.0: +; RV32I-NEXT: snez a2, a0 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: addi a1, a1, -2 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: ret + %2 = add nsw i64 %0, -4294967297 + ret i64 %2 +} + +define i64 @add_hi_one_lo_negone(i64 %0) { +; RV64I-LABEL: add_hi_one_lo_negone: +; RV64I: # %bb.0: +; RV64I-NEXT: li a1, -1 +; RV64I-NEXT: srli a1, a1, 31 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32I-LABEL: add_hi_one_lo_negone: +; RV32I: # %bb.0: +; RV32I-NEXT: snez a2, a0 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: addi a1, a1, 1 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: ret + %2 = add nsw i64 %0, 8589934591 + ret i64 %2 +} diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll index f1528e94c473c..dc4b50215ab0a 100644 --- a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll @@ -1263,10 +1263,9 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { ; RV32I-NEXT: bnez a0, .LBB7_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1 +; RV32I-NEXT: seqz a0, a4 +; RV32I-NEXT: sub a3, a5, a0 ; RV32I-NEXT: addi a2, a4, -1 -; RV32I-NEXT: sltu a0, a2, a4 -; RV32I-NEXT: add a0, a5, a0 -; RV32I-NEXT: addi a3, a0, -1 ; RV32I-NEXT: j .LBB7_1 ; RV32I-NEXT: .LBB7_7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a4 @@ -1327,10 +1326,9 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { ; RV32IA-NEXT: bnez a0, .LBB7_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1 +; RV32IA-NEXT: seqz a0, a4 +; RV32IA-NEXT: sub a3, a5, a0 ; RV32IA-NEXT: addi a2, a4, -1 -; RV32IA-NEXT: sltu a0, a2, a4 -; RV32IA-NEXT: add a0, a5, a0 -; RV32IA-NEXT: addi a3, a0, -1 ; RV32IA-NEXT: j .LBB7_1 ; RV32IA-NEXT: .LBB7_7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a4 diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll index 543c17f748a45..acad770b693d2 100644 --- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll @@ -666,14 +666,13 @@ define i1 @uaddo_i64_increment_alt_dom(i64 %x, ptr %p) { define i1 @uaddo_i64_decrement_alt(i64 %x, ptr %p) { ; RV32-LABEL: uaddo_i64_decrement_alt: ; RV32: # %bb.0: -; RV32-NEXT: addi a3, a0, -1 -; RV32-NEXT: sltu a4, a3, a0 -; RV32-NEXT: add a4, a1, a4 -; RV32-NEXT: addi a4, a4, -1 -; RV32-NEXT: sw a3, 0(a2) +; RV32-NEXT: seqz a3, a0 +; RV32-NEXT: sub a3, a1, a3 +; RV32-NEXT: addi a4, a0, -1 +; RV32-NEXT: sw a4, 0(a2) ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: snez a0, a0 -; RV32-NEXT: sw a4, 4(a2) +; RV32-NEXT: sw a3, 4(a2) ; RV32-NEXT: ret ; ; RV64-LABEL: uaddo_i64_decrement_alt: @@ -695,12 +694,11 @@ define i1 @uaddo_i64_decrement_alt_dom(i64 %x, ptr %p) { ; RV32: # %bb.0: ; RV32-NEXT: or a3, a0, a1 ; RV32-NEXT: snez a3, a3 -; RV32-NEXT: addi a4, a0, -1 -; RV32-NEXT: sltu a0, a4, a0 -; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: seqz a4, a0 +; RV32-NEXT: sub a1, a1, a4 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: sw a4, 0(a2) -; RV32-NEXT: sw a0, 4(a2) +; RV32-NEXT: sw a0, 0(a2) +; RV32-NEXT: sw a1, 4(a2) ; RV32-NEXT: mv a0, a3 ; RV32-NEXT: ret ; @@ -1222,22 +1220,21 @@ define i64 @foo2(ptr %p) { define void @PR41129(ptr %p64) { ; RV32-LABEL: PR41129: ; RV32: # %bb.0: # %entry -; RV32-NEXT: lw a1, 4(a0) -; RV32-NEXT: lw a2, 0(a0) -; RV32-NEXT: or a3, a2, a1 +; RV32-NEXT: lw a2, 4(a0) +; RV32-NEXT: lw a1, 0(a0) +; RV32-NEXT: or a3, a1, a2 ; RV32-NEXT: beqz a3, .LBB36_2 ; RV32-NEXT: # %bb.1: # %false -; RV32-NEXT: andi a2, a2, 7 +; RV32-NEXT: andi a1, a1, 7 ; RV32-NEXT: sw zero, 4(a0) -; RV32-NEXT: sw a2, 0(a0) +; RV32-NEXT: sw a1, 0(a0) ; RV32-NEXT: ret ; RV32-NEXT: .LBB36_2: # %true -; RV32-NEXT: addi a3, a2, -1 -; RV32-NEXT: sltu a2, a3, a2 -; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: seqz a3, a1 +; RV32-NEXT: sub a2, a2, a3 ; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: sw a3, 0(a0) -; RV32-NEXT: sw a1, 4(a0) +; RV32-NEXT: sw a1, 0(a0) +; RV32-NEXT: sw a2, 4(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: PR41129: diff --git a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll index 58469ca23bb54..6be6785fc1d0e 100644 --- a/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll +++ b/llvm/test/CodeGen/RISCV/sext-zext-trunc.ll @@ -454,10 +454,9 @@ define i32 @sext_of_not_i32(i1 %x) { define i64 @sext_of_not_i64(i1 %x) { ; RV32I-LABEL: sext_of_not_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: andi a1, a0, 1 -; RV32I-NEXT: addi a0, a1, -1 -; RV32I-NEXT: sltu a1, a0, a1 -; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: sext_of_not_i64: @@ -541,10 +540,9 @@ define i64 @dec_of_zexted_cmp_i64(i64 %x) { ; RV32I: # %bb.0: ; RV32I-NEXT: xori a0, a0, 7 ; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: seqz a1, a0 -; RV32I-NEXT: addi a0, a1, -1 -; RV32I-NEXT: sltu a1, a0, a1 -; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: seqz a0, a0 +; RV32I-NEXT: addi a0, a0, -1 +; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: dec_of_zexted_cmp_i64: From ccd96b3e03e18653e909852bfef105fc10782acb Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Thu, 23 Mar 2023 19:08:53 -0700 Subject: [PATCH 497/691] [builtins][test] Fix divmodti4_test.c on Windows By making the 64 bit integer literals unsigned. Otherwise some of them are unexpectedly sign extended (and the compiler rightly diagnosed this with warnings) Initially added in D80506. Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D146667 --- .../test/builtins/Unit/divmodti4_test.c | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/compiler-rt/test/builtins/Unit/divmodti4_test.c b/compiler-rt/test/builtins/Unit/divmodti4_test.c index 26b3c1609f18c..7b8d3faae2cdf 100644 --- a/compiler-rt/test/builtins/Unit/divmodti4_test.c +++ b/compiler-rt/test/builtins/Unit/divmodti4_test.c @@ -55,22 +55,22 @@ char assumption_1[sizeof(ti_int) == 2*sizeof(di_int)] = {0}; tu_int tests[][4] = { -{ (ti_int) 0, (ti_int) 1, (ti_int) 0, (ti_int) 0 }, -{ (ti_int) 0, (ti_int)-1, (ti_int) 0, (ti_int) 0 }, -{ (ti_int) 2, (ti_int) 1, (ti_int) 2, (ti_int) 0 }, -{ (ti_int) 2, (ti_int)-1, (ti_int)-2, (ti_int) 0 }, -{ (ti_int)-2, (ti_int) 1, (ti_int)-2, (ti_int) 0 }, -{ (ti_int)-2, (ti_int)-1, (ti_int) 2, (ti_int) 0 }, -{ (ti_int) 5, (ti_int) 3, (ti_int) 1, (ti_int) 2 }, -{ (ti_int) 5, (ti_int)-3, (ti_int)-1, (ti_int) 2 }, -{ (ti_int)-5, (ti_int) 3, (ti_int)-1, (ti_int)-2 }, -{ (ti_int)-5, (ti_int)-3, (ti_int) 1, (ti_int)-2 }, -{ (ti_int)0x8000000000000000LL << 64 | 0, (ti_int) 1, (ti_int)0x8000000000000000LL << 64 | 0, (ti_int)0x0LL }, -{ (ti_int)0x8000000000000000LL << 64 | 0, (ti_int)-1, (ti_int)0x8000000000000000LL << 64 | 0, (ti_int)0x0LL }, -{ (ti_int)0x8000000000000000LL << 64 | 0, (ti_int)-2, (ti_int)0x4000000000000000LL << 64 | 0, (ti_int)0x0LL }, -{ (ti_int)0x8000000000000000LL << 64 | 0, (ti_int) 2, (ti_int)0xC000000000000000LL << 64 | 0, (ti_int)0x0LL }, -{ (ti_int)0x8000000000000000LL << 64 | 0, (ti_int)-3, (ti_int)0x2AAAAAAAAAAAAAAALL << 64 | 0xAAAAAAAAAAAAAAAALL, (ti_int)-2 }, -{ (ti_int)0x8000000000000000LL << 64 | 0, (ti_int) 3, (ti_int)0xD555555555555555LL << 64 | 0x5555555555555556LL, (ti_int)-2 }, +{ (ti_int) 0, (ti_int) 1, (ti_int) 0, (ti_int) 0 }, +{ (ti_int) 0, (ti_int)-1, (ti_int) 0, (ti_int) 0 }, +{ (ti_int) 2, (ti_int) 1, (ti_int) 2, (ti_int) 0 }, +{ (ti_int) 2, (ti_int)-1, (ti_int)-2, (ti_int) 0 }, +{ (ti_int)-2, (ti_int) 1, (ti_int)-2, (ti_int) 0 }, +{ (ti_int)-2, (ti_int)-1, (ti_int) 2, (ti_int) 0 }, +{ (ti_int) 5, (ti_int) 3, (ti_int) 1, (ti_int) 2 }, +{ (ti_int) 5, (ti_int)-3, (ti_int)-1, (ti_int) 2 }, +{ (ti_int)-5, (ti_int) 3, (ti_int)-1, (ti_int)-2 }, +{ (ti_int)-5, (ti_int)-3, (ti_int) 1, (ti_int)-2 }, +{ (ti_int)0x8000000000000000ULL << 64 | 0, (ti_int) 1, (ti_int)0x8000000000000000ULL << 64 | 0, (ti_int)0x0LL }, +{ (ti_int)0x8000000000000000ULL << 64 | 0, (ti_int)-1, (ti_int)0x8000000000000000ULL << 64 | 0, (ti_int)0x0LL }, +{ (ti_int)0x8000000000000000ULL << 64 | 0, (ti_int)-2, (ti_int)0x4000000000000000ULL << 64 | 0, (ti_int)0x0LL }, +{ (ti_int)0x8000000000000000ULL << 64 | 0, (ti_int) 2, (ti_int)0xC000000000000000ULL << 64 | 0, (ti_int)0x0LL }, +{ (ti_int)0x8000000000000000ULL << 64 | 0, (ti_int)-3, (ti_int)0x2AAAAAAAAAAAAAAAULL << 64 | 0xAAAAAAAAAAAAAAAAULL, (ti_int)-2 }, +{ (ti_int)0x8000000000000000ULL << 64 | 0, (ti_int) 3, (ti_int)0xD555555555555555ULL << 64 | 0x5555555555555556ULL, (ti_int)-2 }, }; #endif From 24657a95c1447cc95f2634de50ff4008d8b17d4c Mon Sep 17 00:00:00 2001 From: Rahul Joshi Date: Thu, 23 Mar 2023 16:06:37 -0700 Subject: [PATCH 498/691] [NFC] Fix Windows builds that use MSVC 14.x Differential Revision: https://reviews.llvm.org/D146769 --- mlir/lib/IR/AffineMap.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/mlir/lib/IR/AffineMap.cpp b/mlir/lib/IR/AffineMap.cpp index 9ac181f46b578..6c9034d446341 100644 --- a/mlir/lib/IR/AffineMap.cpp +++ b/mlir/lib/IR/AffineMap.cpp @@ -744,13 +744,18 @@ static AffineMap projectCommonImpl(AffineMap map, replacements.reserve(numDimOrSym); auto createNewDimOrSym = (isDim) ? getAffineDimExpr : getAffineSymbolExpr; - auto replaceDims = [](AffineExpr e, ArrayRef replacements) { + + using replace_fn_ty = + std::function)>; + replace_fn_ty replaceDims = [](AffineExpr e, + ArrayRef replacements) { return e.replaceDims(replacements); }; - auto replaceSymbols = [](AffineExpr e, ArrayRef replacements) { + replace_fn_ty replaceSymbols = [](AffineExpr e, + ArrayRef replacements) { return e.replaceSymbols(replacements); }; - auto replaceNewDimOrSym = (isDim) ? replaceDims : replaceSymbols; + replace_fn_ty replaceNewDimOrSym = (isDim) ? replaceDims : replaceSymbols; MLIRContext *context = map.getContext(); int64_t newNumDimOrSym = 0; From 5f48b861f8ce2d2355347d3b3b8826f7bfd23dd6 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 23 Mar 2023 19:26:42 -0700 Subject: [PATCH 499/691] [SelectionDAG] Use isOneConstant (NFC) --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 3511e76ac1df2..8199e5d5a9c18 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -3628,9 +3628,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } else { // We test only the i1 bit. Skip the AND if UNDEF or another AND. if (Tmp2.isUndef() || - (Tmp2.getOpcode() == ISD::AND && - isa(Tmp2.getOperand(1)) && - cast(Tmp2.getOperand(1))->getZExtValue() == 1)) + (Tmp2.getOpcode() == ISD::AND && isOneConstant(Tmp2.getOperand(1)))) Tmp3 = Tmp2; else Tmp3 = DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2, From 231fa27435105e980b113754c112980ebeb8927d Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 23 Mar 2023 19:26:43 -0700 Subject: [PATCH 500/691] [InstCombine] Generate better code for std::bit_ceil Without this patch, std::bit_ceil is compiled as: %dec = add i32 %x, -1 %lz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false) %sub = sub i32 32, %lz %res = shl i32 1, %sub %ugt = icmp ugt i32 %x, 1 %sel = select i1 %ugt, i32 %res, i32 1 With this patch, we generate: %dec = add i32 %x, -1 %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false) %sub = sub nsw i32 0, %ctlz %and = and i32 %1, 31 %sel = shl nuw i32 1, %and ret i32 %sel https://alive2.llvm.org/ce/z/pwezvF This patch recognizes the specific pattern from std::bit_ceil in libc++ and libstdc++ and drops the conditional move. In addition to the LLVM IR generated for std::bit_ceil(X), this patch recognizes variants like: std::bit_ceil(X - 1) std::bit_ceil(X + 1) std::bit_ceil(X + 2) std::bit_ceil(-X) std::bit_ceil(~X) This patch fixes: https://github.com/llvm/llvm-project/issues/60802 Differential Revision: https://reviews.llvm.org/D145299 --- .../InstCombine/InstCombineSelect.cpp | 131 ++++++++++++++++++ llvm/test/Transforms/InstCombine/bit_ceil.ll | 70 ++++------ 2 files changed, 160 insertions(+), 41 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 1f2441bc9fcf9..3d1dbdd6270d5 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -3163,6 +3163,134 @@ Instruction *InstCombinerImpl::foldSelectOfBools(SelectInst &SI) { return nullptr; } +// Return true if we can safely remove the select instruction for std::bit_ceil +// pattern. +static bool isSafeToRemoveBitCeilSelect(ICmpInst::Predicate Pred, Value *Cond0, + const APInt *Cond1, Value *CtlzOp, + unsigned BitWidth) { + // The challenge in recognizing std::bit_ceil(X) is that the operand is used + // for the CTLZ proper and select condition, each possibly with some + // operation like add and sub. + // + // Our aim is to make sure that -ctlz & (BitWidth - 1) == 0 even when the + // select instruction would select 1, which allows us to get rid of the select + // instruction. + // + // To see if we can do so, we do some symbolic execution with ConstantRange. + // Specifically, we compute the range of values that Cond0 could take when + // Cond == false. Then we successively transform the range until we obtain + // the range of values that CtlzOp could take. + // + // Conceptually, we follow the def-use chain backward from Cond0 while + // transforming the range for Cond0 until we meet the common ancestor of Cond0 + // and CtlzOp. Then we follow the def-use chain forward until we obtain the + // range for CtlzOp. That said, we only follow at most one ancestor from + // Cond0. Likewise, we only follow at most one ancestor from CtrlOp. + + ConstantRange CR = ConstantRange::makeExactICmpRegion( + CmpInst::getInversePredicate(Pred), *Cond1); + + // Match the operation that's used to compute CtlzOp from CommonAncestor. If + // CtlzOp == CommonAncestor, return true as no operation is needed. If a + // match is found, execute the operation on CR, update CR, and return true. + // Otherwise, return false. + auto MatchForward = [&](Value *CommonAncestor) { + const APInt *C = nullptr; + if (CtlzOp == CommonAncestor) + return true; + if (match(CtlzOp, m_Add(m_Specific(CommonAncestor), m_APInt(C)))) { + CR = CR.add(*C); + return true; + } + if (match(CtlzOp, m_Sub(m_APInt(C), m_Specific(CommonAncestor)))) { + CR = ConstantRange(*C).sub(CR); + return true; + } + if (match(CtlzOp, m_Not(m_Specific(CommonAncestor)))) { + CR = CR.binaryNot(); + return true; + } + return false; + }; + + const APInt *C = nullptr; + Value *CommonAncestor; + if (MatchForward(Cond0)) { + // Cond0 is either CtlzOp or CtlzOp's parent. CR has been updated. + } else if (match(Cond0, m_Add(m_Value(CommonAncestor), m_APInt(C)))) { + CR = CR.sub(*C); + if (!MatchForward(CommonAncestor)) + return false; + // Cond0's parent is either CtlzOp or CtlzOp's parent. CR has been updated. + } else { + return false; + } + + // Return true if all the values in the range are either 0 or negative (if + // treated as signed). We do so by evaluating: + // + // CR - 1 u>= (1 << BitWidth) - 1. + APInt IntMax = APInt::getSignMask(BitWidth) - 1; + CR = CR.sub(APInt(BitWidth, 1)); + return CR.icmp(ICmpInst::ICMP_UGE, IntMax); +} + +// Transform the std::bit_ceil(X) pattern like: +// +// %dec = add i32 %x, -1 +// %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false) +// %sub = sub i32 32, %ctlz +// %shl = shl i32 1, %sub +// %ugt = icmp ugt i32 %x, 1 +// %sel = select i1 %ugt, i32 %shl, i32 1 +// +// into: +// +// %dec = add i32 %x, -1 +// %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false) +// %neg = sub i32 0, %ctlz +// %masked = and i32 %ctlz, 31 +// %shl = shl i32 1, %sub +// +// Note that the select is optimized away while the shift count is masked with +// 31. We handle some variations of the input operand like std::bit_ceil(X + +// 1). +static Instruction *foldBitCeil(SelectInst &SI, IRBuilderBase &Builder) { + Type *SelType = SI.getType(); + unsigned BitWidth = SelType->getScalarSizeInBits(); + + Value *FalseVal = SI.getFalseValue(); + Value *TrueVal = SI.getTrueValue(); + ICmpInst::Predicate Pred; + const APInt *Cond1; + Value *Cond0, *Ctlz, *CtlzOp; + if (!match(SI.getCondition(), m_ICmp(Pred, m_Value(Cond0), m_APInt(Cond1)))) + return nullptr; + + if (match(TrueVal, m_One())) { + std::swap(FalseVal, TrueVal); + Pred = CmpInst::getInversePredicate(Pred); + } + + if (!match(FalseVal, m_One()) || + !match(TrueVal, + m_OneUse(m_Shl(m_One(), m_OneUse(m_Sub(m_SpecificInt(BitWidth), + m_Value(Ctlz)))))) || + !match(Ctlz, m_Intrinsic(m_Value(CtlzOp), m_Zero())) || + !isSafeToRemoveBitCeilSelect(Pred, Cond0, Cond1, CtlzOp, BitWidth)) + return nullptr; + + // Build 1 << (-CTLZ & (BitWidth-1)). The negation likely corresponds to a + // single hardware instruction as opposed to BitWidth - CTLZ, where BitWidth + // is an integer constant. Masking with BitWidth-1 comes free on some + // hardware as part of the shift instruction. + Value *Neg = Builder.CreateNeg(Ctlz); + Value *Masked = + Builder.CreateAnd(Neg, ConstantInt::get(SelType, BitWidth - 1)); + return BinaryOperator::Create(Instruction::Shl, ConstantInt::get(SelType, 1), + Masked); +} + Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { Value *CondVal = SI.getCondition(); Value *TrueVal = SI.getTrueValue(); @@ -3590,5 +3718,8 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { if (sinkNotIntoOtherHandOfLogicalOp(SI)) return &SI; + if (Instruction *I = foldBitCeil(SI, Builder)) + return I; + return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/bit_ceil.ll b/llvm/test/Transforms/InstCombine/bit_ceil.ll index 98f4cdb6fb834..6f714153a598a 100644 --- a/llvm/test/Transforms/InstCombine/bit_ceil.ll +++ b/llvm/test/Transforms/InstCombine/bit_ceil.ll @@ -6,10 +6,9 @@ define i32 @bit_ceil_32(i32 %x) { ; CHECK-LABEL: @bit_ceil_32( ; CHECK-NEXT: [[DEC:%.*]] = add i32 [[X:%.*]], -1 ; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[DEC]], i1 false), !range [[RNG0:![0-9]+]] -; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]] -; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB]] -; CHECK-NEXT: [[UGT:%.*]] = icmp ugt i32 [[X]], 1 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[UGT]], i32 [[SHL]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = sub nsw i32 0, [[CTLZ]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 31 +; CHECK-NEXT: [[SEL:%.*]] = shl nuw i32 1, [[TMP2]] ; CHECK-NEXT: ret i32 [[SEL]] ; %dec = add i32 %x, -1 @@ -26,10 +25,9 @@ define i64 @bit_ceil_64(i64 %x) { ; CHECK-LABEL: @bit_ceil_64( ; CHECK-NEXT: [[DEC:%.*]] = add i64 [[X:%.*]], -1 ; CHECK-NEXT: [[CTLZ:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[DEC]], i1 false), !range [[RNG1:![0-9]+]] -; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i64 64, [[CTLZ]] -; CHECK-NEXT: [[SHL:%.*]] = shl nuw i64 1, [[SUB]] -; CHECK-NEXT: [[UGT:%.*]] = icmp ugt i64 [[X]], 1 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[UGT]], i64 [[SHL]], i64 1 +; CHECK-NEXT: [[TMP1:%.*]] = sub nsw i64 0, [[CTLZ]] +; CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], 63 +; CHECK-NEXT: [[SEL:%.*]] = shl nuw i64 1, [[TMP2]] ; CHECK-NEXT: ret i64 [[SEL]] ; %dec = add i64 %x, -1 @@ -47,11 +45,9 @@ define i32 @bit_ceil_32_minus_1(i32 %x) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SUB:%.*]] = add i32 [[X:%.*]], -2 ; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[SUB]], i1 false), !range [[RNG0]] -; CHECK-NEXT: [[SUB2:%.*]] = sub nuw nsw i32 32, [[CTLZ]] -; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB2]] -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X]], -3 -; CHECK-NEXT: [[ULT:%.*]] = icmp ult i32 [[ADD]], -2 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[ULT]], i32 [[SHL]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = sub nsw i32 0, [[CTLZ]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 31 +; CHECK-NEXT: [[SEL:%.*]] = shl nuw i32 1, [[TMP1]] ; CHECK-NEXT: ret i32 [[SEL]] ; entry: @@ -69,11 +65,9 @@ entry: define i32 @bit_ceil_32_plus_1(i32 %x) { ; CHECK-LABEL: @bit_ceil_32_plus_1( ; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range [[RNG0]] -; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]] -; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB]] -; CHECK-NEXT: [[DEC:%.*]] = add i32 [[X]], -1 -; CHECK-NEXT: [[ULT:%.*]] = icmp ult i32 [[DEC]], -2 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[ULT]], i32 [[SHL]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = sub nsw i32 0, [[CTLZ]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 31 +; CHECK-NEXT: [[SEL:%.*]] = shl nuw i32 1, [[TMP2]] ; CHECK-NEXT: ret i32 [[SEL]] ; %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false) @@ -91,10 +85,9 @@ define i32 @bit_ceil_plus_2(i32 %x) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SUB:%.*]] = add i32 [[X:%.*]], 1 ; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[SUB]], i1 false), !range [[RNG0]] -; CHECK-NEXT: [[SUB2:%.*]] = sub nuw nsw i32 32, [[CTLZ]] -; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB2]] -; CHECK-NEXT: [[ULT:%.*]] = icmp ult i32 [[X]], -2 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[ULT]], i32 [[SHL]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = sub nsw i32 0, [[CTLZ]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 31 +; CHECK-NEXT: [[SEL:%.*]] = shl nuw i32 1, [[TMP1]] ; CHECK-NEXT: ret i32 [[SEL]] ; entry: @@ -113,11 +106,9 @@ define i32 @bit_ceil_32_neg(i32 %x) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SUB:%.*]] = xor i32 [[X:%.*]], -1 ; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[SUB]], i1 false), !range [[RNG0]] -; CHECK-NEXT: [[SUB2:%.*]] = sub nuw nsw i32 32, [[CTLZ]] -; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB2]] -; CHECK-NEXT: [[NOTSUB:%.*]] = add i32 [[X]], -1 -; CHECK-NEXT: [[ULT:%.*]] = icmp ult i32 [[NOTSUB]], -2 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[ULT]], i32 [[SHL]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = sub nsw i32 0, [[CTLZ]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 31 +; CHECK-NEXT: [[SEL:%.*]] = shl nuw i32 1, [[TMP1]] ; CHECK-NEXT: ret i32 [[SEL]] ; entry: @@ -137,10 +128,9 @@ define i32 @bit_ceil_not(i32 %x) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SUB:%.*]] = sub i32 -2, [[X:%.*]] ; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[SUB]], i1 false), !range [[RNG0]] -; CHECK-NEXT: [[SUB2:%.*]] = sub nuw nsw i32 32, [[CTLZ]] -; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB2]] -; CHECK-NEXT: [[ULT:%.*]] = icmp ult i32 [[X]], -2 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[ULT]], i32 [[SHL]], i32 1 +; CHECK-NEXT: [[TMP0:%.*]] = sub nsw i32 0, [[CTLZ]] +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 31 +; CHECK-NEXT: [[SEL:%.*]] = shl nuw i32 1, [[TMP1]] ; CHECK-NEXT: ret i32 [[SEL]] ; entry: @@ -158,18 +148,17 @@ define i32 @bit_ceil_commuted_operands(i32 %x) { ; CHECK-LABEL: @bit_ceil_commuted_operands( ; CHECK-NEXT: [[DEC:%.*]] = add i32 [[X:%.*]], -1 ; CHECK-NEXT: [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[DEC]], i1 false), !range [[RNG0]] -; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]] -; CHECK-NEXT: [[SHL:%.*]] = shl nuw i32 1, [[SUB]] -; CHECK-NEXT: [[UGT_INV:%.*]] = icmp ugt i32 [[X]], 1 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[UGT_INV]], i32 [[SHL]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = sub nsw i32 0, [[CTLZ]] +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 31 +; CHECK-NEXT: [[SEL:%.*]] = shl nuw i32 1, [[TMP2]] ; CHECK-NEXT: ret i32 [[SEL]] ; %dec = add i32 %x, -1 %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false) %sub = sub i32 32, %ctlz %shl = shl i32 1, %sub - %ugt = icmp ule i32 %x, 1 - %sel = select i1 %ugt, i32 1, i32 %shl + %eq = icmp eq i32 %dec, 0 + %sel = select i1 %eq, i32 1, i32 %shl ret i32 %sel } @@ -282,10 +271,9 @@ define <4 x i32> @bit_ceil_v4i32(<4 x i32> %x) { ; CHECK-LABEL: @bit_ceil_v4i32( ; CHECK-NEXT: [[DEC:%.*]] = add <4 x i32> [[X:%.*]], ; CHECK-NEXT: [[CTLZ:%.*]] = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[DEC]], i1 false), !range [[RNG0]] -; CHECK-NEXT: [[SUB:%.*]] = sub nuw nsw <4 x i32> , [[CTLZ]] -; CHECK-NEXT: [[SHL:%.*]] = shl nuw <4 x i32> , [[SUB]] -; CHECK-NEXT: [[UGT:%.*]] = icmp ugt <4 x i32> [[X]], -; CHECK-NEXT: [[SEL:%.*]] = select <4 x i1> [[UGT]], <4 x i32> [[SHL]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <4 x i32> zeroinitializer, [[CTLZ]] +; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[TMP1]], +; CHECK-NEXT: [[SEL:%.*]] = shl nuw <4 x i32> , [[TMP2]] ; CHECK-NEXT: ret <4 x i32> [[SEL]] ; %dec = add <4 x i32> %x, From 4950104e243a6af2d0b9da30b415a10670a9385e Mon Sep 17 00:00:00 2001 From: XinWang10 Date: Thu, 23 Mar 2023 22:32:18 -0400 Subject: [PATCH 501/691] [NFC][X86]remove trailing space in X86InstrArithmetic.td In this file, most of the line don't have trailing spaces, but some of them have. To keep consistent, remove the trailing spaces. Reviewed By: skan Differential Revision: https://reviews.llvm.org/D146697 --- llvm/lib/Target/X86/X86InstrArithmetic.td | 80 +++++++++++------------ 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td index 42cc7c8f4585d..46d1412aa984c 100644 --- a/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -125,12 +125,12 @@ class BinOpRR_Rev opcode, string mnemonic, X86TypeInfo typeinfo, let hasSideEffects = 0; } -// BinOpRR_RFF_Rev - Binary instructions with inputs "reg, reg"(reversed +// BinOpRR_RFF_Rev - Binary instructions with inputs "reg, reg"(reversed // encoding), with sched = WriteADC. class BinOpRR_RFF_Rev opcode, string mnemonic, X86TypeInfo typeinfo> : BinOpRR_Rev; -// BinOpRR_F_Rev - Binary instructions with inputs "reg, reg"(reversed +// BinOpRR_F_Rev - Binary instructions with inputs "reg, reg"(reversed // encoding), without outlist dag. class BinOpRR_F_Rev opcode, string mnemonic, X86TypeInfo typeinfo> : ITy opcode, string mnemonic, X86TypeInfo typeinfo, // has both a regclass and EFLAGS as a result, and has EFLAGS as input. class BinOpRM_RFF opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode> - : BinOpRM_ImplicitUse; // BinOpRI - Binary instructions with inputs "reg, imm". @@ -273,21 +273,21 @@ class BinOpMR_RMW opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode> : BinOpMR, + (implicit EFLAGS)]>, Sched<[WriteALURMW, // base, scale, index, offset, segment ReadDefault, ReadDefault, ReadDefault, ReadDefault, ReadDefault, WriteALU.ReadAfterFold]>; // reg -// BinOpMR_RMW_FF - Binary instructions with inputs "[mem], reg", where the +// BinOpMR_RMW_FF - Binary instructions with inputs "[mem], reg", where the // pattern use EFLAGS as operand and implicitly use EFLAGS. class BinOpMR_RMW_FF opcode, string mnemonic, X86TypeInfo typeinfo, SDNode opnode> : BinOpMR, + (implicit EFLAGS)]>, Sched<[WriteADCRMW, // base, scale, index, offset, segment ReadDefault, ReadDefault, ReadDefault, @@ -321,7 +321,7 @@ class BinOpMI_RMW opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpMI, + (implicit EFLAGS)]>, Sched<[WriteALURMW]>; // BinOpMI_RMW_FF - Binary instructions with inputs "[mem], imm", where the @@ -331,7 +331,7 @@ class BinOpMI_RMW_FF opcode, string mnemonic, X86TypeInfo typeinfo, : BinOpMI, + (implicit EFLAGS)]>, Sched<[WriteADCRMW]>; // BinOpMI_F - Binary instructions with inputs "[mem], imm", where the pattern @@ -359,7 +359,7 @@ class BinOpMI8_RMW, + (implicit EFLAGS)]>, Sched<[WriteALURMW]>; // BinOpMI8_RMW_FF - Binary instructions with inputs "[mem], imm8", where the @@ -369,7 +369,7 @@ class BinOpMI8_RMW_FF, + (implicit EFLAGS)]>, Sched<[WriteADCRMW]>; // BinOpMI8_F - Binary instructions with inputs "[mem], imm8", where the pattern @@ -387,7 +387,7 @@ class BinOpAI opcode, string mnemonic, X86TypeInfo typeinfo, Register areg, string operands, X86FoldableSchedWrite sched = WriteALU> : ITy, + mnemonic, operands, []>, Sched<[sched]> { let ImmT = typeinfo.ImmEncoding; let Uses = [areg]; @@ -427,7 +427,7 @@ class UnaryOpR opcode, Format f, string mnemonic, X86TypeInfo info, class INCDECR : UnaryOpR<0xFE, f, mnemonic, info, - [(set info.RegClass:$dst, EFLAGS, + [(set info.RegClass:$dst, EFLAGS, (node info.RegClass:$src1, 1))]>; // INCDECM - Instructions like "inc [mem]". @@ -444,16 +444,16 @@ class INCDECR_ALT opcode, string mnemonic, X86TypeInfo info> } // MulOpR - Instructions like "mul reg". -class MulOpR opcode, Format f, string mnemonic, X86TypeInfo info, +class MulOpR opcode, Format f, string mnemonic, X86TypeInfo info, X86FoldableSchedWrite sched, list pattern> - : ITy, + : ITy, Sched<[sched]>; // MulOpM - Instructions like "mul [mem]". -class MulOpM opcode, Format f, string mnemonic, X86TypeInfo info, +class MulOpM opcode, Format f, string mnemonic, X86TypeInfo info, X86FoldableSchedWrite sched, list pattern> - : ITy, SchedLoadReg; // NegOpR - Instructions like "neg reg", with implicit EFLAGS. @@ -465,7 +465,7 @@ class NegOpR opcode, string mnemonic, X86TypeInfo info> // NotOpR - Instructions like "not reg". class NotOpR opcode, string mnemonic, X86TypeInfo info> : UnaryOpR; // NegOpM - Instructions like "neg [mem]", with implicit EFLAGS. @@ -496,16 +496,16 @@ class BinOpRM_C opcode, Format f, string mnemonic, X86TypeInfo info, mnemonic, "{$src2, $dst|$dst, $src2}", pattern>; // IMulOpRR - Instructions like "imul reg, reg, i8". -class IMulOpRR opcode, string mnemonic, X86TypeInfo info, +class IMulOpRR opcode, string mnemonic, X86TypeInfo info, X86FoldableSchedWrite sched> : BinOpRR_C, + (X86smul_flag info.RegClass:$src1, + info.RegClass:$src2))]>, Sched<[sched]>, TB; // IMulOpRM - Instructions like "imul reg, reg, [mem]". -class IMulOpRM opcode, string mnemonic, X86TypeInfo info, +class IMulOpRM opcode, string mnemonic, X86TypeInfo info, X86FoldableSchedWrite sched> : BinOpRM_C opcode, string mnemonic, X86TypeInfo info, Sched<[sched.Folded, sched.ReadAfterFold]>, TB; // IMulOpRRI8 - Instructions like "imul reg, reg, i8". -class IMulOpRRI8 opcode, string mnemonic, X86TypeInfo info, +class IMulOpRRI8 opcode, string mnemonic, X86TypeInfo info, X86FoldableSchedWrite sched> : ITy, + (X86smul_flag info.RegClass:$src1, + info.Imm8NoSuOperator:$src2))]>, Sched<[sched]>{ let ImmT = Imm8; } // IMulOpRRI - Instructions like "imul reg, reg, i16/i32/i64". -class IMulOpRRI opcode, string mnemonic, X86TypeInfo info, +class IMulOpRRI opcode, string mnemonic, X86TypeInfo info, X86FoldableSchedWrite sched> : ITy, + (X86smul_flag info.RegClass:$src1, + info.ImmNoSuOperator:$src2))]>, Sched<[sched]>{ let ImmT = info.ImmEncoding; } // IMulOpRMI8 - Instructions like "imul reg, [mem], i8". -class IMulOpRMI8 opcode, string mnemonic, X86TypeInfo info, +class IMulOpRMI8 opcode, string mnemonic, X86TypeInfo info, X86FoldableSchedWrite sched> : ITy, + info.Imm8NoSuOperator:$src2))]>, Sched<[sched.Folded]>{ let ImmT = Imm8; } // IMulOpRMI - Instructions like "imul reg, [mem], i16/i32/i64". -class IMulOpRMI opcode, string mnemonic, X86TypeInfo info, +class IMulOpRMI opcode, string mnemonic, X86TypeInfo info, X86FoldableSchedWrite sched> : ITy, + info.ImmNoSuOperator:$src2))]>, Sched<[sched.Folded]>{ let ImmT = info.ImmEncoding; } @@ -639,7 +639,7 @@ let Predicates = [UseIncDec, In64BitMode] in { // SDNode results (i8, i32). // AL,AH = AL*GR8 let Defs = [AL,EFLAGS,AX], Uses = [AL] in -def MUL8r : MulOpR<0xF6, MRM4r, "mul", Xi8, WriteIMul8, +def MUL8r : MulOpR<0xF6, MRM4r, "mul", Xi8, WriteIMul8, // FIXME: Used for 8-bit mul, ignore result upper 8 bits. // This probably ought to be moved to a def : Pat<> if the // syntax can be accepted. From 3ca6e69b6efe6ff4dc456e0ac227b292523a056f Mon Sep 17 00:00:00 2001 From: Jun Zhang Date: Fri, 24 Mar 2023 10:27:02 +0800 Subject: [PATCH 502/691] Precommit tests for #60690 Differential Revision: https://reviews.llvm.org/D146636 Signed-off-by: Jun Zhang --- llvm/test/Transforms/InstCombine/bswap.ll | 60 +++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll index 8c5c761c73e29..ba68e18cf7990 100644 --- a/llvm/test/Transforms/InstCombine/bswap.ll +++ b/llvm/test/Transforms/InstCombine/bswap.ll @@ -929,3 +929,63 @@ define i32 @PR50910(i64 %t0) { %t6 = trunc i64 %t5 to i32 ret i32 %t6 } + +define i64 @PR60690_call_fshl(i64 %result) { +; CHECK-LABEL: @PR60690_call_fshl( +; CHECK-NEXT: [[AND_I:%.*]] = lshr i64 [[RESULT:%.*]], 8 +; CHECK-NEXT: [[SHR_I:%.*]] = and i64 [[AND_I]], 71777214294589695 +; CHECK-NEXT: [[AND1_I:%.*]] = shl i64 [[RESULT]], 8 +; CHECK-NEXT: [[SHL_I:%.*]] = and i64 [[AND1_I]], -71777214294589696 +; CHECK-NEXT: [[OR_I:%.*]] = or i64 [[SHR_I]], [[SHL_I]] +; CHECK-NEXT: [[AND_I7:%.*]] = shl i64 [[OR_I]], 16 +; CHECK-NEXT: [[SHL_I8:%.*]] = and i64 [[AND_I7]], -281470681808896 +; CHECK-NEXT: [[AND1_I9:%.*]] = lshr i64 [[OR_I]], 16 +; CHECK-NEXT: [[SHR_I10:%.*]] = and i64 [[AND1_I9]], 281470681808895 +; CHECK-NEXT: [[OR_I11:%.*]] = or i64 [[SHL_I8]], [[SHR_I10]] +; CHECK-NEXT: [[OR_I12:%.*]] = tail call i64 @llvm.fshl.i64(i64 [[OR_I11]], i64 [[OR_I11]], i64 32) +; CHECK-NEXT: ret i64 [[OR_I12]] +; + %and.i = lshr i64 %result, 8 + %shr.i = and i64 %and.i, 71777214294589695 + %and1.i = shl i64 %result, 8 + %shl.i = and i64 %and1.i, -71777214294589696 + %or.i = or i64 %shr.i, %shl.i + %and.i7 = shl i64 %or.i, 16 + %shl.i8 = and i64 %and.i7, -281470681808896 + %and1.i9 = lshr i64 %or.i, 16 + %shr.i10 = and i64 %and1.i9, 281470681808895 + %or.i11 = or i64 %shl.i8, %shr.i10 + %or.i12 = tail call i64 @llvm.fshl.i64(i64 %or.i11, i64 %or.i11, i64 32) + ret i64 %or.i12 +} +declare i64 @llvm.fshl.i64(i64, i64, i64) + +define i64 @PR60690_call_fshr(i64 %result) { +; CHECK-LABEL: @PR60690_call_fshr( +; CHECK-NEXT: [[AND_I:%.*]] = lshr i64 [[RESULT:%.*]], 8 +; CHECK-NEXT: [[SHR_I:%.*]] = and i64 [[AND_I]], 71777214294589695 +; CHECK-NEXT: [[AND1_I:%.*]] = shl i64 [[RESULT]], 8 +; CHECK-NEXT: [[SHL_I:%.*]] = and i64 [[AND1_I]], -71777214294589696 +; CHECK-NEXT: [[OR_I:%.*]] = or i64 [[SHR_I]], [[SHL_I]] +; CHECK-NEXT: [[AND_I7:%.*]] = shl i64 [[OR_I]], 16 +; CHECK-NEXT: [[SHL_I8:%.*]] = and i64 [[AND_I7]], -281470681808896 +; CHECK-NEXT: [[AND1_I9:%.*]] = lshr i64 [[OR_I]], 16 +; CHECK-NEXT: [[SHR_I10:%.*]] = and i64 [[AND1_I9]], 281470681808895 +; CHECK-NEXT: [[OR_I11:%.*]] = or i64 [[SHL_I8]], [[SHR_I10]] +; CHECK-NEXT: [[OR_I12:%.*]] = call i64 @llvm.fshl.i64(i64 [[OR_I11]], i64 [[OR_I11]], i64 32) +; CHECK-NEXT: ret i64 [[OR_I12]] +; + %and.i = lshr i64 %result, 8 + %shr.i = and i64 %and.i, 71777214294589695 + %and1.i = shl i64 %result, 8 + %shl.i = and i64 %and1.i, -71777214294589696 + %or.i = or i64 %shr.i, %shl.i + %and.i7 = shl i64 %or.i, 16 + %shl.i8 = and i64 %and.i7, -281470681808896 + %and1.i9 = lshr i64 %or.i, 16 + %shr.i10 = and i64 %and1.i9, 281470681808895 + %or.i11 = or i64 %shl.i8, %shr.i10 + %or.i12 = tail call i64 @llvm.fshr.i64(i64 %or.i11, i64 %or.i11, i64 32) + ret i64 %or.i12 +} +declare i64 @llvm.fshr.i64(i64, i64, i64) From cea938390ea77d494b77d399ed440c0c76ef3849 Mon Sep 17 00:00:00 2001 From: Jun Zhang Date: Fri, 24 Mar 2023 10:28:02 +0800 Subject: [PATCH 503/691] [InstCombine] Try to recognize bswap pattern when calling funnel shifts Alive2: https://alive2.llvm.org/ce/z/dxxD7B Fixes: https://github.com/llvm/llvm-project/issues/60690 Signed-off-by: Jun Zhang Differential Revision: https://reviews.llvm.org/D146637 --- .../InstCombine/InstCombineCalls.cpp | 4 ++++ llvm/test/Transforms/InstCombine/bswap.ll | 24 ++----------------- llvm/test/Transforms/InstCombine/fsh.ll | 9 +++---- 3 files changed, 11 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 0fbd62e8a41c0..0708fb44b982b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1795,6 +1795,10 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { Function *Bswap = Intrinsic::getDeclaration(Mod, Intrinsic::bswap, Ty); return CallInst::Create(Bswap, { Op0 }); } + if (Instruction *BitOp = + matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ true, + /*MatchBitReversals*/ true)) + return BitOp; } // Left or right might be masked. diff --git a/llvm/test/Transforms/InstCombine/bswap.ll b/llvm/test/Transforms/InstCombine/bswap.ll index ba68e18cf7990..bb70b4e0c1be2 100644 --- a/llvm/test/Transforms/InstCombine/bswap.ll +++ b/llvm/test/Transforms/InstCombine/bswap.ll @@ -932,17 +932,7 @@ define i32 @PR50910(i64 %t0) { define i64 @PR60690_call_fshl(i64 %result) { ; CHECK-LABEL: @PR60690_call_fshl( -; CHECK-NEXT: [[AND_I:%.*]] = lshr i64 [[RESULT:%.*]], 8 -; CHECK-NEXT: [[SHR_I:%.*]] = and i64 [[AND_I]], 71777214294589695 -; CHECK-NEXT: [[AND1_I:%.*]] = shl i64 [[RESULT]], 8 -; CHECK-NEXT: [[SHL_I:%.*]] = and i64 [[AND1_I]], -71777214294589696 -; CHECK-NEXT: [[OR_I:%.*]] = or i64 [[SHR_I]], [[SHL_I]] -; CHECK-NEXT: [[AND_I7:%.*]] = shl i64 [[OR_I]], 16 -; CHECK-NEXT: [[SHL_I8:%.*]] = and i64 [[AND_I7]], -281470681808896 -; CHECK-NEXT: [[AND1_I9:%.*]] = lshr i64 [[OR_I]], 16 -; CHECK-NEXT: [[SHR_I10:%.*]] = and i64 [[AND1_I9]], 281470681808895 -; CHECK-NEXT: [[OR_I11:%.*]] = or i64 [[SHL_I8]], [[SHR_I10]] -; CHECK-NEXT: [[OR_I12:%.*]] = tail call i64 @llvm.fshl.i64(i64 [[OR_I11]], i64 [[OR_I11]], i64 32) +; CHECK-NEXT: [[OR_I12:%.*]] = call i64 @llvm.bswap.i64(i64 [[RESULT:%.*]]) ; CHECK-NEXT: ret i64 [[OR_I12]] ; %and.i = lshr i64 %result, 8 @@ -962,17 +952,7 @@ declare i64 @llvm.fshl.i64(i64, i64, i64) define i64 @PR60690_call_fshr(i64 %result) { ; CHECK-LABEL: @PR60690_call_fshr( -; CHECK-NEXT: [[AND_I:%.*]] = lshr i64 [[RESULT:%.*]], 8 -; CHECK-NEXT: [[SHR_I:%.*]] = and i64 [[AND_I]], 71777214294589695 -; CHECK-NEXT: [[AND1_I:%.*]] = shl i64 [[RESULT]], 8 -; CHECK-NEXT: [[SHL_I:%.*]] = and i64 [[AND1_I]], -71777214294589696 -; CHECK-NEXT: [[OR_I:%.*]] = or i64 [[SHR_I]], [[SHL_I]] -; CHECK-NEXT: [[AND_I7:%.*]] = shl i64 [[OR_I]], 16 -; CHECK-NEXT: [[SHL_I8:%.*]] = and i64 [[AND_I7]], -281470681808896 -; CHECK-NEXT: [[AND1_I9:%.*]] = lshr i64 [[OR_I]], 16 -; CHECK-NEXT: [[SHR_I10:%.*]] = and i64 [[AND1_I9]], 281470681808895 -; CHECK-NEXT: [[OR_I11:%.*]] = or i64 [[SHL_I8]], [[SHR_I10]] -; CHECK-NEXT: [[OR_I12:%.*]] = call i64 @llvm.fshl.i64(i64 [[OR_I11]], i64 [[OR_I11]], i64 32) +; CHECK-NEXT: [[OR_I12:%.*]] = call i64 @llvm.bswap.i64(i64 [[RESULT:%.*]]) ; CHECK-NEXT: ret i64 [[OR_I12]] ; %and.i = lshr i64 %result, 8 diff --git a/llvm/test/Transforms/InstCombine/fsh.ll b/llvm/test/Transforms/InstCombine/fsh.ll index 7d3f3948511d1..489f6e686680b 100644 --- a/llvm/test/Transforms/InstCombine/fsh.ll +++ b/llvm/test/Transforms/InstCombine/fsh.ll @@ -672,8 +672,9 @@ define i32 @fshl_mask_args_same1(i32 %a) { define i32 @fshl_mask_args_same2(i32 %a) { ; CHECK-LABEL: @fshl_mask_args_same2( -; CHECK-NEXT: [[T1:%.*]] = shl i32 [[A:%.*]], 8 -; CHECK-NEXT: [[T2:%.*]] = and i32 [[T1]], 65280 +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[A:%.*]] to i16 +; CHECK-NEXT: [[REV:%.*]] = shl i16 [[TRUNC]], 8 +; CHECK-NEXT: [[T2:%.*]] = zext i16 [[REV]] to i32 ; CHECK-NEXT: ret i32 [[T2]] ; %t1 = and i32 %a, 255 @@ -683,8 +684,8 @@ define i32 @fshl_mask_args_same2(i32 %a) { define i32 @fshl_mask_args_same3(i32 %a) { ; CHECK-LABEL: @fshl_mask_args_same3( -; CHECK-NEXT: [[T2:%.*]] = shl i32 [[A:%.*]], 24 -; CHECK-NEXT: ret i32 [[T2]] +; CHECK-NEXT: [[REV:%.*]] = shl i32 [[A:%.*]], 24 +; CHECK-NEXT: ret i32 [[REV]] ; %t1 = and i32 %a, 255 %t2 = call i32 @llvm.fshl.i32(i32 %t1, i32 %t1, i32 24) From 11674147e40699202132440313032528dfbf624f Mon Sep 17 00:00:00 2001 From: Xiaodong Liu Date: Fri, 24 Mar 2023 11:08:21 +0800 Subject: [PATCH 504/691] [LoongArch] Enable LoopDataPrefetch pass Keep `EnableLoopDataPrefetch` option off for now because we need a few more TTIs and ISels. This patch is inspired by http://reviews.llvm.org/D17943. Reviewed By: SixWeining Differential Revision: https://reviews.llvm.org/D146600 --- .../LoongArch/LoongArchTargetMachine.cpp | 12 +++++++++ .../LoopDataPrefetch/LoongArch/basic.ll | 25 +++++++++++++++++++ .../LoopDataPrefetch/LoongArch/lit.local.cfg | 2 ++ 3 files changed, 39 insertions(+) create mode 100644 llvm/test/Transforms/LoopDataPrefetch/LoongArch/basic.ll create mode 100644 llvm/test/Transforms/LoopDataPrefetch/LoongArch/lit.local.cfg diff --git a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp index 933ba3b40ce40..504019c2a09e8 100644 --- a/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/MC/TargetRegistry.h" +#include "llvm/Transforms/Scalar.h" #include using namespace llvm; @@ -34,6 +35,11 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeLoongArchTarget() { initializeLoongArchDAGToDAGISelPass(*PR); } +static cl::opt + EnableLoopDataPrefetch("loongarch-enable-loop-data-prefetch", cl::Hidden, + cl::desc("Enable the loop data prefetch pass"), + cl::init(false)); + static std::string computeDataLayout(const Triple &TT) { if (TT.isArch64Bit()) return "e-m:e-p:64:64-i64:64-i128:128-n64-S128"; @@ -126,6 +132,12 @@ LoongArchTargetMachine::createPassConfig(PassManagerBase &PM) { } void LoongArchPassConfig::addIRPasses() { + // Run LoopDataPrefetch + // + // Run this before LSR to remove the multiplies involved in computing the + // pointer values N iterations ahead. + if (TM->getOptLevel() != CodeGenOpt::None && EnableLoopDataPrefetch) + addPass(createLoopDataPrefetchPass()); addPass(createAtomicExpandPass()); TargetPassConfig::addIRPasses(); diff --git a/llvm/test/Transforms/LoopDataPrefetch/LoongArch/basic.ll b/llvm/test/Transforms/LoopDataPrefetch/LoongArch/basic.ll new file mode 100644 index 0000000000000..55a2a2970d2d7 --- /dev/null +++ b/llvm/test/Transforms/LoopDataPrefetch/LoongArch/basic.ll @@ -0,0 +1,25 @@ +;; Tag this 'XFAIL' because we need a few more TTIs and ISels. +; XFAIL: * +; RUN: opt --mtriple=loongarch64 --passes=loop-data-prefetch -loongarch-enable-loop-data-prefetch -S < %s | FileCheck %s + +define void @foo(ptr %a, ptr %b) { +entry: + br label %for.body + +; CHECK: for.body: +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds double, ptr %b, i64 %indvars.iv +; CHECK: call void @llvm.prefetch + %0 = load double, ptr %arrayidx, align 8 + %add = fadd double %0, 1.000000e+00 + %arrayidx2 = getelementptr inbounds double, ptr %a, i64 %indvars.iv + store double %add, ptr %arrayidx2, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1600 + br i1 %exitcond, label %for.end, label %for.body + +; CHECK: for.end: +for.end: ; preds = %for.body + ret void +} diff --git a/llvm/test/Transforms/LoopDataPrefetch/LoongArch/lit.local.cfg b/llvm/test/Transforms/LoopDataPrefetch/LoongArch/lit.local.cfg new file mode 100644 index 0000000000000..2b5a4893e686f --- /dev/null +++ b/llvm/test/Transforms/LoopDataPrefetch/LoongArch/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'LoongArch' in config.root.targets: + config.unsupported = True From d8efbcf9dcbb413fa3d3e66173f4630989e5588c Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 23 Mar 2023 20:20:20 -0700 Subject: [PATCH 505/691] [AArch64] Add tests for umax(x, 1u) This patch adds tests for umax(x, 1u). This patch fixes: https://github.com/llvm/llvm-project/issues/60233 It turns out that commit 86b4d8645fc1b86693fef564cef68f24599c930f on Feb 8, 2023 already performs the instcombine transformation proposed in the issue, so the issue requires no change on the codegen side. --- llvm/test/CodeGen/AArch64/min-max.ll | 112 +++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/min-max.ll b/llvm/test/CodeGen/AArch64/min-max.ll index 63d5632b50821..8914406f1db73 100644 --- a/llvm/test/CodeGen/AArch64/min-max.ll +++ b/llvm/test/CodeGen/AArch64/min-max.ll @@ -428,6 +428,36 @@ define i8 @umaxi8(i8 %a, i8 %b) { ret i8 %c } +define i8 @umaxi8_1(i8 %a) { +; CHECK-ISEL-LABEL: umaxi8_1: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: and w8, w0, #0xff +; CHECK-ISEL-NEXT: tst w0, #0xfe +; CHECK-ISEL-NEXT: csinc w0, w8, wzr, ne +; CHECK-ISEL-NEXT: ret +; +; CHECK-ISEL-CSSC-LABEL: umaxi8_1: +; CHECK-ISEL-CSSC: // %bb.0: +; CHECK-ISEL-CSSC-NEXT: and w8, w0, #0xff +; CHECK-ISEL-CSSC-NEXT: umax w0, w8, #1 +; CHECK-ISEL-CSSC-NEXT: ret +; +; CHECK-GLOBAL-LABEL: umaxi8_1: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: and w8, w0, #0xff +; CHECK-GLOBAL-NEXT: cmp w8, #1 +; CHECK-GLOBAL-NEXT: csinc w0, w0, wzr, hi +; CHECK-GLOBAL-NEXT: ret +; +; CHECK-GLOBAL-CSSC-LABEL: umaxi8_1: +; CHECK-GLOBAL-CSSC: // %bb.0: +; CHECK-GLOBAL-CSSC-NEXT: and w8, w0, #0xff +; CHECK-GLOBAL-CSSC-NEXT: umax w0, w8, #1 +; CHECK-GLOBAL-CSSC-NEXT: ret + %c = call i8 @llvm.umax.i8(i8 %a, i8 1) + ret i8 %c +} + declare i16 @llvm.umax.i16(i16 %a, i16 %b) readnone define i16 @umaxi16(i16 %a, i16 %b) { @@ -463,6 +493,36 @@ define i16 @umaxi16(i16 %a, i16 %b) { ret i16 %c } +define i16 @umaxi16_1(i16 %a) { +; CHECK-ISEL-LABEL: umaxi16_1: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: and w8, w0, #0xffff +; CHECK-ISEL-NEXT: tst w0, #0xfffe +; CHECK-ISEL-NEXT: csinc w0, w8, wzr, ne +; CHECK-ISEL-NEXT: ret +; +; CHECK-ISEL-CSSC-LABEL: umaxi16_1: +; CHECK-ISEL-CSSC: // %bb.0: +; CHECK-ISEL-CSSC-NEXT: and w8, w0, #0xffff +; CHECK-ISEL-CSSC-NEXT: umax w0, w8, #1 +; CHECK-ISEL-CSSC-NEXT: ret +; +; CHECK-GLOBAL-LABEL: umaxi16_1: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: and w8, w0, #0xffff +; CHECK-GLOBAL-NEXT: cmp w8, #1 +; CHECK-GLOBAL-NEXT: csinc w0, w0, wzr, hi +; CHECK-GLOBAL-NEXT: ret +; +; CHECK-GLOBAL-CSSC-LABEL: umaxi16_1: +; CHECK-GLOBAL-CSSC: // %bb.0: +; CHECK-GLOBAL-CSSC-NEXT: and w8, w0, #0xffff +; CHECK-GLOBAL-CSSC-NEXT: umax w0, w8, #1 +; CHECK-GLOBAL-CSSC-NEXT: ret + %c = call i16 @llvm.umax.i16(i16 %a, i16 1) + ret i16 %c +} + declare i32 @llvm.umax.i32(i32 %a, i32 %b) readnone define i32 @umaxi32(i32 %a, i32 %b) { @@ -491,6 +551,32 @@ define i32 @umaxi32(i32 %a, i32 %b) { ret i32 %c } +define i32 @umaxi32_1(i32 %a) { +; CHECK-ISEL-LABEL: umaxi32_1: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: cmp w0, #1 +; CHECK-ISEL-NEXT: csinc w0, w0, wzr, hi +; CHECK-ISEL-NEXT: ret +; +; CHECK-ISEL-CSSC-LABEL: umaxi32_1: +; CHECK-ISEL-CSSC: // %bb.0: +; CHECK-ISEL-CSSC-NEXT: umax w0, w0, #1 +; CHECK-ISEL-CSSC-NEXT: ret +; +; CHECK-GLOBAL-LABEL: umaxi32_1: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: cmp w0, #1 +; CHECK-GLOBAL-NEXT: csinc w0, w0, wzr, hi +; CHECK-GLOBAL-NEXT: ret +; +; CHECK-GLOBAL-CSSC-LABEL: umaxi32_1: +; CHECK-GLOBAL-CSSC: // %bb.0: +; CHECK-GLOBAL-CSSC-NEXT: umax w0, w0, #1 +; CHECK-GLOBAL-CSSC-NEXT: ret + %c = call i32 @llvm.umax.i32(i32 %a, i32 1) + ret i32 %c +} + declare i64 @llvm.umax.i64(i64 %a, i64 %b) readnone define i64 @umaxi64(i64 %a, i64 %b) { @@ -519,6 +605,32 @@ define i64 @umaxi64(i64 %a, i64 %b) { ret i64 %c } +define i64 @umaxi64_1(i64 %a) { +; CHECK-ISEL-LABEL: umaxi64_1: +; CHECK-ISEL: // %bb.0: +; CHECK-ISEL-NEXT: cmp x0, #1 +; CHECK-ISEL-NEXT: csinc x0, x0, xzr, hi +; CHECK-ISEL-NEXT: ret +; +; CHECK-ISEL-CSSC-LABEL: umaxi64_1: +; CHECK-ISEL-CSSC: // %bb.0: +; CHECK-ISEL-CSSC-NEXT: umax x0, x0, #1 +; CHECK-ISEL-CSSC-NEXT: ret +; +; CHECK-GLOBAL-LABEL: umaxi64_1: +; CHECK-GLOBAL: // %bb.0: +; CHECK-GLOBAL-NEXT: cmp x0, #1 +; CHECK-GLOBAL-NEXT: csinc x0, x0, xzr, hi +; CHECK-GLOBAL-NEXT: ret +; +; CHECK-GLOBAL-CSSC-LABEL: umaxi64_1: +; CHECK-GLOBAL-CSSC: // %bb.0: +; CHECK-GLOBAL-CSSC-NEXT: umax x0, x0, #1 +; CHECK-GLOBAL-CSSC-NEXT: ret + %c = call i64 @llvm.umax.i64(i64 %a, i64 1) + ret i64 %c +} + declare <8 x i8> @llvm.umax.v8i8(<8 x i8> %a, <8 x i8> %b) readnone define <8 x i8> @umax8i8(<8 x i8> %a, <8 x i8> %b) { From cc86e6b0a88be33a8b74b5f0431424909dec9feb Mon Sep 17 00:00:00 2001 From: Xiang1 Zhang Date: Tue, 21 Mar 2023 17:33:54 +0800 Subject: [PATCH 506/691] [BugFix] Fix VSELECT ISel fail Reviewed By: Luo yuanke Differential Revision: https://reviews.llvm.org/D146683 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 15 ++++++++---- llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 2 ++ llvm/test/CodeGen/X86/vselect-post-combine.ll | 24 +++++++++++++++++++ 3 files changed, 36 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/X86/vselect-post-combine.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index cc722bcc8c2b3..20d7447802c8a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12210,7 +12210,8 @@ static bool isCompatibleLoad(SDValue N, unsigned ExtOpcode) { /// This function is called by the DAGCombiner when visiting sext/zext/aext /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, - SelectionDAG &DAG) { + SelectionDAG &DAG, + CombineLevel Level) { unsigned Opcode = N->getOpcode(); SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -12235,10 +12236,14 @@ static SDValue tryToFoldExtendSelectLoad(SDNode *N, const TargetLowering &TLI, else if (Opcode == ISD::ZERO_EXTEND) ExtLoadOpcode = ISD::ZEXTLOAD; + // Illegal VSELECT may ISel fail if happen after legalization (DAG + // Combine2), so we should conservatively check the OperationAction. LoadSDNode *Load1 = cast(Op1); LoadSDNode *Load2 = cast(Op2); if (!TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load1->getMemoryVT()) || - !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT())) + !TLI.isLoadExtLegal(ExtLoadOpcode, VT, Load2->getMemoryVT()) || + (N0->getOpcode() == ISD::VSELECT && Level >= AfterLegalizeTypes && + TLI.getOperationAction(ISD::VSELECT, VT) != TargetLowering::Legal)) return SDValue(); SDValue Ext1 = DAG.getNode(Opcode, DL, VT, Op1); @@ -13106,7 +13111,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT)); } - if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG)) + if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level)) return Res; return SDValue(); @@ -13457,7 +13462,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (SDValue V = widenAbs(N, DAG)) return V; - if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG)) + if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level)) return Res; return SDValue(); @@ -13618,7 +13623,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { if (SDValue NewCtPop = widenCtPop(N, DAG)) return NewCtPop; - if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG)) + if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG, Level)) return Res; return SDValue(); diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 5e90a94819b6b..dfac24935e244 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1039,6 +1039,8 @@ void X86DAGToDAGISel::PreprocessISelDAG() { break; assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!"); + assert(N->getValueType(0).getVectorElementType() != MVT::i16 && + "We can't replace VSELECT with BLENDV in vXi16!"); SDValue Blendv = CurDAG->getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0), N->getOperand(0), N->getOperand(1), N->getOperand(2)); diff --git a/llvm/test/CodeGen/X86/vselect-post-combine.ll b/llvm/test/CodeGen/X86/vselect-post-combine.ll new file mode 100644 index 0000000000000..fdbc361e85d22 --- /dev/null +++ b/llvm/test/CodeGen/X86/vselect-post-combine.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 + +define ptr @test_mul(ptr %addr) { +; AVX2-LABEL: test_mul: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vmovdqa {{.*#+}} xmm0 = [255,0,0,0] +; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; AVX2-NEXT: vpblendvb %xmm0, (%rdi), %xmm1, %xmm0 +; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; AVX2-NEXT: vmovdqu %ymm0, 0 +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +entry: + %vec0 = load <32 x i8>, ptr %addr + %vec1 = shufflevector <32 x i8> %vec0, <32 x i8> , <32 x i32> + %0 = bitcast <32 x i8> %vec1 to <4 x i64> + %shuffle = shufflevector <4 x i64> %0, <4 x i64> zeroinitializer, <2 x i32> + %1 = bitcast <2 x i64> %shuffle to <16 x i8> + %conv = zext <16 x i8> %1 to <16 x i16> + store <16 x i16> %conv, ptr null, align 1 + ret ptr null +} From abddb8359895a2040a3439850f5c8c9c61123947 Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Thu, 23 Mar 2023 22:14:10 -0700 Subject: [PATCH 507/691] [lldb] Fix type of --apply-fixits (NFC) --- lldb/source/Commands/Options.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td index f11c95e5660e2..ea917f78841bb 100644 --- a/lldb/source/Commands/Options.td +++ b/lldb/source/Commands/Options.td @@ -371,7 +371,7 @@ let Command = "expression" in { Arg<"Language">, Desc<"Specifies the Language to use when parsing the " "expression. If not set the target.language setting is used.">; def expression_options_apply_fixits : Option<"apply-fixits", "X">, - Groups<[1,2]>, Arg<"Language">, Desc<"If true, simple fix-it hints will be " + Groups<[1,2]>, Arg<"Boolean">, Desc<"If true, simple fix-it hints will be " "automatically applied to the expression.">; def expression_options_description_verbosity : Option<"description-verbosity", "v">, Group<1>, From 1e4325f30c2494637626a978c54b41c8ca7ec0ff Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 23 Mar 2023 23:48:17 -0700 Subject: [PATCH 508/691] [X86] Precommit a test This patch precommits a test for: https://github.com/llvm/llvm-project/issues/61365 --- llvm/test/CodeGen/X86/select-constant-lea.ll | 26 ++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 llvm/test/CodeGen/X86/select-constant-lea.ll diff --git a/llvm/test/CodeGen/X86/select-constant-lea.ll b/llvm/test/CodeGen/X86/select-constant-lea.ll new file mode 100644 index 0000000000000..e8472053353cc --- /dev/null +++ b/llvm/test/CodeGen/X86/select-constant-lea.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=BASE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=slow-3ops-lea | FileCheck %s --check-prefix=SLOWLEA3 + +define i32 @select_unsigned_lt_10_8_13j(i32 %0) { +; BASE-LABEL: select_unsigned_lt_10_8_13j: +; BASE: # %bb.0: +; BASE-NEXT: xorl %eax, %eax +; BASE-NEXT: cmpl $10, %edi +; BASE-NEXT: setae %al +; BASE-NEXT: leal (%rax,%rax,4), %eax +; BASE-NEXT: orl $8, %eax +; BASE-NEXT: retq +; +; SLOWLEA3-LABEL: select_unsigned_lt_10_8_13j: +; SLOWLEA3: # %bb.0: +; SLOWLEA3-NEXT: xorl %eax, %eax +; SLOWLEA3-NEXT: cmpl $10, %edi +; SLOWLEA3-NEXT: setae %al +; SLOWLEA3-NEXT: leal (%rax,%rax,4), %eax +; SLOWLEA3-NEXT: orl $8, %eax +; SLOWLEA3-NEXT: retq + %2 = icmp ult i32 %0, 10 + %3 = select i1 %2, i32 8, i32 13 + ret i32 %3 +} From d30bc9e91241d69410fe1a878a66438dd752014f Mon Sep 17 00:00:00 2001 From: Michael Platings Date: Tue, 14 Mar 2023 19:40:58 +0000 Subject: [PATCH 509/691] [Driver] Change multilib selection algorithm The new algorithm is: 1. Find all multilibs with flags that are a subset of the requested flags. 2. If more than one multilib matches, choose the last. In addition a new selection mechanism is permitted via an overload of MultilibSet::select() for which multiple multilibs are returned. This allows layering multilibs on top of each other. Since multilibs are now ordered within a list, they no longer need a Priority field. The new algorithm is different to the old algorithm, but in practise the old algorithm was always used in such a way that the effect is the same. The old algorithm was to find the set intersection of the requested flags (with the first character of each removed) with each multilib's flags (ditto), and for that intersection check whether the first character matched. However, ignoring the first characters, the requested flags were always a superset of all the multilibs flags. Therefore the new algorithm can be used as a drop-in replacement. The exception is Fuchsia, which needs adjusting slightly to set both fexceptions and fno-exceptions flags. Differential Revision: https://reviews.llvm.org/D142905 --- clang/include/clang/Driver/Multilib.h | 17 +---- clang/include/clang/Driver/MultilibBuilder.h | 7 +- clang/lib/Driver/Multilib.cpp | 80 ++++++-------------- clang/lib/Driver/MultilibBuilder.cpp | 7 +- clang/lib/Driver/ToolChains/Fuchsia.cpp | 19 ++--- clang/lib/Driver/ToolChains/OHOS.cpp | 14 ++-- clang/unittests/Driver/MultilibTest.cpp | 37 +++++++-- 7 files changed, 79 insertions(+), 102 deletions(-) diff --git a/clang/include/clang/Driver/Multilib.h b/clang/include/clang/Driver/Multilib.h index feb12f3638d34..9d6f1d23696b8 100644 --- a/clang/include/clang/Driver/Multilib.h +++ b/clang/include/clang/Driver/Multilib.h @@ -36,14 +36,13 @@ class Multilib { std::string OSSuffix; std::string IncludeSuffix; flags_list Flags; - int Priority; public: /// GCCSuffix, OSSuffix & IncludeSuffix will be appended directly to the /// sysroot string so they must either be empty or begin with a '/' character. /// This is enforced with an assert in the constructor. Multilib(StringRef GCCSuffix = {}, StringRef OSSuffix = {}, - StringRef IncludeSuffix = {}, int Priority = 0, + StringRef IncludeSuffix = {}, const flags_list &Flags = flags_list()); /// Get the detected GCC installation path suffix for the multi-arch @@ -62,10 +61,6 @@ class Multilib { /// All elements begin with either '+' or '-' const flags_list &flags() const { return Flags; } - /// Returns the multilib priority. When more than one multilib matches flags, - /// the one with the highest priority is selected, with 0 being the default. - int priority() const { return Priority; } - LLVM_DUMP_METHOD void dump() const; /// print summary of the Multilib void print(raw_ostream &OS) const; @@ -108,6 +103,9 @@ class MultilibSet { const_iterator begin() const { return Multilibs.begin(); } const_iterator end() const { return Multilibs.end(); } + /// Select compatible variants + multilib_list select(const Multilib::flags_list &Flags) const; + /// Pick the best multilib in the set, \returns false if none are compatible bool select(const Multilib::flags_list &Flags, Multilib &M) const; @@ -129,13 +127,6 @@ class MultilibSet { } const IncludeDirsFunc &filePathsCallback() const { return FilePathsCallback; } - -private: - /// Apply the filter to Multilibs and return the subset that remains - static multilib_list filterCopy(FilterCallback F, const multilib_list &Ms); - - /// Apply the filter to the multilib_list, removing those that don't match - static void filterInPlace(FilterCallback F, multilib_list &Ms); }; raw_ostream &operator<<(raw_ostream &OS, const MultilibSet &MS); diff --git a/clang/include/clang/Driver/MultilibBuilder.h b/clang/include/clang/Driver/MultilibBuilder.h index cf84c456152b1..f4875f2e03f8a 100644 --- a/clang/include/clang/Driver/MultilibBuilder.h +++ b/clang/include/clang/Driver/MultilibBuilder.h @@ -28,11 +28,10 @@ class MultilibBuilder { std::string OSSuffix; std::string IncludeSuffix; flags_list Flags; - int Priority; public: MultilibBuilder(StringRef GCCSuffix, StringRef OSSuffix, - StringRef IncludeSuffix, int Priority = 0); + StringRef IncludeSuffix); /// Initializes GCCSuffix, OSSuffix & IncludeSuffix to the same value. MultilibBuilder(StringRef Suffix = {}); @@ -75,10 +74,6 @@ class MultilibBuilder { const flags_list &flags() const { return Flags; } flags_list &flags() { return Flags; } - /// Returns the multilib priority. When more than one multilib matches flags, - /// the one with the highest priority is selected, with 0 being the default. - int priority() const { return Priority; } - /// Add a flag to the flags list /// \p Flag must be a flag accepted by the driver with its leading '-' /// removed, diff --git a/clang/lib/Driver/Multilib.cpp b/clang/lib/Driver/Multilib.cpp index d1ab0c7b114e9..06bab74898616 100644 --- a/clang/lib/Driver/Multilib.cpp +++ b/clang/lib/Driver/Multilib.cpp @@ -26,10 +26,9 @@ using namespace driver; using namespace llvm::sys; Multilib::Multilib(StringRef GCCSuffix, StringRef OSSuffix, - StringRef IncludeSuffix, int Priority, - const flags_list &Flags) + StringRef IncludeSuffix, const flags_list &Flags) : GCCSuffix(GCCSuffix), OSSuffix(OSSuffix), IncludeSuffix(IncludeSuffix), - Flags(Flags), Priority(Priority) { + Flags(Flags) { assert(GCCSuffix.empty() || (StringRef(GCCSuffix).front() == '/' && GCCSuffix.size() > 1)); assert(OSSuffix.empty() || @@ -84,56 +83,36 @@ raw_ostream &clang::driver::operator<<(raw_ostream &OS, const Multilib &M) { } MultilibSet &MultilibSet::FilterOut(FilterCallback F) { - filterInPlace(F, Multilibs); + llvm::erase_if(Multilibs, F); return *this; } void MultilibSet::push_back(const Multilib &M) { Multilibs.push_back(M); } -static bool isFlagEnabled(StringRef Flag) { - char Indicator = Flag.front(); - assert(Indicator == '+' || Indicator == '-'); - return Indicator == '+'; +MultilibSet::multilib_list +MultilibSet::select(const Multilib::flags_list &Flags) const { + llvm::StringSet<> FlagSet; + for (const auto &Flag : Flags) + FlagSet.insert(Flag); + + multilib_list Result; + llvm::copy_if(Multilibs, std::back_inserter(Result), + [&FlagSet](const Multilib &M) { + for (const std::string &F : M.flags()) + if (!FlagSet.contains(F)) + return false; + return true; + }); + return Result; } -bool MultilibSet::select(const Multilib::flags_list &Flags, Multilib &M) const { - llvm::StringMap FlagSet; - - // Stuff all of the flags into the FlagSet such that a true mappend indicates - // the flag was enabled, and a false mappend indicates the flag was disabled. - for (StringRef Flag : Flags) - FlagSet[Flag.substr(1)] = isFlagEnabled(Flag); - - multilib_list Filtered = filterCopy([&FlagSet](const Multilib &M) { - for (StringRef Flag : M.flags()) { - llvm::StringMap::const_iterator SI = FlagSet.find(Flag.substr(1)); - if (SI != FlagSet.end()) - if (SI->getValue() != isFlagEnabled(Flag)) - return true; - } - return false; - }, Multilibs); - - if (Filtered.empty()) +bool MultilibSet::select(const Multilib::flags_list &Flags, + Multilib &Selected) const { + multilib_list Result = select(Flags); + if (Result.empty()) return false; - if (Filtered.size() == 1) { - M = Filtered[0]; - return true; - } - - // Sort multilibs by priority and select the one with the highest priority. - llvm::sort(Filtered, [](const Multilib &a, const Multilib &b) -> bool { - return a.priority() > b.priority(); - }); - - if (Filtered[0].priority() > Filtered[1].priority()) { - M = Filtered[0]; - return true; - } - - // TODO: We should consider returning llvm::Error rather than aborting. - assert(false && "More than one multilib with the same priority"); - return false; + Selected = Result.back(); + return true; } LLVM_DUMP_METHOD void MultilibSet::dump() const { @@ -145,17 +124,6 @@ void MultilibSet::print(raw_ostream &OS) const { OS << M << "\n"; } -MultilibSet::multilib_list MultilibSet::filterCopy(FilterCallback F, - const multilib_list &Ms) { - multilib_list Copy(Ms); - filterInPlace(F, Copy); - return Copy; -} - -void MultilibSet::filterInPlace(FilterCallback F, multilib_list &Ms) { - llvm::erase_if(Ms, F); -} - raw_ostream &clang::driver::operator<<(raw_ostream &OS, const MultilibSet &MS) { MS.print(OS); return OS; diff --git a/clang/lib/Driver/MultilibBuilder.cpp b/clang/lib/Driver/MultilibBuilder.cpp index 83ebc31d8eb99..f6351ae4b5278 100644 --- a/clang/lib/Driver/MultilibBuilder.cpp +++ b/clang/lib/Driver/MultilibBuilder.cpp @@ -41,9 +41,8 @@ static void normalizePathSegment(std::string &Segment) { } } -MultilibBuilder::MultilibBuilder(StringRef GCC, StringRef OS, StringRef Include, - int Priority) - : GCCSuffix(GCC), OSSuffix(OS), IncludeSuffix(Include), Priority(Priority) { +MultilibBuilder::MultilibBuilder(StringRef GCC, StringRef OS, StringRef Include) + : GCCSuffix(GCC), OSSuffix(OS), IncludeSuffix(Include) { normalizePathSegment(GCCSuffix); normalizePathSegment(OSSuffix); normalizePathSegment(IncludeSuffix); @@ -87,7 +86,7 @@ bool MultilibBuilder::isValid() const { } Multilib MultilibBuilder::makeMultilib() const { - return Multilib(GCCSuffix, OSSuffix, IncludeSuffix, Priority, Flags); + return Multilib(GCCSuffix, OSSuffix, IncludeSuffix, Flags); } MultilibSetBuilder &MultilibSetBuilder::Maybe(const MultilibBuilder &M) { diff --git a/clang/lib/Driver/ToolChains/Fuchsia.cpp b/clang/lib/Driver/ToolChains/Fuchsia.cpp index 3a3f7043a795f..b8bb000391b91 100644 --- a/clang/lib/Driver/ToolChains/Fuchsia.cpp +++ b/clang/lib/Driver/ToolChains/Fuchsia.cpp @@ -263,33 +263,33 @@ Fuchsia::Fuchsia(const Driver &D, const llvm::Triple &Triple, Multilibs.push_back(Multilib()); // Use the noexcept variant with -fno-exceptions to avoid the extra overhead. - Multilibs.push_back(MultilibBuilder("noexcept", {}, {}, 1) + Multilibs.push_back(MultilibBuilder("noexcept", {}, {}) .flag("-fexceptions") .flag("+fno-exceptions") .makeMultilib()); // ASan has higher priority because we always want the instrumentated version. - Multilibs.push_back(MultilibBuilder("asan", {}, {}, 2) + Multilibs.push_back(MultilibBuilder("asan", {}, {}) .flag("+fsanitize=address") .makeMultilib()); // Use the asan+noexcept variant with ASan and -fno-exceptions. - Multilibs.push_back(MultilibBuilder("asan+noexcept", {}, {}, 3) + Multilibs.push_back(MultilibBuilder("asan+noexcept", {}, {}) .flag("+fsanitize=address") .flag("-fexceptions") .flag("+fno-exceptions") .makeMultilib()); // HWASan has higher priority because we always want the instrumentated // version. - Multilibs.push_back(MultilibBuilder("hwasan", {}, {}, 4) + Multilibs.push_back(MultilibBuilder("hwasan", {}, {}) .flag("+fsanitize=hwaddress") .makeMultilib()); // Use the hwasan+noexcept variant with HWASan and -fno-exceptions. - Multilibs.push_back(MultilibBuilder("hwasan+noexcept", {}, {}, 5) + Multilibs.push_back(MultilibBuilder("hwasan+noexcept", {}, {}) .flag("+fsanitize=hwaddress") .flag("-fexceptions") .flag("+fno-exceptions") .makeMultilib()); // Use Itanium C++ ABI for the compat multilib. - Multilibs.push_back(MultilibBuilder("compat", {}, {}, 6) + Multilibs.push_back(MultilibBuilder("compat", {}, {}) .flag("+fc++-abi=itanium") .makeMultilib()); @@ -299,9 +299,10 @@ Fuchsia::Fuchsia(const Driver &D, const llvm::Triple &Triple, }); Multilib::flags_list Flags; - addMultilibFlag( - Args.hasFlag(options::OPT_fexceptions, options::OPT_fno_exceptions, true), - "fexceptions", Flags); + bool Exceptions = + Args.hasFlag(options::OPT_fexceptions, options::OPT_fno_exceptions, true); + addMultilibFlag(Exceptions, "fexceptions", Flags); + addMultilibFlag(!Exceptions, "fno-exceptions", Flags); addMultilibFlag(getSanitizerArgs(Args).needsAsanRt(), "fsanitize=address", Flags); addMultilibFlag(getSanitizerArgs(Args).needsHwasanRt(), "fsanitize=hwaddress", diff --git a/clang/lib/Driver/ToolChains/OHOS.cpp b/clang/lib/Driver/ToolChains/OHOS.cpp index 71a4ccd042ac8..bd0409d282084 100644 --- a/clang/lib/Driver/ToolChains/OHOS.cpp +++ b/clang/lib/Driver/ToolChains/OHOS.cpp @@ -39,14 +39,16 @@ static bool findOHOSMuslMultilibs(const Multilib::flags_list &Flags, // -mcpu=cortex-a7 // -mfloat-abi=soft -mfloat-abi=softfp -mfloat-abi=hard // -mfpu=neon-vfpv4 - Multilibs.push_back(Multilib("/a7_soft", {}, {}, 1, - {"+mcpu=cortex-a7", "+mfloat-abi=soft"})); + Multilibs.push_back( + Multilib("/a7_soft", {}, {}, {"+mcpu=cortex-a7", "+mfloat-abi=soft"})); - Multilibs.push_back(Multilib("/a7_softfp_neon-vfpv4", {}, {}, 1, - {"+mcpu=cortex-a7", "+mfloat-abi=softfp", "+mfpu=neon-vfpv4"})); + Multilibs.push_back( + Multilib("/a7_softfp_neon-vfpv4", {}, {}, + {"+mcpu=cortex-a7", "+mfloat-abi=softfp", "+mfpu=neon-vfpv4"})); - Multilibs.push_back(Multilib("/a7_hard_neon-vfpv4", {}, {}, 1, - {"+mcpu=cortex-a7", "+mfloat-abi=hard", "+mfpu=neon-vfpv4"})); + Multilibs.push_back( + Multilib("/a7_hard_neon-vfpv4", {}, {}, + {"+mcpu=cortex-a7", "+mfloat-abi=hard", "+mfpu=neon-vfpv4"})); if (Multilibs.select(Flags, Result.SelectedMultilib)) { Result.Multilibs = Multilibs; diff --git a/clang/unittests/Driver/MultilibTest.cpp b/clang/unittests/Driver/MultilibTest.cpp index 2e729a5051734..6a066f6b0f5a6 100644 --- a/clang/unittests/Driver/MultilibTest.cpp +++ b/clang/unittests/Driver/MultilibTest.cpp @@ -33,14 +33,14 @@ TEST(MultilibTest, OpEqReflexivity2) { } TEST(MultilibTest, OpEqReflexivity3) { - Multilib M1({}, {}, {}, 0, {"+foo"}); - Multilib M2({}, {}, {}, 0, {"+foo"}); + Multilib M1({}, {}, {}, {"+foo"}); + Multilib M2({}, {}, {}, {"+foo"}); ASSERT_TRUE(M1 == M2) << "Multilibs with the same flag should be the same"; } TEST(MultilibTest, OpEqInequivalence1) { - Multilib M1({}, {}, {}, 0, {"+foo"}); - Multilib M2({}, {}, {}, 0, {"-foo"}); + Multilib M1({}, {}, {}, {"+foo"}); + Multilib M2({}, {}, {}, {"-foo"}); ASSERT_FALSE(M1 == M2) << "Multilibs with conflicting flags are not the same"; ASSERT_FALSE(M2 == M1) << "Multilibs with conflicting flags are not the same (commuted)"; @@ -48,7 +48,7 @@ TEST(MultilibTest, OpEqInequivalence1) { TEST(MultilibTest, OpEqInequivalence2) { Multilib M1; - Multilib M2({}, {}, {}, 0, {"+foo"}); + Multilib M2({}, {}, {}, {"+foo"}); ASSERT_FALSE(M1 == M2) << "Flags make Multilibs different"; } @@ -124,7 +124,7 @@ TEST(MultilibTest, Construction2) { } TEST(MultilibTest, Construction3) { - Multilib M({}, {}, {}, 0, {"+f1", "+f2", "-f3"}); + Multilib M({}, {}, {}, {"+f1", "+f2", "-f3"}); for (Multilib::flags_list::const_iterator I = M.flags().begin(), E = M.flags().end(); I != E; ++I) { @@ -149,8 +149,8 @@ TEST(MultilibTest, SetPushback) { TEST(MultilibTest, SetPriority) { MultilibSet MS({ - Multilib("/foo", {}, {}, 1, {"+foo"}), - Multilib("/bar", {}, {}, 2, {"+bar"}), + Multilib("/foo", {}, {}, {"+foo"}), + Multilib("/bar", {}, {}, {"+bar"}), }); Multilib::flags_list Flags1 = {"+foo", "-bar"}; Multilib Selection1; @@ -166,3 +166,24 @@ TEST(MultilibTest, SetPriority) { ASSERT_TRUE(Selection2.gccSuffix() == "/bar") << "Selection picked " << Selection2 << " which was not expected"; } + +TEST(MultilibTest, SelectMultiple) { + MultilibSet MS({ + Multilib("/a", {}, {}, {"x"}), + Multilib("/b", {}, {}, {"y"}), + }); + std::vector Selection; + + Selection = MS.select({"x"}); + ASSERT_EQ(1u, Selection.size()); + EXPECT_EQ("/a", Selection[0].gccSuffix()); + + Selection = MS.select({"y"}); + ASSERT_EQ(1u, Selection.size()); + EXPECT_EQ("/b", Selection[0].gccSuffix()); + + Selection = MS.select({"y", "x"}); + ASSERT_EQ(2u, Selection.size()); + EXPECT_EQ("/a", Selection[0].gccSuffix()); + EXPECT_EQ("/b", Selection[1].gccSuffix()); +} From f957b8fe1efe34ac04d1b2e6381e44edcef056b3 Mon Sep 17 00:00:00 2001 From: Carlos Galvez Date: Thu, 23 Mar 2023 12:16:40 +0000 Subject: [PATCH 510/691] [clang-tidy][NFC] Improve naming convention in google-readability-avoid-underscore-in-googletest-name According to the Google docs, the convention is TEST(TestSuiteName, TestName). Apply that convention to the source code, test and documentation of the check. Differential Revision: https://reviews.llvm.org/D146713 --- .../AvoidUnderscoreInGoogletestNameCheck.cpp | 19 +- ...ty-avoid-underscore-in-googletest-name.rst | 20 +- .../avoid-underscore-in-googletest-name.cpp | 226 +++++++++--------- 3 files changed, 133 insertions(+), 132 deletions(-) diff --git a/clang-tools-extra/clang-tidy/google/AvoidUnderscoreInGoogletestNameCheck.cpp b/clang-tools-extra/clang-tidy/google/AvoidUnderscoreInGoogletestNameCheck.cpp index b903f2552b7e6..d522d6760af1d 100644 --- a/clang-tools-extra/clang-tidy/google/AvoidUnderscoreInGoogletestNameCheck.cpp +++ b/clang-tools-extra/clang-tidy/google/AvoidUnderscoreInGoogletestNameCheck.cpp @@ -47,18 +47,19 @@ class AvoidUnderscoreInGoogletestNameCallback : public PPCallbacks { if (!isGoogletestTestMacro(MacroName) || !Args || Args->getNumMacroArguments() < 2) return; - const Token *TestCaseNameToken = Args->getUnexpArgument(0); + const Token *TestSuiteNameToken = Args->getUnexpArgument(0); const Token *TestNameToken = Args->getUnexpArgument(1); - if (!TestCaseNameToken || !TestNameToken) + if (!TestSuiteNameToken || !TestNameToken) return; - std::string TestCaseNameMaybeDisabled = PP->getSpelling(*TestCaseNameToken); - StringRef TestCaseName = TestCaseNameMaybeDisabled; - TestCaseName.consume_front(KDisabledTestPrefix); - if (TestCaseName.contains('_')) - Check->diag(TestCaseNameToken->getLocation(), - "avoid using \"_\" in test case name \"%0\" according to " + std::string TestSuiteNameMaybeDisabled = + PP->getSpelling(*TestSuiteNameToken); + StringRef TestSuiteName = TestSuiteNameMaybeDisabled; + TestSuiteName.consume_front(KDisabledTestPrefix); + if (TestSuiteName.contains('_')) + Check->diag(TestSuiteNameToken->getLocation(), + "avoid using \"_\" in test suite name \"%0\" according to " "Googletest FAQ") - << TestCaseName; + << TestSuiteName; std::string TestNameMaybeDisabled = PP->getSpelling(*TestNameToken); StringRef TestName = TestNameMaybeDisabled; diff --git a/clang-tools-extra/docs/clang-tidy/checks/google/readability-avoid-underscore-in-googletest-name.rst b/clang-tools-extra/docs/clang-tidy/checks/google/readability-avoid-underscore-in-googletest-name.rst index f2053b4d2fcd3..e667fd12222bb 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/google/readability-avoid-underscore-in-googletest-name.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/google/readability-avoid-underscore-in-googletest-name.rst @@ -3,8 +3,8 @@ google-readability-avoid-underscore-in-googletest-name ====================================================== -Checks whether there are underscores in googletest test and test case names in -test macros: +Checks whether there are underscores in googletest test suite names and test +names in test macros: - ``TEST`` - ``TEST_F`` @@ -18,17 +18,17 @@ For example: .. code-block:: c++ - TEST(TestCaseName, Illegal_TestName) {} - TEST(Illegal_TestCaseName, TestName) {} + TEST(TestSuiteName, Illegal_TestName) {} + TEST(Illegal_TestSuiteName, TestName) {} -would trigger the check. `Underscores are not allowed`_ in test names nor test -case names. +would trigger the check. `Underscores are not allowed`_ in test suite name nor +test names. -The ``DISABLED_`` prefix, which may be used to `disable individual tests`_, is -ignored when checking test names, but the rest of the rest of the test name is -still checked. +The ``DISABLED_`` prefix, which may be used to +`disable test suites and individual tests`_, is removed from the test suite name +and test name before checking for underscores. This check does not propose any fixes. .. _Underscores are not allowed: https://google.github.io/googletest/faq.html#why-should-test-suite-names-and-test-names-not-contain-underscore -.. _disable individual tests: https://google.github.io/googletest/advanced.html#temporarily-disabling-tests +.. _disable test suites and individual tests: https://google.github.io/googletest/advanced.html#temporarily-disabling-tests diff --git a/clang-tools-extra/test/clang-tidy/checkers/google/avoid-underscore-in-googletest-name.cpp b/clang-tools-extra/test/clang-tidy/checkers/google/avoid-underscore-in-googletest-name.cpp index 3ab5a6ffe383b..0e43735c2105c 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/google/avoid-underscore-in-googletest-name.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/google/avoid-underscore-in-googletest-name.cpp @@ -1,118 +1,118 @@ // RUN: %check_clang_tidy %s google-readability-avoid-underscore-in-googletest-name %t -#define TEST(test_case_name, test_name) void test_case_name##test_name() -#define TEST_F(test_case_name, test_name) void test_case_name##test_name() -#define TEST_P(test_case_name, test_name) void test_case_name##test_name() -#define TYPED_TEST(test_case_name, test_name) void test_case_name##test_name() -#define TYPED_TEST_P(test_case_name, test_name) void test_case_name##test_name() -#define FRIEND_TEST(test_case_name, test_name) void test_case_name##test_name() - -TEST(TestCaseName, Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:20: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] - -TEST(TestCaseName, DISABLED_Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:20: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TEST(TestCaseName, Illegal_Test_Name) {} -// CHECK-MESSAGES: :[[@LINE-1]]:20: warning: avoid using "_" in test name "Illegal_Test_Name" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TEST(Illegal_TestCaseName, TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:6: warning: avoid using "_" in test case name "Illegal_TestCaseName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TEST(Illegal_Test_CaseName, TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:6: warning: avoid using "_" in test case name "Illegal_Test_CaseName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TEST(Illegal_TestCaseName, Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:6: warning: avoid using "_" in test case name "Illegal_TestCaseName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -// CHECK-MESSAGES: :[[@LINE-2]]:28: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] - -TEST_F(TestCaseFixtureName, Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:29: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TEST_F(TestCaseFixtureName, DISABLED_Illegal_Test_Name) {} -// CHECK-MESSAGES: :[[@LINE-1]]:29: warning: avoid using "_" in test name "Illegal_Test_Name" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TEST_F(TestCaseFixtureName, Illegal_Test_Name) {} -// CHECK-MESSAGES: :[[@LINE-1]]:29: warning: avoid using "_" in test name "Illegal_Test_Name" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] - -TEST_F(Illegal_TestCaseFixtureName, TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: avoid using "_" in test case name "Illegal_TestCaseFixtureName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TEST_F(Illegal_TestCaseFixtureName, Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: avoid using "_" in test case name "Illegal_TestCaseFixtureName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -// CHECK-MESSAGES: :[[@LINE-2]]:37: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] - -TEST_F(Illegal_Test_CaseFixtureName, TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: avoid using "_" in test case name "Illegal_Test_CaseFixtureName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] - -TEST_P(ParameterizedTestCaseFixtureName, Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:42: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TEST_P(ParameterizedTestCaseFixtureName, DISABLED_Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:42: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TEST_P(ParameterizedTestCaseFixtureName, Illegal_Test_Name) {} -// CHECK-MESSAGES: :[[@LINE-1]]:42: warning: avoid using "_" in test name "Illegal_Test_Name" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] - -TEST_P(Illegal_ParameterizedTestCaseFixtureName, TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: avoid using "_" in test case name "Illegal_ParameterizedTestCaseFixtureName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TEST_P(Illegal_ParameterizedTestCaseFixtureName, Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: avoid using "_" in test case name "Illegal_ParameterizedTestCaseFixtureName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -// CHECK-MESSAGES: :[[@LINE-2]]:50: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] - -TEST_P(Illegal_Parameterized_TestCaseFixtureName, TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: avoid using "_" in test case name "Illegal_Parameterized_TestCaseFixtureName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] - -TYPED_TEST(TypedTestCaseName, Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TYPED_TEST(TypedTestCaseName, DISABLED_Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TYPED_TEST(TypedTestCaseName, Illegal_Test_Name) {} -// CHECK-MESSAGES: :[[@LINE-1]]:31: warning: avoid using "_" in test name "Illegal_Test_Name" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] - -TYPED_TEST(Illegal_TypedTestCaseName, TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: avoid using "_" in test case name "Illegal_TypedTestCaseName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TYPED_TEST(Illegal_TypedTestCaseName, Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: avoid using "_" in test case name "Illegal_TypedTestCaseName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -// CHECK-MESSAGES: :[[@LINE-2]]:39: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] - -TYPED_TEST(Illegal_Typed_TestCaseName, TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: avoid using "_" in test case name "Illegal_Typed_TestCaseName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] - -TYPED_TEST_P(TypeParameterizedTestCaseName, Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:45: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TYPED_TEST_P(TypeParameterizedTestCaseName, DISABLED_Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:45: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TYPED_TEST_P(TypeParameterizedTestCaseName, Illegal_Test_Name) {} -// CHECK-MESSAGES: :[[@LINE-1]]:45: warning: avoid using "_" in test name "Illegal_Test_Name" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] - -TYPED_TEST_P(Illegal_TypeParameterizedTestCaseName, TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:14: warning: avoid using "_" in test case name "Illegal_TypeParameterizedTestCaseName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -TYPED_TEST_P(Illegal_TypeParameterizedTestCaseName, Illegal_TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:14: warning: avoid using "_" in test case name "Illegal_TypeParameterizedTestCaseName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] -// CHECK-MESSAGES: :[[@LINE-2]]:53: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] - -TYPED_TEST_P(Illegal_Type_ParameterizedTestCaseName, TestName) {} -// CHECK-MESSAGES: :[[@LINE-1]]:14: warning: avoid using "_" in test case name "Illegal_Type_ParameterizedTestCaseName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +#define TEST(test_suite_name, test_name) void test_suite_name##test_name() +#define TEST_F(test_suite_name, test_name) void test_suite_name##test_name() +#define TEST_P(test_suite_name, test_name) void test_suite_name##test_name() +#define TYPED_TEST(test_suite_name, test_name) void test_suite_name##test_name() +#define TYPED_TEST_P(test_suite_name, test_name) void test_suite_name##test_name() +#define FRIEND_TEST(test_suite_name, test_name) void test_suite_name##test_name() + +TEST(TestSuiteName, Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] + +TEST(TestSuiteName, DISABLED_Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TEST(TestSuiteName, Illegal_Test_Name) {} +// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: avoid using "_" in test name "Illegal_Test_Name" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TEST(Illegal_TestSuiteName, TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:6: warning: avoid using "_" in test suite name "Illegal_TestSuiteName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TEST(Illegal_Test_SuiteName, TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:6: warning: avoid using "_" in test suite name "Illegal_Test_SuiteName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TEST(Illegal_TestSuiteName, Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:6: warning: avoid using "_" in test suite name "Illegal_TestSuiteName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +// CHECK-MESSAGES: :[[@LINE-2]]:29: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] + +TEST_F(TestSuiteFixtureName, Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:30: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TEST_F(TestSuiteFixtureName, DISABLED_Illegal_Test_Name) {} +// CHECK-MESSAGES: :[[@LINE-1]]:30: warning: avoid using "_" in test name "Illegal_Test_Name" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TEST_F(TestSuiteFixtureName, Illegal_Test_Name) {} +// CHECK-MESSAGES: :[[@LINE-1]]:30: warning: avoid using "_" in test name "Illegal_Test_Name" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] + +TEST_F(Illegal_TestSuiteFixtureName, TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: avoid using "_" in test suite name "Illegal_TestSuiteFixtureName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TEST_F(Illegal_TestSuiteFixtureName, Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: avoid using "_" in test suite name "Illegal_TestSuiteFixtureName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +// CHECK-MESSAGES: :[[@LINE-2]]:38: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] + +TEST_F(Illegal_Test_SuiteFixtureName, TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: avoid using "_" in test suite name "Illegal_Test_SuiteFixtureName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] + +TEST_P(ParameterizedTestSuiteFixtureName, Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:43: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TEST_P(ParameterizedTestSuiteFixtureName, DISABLED_Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:43: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TEST_P(ParameterizedTestSuiteFixtureName, Illegal_Test_Name) {} +// CHECK-MESSAGES: :[[@LINE-1]]:43: warning: avoid using "_" in test name "Illegal_Test_Name" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] + +TEST_P(Illegal_ParameterizedTestSuiteFixtureName, TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: avoid using "_" in test suite name "Illegal_ParameterizedTestSuiteFixtureName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TEST_P(Illegal_ParameterizedTestSuiteFixtureName, Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: avoid using "_" in test suite name "Illegal_ParameterizedTestSuiteFixtureName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +// CHECK-MESSAGES: :[[@LINE-2]]:51: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] + +TEST_P(Illegal_Parameterized_TestSuiteFixtureName, TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:8: warning: avoid using "_" in test suite name "Illegal_Parameterized_TestSuiteFixtureName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] + +TYPED_TEST(TypedTestSuiteName, Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:32: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TYPED_TEST(TypedTestSuiteName, DISABLED_Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:32: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TYPED_TEST(TypedTestSuiteName, Illegal_Test_Name) {} +// CHECK-MESSAGES: :[[@LINE-1]]:32: warning: avoid using "_" in test name "Illegal_Test_Name" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] + +TYPED_TEST(Illegal_TypedTestSuiteName, TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: avoid using "_" in test suite name "Illegal_TypedTestSuiteName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TYPED_TEST(Illegal_TypedTestSuiteName, Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: avoid using "_" in test suite name "Illegal_TypedTestSuiteName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +// CHECK-MESSAGES: :[[@LINE-2]]:40: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] + +TYPED_TEST(Illegal_Typed_TestSuiteName, TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: avoid using "_" in test suite name "Illegal_Typed_TestSuiteName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] + +TYPED_TEST_P(TypeParameterizedTestSuiteName, Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:46: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TYPED_TEST_P(TypeParameterizedTestSuiteName, DISABLED_Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:46: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TYPED_TEST_P(TypeParameterizedTestSuiteName, Illegal_Test_Name) {} +// CHECK-MESSAGES: :[[@LINE-1]]:46: warning: avoid using "_" in test name "Illegal_Test_Name" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] + +TYPED_TEST_P(Illegal_TypeParameterizedTestSuiteName, TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:14: warning: avoid using "_" in test suite name "Illegal_TypeParameterizedTestSuiteName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +TYPED_TEST_P(Illegal_TypeParameterizedTestSuiteName, Illegal_TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:14: warning: avoid using "_" in test suite name "Illegal_TypeParameterizedTestSuiteName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] +// CHECK-MESSAGES: :[[@LINE-2]]:54: warning: avoid using "_" in test name "Illegal_TestName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] + +TYPED_TEST_P(Illegal_Type_ParameterizedTestSuiteName, TestName) {} +// CHECK-MESSAGES: :[[@LINE-1]]:14: warning: avoid using "_" in test suite name "Illegal_Type_ParameterizedTestSuiteName" according to Googletest FAQ [google-readability-avoid-underscore-in-googletest-name] // Underscores are allowed to disable a test with the DISABLED_ prefix. // https://google.github.io/googletest/faq.html#why-should-test-suite-names-and-test-names-not-contain-underscore -TEST(TestCaseName, TestName) {} -TEST(TestCaseName, DISABLED_TestName) {} -TEST(DISABLED_TestCaseName, TestName) {} -TEST(DISABLED_TestCaseName, DISABLED_TestName) {} - -TEST_F(TestCaseFixtureName, TestName) {} -TEST_F(TestCaseFixtureName, DISABLED_TestName) {} -TEST_F(DISABLED_TestCaseFixtureName, TestName) {} -TEST_F(DISABLED_TestCaseFixtureName, DISABLED_TestName) {} - -TEST_P(ParameterizedTestCaseFixtureName, TestName) {} -TEST_P(ParameterizedTestCaseFixtureName, DISABLED_TestName) {} -TEST_P(DISABLED_ParameterizedTestCaseFixtureName, TestName) {} -TEST_P(DISABLED_ParameterizedTestCaseFixtureName, DISABLED_TestName) {} - -TYPED_TEST(TypedTestName, TestName) {} -TYPED_TEST(TypedTestName, DISABLED_TestName) {} -TYPED_TEST(DISABLED_TypedTestName, TestName) {} -TYPED_TEST(DISABLED_TypedTestName, DISABLED_TestName) {} - -TYPED_TEST_P(TypeParameterizedTestName, TestName) {} -TYPED_TEST_P(TypeParameterizedTestName, DISABLED_TestName) {} -TYPED_TEST_P(DISABLED_TypeParameterizedTestName, TestName) {} -TYPED_TEST_P(DISABLED_TypeParameterizedTestName, DISABLED_TestName) {} - -FRIEND_TEST(FriendTest, Is_NotChecked) {} -FRIEND_TEST(Friend_Test, IsNotChecked) {} -FRIEND_TEST(Friend_Test, Is_NotChecked) {} +TEST(TestSuiteName, TestName) {} +TEST(TestSuiteName, DISABLED_TestName) {} +TEST(DISABLED_TestSuiteName, TestName) {} +TEST(DISABLED_TestSuiteName, DISABLED_TestName) {} + +TEST_F(TestSuiteFixtureName, TestName) {} +TEST_F(TestSuiteFixtureName, DISABLED_TestName) {} +TEST_F(DISABLED_TestSuiteFixtureName, TestName) {} +TEST_F(DISABLED_TestSuiteFixtureName, DISABLED_TestName) {} + +TEST_P(ParameterizedTestSuiteFixtureName, TestName) {} +TEST_P(ParameterizedTestSuiteFixtureName, DISABLED_TestName) {} +TEST_P(DISABLED_ParameterizedTestSuiteFixtureName, TestName) {} +TEST_P(DISABLED_ParameterizedTestSuiteFixtureName, DISABLED_TestName) {} + +TYPED_TEST(TypedTestSuiteName, TestName) {} +TYPED_TEST(TypedTestSuiteName, DISABLED_TestName) {} +TYPED_TEST(DISABLED_TypedTestSuiteName, TestName) {} +TYPED_TEST(DISABLED_TypedTestSuiteName, DISABLED_TestName) {} + +TYPED_TEST_P(TypeParameterizedTestSuiteName, TestName) {} +TYPED_TEST_P(TypeParameterizedTestSuiteName, DISABLED_TestName) {} +TYPED_TEST_P(DISABLED_TypeParameterizedTestSuiteName, TestName) {} +TYPED_TEST_P(DISABLED_TypeParameterizedTestSuiteName, DISABLED_TestName) {} + +FRIEND_TEST(FriendTestSuite, Is_NotChecked) {} +FRIEND_TEST(Friend_TestSuite, IsNotChecked) {} +FRIEND_TEST(Friend_TestSuite, Is_NotChecked) {} From b0cd5b2a476063b588c59325720c841d79ed3262 Mon Sep 17 00:00:00 2001 From: Tobias Gysi Date: Fri, 24 Mar 2023 07:57:24 +0100 Subject: [PATCH 511/691] [mlir][llvm] Switch remaining LLVM dialect tests to opaque pointers. The revision switches the remaining LLVM dialect tests to use opaque pointers. Selected tests are copied to a postfixed test file for the time being. A number of tests disappear once we fully switch to opaque pointers. In particular, all tests that check verify a pointer element type matches another type as well as tests of recursive types. Part of https://discourse.llvm.org/t/rfc-switching-the-llvm-dialect-and-dialect-lowerings-to-opaque-pointers/68179 Reviewed By: Dinistro, zero9178 Differential Revision: https://reviews.llvm.org/D146726 --- mlir/test/Dialect/LLVMIR/callgraph.mlir | 27 +- .../LLVMIR/canonicalize-typed-pointers.mlir | 86 ++++++ mlir/test/Dialect/LLVMIR/canonicalize.mlir | 55 ++-- mlir/test/Dialect/LLVMIR/debuginfo.mlir | 4 +- .../dynamic-gep-index-typed-pointers.mlir | 12 + .../Dialect/LLVMIR/dynamic-gep-index.mlir | 6 +- mlir/test/Dialect/LLVMIR/func.mlir | 62 ++-- .../Dialect/LLVMIR/global-typed-pointers.mlir | 46 +++ mlir/test/Dialect/LLVMIR/global.mlir | 47 +-- .../LLVMIR/invalid-typed-pointers.mlir | 283 ++++++++++++++++++ mlir/test/Dialect/LLVMIR/invalid.mlir | 187 +++++------- .../Dialect/LLVMIR/layout-typed-pointers.mlir | 145 +++++++++ mlir/test/Dialect/LLVMIR/layout.mlir | 78 +---- .../Dialect/LLVMIR/nvvm-typed-pointers.mlir | 55 ++++ mlir/test/Dialect/LLVMIR/nvvm.mlir | 30 +- ...arameter-attrs-invalid-typed-pointers.mlir | 6 + .../LLVMIR/parameter-attrs-invalid.mlir | 5 - .../LLVMIR/types-invalid-typed-pointers.mlir | 42 +++ mlir/test/Dialect/LLVMIR/types-invalid.mlir | 17 +- .../Dialect/LLVMIR/types-typed-pointers.mlir | 118 ++++++++ mlir/test/Dialect/LLVMIR/types.mlir | 62 +--- 21 files changed, 991 insertions(+), 382 deletions(-) create mode 100644 mlir/test/Dialect/LLVMIR/canonicalize-typed-pointers.mlir create mode 100644 mlir/test/Dialect/LLVMIR/dynamic-gep-index-typed-pointers.mlir create mode 100644 mlir/test/Dialect/LLVMIR/global-typed-pointers.mlir create mode 100644 mlir/test/Dialect/LLVMIR/invalid-typed-pointers.mlir create mode 100644 mlir/test/Dialect/LLVMIR/layout-typed-pointers.mlir create mode 100644 mlir/test/Dialect/LLVMIR/nvvm-typed-pointers.mlir create mode 100644 mlir/test/Dialect/LLVMIR/parameter-attrs-invalid-typed-pointers.mlir create mode 100644 mlir/test/Dialect/LLVMIR/types-invalid-typed-pointers.mlir create mode 100644 mlir/test/Dialect/LLVMIR/types-typed-pointers.mlir diff --git a/mlir/test/Dialect/LLVMIR/callgraph.mlir b/mlir/test/Dialect/LLVMIR/callgraph.mlir index edb5b35d126a5..ca1044b8288c4 100644 --- a/mlir/test/Dialect/LLVMIR/callgraph.mlir +++ b/mlir/test/Dialect/LLVMIR/callgraph.mlir @@ -58,33 +58,32 @@ module attributes {"test.name" = "Invoke call"} { // CHECK-DAG: -- Call-Edge : // CHECK: -- SCCs -- - llvm.mlir.global external constant @_ZTIi() : !llvm.ptr + llvm.mlir.global external constant @_ZTIi() : !llvm.ptr llvm.func @foo(%arg0: i32) -> !llvm.struct<(i32, f64, i32)> - llvm.func @bar(!llvm.ptr, !llvm.ptr, !llvm.ptr) + llvm.func @bar(!llvm.ptr, !llvm.ptr, !llvm.ptr) llvm.func @__gxx_personality_v0(...) -> i32 llvm.func @invokeLandingpad() -> i32 attributes { personality = @__gxx_personality_v0 } { %0 = llvm.mlir.constant(0 : i32) : i32 %1 = llvm.mlir.constant(3 : i32) : i32 %2 = llvm.mlir.constant("\01") : !llvm.array<1 x i8> - %3 = llvm.mlir.null : !llvm.ptr> - %4 = llvm.mlir.null : !llvm.ptr - %5 = llvm.mlir.addressof @_ZTIi : !llvm.ptr> - %6 = llvm.bitcast %5 : !llvm.ptr> to !llvm.ptr - %7 = llvm.mlir.constant(1 : i32) : i32 - %8 = llvm.alloca %7 x i8 : (i32) -> !llvm.ptr - %9 = llvm.invoke @foo(%7) to ^bb2 unwind ^bb1 : (i32) -> !llvm.struct<(i32, f64, i32)> + %3 = llvm.mlir.null : !llvm.ptr + %4 = llvm.mlir.null : !llvm.ptr + %5 = llvm.mlir.addressof @_ZTIi : !llvm.ptr + %6 = llvm.mlir.constant(1 : i32) : i32 + %7 = llvm.alloca %6 x i8 : (i32) -> !llvm.ptr + %8 = llvm.invoke @foo(%6) to ^bb2 unwind ^bb1 : (i32) -> !llvm.struct<(i32, f64, i32)> ^bb1: - %10 = llvm.landingpad cleanup (catch %3 : !llvm.ptr>) (catch %6 : !llvm.ptr) (filter %2 : !llvm.array<1 x i8>) : !llvm.struct<(ptr, i32)> - %11 = llvm.intr.eh.typeid.for %6 : (!llvm.ptr) -> i32 - llvm.resume %10 : !llvm.struct<(ptr, i32)> + %10 = llvm.landingpad cleanup (catch %3 : !llvm.ptr) (catch %5 : !llvm.ptr) (filter %2 : !llvm.array<1 x i8>) : !llvm.struct<(ptr, i32)> + %11 = llvm.intr.eh.typeid.for %5 : (!llvm.ptr) -> i32 + llvm.resume %10 : !llvm.struct<(ptr, i32)> ^bb2: - llvm.return %7 : i32 + llvm.return %6 : i32 ^bb3: - llvm.invoke @bar(%8, %6, %4) to ^bb2 unwind ^bb1 : (!llvm.ptr, !llvm.ptr, !llvm.ptr) -> () + llvm.invoke @bar(%7, %5, %4) to ^bb2 unwind ^bb1 : (!llvm.ptr, !llvm.ptr, !llvm.ptr) -> () ^bb4: llvm.return %0 : i32 diff --git a/mlir/test/Dialect/LLVMIR/canonicalize-typed-pointers.mlir b/mlir/test/Dialect/LLVMIR/canonicalize-typed-pointers.mlir new file mode 100644 index 0000000000000..2ae9727482fc3 --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/canonicalize-typed-pointers.mlir @@ -0,0 +1,86 @@ +// RUN: mlir-opt --pass-pipeline='builtin.module(llvm.func(canonicalize{test-convergence}))' %s -split-input-file | FileCheck %s + +// CHECK-LABEL: fold_bitcast +// CHECK-SAME: %[[a0:arg[0-9]+]] +// CHECK-NEXT: llvm.return %[[a0]] +llvm.func @fold_bitcast(%x : !llvm.ptr) -> !llvm.ptr { + %c = llvm.bitcast %x : !llvm.ptr to !llvm.ptr + llvm.return %c : !llvm.ptr +} + +// CHECK-LABEL: fold_bitcast2 +// CHECK-SAME: %[[a0:arg[0-9]+]] +// CHECK-NEXT: llvm.return %[[a0]] +llvm.func @fold_bitcast2(%x : !llvm.ptr) -> !llvm.ptr { + %c = llvm.bitcast %x : !llvm.ptr to !llvm.ptr + %d = llvm.bitcast %c : !llvm.ptr to !llvm.ptr + llvm.return %d : !llvm.ptr +} + +// ----- + +// CHECK-LABEL: fold_addrcast +// CHECK-SAME: %[[a0:arg[0-9]+]] +// CHECK-NEXT: llvm.return %[[a0]] +llvm.func @fold_addrcast(%x : !llvm.ptr) -> !llvm.ptr { + %c = llvm.addrspacecast %x : !llvm.ptr to !llvm.ptr + llvm.return %c : !llvm.ptr +} + +// CHECK-LABEL: fold_addrcast2 +// CHECK-SAME: %[[a0:arg[0-9]+]] +// CHECK-NEXT: llvm.return %[[a0]] +llvm.func @fold_addrcast2(%x : !llvm.ptr) -> !llvm.ptr { + %c = llvm.addrspacecast %x : !llvm.ptr to !llvm.ptr + %d = llvm.addrspacecast %c : !llvm.ptr to !llvm.ptr + llvm.return %d : !llvm.ptr +} + +// ----- + +// CHECK-LABEL: fold_gep +// CHECK-SAME: %[[a0:arg[0-9]+]] +// CHECK-NEXT: llvm.return %[[a0]] +llvm.func @fold_gep(%x : !llvm.ptr) -> !llvm.ptr { + %c0 = arith.constant 0 : i32 + %c = llvm.getelementptr %x[%c0] : (!llvm.ptr, i32) -> !llvm.ptr + llvm.return %c : !llvm.ptr +} + +// ----- + +// CHECK-LABEL: fold_gep_canon +// CHECK-SAME: %[[a0:arg[0-9]+]] +// CHECK-NEXT: %[[RES:.*]] = llvm.getelementptr %[[a0]][2] +// CHECK-NEXT: llvm.return %[[RES]] +llvm.func @fold_gep_canon(%x : !llvm.ptr) -> !llvm.ptr { + %c2 = arith.constant 2 : i32 + %c = llvm.getelementptr %x[%c2] : (!llvm.ptr, i32) -> !llvm.ptr + llvm.return %c : !llvm.ptr +} + +// ----- + +// CHECK-LABEL: load_dce +// CHECK-NEXT: llvm.return +llvm.func @load_dce(%x : !llvm.ptr) { + %0 = llvm.load %x : !llvm.ptr + llvm.return +} + +llvm.mlir.global external @fp() : !llvm.ptr + +// CHECK-LABEL: addr_dce +// CHECK-NEXT: llvm.return +llvm.func @addr_dce(%x : !llvm.ptr) { + %0 = llvm.mlir.addressof @fp : !llvm.ptr> + llvm.return +} + +// CHECK-LABEL: alloca_dce +// CHECK-NEXT: llvm.return +llvm.func @alloca_dce() { + %c1_i64 = arith.constant 1 : i64 + %0 = llvm.alloca %c1_i64 x i32 : (i64) -> !llvm.ptr + llvm.return +} diff --git a/mlir/test/Dialect/LLVMIR/canonicalize.mlir b/mlir/test/Dialect/LLVMIR/canonicalize.mlir index 7fa7684f5ace0..6b2cac14f2985 100644 --- a/mlir/test/Dialect/LLVMIR/canonicalize.mlir +++ b/mlir/test/Dialect/LLVMIR/canonicalize.mlir @@ -37,8 +37,8 @@ llvm.func @no_fold_extractvalue(%arr: !llvm.array<4 x f32>) -> f32 { %3 = llvm.extractvalue %2[0, 0] : !llvm.array<4 x !llvm.array<4 x f32>> llvm.return %3 : f32 - } + // ----- // CHECK-LABEL: fold_unrelated_extractvalue @@ -56,18 +56,18 @@ llvm.func @fold_unrelated_extractvalue(%arr: !llvm.array<4 x f32>) -> f32 { // CHECK-LABEL: fold_bitcast // CHECK-SAME: %[[a0:arg[0-9]+]] // CHECK-NEXT: llvm.return %[[a0]] -llvm.func @fold_bitcast(%x : !llvm.ptr) -> !llvm.ptr { - %c = llvm.bitcast %x : !llvm.ptr to !llvm.ptr - llvm.return %c : !llvm.ptr +llvm.func @fold_bitcast(%x : !llvm.ptr) -> !llvm.ptr { + %c = llvm.bitcast %x : !llvm.ptr to !llvm.ptr + llvm.return %c : !llvm.ptr } // CHECK-LABEL: fold_bitcast2 // CHECK-SAME: %[[a0:arg[0-9]+]] // CHECK-NEXT: llvm.return %[[a0]] -llvm.func @fold_bitcast2(%x : !llvm.ptr) -> !llvm.ptr { - %c = llvm.bitcast %x : !llvm.ptr to !llvm.ptr - %d = llvm.bitcast %c : !llvm.ptr to !llvm.ptr - llvm.return %d : !llvm.ptr +llvm.func @fold_bitcast2(%x : i32) -> i32 { + %c = llvm.bitcast %x : i32 to f32 + %d = llvm.bitcast %c : f32 to i32 + llvm.return %d : i32 } // ----- @@ -75,18 +75,18 @@ llvm.func @fold_bitcast2(%x : !llvm.ptr) -> !llvm.ptr { // CHECK-LABEL: fold_addrcast // CHECK-SAME: %[[a0:arg[0-9]+]] // CHECK-NEXT: llvm.return %[[a0]] -llvm.func @fold_addrcast(%x : !llvm.ptr) -> !llvm.ptr { - %c = llvm.addrspacecast %x : !llvm.ptr to !llvm.ptr - llvm.return %c : !llvm.ptr +llvm.func @fold_addrcast(%x : !llvm.ptr) -> !llvm.ptr { + %c = llvm.addrspacecast %x : !llvm.ptr to !llvm.ptr + llvm.return %c : !llvm.ptr } // CHECK-LABEL: fold_addrcast2 // CHECK-SAME: %[[a0:arg[0-9]+]] // CHECK-NEXT: llvm.return %[[a0]] -llvm.func @fold_addrcast2(%x : !llvm.ptr) -> !llvm.ptr { - %c = llvm.addrspacecast %x : !llvm.ptr to !llvm.ptr - %d = llvm.addrspacecast %c : !llvm.ptr to !llvm.ptr - llvm.return %d : !llvm.ptr +llvm.func @fold_addrcast2(%x : !llvm.ptr) -> !llvm.ptr { + %c = llvm.addrspacecast %x : !llvm.ptr to !llvm.ptr<5> + %d = llvm.addrspacecast %c : !llvm.ptr<5> to !llvm.ptr + llvm.return %d : !llvm.ptr } // ----- @@ -94,10 +94,10 @@ llvm.func @fold_addrcast2(%x : !llvm.ptr) -> !llvm.ptr { // CHECK-LABEL: fold_gep // CHECK-SAME: %[[a0:arg[0-9]+]] // CHECK-NEXT: llvm.return %[[a0]] -llvm.func @fold_gep(%x : !llvm.ptr) -> !llvm.ptr { +llvm.func @fold_gep(%x : !llvm.ptr) -> !llvm.ptr { %c0 = arith.constant 0 : i32 - %c = llvm.getelementptr %x[%c0] : (!llvm.ptr, i32) -> !llvm.ptr - llvm.return %c : !llvm.ptr + %c = llvm.getelementptr %x[%c0] : (!llvm.ptr, i32) -> !llvm.ptr, i8 + llvm.return %c : !llvm.ptr } // CHECK-LABEL: fold_gep_neg @@ -114,13 +114,12 @@ llvm.func @fold_gep_neg(%x : !llvm.ptr) -> !llvm.ptr { // CHECK-SAME: %[[a0:arg[0-9]+]] // CHECK-NEXT: %[[RES:.*]] = llvm.getelementptr %[[a0]][2] // CHECK-NEXT: llvm.return %[[RES]] -llvm.func @fold_gep_canon(%x : !llvm.ptr) -> !llvm.ptr { +llvm.func @fold_gep_canon(%x : !llvm.ptr) -> !llvm.ptr { %c2 = arith.constant 2 : i32 - %c = llvm.getelementptr %x[%c2] : (!llvm.ptr, i32) -> !llvm.ptr - llvm.return %c : !llvm.ptr + %c = llvm.getelementptr %x[%c2] : (!llvm.ptr, i32) -> !llvm.ptr, i8 + llvm.return %c : !llvm.ptr } - // ----- // Check that LLVM constants participate in cross-dialect constant folding. The @@ -142,17 +141,17 @@ llvm.func @llvm_constant() -> i32 { // CHECK-LABEL: load_dce // CHECK-NEXT: llvm.return -llvm.func @load_dce(%x : !llvm.ptr) { - %0 = llvm.load %x : !llvm.ptr +llvm.func @load_dce(%x : !llvm.ptr) { + %0 = llvm.load %x : !llvm.ptr -> i8 llvm.return } -llvm.mlir.global external @fp() : !llvm.ptr +llvm.mlir.global external @fp() : !llvm.ptr // CHECK-LABEL: addr_dce // CHECK-NEXT: llvm.return -llvm.func @addr_dce(%x : !llvm.ptr) { - %0 = llvm.mlir.addressof @fp : !llvm.ptr> +llvm.func @addr_dce(%x : !llvm.ptr) { + %0 = llvm.mlir.addressof @fp : !llvm.ptr llvm.return } @@ -160,6 +159,6 @@ llvm.func @addr_dce(%x : !llvm.ptr) { // CHECK-NEXT: llvm.return llvm.func @alloca_dce() { %c1_i64 = arith.constant 1 : i64 - %0 = llvm.alloca %c1_i64 x i32 : (i64) -> !llvm.ptr + %0 = llvm.alloca %c1_i64 x i32 : (i64) -> !llvm.ptr llvm.return } diff --git a/mlir/test/Dialect/LLVMIR/debuginfo.mlir b/mlir/test/Dialect/LLVMIR/debuginfo.mlir index 7aaef0d31bb9d..f7517b2f23108 100644 --- a/mlir/test/Dialect/LLVMIR/debuginfo.mlir +++ b/mlir/test/Dialect/LLVMIR/debuginfo.mlir @@ -134,10 +134,10 @@ llvm.func @addr(%arg: i64) { // CHECK: %[[ALLOC:.*]] = llvm.alloca %allocCount = llvm.mlir.constant(1 : i32) : i32 - %alloc = llvm.alloca %allocCount x i64 : (i32) -> !llvm.ptr + %alloc = llvm.alloca %allocCount x i64 : (i32) -> !llvm.ptr // CHECK: llvm.intr.dbg.declare #[[VAR0]] = %[[ALLOC]] - llvm.intr.dbg.declare #var0 = %alloc : !llvm.ptr + llvm.intr.dbg.declare #var0 = %alloc : !llvm.ptr llvm.return } diff --git a/mlir/test/Dialect/LLVMIR/dynamic-gep-index-typed-pointers.mlir b/mlir/test/Dialect/LLVMIR/dynamic-gep-index-typed-pointers.mlir new file mode 100644 index 0000000000000..9e14b1db3432b --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/dynamic-gep-index-typed-pointers.mlir @@ -0,0 +1,12 @@ +// RUN: mlir-opt %s | FileCheck %s + +module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>>} { + // CHECK: llvm.func @foo(%[[ARG0:.+]]: !llvm.ptr>, %[[ARG1:.+]]: i32) + llvm.func @foo(%arg0: !llvm.ptr, array<4 x i32>)>>, %arg1: i32) { + // CHECK: %[[C0:.+]] = llvm.mlir.constant(0 : i32) + %0 = llvm.mlir.constant(0 : i32) : i32 + // CHECK: llvm.getelementptr %[[ARG0]][%[[C0]], 1, %[[ARG1]]] + %1 = "llvm.getelementptr"(%arg0, %0, %arg1) {rawConstantIndices = array} : (!llvm.ptr, array<4 x i32>)>>, i32, i32) -> !llvm.ptr + llvm.return + } +} diff --git a/mlir/test/Dialect/LLVMIR/dynamic-gep-index.mlir b/mlir/test/Dialect/LLVMIR/dynamic-gep-index.mlir index 9e14b1db3432b..f5808134ea026 100644 --- a/mlir/test/Dialect/LLVMIR/dynamic-gep-index.mlir +++ b/mlir/test/Dialect/LLVMIR/dynamic-gep-index.mlir @@ -1,12 +1,12 @@ // RUN: mlir-opt %s | FileCheck %s module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>, #dlti.dl_entry : vector<2xi32>>>} { - // CHECK: llvm.func @foo(%[[ARG0:.+]]: !llvm.ptr>, %[[ARG1:.+]]: i32) - llvm.func @foo(%arg0: !llvm.ptr, array<4 x i32>)>>, %arg1: i32) { + // CHECK: llvm.func @foo(%[[ARG0:.+]]: !llvm.ptr, %[[ARG1:.+]]: i32) + llvm.func @foo(%arg0: !llvm.ptr, %arg1: i32) { // CHECK: %[[C0:.+]] = llvm.mlir.constant(0 : i32) %0 = llvm.mlir.constant(0 : i32) : i32 // CHECK: llvm.getelementptr %[[ARG0]][%[[C0]], 1, %[[ARG1]]] - %1 = "llvm.getelementptr"(%arg0, %0, %arg1) {rawConstantIndices = array} : (!llvm.ptr, array<4 x i32>)>>, i32, i32) -> !llvm.ptr + %1 = "llvm.getelementptr"(%arg0, %0, %arg1) {elem_type = !llvm.struct<"my_struct", (struct<"sub_struct", (i32, i8)>, array<4 x i32>)>, rawConstantIndices = array} : (!llvm.ptr, i32, i32) -> !llvm.ptr llvm.return } } diff --git a/mlir/test/Dialect/LLVMIR/func.mlir b/mlir/test/Dialect/LLVMIR/func.mlir index 5cc7d75b627fa..50f6c6a0e56f5 100644 --- a/mlir/test/Dialect/LLVMIR/func.mlir +++ b/mlir/test/Dialect/LLVMIR/func.mlir @@ -33,10 +33,10 @@ module { // GENERIC-SAME: () -> () }) {sym_name = "baz", function_type = !llvm.func} : () -> () - // CHECK: llvm.func @qux(!llvm.ptr {llvm.noalias}, i64) + // CHECK: llvm.func @qux(!llvm.ptr {llvm.noalias}, i64) // CHECK: attributes {xxx = {yyy = 42 : i64}} "llvm.func"() ({ - }) {sym_name = "qux", function_type = !llvm.func, i64)>, + }) {sym_name = "qux", function_type = !llvm.func, arg_attrs = [{llvm.noalias}, {}], xxx = {yyy = 42}} : () -> () // CHECK: llvm.func @roundtrip1() @@ -71,56 +71,56 @@ module { // CHECK: llvm.func @roundtrip8() -> i32 llvm.func @roundtrip8() -> i32 attributes {} - // CHECK: llvm.func @roundtrip9(!llvm.ptr {llvm.noalias}) - llvm.func @roundtrip9(!llvm.ptr {llvm.noalias}) + // CHECK: llvm.func @roundtrip9(!llvm.ptr {llvm.noalias}) + llvm.func @roundtrip9(!llvm.ptr {llvm.noalias}) - // CHECK: llvm.func @roundtrip10(!llvm.ptr {llvm.noalias}) - llvm.func @roundtrip10(%arg0: !llvm.ptr {llvm.noalias}) + // CHECK: llvm.func @roundtrip10(!llvm.ptr {llvm.noalias}) + llvm.func @roundtrip10(%arg0: !llvm.ptr {llvm.noalias}) - // CHECK: llvm.func @roundtrip11(%{{.*}}: !llvm.ptr {llvm.noalias}) { - llvm.func @roundtrip11(%arg0: !llvm.ptr {llvm.noalias}) { + // CHECK: llvm.func @roundtrip11(%{{.*}}: !llvm.ptr {llvm.noalias}) { + llvm.func @roundtrip11(%arg0: !llvm.ptr {llvm.noalias}) { llvm.return } - // CHECK: llvm.func @roundtrip12(%{{.*}}: !llvm.ptr {llvm.noalias}) + // CHECK: llvm.func @roundtrip12(%{{.*}}: !llvm.ptr {llvm.noalias}) // CHECK: attributes {foo = 42 : i32} - llvm.func @roundtrip12(%arg0: !llvm.ptr {llvm.noalias}) + llvm.func @roundtrip12(%arg0: !llvm.ptr {llvm.noalias}) attributes {foo = 42 : i32} { llvm.return } - // CHECK: llvm.func @byvalattr(%{{.*}}: !llvm.ptr {llvm.byval = i32}) - llvm.func @byvalattr(%arg0: !llvm.ptr {llvm.byval = i32}) { + // CHECK: llvm.func @byvalattr(%{{.*}}: !llvm.ptr {llvm.byval = i32}) + llvm.func @byvalattr(%arg0: !llvm.ptr {llvm.byval = i32}) { llvm.return } - // CHECK: llvm.func @sretattr(%{{.*}}: !llvm.ptr {llvm.sret = i32}) - // LOCINFO: llvm.func @sretattr(%{{.*}}: !llvm.ptr {llvm.sret = i32} loc("some_source_loc")) - llvm.func @sretattr(%arg0: !llvm.ptr {llvm.sret = i32} loc("some_source_loc")) { + // CHECK: llvm.func @sretattr(%{{.*}}: !llvm.ptr {llvm.sret = i32}) + // LOCINFO: llvm.func @sretattr(%{{.*}}: !llvm.ptr {llvm.sret = i32} loc("some_source_loc")) + llvm.func @sretattr(%arg0: !llvm.ptr {llvm.sret = i32} loc("some_source_loc")) { llvm.return } - // CHECK: llvm.func @nestattr(%{{.*}}: !llvm.ptr {llvm.nest}) - llvm.func @nestattr(%arg0: !llvm.ptr {llvm.nest}) { + // CHECK: llvm.func @nestattr(%{{.*}}: !llvm.ptr {llvm.nest}) + llvm.func @nestattr(%arg0: !llvm.ptr {llvm.nest}) { llvm.return } - // CHECK: llvm.func @llvm_noalias_decl(!llvm.ptr {llvm.noalias}) - llvm.func @llvm_noalias_decl(!llvm.ptr {llvm.noalias}) - // CHECK: llvm.func @byrefattr_decl(!llvm.ptr {llvm.byref = i32}) - llvm.func @byrefattr_decl(!llvm.ptr {llvm.byref = i32}) - // CHECK: llvm.func @byvalattr_decl(!llvm.ptr {llvm.byval = i32}) - llvm.func @byvalattr_decl(!llvm.ptr {llvm.byval = i32}) - // CHECK: llvm.func @sretattr_decl(!llvm.ptr {llvm.sret = i32}) - llvm.func @sretattr_decl(!llvm.ptr {llvm.sret = i32}) - // CHECK: llvm.func @nestattr_decl(!llvm.ptr {llvm.nest}) - llvm.func @nestattr_decl(!llvm.ptr {llvm.nest}) + // CHECK: llvm.func @llvm_noalias_decl(!llvm.ptr {llvm.noalias}) + llvm.func @llvm_noalias_decl(!llvm.ptr {llvm.noalias}) + // CHECK: llvm.func @byrefattr_decl(!llvm.ptr {llvm.byref = i32}) + llvm.func @byrefattr_decl(!llvm.ptr {llvm.byref = i32}) + // CHECK: llvm.func @byvalattr_decl(!llvm.ptr {llvm.byval = i32}) + llvm.func @byvalattr_decl(!llvm.ptr {llvm.byval = i32}) + // CHECK: llvm.func @sretattr_decl(!llvm.ptr {llvm.sret = i32}) + llvm.func @sretattr_decl(!llvm.ptr {llvm.sret = i32}) + // CHECK: llvm.func @nestattr_decl(!llvm.ptr {llvm.nest}) + llvm.func @nestattr_decl(!llvm.ptr {llvm.nest}) // CHECK: llvm.func @noundefattr_decl(i32 {llvm.noundef}) llvm.func @noundefattr_decl(i32 {llvm.noundef}) - // CHECK: llvm.func @llvm_align_decl(!llvm.ptr {llvm.align = 4 : i64}) - llvm.func @llvm_align_decl(!llvm.ptr {llvm.align = 4}) - // CHECK: llvm.func @inallocaattr_decl(!llvm.ptr {llvm.inalloca = i32}) - llvm.func @inallocaattr_decl(!llvm.ptr {llvm.inalloca = i32}) + // CHECK: llvm.func @llvm_align_decl(!llvm.ptr {llvm.align = 4 : i64}) + llvm.func @llvm_align_decl(!llvm.ptr {llvm.align = 4}) + // CHECK: llvm.func @inallocaattr_decl(!llvm.ptr {llvm.inalloca = i32}) + llvm.func @inallocaattr_decl(!llvm.ptr {llvm.inalloca = i32}) // CHECK: llvm.func @variadic(...) diff --git a/mlir/test/Dialect/LLVMIR/global-typed-pointers.mlir b/mlir/test/Dialect/LLVMIR/global-typed-pointers.mlir new file mode 100644 index 0000000000000..56d720cc866b6 --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/global-typed-pointers.mlir @@ -0,0 +1,46 @@ +// RUN: mlir-opt -split-input-file -verify-diagnostics %s | FileCheck %s + +// CHECK: llvm.mlir.global internal @global(42 : i64) {addr_space = 0 : i32} : i64 +llvm.mlir.global internal @global(42 : i64) : i64 + +// CHECK: llvm.mlir.global internal constant @".string"("foobar") +llvm.mlir.global internal constant @".string"("foobar") : !llvm.array<6 x i8> + +func.func @references() { + // CHECK: llvm.mlir.addressof @global : !llvm.ptr + %0 = llvm.mlir.addressof @global : !llvm.ptr + + // CHECK: llvm.mlir.addressof @".string" : !llvm.ptr> + %1 = llvm.mlir.addressof @".string" : !llvm.ptr> + + llvm.return +} + +// ----- + +llvm.mlir.global internal @foo(0: i32) : i32 + +func.func @bar() { + // expected-error @+1 {{the type must be a pointer to the type of the referenced global}} + llvm.mlir.addressof @foo : !llvm.ptr + llvm.return +} + +// ----- + +llvm.func @foo() + +llvm.func @bar() { + // expected-error @+1 {{the type must be a pointer to the type of the referenced function}} + llvm.mlir.addressof @foo : !llvm.ptr + llvm.return +} + +// ----- + +llvm.mlir.global internal @g(32 : i64) {addr_space = 3: i32} : i64 +func.func @mismatch_addr_space() { + // expected-error @+1 {{pointer address space must match address space of the referenced global}} + llvm.mlir.addressof @g : !llvm.ptr + llvm.return +} diff --git a/mlir/test/Dialect/LLVMIR/global.mlir b/mlir/test/Dialect/LLVMIR/global.mlir index 2f0850834a0ef..aff116db5dcca 100644 --- a/mlir/test/Dialect/LLVMIR/global.mlir +++ b/mlir/test/Dialect/LLVMIR/global.mlir @@ -66,17 +66,14 @@ llvm.mlir.global external @has_addr_space(32 : i64) {addr_space = 3: i32} : i64 // CHECK-LABEL: references func.func @references() { - // CHECK: llvm.mlir.addressof @global : !llvm.ptr - %0 = llvm.mlir.addressof @global : !llvm.ptr - - // CHECK: llvm.mlir.addressof @".string" : !llvm.ptr> - %1 = llvm.mlir.addressof @".string" : !llvm.ptr> + // CHECK: llvm.mlir.addressof @".string" : !llvm.ptr + %0 = llvm.mlir.addressof @".string" : !llvm.ptr // CHECK: llvm.mlir.addressof @global : !llvm.ptr - %2 = llvm.mlir.addressof @global : !llvm.ptr + %1 = llvm.mlir.addressof @global : !llvm.ptr // CHECK: llvm.mlir.addressof @has_addr_space : !llvm.ptr<3> - %3 = llvm.mlir.addressof @has_addr_space : !llvm.ptr<3> + %2 = llvm.mlir.addressof @has_addr_space : !llvm.ptr<3> llvm.return } @@ -164,7 +161,7 @@ func.func @foo() { // The attribute parser will consume the first colon-type, so we put two of // them to trigger the attribute type mismatch error. // expected-error @+1 {{invalid kind of attribute specified}} - llvm.mlir.addressof "foo" : i64 : !llvm.ptr> + llvm.mlir.addressof "foo" : i64 : !llvm.ptr llvm.return } @@ -172,27 +169,7 @@ func.func @foo() { func.func @foo() { // expected-error @+1 {{must reference a global defined by 'llvm.mlir.global'}} - llvm.mlir.addressof @foo : !llvm.ptr> - llvm.return -} - -// ----- - -llvm.mlir.global internal @foo(0: i32) : i32 - -func.func @bar() { - // expected-error @+1 {{the type must be a pointer to the type of the referenced global}} - llvm.mlir.addressof @foo : !llvm.ptr - llvm.return -} - -// ----- - -llvm.func @foo() - -llvm.func @bar() { - // expected-error @+1 {{the type must be a pointer to the type of the referenced function}} - llvm.mlir.addressof @foo : !llvm.ptr + llvm.mlir.addressof @foo : !llvm.ptr llvm.return } @@ -224,23 +201,15 @@ llvm.mlir.global internal @g(43 : i64) : i64 { llvm.mlir.global internal @g(32 : i64) {addr_space = 3: i32} : i64 func.func @mismatch_addr_space_implicit_global() { // expected-error @+1 {{pointer address space must match address space of the referenced global}} - llvm.mlir.addressof @g : !llvm.ptr + llvm.mlir.addressof @g : !llvm.ptr llvm.return } // ----- llvm.mlir.global internal @g(32 : i64) {addr_space = 3: i32} : i64 -func.func @mismatch_addr_space() { - // expected-error @+1 {{pointer address space must match address space of the referenced global}} - llvm.mlir.addressof @g : !llvm.ptr - llvm.return -} -// ----- -llvm.mlir.global internal @g(32 : i64) {addr_space = 3: i32} : i64 - -func.func @mismatch_addr_space_opaque() { +func.func @mismatch_addr_space() { // expected-error @+1 {{pointer address space must match address space of the referenced global}} llvm.mlir.addressof @g : !llvm.ptr<4> llvm.return diff --git a/mlir/test/Dialect/LLVMIR/invalid-typed-pointers.mlir b/mlir/test/Dialect/LLVMIR/invalid-typed-pointers.mlir new file mode 100644 index 0000000000000..033b84d04ef87 --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/invalid-typed-pointers.mlir @@ -0,0 +1,283 @@ +// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -verify-diagnostics + +func.func @alloca_ptr_type_attr_non_opaque_ptr(%sz : i64) { + // expected-error@below {{unexpected 'elem_type' attribute when non-opaque pointer type is used}} + "llvm.alloca"(%sz) { elem_type = i32 } : (i64) -> !llvm.ptr +} + +// ----- + +func.func @gep_missing_input_type(%pos : i64, %base : !llvm.ptr) { + // expected-error@+1 {{2 operands present, but expected 0}} + llvm.getelementptr %base[%pos] : () -> (!llvm.ptr) +} + +// ----- + +func.func @gep_missing_result_type(%pos : i64, %base : !llvm.ptr) { + // expected-error@+1 {{op requires one result}} + llvm.getelementptr %base[%pos] : (!llvm.ptr, i64) -> () +} + +// ----- + +func.func @gep_non_function_type(%pos : i64, %base : !llvm.ptr) { + // expected-error@+1 {{invalid kind of type specified}} + llvm.getelementptr %base[%pos] : !llvm.ptr +} + +// ----- + +func.func @gep_too_few_dynamic(%base : !llvm.ptr) { + // expected-error@+1 {{expected as many dynamic indices as specified in 'rawConstantIndices'}} + %1 = "llvm.getelementptr"(%base) {rawConstantIndices = array} : (!llvm.ptr) -> !llvm.ptr +} + +// ----- + +func.func @call_variadic(%callee : !llvm.ptr>, %arg : i8) { + // expected-error@+1 {{indirect calls to variadic functions are not supported}} + llvm.call %callee(%arg) : !llvm.ptr>, (i8) -> (i8) + llvm.return +} + +// ----- + +func.func @indirect_callee_arg_mismatch(%arg0 : i32, %callee : !llvm.ptr>) { + // expected-error@+1 {{'llvm.call' op operand type mismatch for operand 0: 'i32' != 'i8'}} + "llvm.call"(%callee, %arg0) : (!llvm.ptr>, i32) -> () + llvm.return +} + +// ----- + +func.func @indirect_callee_return_mismatch(%callee : !llvm.ptr>) { + // expected-error@+1 {{'llvm.call' op result type mismatch: 'i32' != 'i8'}} + "llvm.call"(%callee) : (!llvm.ptr>) -> (i32) + llvm.return +} + +// ----- + +func.func @atomicrmw_mismatched_operands(%f32_ptr : !llvm.ptr, %i32 : i32) { + // expected-error@+1 {{expected LLVM IR element type for operand #0 to match type for operand #1}} + %0 = "llvm.atomicrmw"(%f32_ptr, %i32) {bin_op=11, ordering=1} : (!llvm.ptr, i32) -> i32 + llvm.return +} + +// ----- + +func.func @cmpxchg_expected_ptr(%f32 : f32) { + // expected-error@+1 {{op operand #0 must be LLVM pointer to integer or LLVM pointer type}} + %0 = "llvm.cmpxchg"(%f32, %f32, %f32) {success_ordering=2,failure_ordering=2} : (f32, f32, f32) -> !llvm.struct<(f32, i1)> + llvm.return +} + +// ----- + +func.func @cmpxchg_mismatched_operands(%i64_ptr : !llvm.ptr, %i32 : i32) { + // expected-error@+1 {{expected LLVM IR element type for operand #0 to match type for all other operands}} + %0 = "llvm.cmpxchg"(%i64_ptr, %i32, %i32) {success_ordering=2,failure_ordering=2} : (!llvm.ptr, i32, i32) -> !llvm.struct<(i32, i1)> + llvm.return +} + +// ----- + +llvm.func @foo(i32) -> i32 +llvm.func @__gxx_personality_v0(...) -> i32 + +llvm.func @bad_landingpad(%arg0: !llvm.ptr>) -> i32 attributes { personality = @__gxx_personality_v0} { + %0 = llvm.mlir.constant(3 : i32) : i32 + %1 = llvm.mlir.constant(2 : i32) : i32 + %2 = llvm.invoke @foo(%1) to ^bb1 unwind ^bb2 : (i32) -> i32 +^bb1: // pred: ^bb0 + llvm.return %1 : i32 +^bb2: // pred: ^bb0 + // expected-error@+1 {{clause #0 is not a known constant - null, addressof, bitcast}} + %3 = llvm.landingpad cleanup (catch %1 : i32) (catch %arg0 : !llvm.ptr>) : !llvm.struct<(ptr, i32)> + llvm.return %0 : i32 +} + +// ----- + +llvm.func @foo(i32) -> i32 +llvm.func @__gxx_personality_v0(...) -> i32 + +llvm.func @caller(%arg0: i32) -> i32 attributes { personality = @__gxx_personality_v0} { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.alloca %0 x !llvm.ptr : (i32) -> !llvm.ptr> + // expected-note@+1 {{global addresses expected as operand to bitcast used in clauses for landingpad}} + %2 = llvm.bitcast %1 : !llvm.ptr> to !llvm.ptr + %3 = llvm.invoke @foo(%0) to ^bb1 unwind ^bb2 : (i32) -> i32 +^bb1: // pred: ^bb0 + llvm.return %0 : i32 +^bb2: // pred: ^bb0 + // expected-error@+1 {{constant clauses expected}} + %5 = llvm.landingpad (catch %2 : !llvm.ptr) : !llvm.struct<(ptr, i32)> + llvm.return %0 : i32 +} + +// ----- + +llvm.func @foo(i32) -> i32 +llvm.func @__gxx_personality_v0(...) -> i32 + +llvm.func @caller(%arg0: i32) -> i32 attributes { personality = @__gxx_personality_v0} { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.invoke @foo(%0) to ^bb1 unwind ^bb2 : (i32) -> i32 +^bb1: // pred: ^bb0 + llvm.return %0 : i32 +^bb2: // pred: ^bb0 + // expected-error@+1 {{landingpad instruction expects at least one clause or cleanup attribute}} + %2 = llvm.landingpad : !llvm.struct<(ptr, i32)> + llvm.return %0 : i32 +} + +// ----- + +llvm.func @foo(i32) -> i32 +llvm.func @__gxx_personality_v0(...) -> i32 + +llvm.func @caller(%arg0: i32) -> i32 attributes { personality = @__gxx_personality_v0 } { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.invoke @foo(%0) to ^bb1 unwind ^bb2 : (i32) -> i32 +^bb1: // pred: ^bb0 + llvm.return %0 : i32 +^bb2: // pred: ^bb0 + %2 = llvm.landingpad cleanup : !llvm.struct<(ptr, i32)> + // expected-error@+1 {{'llvm.resume' op expects landingpad value as operand}} + llvm.resume %0 : i32 +} + +// ----- + +llvm.func @foo(i32) -> i32 + +llvm.func @caller(%arg0: i32) -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + %1 = llvm.invoke @foo(%0) to ^bb1 unwind ^bb2 : (i32) -> i32 +^bb1: // pred: ^bb0 + llvm.return %0 : i32 +^bb2: // pred: ^bb0 + // expected-error@+1 {{llvm.landingpad needs to be in a function with a personality}} + %2 = llvm.landingpad cleanup : !llvm.struct<(ptr, i32)> + llvm.resume %2 : !llvm.struct<(ptr, i32)> +} + +// ----- + +llvm.func @wmmaLoadOp_invalid_mem_space(%arg0: !llvm.ptr<5>, %arg1: i32) { + // expected-error@+1 {{'nvvm.wmma.load' op expected source pointer in memory space 0, 1, 3}} + %0 = nvvm.wmma.load %arg0, %arg1 + {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + : (!llvm.ptr<5>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> + llvm.return +} + +// ----- + +llvm.func @wmmaLoadOp_invalid_AOp(%arg0: !llvm.ptr<3>, %arg1: i32) { + // expected-error@+1 {{'nvvm.wmma.load' op expected destination type is a structure of 8 elements of type 'vector<2xf16>'}} + %0 = nvvm.wmma.load %arg0, %arg1 + {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + : (!llvm.ptr<3>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> + llvm.return +} + +// ----- + +llvm.func @wmmaLoadOp_invalid_BOp(%arg0: !llvm.ptr<3>, %arg1: i32) { + // expected-error@+1 {{'nvvm.wmma.load' op expected destination type is a structure of 8 elements of type 'vector<2xf16>'}} + %0 = nvvm.wmma.load %arg0, %arg1 + {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + : (!llvm.ptr<3>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> + + llvm.return +} + +// ----- + +llvm.func @wmmaLoadOp_invalid_COp(%arg0: !llvm.ptr<3>, %arg1: i32) { + // expected-error@+1 {{'nvvm.wmma.load' op expected destination type is a structure of 4 elements of type 'vector<2xf16>'}} + %0 = nvvm.wmma.load %arg0, %arg1 + {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + : (!llvm.ptr<3>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)> + + llvm.return +} + +// ----- + +llvm.func @wmmaStoreOp_invalid_mem_space(%arg0: !llvm.ptr<5>, %arg1: i32, + %arg2: vector<2 x f16>, %arg3: vector<2 x f16>, + %arg4: vector<2 x f16>, %arg5: vector<2 xf16>) { + // expected-error@+1 {{'nvvm.wmma.store' op expected operands to be a source pointer in memory space 0, 1, 3}} + nvvm.wmma.store %arg0, %arg1, %arg2, %arg3, %arg4, %arg5 + {eltype = #nvvm.mma_type, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + : !llvm.ptr<5>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16> + llvm.return +} + +// ----- + +llvm.func @wmmald_matrix(%arg0: !llvm.ptr) { + // expected-error@+1 {{'nvvm.ldmatrix' op expected source pointer in memory space 3}} + %l = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> i32 + llvm.return +} + +// ----- + +llvm.func @wmmald_matrix(%arg0: !llvm.ptr) { + // expected-error@+1 {{'nvvm.ldmatrix' op expected num attribute to be 1, 2 or 4}} + %l = nvvm.ldmatrix %arg0 {num = 3 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> i32 + llvm.return +} + +// ----- + +llvm.func @wmmald_matrix(%arg0: !llvm.ptr) { + // expected-error@+1 {{'nvvm.ldmatrix' op expected destination type is i32}} + %l = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> !llvm.struct<(i32)> + llvm.return +} + +// ----- + +llvm.func @wmmald_matrix(%arg0: !llvm.ptr) { + // expected-error@+1 {{'nvvm.ldmatrix' op expected destination type is a structure of 4 elements of type i32}} + %l = nvvm.ldmatrix %arg0 {num = 4 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> !llvm.struct<(i32, i32)> + llvm.return +} + +// ----- + +func.func @cp_async(%arg0: !llvm.ptr, %arg1: !llvm.ptr) { + // expected-error @below {{expected byte size to be either 4, 8 or 16.}} + nvvm.cp.async.shared.global %arg0, %arg1, 32 : !llvm.ptr, !llvm.ptr + return +} + +// ----- + +func.func @cp_async(%arg0: !llvm.ptr, %arg1: !llvm.ptr) { + // expected-error @below {{bypass l1 is only support for 16 bytes copy.}} + nvvm.cp.async.shared.global %arg0, %arg1, 8 {bypass_l1} : !llvm.ptr, !llvm.ptr + return +} + +// ----- + +func.func @gep_struct_variable(%arg0: !llvm.ptr>, %arg1: i32, %arg2: i32) { + // expected-error @below {{op expected index 1 indexing a struct to be constant}} + llvm.getelementptr %arg0[%arg1, %arg1] : (!llvm.ptr>, i32, i32) -> !llvm.ptr + return +} + +// ----- + +func.func @gep_out_of_bounds(%ptr: !llvm.ptr)>>, %idx: i64) { + // expected-error @below {{index 2 indexing a struct is out of bounds}} + llvm.getelementptr %ptr[%idx, 1, 3] : (!llvm.ptr)>>, i64) -> !llvm.ptr + return +} diff --git a/mlir/test/Dialect/LLVMIR/invalid.mlir b/mlir/test/Dialect/LLVMIR/invalid.mlir index 3e019144a199b..c3af84e55b881 100644 --- a/mlir/test/Dialect/LLVMIR/invalid.mlir +++ b/mlir/test/Dialect/LLVMIR/invalid.mlir @@ -64,7 +64,7 @@ func.func @alloca_missing_input_result_type(%size : i64) { func.func @alloca_missing_input_type() { // expected-error@+1 {{expected trailing function type with one argument and one result}} - llvm.alloca %size x i32 : () -> (!llvm.ptr) + llvm.alloca %size x i32 : () -> (!llvm.ptr) } // ----- @@ -78,14 +78,14 @@ func.func @alloca_missing_result_type() { func.func @alloca_non_function_type() { // expected-error@+1 {{expected trailing function type with one argument and one result}} - llvm.alloca %size x i32 : !llvm.ptr + llvm.alloca %size x i32 : !llvm.ptr } // ----- func.func @alloca_non_integer_alignment() { // expected-error@+1 {{expected integer alignment}} - llvm.alloca %size x i32 {alignment = 3.0} : !llvm.ptr + llvm.alloca %size x i32 {alignment = 3.0} : !llvm.ptr } // ----- @@ -97,44 +97,37 @@ func.func @alloca_opaque_ptr_no_type(%sz : i64) { // ----- -func.func @alloca_ptr_type_attr_non_opaque_ptr(%sz : i64) { - // expected-error@below {{unexpected 'elem_type' attribute when non-opaque pointer type is used}} - "llvm.alloca"(%sz) { elem_type = i32 } : (i64) -> !llvm.ptr -} - -// ----- - -func.func @gep_missing_input_result_type(%pos : i64, %base : !llvm.ptr) { +func.func @gep_missing_input_result_type(%pos : i64, %base : !llvm.ptr) { // expected-error@+1 {{2 operands present, but expected 0}} llvm.getelementptr %base[%pos] : () -> () } // ----- -func.func @gep_missing_input_type(%pos : i64, %base : !llvm.ptr) { +func.func @gep_missing_input_type(%pos : i64, %base : !llvm.ptr) { // expected-error@+1 {{2 operands present, but expected 0}} - llvm.getelementptr %base[%pos] : () -> (!llvm.ptr) + llvm.getelementptr %base[%pos] : () -> (!llvm.ptr) } // ----- -func.func @gep_missing_result_type(%pos : i64, %base : !llvm.ptr) { +func.func @gep_missing_result_type(%pos : i64, %base : !llvm.ptr) { // expected-error@+1 {{op requires one result}} - llvm.getelementptr %base[%pos] : (!llvm.ptr, i64) -> () + llvm.getelementptr %base[%pos] : (!llvm.ptr, i64) -> () } // ----- -func.func @gep_non_function_type(%pos : i64, %base : !llvm.ptr) { +func.func @gep_non_function_type(%pos : i64, %base : !llvm.ptr) { // expected-error@+1 {{invalid kind of type specified}} - llvm.getelementptr %base[%pos] : !llvm.ptr + llvm.getelementptr %base[%pos] : !llvm.ptr } // ----- -func.func @gep_too_few_dynamic(%base : !llvm.ptr) { +func.func @gep_too_few_dynamic(%base : !llvm.ptr) { // expected-error@+1 {{expected as many dynamic indices as specified in 'rawConstantIndices'}} - %1 = "llvm.getelementptr"(%base) {rawConstantIndices = array} : (!llvm.ptr) -> !llvm.ptr + %1 = "llvm.getelementptr"(%base) {elem_type = f32, rawConstantIndices = array} : (!llvm.ptr) -> !llvm.ptr } // ----- @@ -302,14 +295,6 @@ func.func @call_unknown_symbol() { // ----- -func.func @call_variadic(%callee : !llvm.ptr>, %arg : i8) { - // expected-error@+1 {{indirect calls to variadic functions are not supported}} - llvm.call %callee(%arg) : !llvm.ptr>, (i8) -> (i8) - llvm.return -} - -// ----- - func.func private @standard_func_callee() func.func @call_non_llvm() { @@ -346,14 +331,6 @@ func.func @callee_arg_mismatch(%arg0 : i32) { // ----- -func.func @indirect_callee_arg_mismatch(%arg0 : i32, %callee : !llvm.ptr>) { - // expected-error@+1 {{'llvm.call' op operand type mismatch for operand 0: 'i32' != 'i8'}} - "llvm.call"(%callee, %arg0) : (!llvm.ptr>, i32) -> () - llvm.return -} - -// ----- - llvm.func @callee_func() -> (i8) func.func @callee_return_mismatch() { @@ -364,14 +341,6 @@ func.func @callee_return_mismatch() { // ----- -func.func @indirect_callee_return_mismatch(%callee : !llvm.ptr>) { - // expected-error@+1 {{'llvm.call' op result type mismatch: 'i32' != 'i8'}} - "llvm.call"(%callee) : (!llvm.ptr>) -> (i32) - llvm.return -} - -// ----- - func.func @call_too_many_results(%callee : !llvm.ptr) { // expected-error@+1 {{expected function with 0 or 1 result}} llvm.call %callee() : !llvm.ptr, () -> (i32, i32) @@ -406,14 +375,14 @@ llvm.func @func_result_mismatch(%arg0: f32) -> i32 { func.func @constant_wrong_type() { // expected-error@+1 {{only supports integer, float, string or elements attributes}} - llvm.mlir.constant(@constant_wrong_type) : !llvm.ptr> + llvm.mlir.constant(@constant_wrong_type) : !llvm.ptr } // ----- func.func @constant_wrong_type_string() { // expected-error@below {{expected array type of 3 i8 elements for the string constant}} - llvm.mlir.constant("foo") : !llvm.ptr + llvm.mlir.constant("foo") : !llvm.ptr } // ----- @@ -671,47 +640,39 @@ func.func @atomicrmw_expected_ptr(%f32 : f32) { // ----- -func.func @atomicrmw_mismatched_operands(%f32_ptr : !llvm.ptr, %i32 : i32) { - // expected-error@+1 {{expected LLVM IR element type for operand #0 to match type for operand #1}} - %0 = "llvm.atomicrmw"(%f32_ptr, %i32) {bin_op=11, ordering=1} : (!llvm.ptr, i32) -> i32 - llvm.return -} - -// ----- - -func.func @atomicrmw_mismatched_operands(%f32_ptr : !llvm.ptr, %f32 : f32) { +func.func @atomicrmw_mismatched_operands(%f32_ptr : !llvm.ptr, %f32 : f32) { // expected-error@+1 {{op failed to verify that result #0 and operand #1 have the same type}} - %0 = "llvm.atomicrmw"(%f32_ptr, %f32) {bin_op=11, ordering=1} : (!llvm.ptr, f32) -> i32 + %0 = "llvm.atomicrmw"(%f32_ptr, %f32) {bin_op=11, ordering=1} : (!llvm.ptr, f32) -> i32 llvm.return } // ----- -func.func @atomicrmw_expected_float(%i32_ptr : !llvm.ptr, %i32 : i32) { +func.func @atomicrmw_expected_float(%i32_ptr : !llvm.ptr, %i32 : i32) { // expected-error@+1 {{expected LLVM IR floating point type}} - %0 = llvm.atomicrmw fadd %i32_ptr, %i32 unordered : !llvm.ptr, i32 + %0 = llvm.atomicrmw fadd %i32_ptr, %i32 unordered : !llvm.ptr, i32 llvm.return } // ----- -func.func @atomicrmw_unexpected_xchg_type(%i1_ptr : !llvm.ptr, %i1 : i1) { +func.func @atomicrmw_unexpected_xchg_type(%i1_ptr : !llvm.ptr, %i1 : i1) { // expected-error@+1 {{unexpected LLVM IR type for 'xchg' bin_op}} - %0 = llvm.atomicrmw xchg %i1_ptr, %i1 unordered : !llvm.ptr, i1 + %0 = llvm.atomicrmw xchg %i1_ptr, %i1 unordered : !llvm.ptr, i1 llvm.return } // ----- -func.func @atomicrmw_expected_int(%f32_ptr : !llvm.ptr, %f32 : f32) { +func.func @atomicrmw_expected_int(%f32_ptr : !llvm.ptr, %f32 : f32) { // expected-error@+1 {{expected LLVM IR integer type}} - %0 = llvm.atomicrmw max %f32_ptr, %f32 unordered : !llvm.ptr, f32 + %0 = llvm.atomicrmw max %f32_ptr, %f32 unordered : !llvm.ptr, f32 llvm.return } // ----- -func.func @cmpxchg_expected_ptr(%f32_ptr : !llvm.ptr, %f32 : f32) { +func.func @cmpxchg_expected_ptr(%f32 : f32) { // expected-error@+1 {{op operand #0 must be LLVM pointer to integer or LLVM pointer type}} %0 = "llvm.cmpxchg"(%f32, %f32, %f32) {success_ordering=2,failure_ordering=2} : (f32, f32, f32) -> !llvm.struct<(f32, i1)> llvm.return @@ -719,14 +680,6 @@ func.func @cmpxchg_expected_ptr(%f32_ptr : !llvm.ptr, %f32 : f32) { // ----- -func.func @cmpxchg_mismatched_operands(%i64_ptr : !llvm.ptr, %i32 : i32) { - // expected-error@+1 {{expected LLVM IR element type for operand #0 to match type for all other operands}} - %0 = "llvm.cmpxchg"(%i64_ptr, %i32, %i32) {success_ordering=2,failure_ordering=2} : (!llvm.ptr, i32, i32) -> !llvm.struct<(i32, i1)> - llvm.return -} - -// ----- - func.func @cmpxchg_mismatched_value_operands(%ptr : !llvm.ptr, %i32 : i32, %i64 : i64) { // expected-error@+1 {{op failed to verify that operand #1 and operand #2 have the same type}} %0 = "llvm.cmpxchg"(%ptr, %i32, %i64) {success_ordering=2,failure_ordering=2} : (!llvm.ptr, i32, i64) -> !llvm.struct<(i32, i1)> @@ -743,41 +696,41 @@ func.func @cmpxchg_mismatched_result(%ptr : !llvm.ptr, %i64 : i64) { // ----- -func.func @cmpxchg_unexpected_type(%i1_ptr : !llvm.ptr, %i1 : i1) { +func.func @cmpxchg_unexpected_type(%i1_ptr : !llvm.ptr, %i1 : i1) { // expected-error@+1 {{unexpected LLVM IR type}} - %0 = llvm.cmpxchg %i1_ptr, %i1, %i1 monotonic monotonic : !llvm.ptr, i1 + %0 = llvm.cmpxchg %i1_ptr, %i1, %i1 monotonic monotonic : !llvm.ptr, i1 llvm.return } // ----- -func.func @cmpxchg_at_least_monotonic_success(%i32_ptr : !llvm.ptr, %i32 : i32) { +func.func @cmpxchg_at_least_monotonic_success(%i32_ptr : !llvm.ptr, %i32 : i32) { // expected-error@+1 {{ordering must be at least 'monotonic'}} - %0 = llvm.cmpxchg %i32_ptr, %i32, %i32 unordered monotonic : !llvm.ptr, i32 + %0 = llvm.cmpxchg %i32_ptr, %i32, %i32 unordered monotonic : !llvm.ptr, i32 llvm.return } // ----- -func.func @cmpxchg_at_least_monotonic_failure(%i32_ptr : !llvm.ptr, %i32 : i32) { +func.func @cmpxchg_at_least_monotonic_failure(%i32_ptr : !llvm.ptr, %i32 : i32) { // expected-error@+1 {{ordering must be at least 'monotonic'}} - %0 = llvm.cmpxchg %i32_ptr, %i32, %i32 monotonic unordered : !llvm.ptr, i32 + %0 = llvm.cmpxchg %i32_ptr, %i32, %i32 monotonic unordered : !llvm.ptr, i32 llvm.return } // ----- -func.func @cmpxchg_failure_release(%i32_ptr : !llvm.ptr, %i32 : i32) { +func.func @cmpxchg_failure_release(%i32_ptr : !llvm.ptr, %i32 : i32) { // expected-error@+1 {{failure ordering cannot be 'release' or 'acq_rel'}} - %0 = llvm.cmpxchg %i32_ptr, %i32, %i32 acq_rel release : !llvm.ptr, i32 + %0 = llvm.cmpxchg %i32_ptr, %i32, %i32 acq_rel release : !llvm.ptr, i32 llvm.return } // ----- -func.func @cmpxchg_failure_acq_rel(%i32_ptr : !llvm.ptr, %i32 : i32) { +func.func @cmpxchg_failure_acq_rel(%i32_ptr : !llvm.ptr, %i32 : i32) { // expected-error@+1 {{failure ordering cannot be 'release' or 'acq_rel'}} - %0 = llvm.cmpxchg %i32_ptr, %i32, %i32 acq_rel acq_rel : !llvm.ptr, i32 + %0 = llvm.cmpxchg %i32_ptr, %i32, %i32 acq_rel acq_rel : !llvm.ptr, i32 llvm.return } @@ -786,7 +739,7 @@ func.func @cmpxchg_failure_acq_rel(%i32_ptr : !llvm.ptr, %i32 : i32) { llvm.func @foo(i32) -> i32 llvm.func @__gxx_personality_v0(...) -> i32 -llvm.func @bad_landingpad(%arg0: !llvm.ptr>) -> i32 attributes { personality = @__gxx_personality_v0} { +llvm.func @bad_landingpad(%arg0: !llvm.ptr) -> i32 attributes { personality = @__gxx_personality_v0} { %0 = llvm.mlir.constant(3 : i32) : i32 %1 = llvm.mlir.constant(2 : i32) : i32 %2 = llvm.invoke @foo(%1) to ^bb1 unwind ^bb2 : (i32) -> i32 @@ -794,7 +747,7 @@ llvm.func @bad_landingpad(%arg0: !llvm.ptr>) -> i32 attributes { persona llvm.return %1 : i32 ^bb2: // pred: ^bb0 // expected-error@+1 {{clause #0 is not a known constant - null, addressof, bitcast}} - %3 = llvm.landingpad cleanup (catch %1 : i32) (catch %arg0 : !llvm.ptr>) : !llvm.struct<(ptr, i32)> + %3 = llvm.landingpad cleanup (catch %1 : i32) (catch %arg0 : !llvm.ptr) : !llvm.struct<(ptr, i32)> llvm.return %0 : i32 } @@ -805,15 +758,15 @@ llvm.func @__gxx_personality_v0(...) -> i32 llvm.func @caller(%arg0: i32) -> i32 attributes { personality = @__gxx_personality_v0} { %0 = llvm.mlir.constant(1 : i32) : i32 - %1 = llvm.alloca %0 x !llvm.ptr : (i32) -> !llvm.ptr> + %1 = llvm.alloca %0 x !llvm.ptr : (i32) -> !llvm.ptr // expected-note@+1 {{global addresses expected as operand to bitcast used in clauses for landingpad}} - %2 = llvm.bitcast %1 : !llvm.ptr> to !llvm.ptr + %2 = llvm.bitcast %1 : !llvm.ptr to !llvm.ptr %3 = llvm.invoke @foo(%0) to ^bb1 unwind ^bb2 : (i32) -> i32 ^bb1: // pred: ^bb0 llvm.return %0 : i32 ^bb2: // pred: ^bb0 // expected-error@+1 {{constant clauses expected}} - %5 = llvm.landingpad (catch %2 : !llvm.ptr) : !llvm.struct<(ptr, i32)> + %5 = llvm.landingpad (catch %2 : !llvm.ptr) : !llvm.struct<(ptr, i32)> llvm.return %0 : i32 } @@ -829,7 +782,7 @@ llvm.func @caller(%arg0: i32) -> i32 attributes { personality = @__gxx_personali llvm.return %0 : i32 ^bb2: // pred: ^bb0 // expected-error@+1 {{landingpad instruction expects at least one clause or cleanup attribute}} - %2 = llvm.landingpad : !llvm.struct<(ptr, i32)> + %2 = llvm.landingpad : !llvm.struct<(ptr, i32)> llvm.return %0 : i32 } @@ -844,7 +797,7 @@ llvm.func @caller(%arg0: i32) -> i32 attributes { personality = @__gxx_personali ^bb1: // pred: ^bb0 llvm.return %0 : i32 ^bb2: // pred: ^bb0 - %2 = llvm.landingpad cleanup : !llvm.struct<(ptr, i32)> + %2 = llvm.landingpad cleanup : !llvm.struct<(ptr, i32)> // expected-error@+1 {{'llvm.resume' op expects landingpad value as operand}} llvm.resume %0 : i32 } @@ -860,8 +813,8 @@ llvm.func @caller(%arg0: i32) -> i32 { llvm.return %0 : i32 ^bb2: // pred: ^bb0 // expected-error@+1 {{llvm.landingpad needs to be in a function with a personality}} - %2 = llvm.landingpad cleanup : !llvm.struct<(ptr, i32)> - llvm.resume %2 : !llvm.struct<(ptr, i32)> + %2 = llvm.landingpad cleanup : !llvm.struct<(ptr, i32)> + llvm.resume %2 : !llvm.struct<(ptr, i32)> } // ----- @@ -1056,55 +1009,55 @@ module { // ----- -llvm.func @wmmaLoadOp_invalid_mem_space(%arg0: !llvm.ptr, %arg1: i32) { +llvm.func @wmmaLoadOp_invalid_mem_space(%arg0: !llvm.ptr<5>, %arg1: i32) { // expected-error@+1 {{'nvvm.wmma.load' op expected source pointer in memory space 0, 1, 3}} %0 = nvvm.wmma.load %arg0, %arg1 {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} - : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> + : (!llvm.ptr<5>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> llvm.return } // ----- -llvm.func @wmmaLoadOp_invalid_AOp(%arg0: !llvm.ptr, %arg1: i32) { +llvm.func @wmmaLoadOp_invalid_AOp(%arg0: !llvm.ptr<3>, %arg1: i32) { // expected-error@+1 {{'nvvm.wmma.load' op expected destination type is a structure of 8 elements of type 'vector<2xf16>'}} %0 = nvvm.wmma.load %arg0, %arg1 {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} - : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> + : (!llvm.ptr<3>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> llvm.return } // ----- -llvm.func @wmmaLoadOp_invalid_BOp(%arg0: !llvm.ptr, %arg1: i32) { +llvm.func @wmmaLoadOp_invalid_BOp(%arg0: !llvm.ptr<3>, %arg1: i32) { // expected-error@+1 {{'nvvm.wmma.load' op expected destination type is a structure of 8 elements of type 'vector<2xf16>'}} %0 = nvvm.wmma.load %arg0, %arg1 {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} - : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> + : (!llvm.ptr<3>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>, vector<2xf16>)> llvm.return } // ----- -llvm.func @wmmaLoadOp_invalid_COp(%arg0: !llvm.ptr, %arg1: i32) { +llvm.func @wmmaLoadOp_invalid_COp(%arg0: !llvm.ptr<3>, %arg1: i32) { // expected-error@+1 {{'nvvm.wmma.load' op expected destination type is a structure of 4 elements of type 'vector<2xf16>'}} %0 = nvvm.wmma.load %arg0, %arg1 {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} - : (!llvm.ptr) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)> + : (!llvm.ptr<3>) -> !llvm.struct<(vector<2xf16>, vector<2xf16>)> llvm.return } // ----- -llvm.func @wmmaStoreOp_invalid_mem_space(%arg0: !llvm.ptr, %arg1: i32, +llvm.func @wmmaStoreOp_invalid_mem_space(%arg0: !llvm.ptr<5>, %arg1: i32, %arg2: vector<2 x f16>, %arg3: vector<2 x f16>, %arg4: vector<2 x f16>, %arg5: vector<2 xf16>) { // expected-error@+1 {{'nvvm.wmma.store' op expected operands to be a source pointer in memory space 0, 1, 3}} nvvm.wmma.store %arg0, %arg1, %arg2, %arg3, %arg4, %arg5 {eltype = #nvvm.mma_type, k = 16 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} - : !llvm.ptr, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16> + : !llvm.ptr<5>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16>, vector<2 x f16> llvm.return } @@ -1208,33 +1161,33 @@ llvm.func @gpu_wmma_mma_op_invalid_result(%arg0: vector<2 x f16>, %arg1: vector< // ----- -llvm.func @wmmald_matrix(%arg0: !llvm.ptr) { +llvm.func @wmmald_matrix(%arg0: !llvm.ptr) { // expected-error@+1 {{'nvvm.ldmatrix' op expected source pointer in memory space 3}} - %l = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> i32 + %l = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> i32 llvm.return } // ----- -llvm.func @wmmald_matrix(%arg0: !llvm.ptr) { +llvm.func @wmmald_matrix(%arg0: !llvm.ptr<3>) { // expected-error@+1 {{'nvvm.ldmatrix' op expected num attribute to be 1, 2 or 4}} - %l = nvvm.ldmatrix %arg0 {num = 3 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> i32 + %l = nvvm.ldmatrix %arg0 {num = 3 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr<3>) -> i32 llvm.return } // ----- -llvm.func @wmmald_matrix(%arg0: !llvm.ptr) { +llvm.func @wmmald_matrix(%arg0: !llvm.ptr<3>) { // expected-error@+1 {{'nvvm.ldmatrix' op expected destination type is i32}} - %l = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> !llvm.struct<(i32)> + %l = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr<3>) -> !llvm.struct<(i32)> llvm.return } // ----- -llvm.func @wmmald_matrix(%arg0: !llvm.ptr) { +llvm.func @wmmald_matrix(%arg0: !llvm.ptr<3>) { // expected-error@+1 {{'nvvm.ldmatrix' op expected destination type is a structure of 4 elements of type i32}} - %l = nvvm.ldmatrix %arg0 {num = 4 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> !llvm.struct<(i32, i32)> + %l = nvvm.ldmatrix %arg0 {num = 4 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32)> llvm.return } @@ -1278,33 +1231,33 @@ func.func @bitcast(%arg0: vector<2x3xf32>) { // ----- -func.func @cp_async(%arg0: !llvm.ptr, %arg1: !llvm.ptr) { +func.func @cp_async(%arg0: !llvm.ptr<3>, %arg1: !llvm.ptr<1>) { // expected-error @below {{expected byte size to be either 4, 8 or 16.}} - nvvm.cp.async.shared.global %arg0, %arg1, 32 : !llvm.ptr, !llvm.ptr + nvvm.cp.async.shared.global %arg0, %arg1, 32 : !llvm.ptr<3>, !llvm.ptr<1> return } // ----- -func.func @cp_async(%arg0: !llvm.ptr, %arg1: !llvm.ptr) { +func.func @cp_async(%arg0: !llvm.ptr<3>, %arg1: !llvm.ptr<1>) { // expected-error @below {{bypass l1 is only support for 16 bytes copy.}} - nvvm.cp.async.shared.global %arg0, %arg1, 8 {bypass_l1} : !llvm.ptr, !llvm.ptr + nvvm.cp.async.shared.global %arg0, %arg1, 8 {bypass_l1} : !llvm.ptr<3>, !llvm.ptr<1> return } // ----- -func.func @gep_struct_variable(%arg0: !llvm.ptr>, %arg1: i32, %arg2: i32) { +func.func @gep_struct_variable(%arg0: !llvm.ptr, %arg1: i32, %arg2: i32) { // expected-error @below {{op expected index 1 indexing a struct to be constant}} - llvm.getelementptr %arg0[%arg1, %arg1] : (!llvm.ptr>, i32, i32) -> !llvm.ptr + llvm.getelementptr %arg0[%arg1, %arg1] : (!llvm.ptr, i32, i32) -> !llvm.ptr, !llvm.struct<(i32)> return } // ----- -func.func @gep_out_of_bounds(%ptr: !llvm.ptr)>>, %idx: i64) { +func.func @gep_out_of_bounds(%ptr: !llvm.ptr, %idx: i64) { // expected-error @below {{index 2 indexing a struct is out of bounds}} - llvm.getelementptr %ptr[%idx, 1, 3] : (!llvm.ptr)>>, i64) -> !llvm.ptr + llvm.getelementptr %ptr[%idx, 1, 3] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.struct<(i32, struct<(i32, f32)>)> return } @@ -1321,8 +1274,8 @@ func.func @non_splat_shuffle_on_scalable_vector(%arg0: vector<[4]xf32>) { llvm.mlir.global internal @side_effecting_global() : !llvm.struct<(i8)> { %0 = llvm.mlir.constant(1 : i64) : i64 // expected-error@below {{ops with side effects not allowed in global initializers}} - %1 = llvm.alloca %0 x !llvm.struct<(i8)> : (i64) -> !llvm.ptr> - %2 = llvm.load %1 : !llvm.ptr> + %1 = llvm.alloca %0 x !llvm.struct<(i8)> : (i64) -> !llvm.ptr + %2 = llvm.load %1 : !llvm.ptr -> !llvm.struct<(i8)> llvm.return %2 : !llvm.struct<(i8)> } diff --git a/mlir/test/Dialect/LLVMIR/layout-typed-pointers.mlir b/mlir/test/Dialect/LLVMIR/layout-typed-pointers.mlir new file mode 100644 index 0000000000000..5cf1ed03e64c8 --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/layout-typed-pointers.mlir @@ -0,0 +1,145 @@ +// RUN: mlir-opt --test-data-layout-query --split-input-file --verify-diagnostics %s | FileCheck %s + +module { + // CHECK: @no_spec + func.func @no_spec() { + // CHECK: alignment = 8 + // CHECK: alloca_memory_space = 0 + // CHECK: bitsize = 64 + // CHECK: preferred = 8 + // CHECK: size = 8 + "test.data_layout_query"() : () -> !llvm.ptr + // CHECK: alignment = 8 + // CHECK: alloca_memory_space = 0 + // CHECK: bitsize = 64 + // CHECK: preferred = 8 + // CHECK: size = 8 + "test.data_layout_query"() : () -> !llvm.ptr + // CHECK: alignment = 8 + // CHECK: alloca_memory_space = 0 + // CHECK: bitsize = 64 + // CHECK: preferred = 8 + // CHECK: size = 8 + "test.data_layout_query"() : () -> !llvm.ptr + // CHECK: alignment = 8 + // CHECK: alloca_memory_space = 0 + // CHECK: bitsize = 64 + // CHECK: preferred = 8 + // CHECK: size = 8 + "test.data_layout_query"() : () -> !llvm.ptr> + // CHECK: alignment = 8 + // CHECK: alloca_memory_space = 0 + // CHECK: bitsize = 64 + // CHECK: preferred = 8 + // CHECK: size = 8 + "test.data_layout_query"() : () -> !llvm.ptr + // CHECK: alignment = 8 + // CHECK: alloca_memory_space = 0 + // CHECK: bitsize = 64 + // CHECK: preferred = 8 + // CHECK: size = 8 + "test.data_layout_query"() : () -> !llvm.ptr + // CHECK: alignment = 8 + // CHECK: alloca_memory_space = 0 + // CHECK: bitsize = 64 + // CHECK: preferred = 8 + // CHECK: size = 8 + "test.data_layout_query"() : () -> !llvm.ptr<5> + return + } +} + +// ----- + +module attributes { dlti.dl_spec = #dlti.dl_spec< + #dlti.dl_entry, dense<[32, 32, 64]> : vector<3xi32>>, + #dlti.dl_entry, dense<[64, 64, 64]> : vector<3xi32>>, + #dlti.dl_entry, dense<[32, 64, 64]> : vector<3xi32>>, + #dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32> +>} { + // CHECK: @spec + func.func @spec() { + // CHECK: alignment = 4 + // CHECK: alloca_memory_space = 5 + // CHECK: bitsize = 32 + // CHECK: preferred = 8 + // CHECK: size = 4 + "test.data_layout_query"() : () -> !llvm.ptr + // CHECK: alignment = 4 + // CHECK: alloca_memory_space = 5 + // CHECK: bitsize = 32 + // CHECK: preferred = 8 + // CHECK: size = 4 + "test.data_layout_query"() : () -> !llvm.ptr + // CHECK: alignment = 4 + // CHECK: alloca_memory_space = 5 + // CHECK: bitsize = 32 + // CHECK: preferred = 8 + // CHECK: size = 4 + "test.data_layout_query"() : () -> !llvm.ptr + // CHECK: alignment = 4 + // CHECK: alloca_memory_space = 5 + // CHECK: bitsize = 32 + // CHECK: preferred = 8 + // CHECK: size = 4 + "test.data_layout_query"() : () -> !llvm.ptr> + // CHECK: alignment = 4 + // CHECK: alloca_memory_space = 5 + // CHECK: bitsize = 32 + // CHECK: preferred = 8 + // CHECK: size = 4 + "test.data_layout_query"() : () -> !llvm.ptr + // CHECK: alignment = 8 + // CHECK: alloca_memory_space = 5 + // CHECK: bitsize = 64 + // CHECK: preferred = 8 + // CHECK: size = 8 + "test.data_layout_query"() : () -> !llvm.ptr + // CHECK: alignment = 4 + // CHECK: alloca_memory_space = 5 + // CHECK: bitsize = 32 + // CHECK: preferred = 8 + // CHECK: size = 4 + "test.data_layout_query"() : () -> !llvm.ptr<3> + // CHECK: alignment = 8 + // CHECK: alloca_memory_space = 5 + // CHECK: bitsize = 32 + // CHECK: preferred = 8 + // CHECK: size = 4 + "test.data_layout_query"() : () -> !llvm.ptr<4> + return + } +} + +// ----- + +// expected-error@below {{unexpected layout attribute for pointer to 'i32'}} +module attributes { dlti.dl_spec = #dlti.dl_spec< + #dlti.dl_entry, dense<[64, 64, 64]> : vector<3xi32>> +>} { + func.func @pointer() { + return + } +} + +// ----- + +// expected-error@below {{expected layout attribute for '!llvm.ptr' to be a dense integer elements attribute with 3 or 4 elements}} +module attributes { dlti.dl_spec = #dlti.dl_spec< + #dlti.dl_entry, dense<[64.0, 64.0, 64.0]> : vector<3xf32>> +>} { + func.func @pointer() { + return + } +} + +// ----- + +// expected-error@below {{preferred alignment is expected to be at least as large as ABI alignment}} +module attributes { dlti.dl_spec = #dlti.dl_spec< + #dlti.dl_entry, dense<[64, 64, 32]> : vector<3xi32>> +>} { + func.func @pointer() { + return + } +} diff --git a/mlir/test/Dialect/LLVMIR/layout.mlir b/mlir/test/Dialect/LLVMIR/layout.mlir index e5c8c0bd86db1..d6e2013cc86ca 100644 --- a/mlir/test/Dialect/LLVMIR/layout.mlir +++ b/mlir/test/Dialect/LLVMIR/layout.mlir @@ -3,42 +3,13 @@ module { // CHECK: @no_spec func.func @no_spec() { + "test.data_layout_query"() : () -> !llvm.ptr // CHECK: alignment = 8 // CHECK: alloca_memory_space = 0 // CHECK: bitsize = 64 // CHECK: preferred = 8 // CHECK: size = 8 - "test.data_layout_query"() : () -> !llvm.ptr - // CHECK: alignment = 8 - // CHECK: alloca_memory_space = 0 - // CHECK: bitsize = 64 - // CHECK: preferred = 8 - // CHECK: size = 8 - "test.data_layout_query"() : () -> !llvm.ptr - // CHECK: alignment = 8 - // CHECK: alloca_memory_space = 0 - // CHECK: bitsize = 64 - // CHECK: preferred = 8 - // CHECK: size = 8 - "test.data_layout_query"() : () -> !llvm.ptr - // CHECK: alignment = 8 - // CHECK: alloca_memory_space = 0 - // CHECK: bitsize = 64 - // CHECK: preferred = 8 - // CHECK: size = 8 - "test.data_layout_query"() : () -> !llvm.ptr> - // CHECK: alignment = 8 - // CHECK: alloca_memory_space = 0 - // CHECK: bitsize = 64 - // CHECK: preferred = 8 - // CHECK: size = 8 - "test.data_layout_query"() : () -> !llvm.ptr - // CHECK: alignment = 8 - // CHECK: alloca_memory_space = 0 - // CHECK: bitsize = 64 - // CHECK: preferred = 8 - // CHECK: size = 8 - "test.data_layout_query"() : () -> !llvm.ptr + "test.data_layout_query"() : () -> !llvm.ptr<3> // CHECK: alignment = 8 // CHECK: alloca_memory_space = 0 // CHECK: bitsize = 64 @@ -52,8 +23,8 @@ module { // ----- module attributes { dlti.dl_spec = #dlti.dl_spec< - #dlti.dl_entry, dense<[32, 32, 64]> : vector<3xi32>>, - #dlti.dl_entry, dense<[64, 64, 64]> : vector<3xi32>>, + #dlti.dl_entry : vector<3xi32>>, + #dlti.dl_entry, dense<[64, 64, 64]> : vector<3xi32>>, #dlti.dl_entry, dense<[32, 64, 64]> : vector<3xi32>>, #dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32> >} { @@ -64,37 +35,19 @@ module attributes { dlti.dl_spec = #dlti.dl_spec< // CHECK: bitsize = 32 // CHECK: preferred = 8 // CHECK: size = 4 - "test.data_layout_query"() : () -> !llvm.ptr + "test.data_layout_query"() : () -> !llvm.ptr // CHECK: alignment = 4 // CHECK: alloca_memory_space = 5 // CHECK: bitsize = 32 // CHECK: preferred = 8 // CHECK: size = 4 - "test.data_layout_query"() : () -> !llvm.ptr - // CHECK: alignment = 4 - // CHECK: alloca_memory_space = 5 - // CHECK: bitsize = 32 - // CHECK: preferred = 8 - // CHECK: size = 4 - "test.data_layout_query"() : () -> !llvm.ptr - // CHECK: alignment = 4 - // CHECK: alloca_memory_space = 5 - // CHECK: bitsize = 32 - // CHECK: preferred = 8 - // CHECK: size = 4 - "test.data_layout_query"() : () -> !llvm.ptr> - // CHECK: alignment = 4 - // CHECK: alloca_memory_space = 5 - // CHECK: bitsize = 32 - // CHECK: preferred = 8 - // CHECK: size = 4 - "test.data_layout_query"() : () -> !llvm.ptr + "test.data_layout_query"() : () -> !llvm.ptr<3> // CHECK: alignment = 8 // CHECK: alloca_memory_space = 5 // CHECK: bitsize = 64 // CHECK: preferred = 8 // CHECK: size = 8 - "test.data_layout_query"() : () -> !llvm.ptr + "test.data_layout_query"() : () -> !llvm.ptr<5> // CHECK: alignment = 4 // CHECK: alloca_memory_space = 5 // CHECK: bitsize = 32 @@ -113,20 +66,9 @@ module attributes { dlti.dl_spec = #dlti.dl_spec< // ----- -// expected-error@below {{unexpected layout attribute for pointer to 'i32'}} -module attributes { dlti.dl_spec = #dlti.dl_spec< - #dlti.dl_entry, dense<[64, 64, 64]> : vector<3xi32>> ->} { - func.func @pointer() { - return - } -} - -// ----- - -// expected-error@below {{expected layout attribute for '!llvm.ptr' to be a dense integer elements attribute with 3 or 4 elements}} +// expected-error@below {{expected layout attribute for '!llvm.ptr' to be a dense integer elements attribute with 3 or 4 elements}} module attributes { dlti.dl_spec = #dlti.dl_spec< - #dlti.dl_entry, dense<[64.0, 64.0, 64.0]> : vector<3xf32>> + #dlti.dl_entry : vector<3xf32>> >} { func.func @pointer() { return @@ -137,7 +79,7 @@ module attributes { dlti.dl_spec = #dlti.dl_spec< // expected-error@below {{preferred alignment is expected to be at least as large as ABI alignment}} module attributes { dlti.dl_spec = #dlti.dl_spec< - #dlti.dl_entry, dense<[64, 64, 32]> : vector<3xi32>> + #dlti.dl_entry : vector<3xi32>> >} { func.func @pointer() { return diff --git a/mlir/test/Dialect/LLVMIR/nvvm-typed-pointers.mlir b/mlir/test/Dialect/LLVMIR/nvvm-typed-pointers.mlir new file mode 100644 index 0000000000000..5fbadd1dc414e --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/nvvm-typed-pointers.mlir @@ -0,0 +1,55 @@ +// RUN: mlir-opt %s -split-input-file -verify-diagnostics | FileCheck %s + +// CHECK-LABEL: @nvvm_wmma_load_tf32 +func.func @nvvm_wmma_load_tf32(%arg0: !llvm.ptr, %arg1 : i32) -> !llvm.struct<(i32, i32, i32, i32)> { + // CHECK: nvvm.wmma.load {{.*}} {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 8 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + %0 = nvvm.wmma.load %arg0, %arg1 + {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 8 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} + : (!llvm.ptr) -> !llvm.struct<(i32, i32, i32, i32)> + llvm.return %0 : !llvm.struct<(i32, i32, i32, i32)> +} + +// CHECK-LABEL: @cp_async +llvm.func @cp_async(%arg0: !llvm.ptr, %arg1: !llvm.ptr) { +// CHECK: nvvm.cp.async.shared.global %{{.*}}, %{{.*}}, 16 + nvvm.cp.async.shared.global %arg0, %arg1, 16 : !llvm.ptr, !llvm.ptr +// CHECK: nvvm.cp.async.shared.global %{{.*}}, %{{.*}}, 16 {bypass_l1} + nvvm.cp.async.shared.global %arg0, %arg1, 16 {bypass_l1} : !llvm.ptr, !llvm.ptr +// CHECK: nvvm.cp.async.commit.group + nvvm.cp.async.commit.group +// CHECK: nvvm.cp.async.wait.group 0 + nvvm.cp.async.wait.group 0 + llvm.return +} + +// CHECK-LABEL: llvm.func @ld_matrix +llvm.func @ld_matrix(%arg0: !llvm.ptr) { + // CHECK: nvvm.ldmatrix %{{.*}} {layout = #nvvm.mma_layout, num = 1 : i32} : (!llvm.ptr) -> i32 + %l1 = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> i32 + // CHECK: nvvm.ldmatrix %{{.*}} {layout = #nvvm.mma_layout, num = 2 : i32} : (!llvm.ptr) -> !llvm.struct<(i32, i32)> + %l2 = nvvm.ldmatrix %arg0 {num = 2 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> !llvm.struct<(i32, i32)> + // CHECK: nvvm.ldmatrix %{{.*}} {layout = #nvvm.mma_layout, num = 4 : i32} : (!llvm.ptr) -> !llvm.struct<(i32, i32, i32, i32)> + %l4 = nvvm.ldmatrix %arg0 {num = 4 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> !llvm.struct<(i32, i32, i32, i32)> + llvm.return +} + +// CHECK-LABEL: llvm.func @redux_sync +llvm.func @redux_sync(%value : i32, %offset : i32) -> i32 { + // CHECK: nvvm.redux.sync add %{{.*}} + %r1 = nvvm.redux.sync add %value, %offset : i32 -> i32 + // CHECK: nvvm.redux.sync max %{{.*}} + %r2 = nvvm.redux.sync max %value, %offset : i32 -> i32 + // CHECK: nvvm.redux.sync min %{{.*}} + %r3 = nvvm.redux.sync min %value, %offset : i32 -> i32 + // CHECK: nvvm.redux.sync umax %{{.*}} + %r5 = nvvm.redux.sync umax %value, %offset : i32 -> i32 + // CHECK: nvvm.redux.sync umin %{{.*}} + %r6 = nvvm.redux.sync umin %value, %offset : i32 -> i32 + // CHECK: nvvm.redux.sync and %{{.*}} + %r7 = nvvm.redux.sync and %value, %offset : i32 -> i32 + // CHECK: nvvm.redux.sync or %{{.*}} + %r8 = nvvm.redux.sync or %value, %offset : i32 -> i32 + // CHECK: nvvm.redux.sync xor %{{.*}} + %r9 = nvvm.redux.sync xor %value, %offset : i32 -> i32 + llvm.return %r1 : i32 +} diff --git a/mlir/test/Dialect/LLVMIR/nvvm.mlir b/mlir/test/Dialect/LLVMIR/nvvm.mlir index 6596b8503d7a5..c7c83d29638c4 100644 --- a/mlir/test/Dialect/LLVMIR/nvvm.mlir +++ b/mlir/test/Dialect/LLVMIR/nvvm.mlir @@ -266,11 +266,11 @@ func.func @nvvm_mma_m16n8k32_s4_s4(%a0 : i32, %a1 : i32, } // CHECK-LABEL: @nvvm_wmma_load_tf32 -func.func @nvvm_wmma_load_tf32(%arg0: !llvm.ptr, %arg1 : i32) -> !llvm.struct<(i32, i32, i32, i32)> { +func.func @nvvm_wmma_load_tf32(%arg0: !llvm.ptr, %arg1 : i32) -> !llvm.struct<(i32, i32, i32, i32)> { // CHECK: nvvm.wmma.load {{.*}} {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 8 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} %0 = nvvm.wmma.load %arg0, %arg1 {eltype = #nvvm.mma_type, frag = #nvvm.mma_frag, k = 8 : i32, layout = #nvvm.mma_layout, m = 16 : i32, n = 16 : i32} - : (!llvm.ptr) -> !llvm.struct<(i32, i32, i32, i32)> + : (!llvm.ptr) -> !llvm.struct<(i32, i32, i32, i32)> llvm.return %0 : !llvm.struct<(i32, i32, i32, i32)> } @@ -288,11 +288,11 @@ func.func @nvvm_wmma_mma(%0 : i32, %1 : i32, %2 : i32, %3 : i32, %4 : i32, %5 : } // CHECK-LABEL: @cp_async -llvm.func @cp_async(%arg0: !llvm.ptr, %arg1: !llvm.ptr) { +llvm.func @cp_async(%arg0: !llvm.ptr<3>, %arg1: !llvm.ptr<1>) { // CHECK: nvvm.cp.async.shared.global %{{.*}}, %{{.*}}, 16 - nvvm.cp.async.shared.global %arg0, %arg1, 16 : !llvm.ptr, !llvm.ptr + nvvm.cp.async.shared.global %arg0, %arg1, 16 : !llvm.ptr<3>, !llvm.ptr<1> // CHECK: nvvm.cp.async.shared.global %{{.*}}, %{{.*}}, 16 {bypass_l1} - nvvm.cp.async.shared.global %arg0, %arg1, 16 {bypass_l1} : !llvm.ptr, !llvm.ptr + nvvm.cp.async.shared.global %arg0, %arg1, 16 {bypass_l1} : !llvm.ptr<3>, !llvm.ptr<1> // CHECK: nvvm.cp.async.commit.group nvvm.cp.async.commit.group // CHECK: nvvm.cp.async.wait.group 0 @@ -301,18 +301,18 @@ llvm.func @cp_async(%arg0: !llvm.ptr, %arg1: !llvm.ptr) { } // CHECK-LABEL: llvm.func @ld_matrix -llvm.func @ld_matrix(%arg0: !llvm.ptr) { - // CHECK: nvvm.ldmatrix %{{.*}} {layout = #nvvm.mma_layout, num = 1 : i32} : (!llvm.ptr) -> i32 - %l1 = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> i32 - // CHECK: nvvm.ldmatrix %{{.*}} {layout = #nvvm.mma_layout, num = 2 : i32} : (!llvm.ptr) -> !llvm.struct<(i32, i32)> - %l2 = nvvm.ldmatrix %arg0 {num = 2 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> !llvm.struct<(i32, i32)> - // CHECK: nvvm.ldmatrix %{{.*}} {layout = #nvvm.mma_layout, num = 4 : i32} : (!llvm.ptr) -> !llvm.struct<(i32, i32, i32, i32)> - %l4 = nvvm.ldmatrix %arg0 {num = 4 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr) -> !llvm.struct<(i32, i32, i32, i32)> +llvm.func @ld_matrix(%arg0: !llvm.ptr<3>) { + // CHECK: nvvm.ldmatrix %{{.*}} {layout = #nvvm.mma_layout, num = 1 : i32} : (!llvm.ptr<3>) -> i32 + %l1 = nvvm.ldmatrix %arg0 {num = 1 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr<3>) -> i32 + // CHECK: nvvm.ldmatrix %{{.*}} {layout = #nvvm.mma_layout, num = 2 : i32} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32)> + %l2 = nvvm.ldmatrix %arg0 {num = 2 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32)> + // CHECK: nvvm.ldmatrix %{{.*}} {layout = #nvvm.mma_layout, num = 4 : i32} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)> + %l4 = nvvm.ldmatrix %arg0 {num = 4 : i32, layout = #nvvm.mma_layout} : (!llvm.ptr<3>) -> !llvm.struct<(i32, i32, i32, i32)> llvm.return } // CHECK-LABEL: llvm.func @redux_sync -llvm.func @redux_sync(%value : i32, %offset : i32) -> i32 { +llvm.func @redux_sync(%value : i32, %offset : i32) -> i32 { // CHECK: nvvm.redux.sync add %{{.*}} %r1 = nvvm.redux.sync add %value, %offset : i32 -> i32 // CHECK: nvvm.redux.sync max %{{.*}} @@ -324,9 +324,9 @@ llvm.func @redux_sync(%value : i32, %offset : i32) -> i32 { // CHECK: nvvm.redux.sync umin %{{.*}} %r6 = nvvm.redux.sync umin %value, %offset : i32 -> i32 // CHECK: nvvm.redux.sync and %{{.*}} - %r7 = nvvm.redux.sync and %value, %offset : i32 -> i32 + %r7 = nvvm.redux.sync and %value, %offset : i32 -> i32 // CHECK: nvvm.redux.sync or %{{.*}} - %r8 = nvvm.redux.sync or %value, %offset : i32 -> i32 + %r8 = nvvm.redux.sync or %value, %offset : i32 -> i32 // CHECK: nvvm.redux.sync xor %{{.*}} %r9 = nvvm.redux.sync xor %value, %offset : i32 -> i32 llvm.return %r1 : i32 diff --git a/mlir/test/Dialect/LLVMIR/parameter-attrs-invalid-typed-pointers.mlir b/mlir/test/Dialect/LLVMIR/parameter-attrs-invalid-typed-pointers.mlir new file mode 100644 index 0000000000000..65411ff41e285 --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/parameter-attrs-invalid-typed-pointers.mlir @@ -0,0 +1,6 @@ +// RUN: mlir-opt %s -split-input-file -verify-diagnostics + +// Argument attributes + +// expected-error@below {{"llvm.sret" attribute attached to LLVM pointer argument of different type}} +llvm.func @invalid_sret_attr_type(%0 : !llvm.ptr {llvm.sret = !llvm.struct<(i32)>}) diff --git a/mlir/test/Dialect/LLVMIR/parameter-attrs-invalid.mlir b/mlir/test/Dialect/LLVMIR/parameter-attrs-invalid.mlir index 72bf45052ef13..d7ee6097b3600 100644 --- a/mlir/test/Dialect/LLVMIR/parameter-attrs-invalid.mlir +++ b/mlir/test/Dialect/LLVMIR/parameter-attrs-invalid.mlir @@ -47,11 +47,6 @@ llvm.func @invalid_sret_arg_type(%0 : i32 {llvm.sret = !llvm.struct<(i32)>}) // ----- -// expected-error@below {{"llvm.sret" attribute attached to LLVM pointer argument of different type}} -llvm.func @invalid_sret_attr_type(%0 : !llvm.ptr {llvm.sret = !llvm.struct<(i32)>}) - -// ----- - // expected-error@below {{"llvm.byval" attribute attached to non-pointer LLVM type}} llvm.func @invalid_byval_arg_type(%0 : i32 {llvm.byval = !llvm.struct<(i32)>}) diff --git a/mlir/test/Dialect/LLVMIR/types-invalid-typed-pointers.mlir b/mlir/test/Dialect/LLVMIR/types-invalid-typed-pointers.mlir new file mode 100644 index 0000000000000..475fadede8fbf --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/types-invalid-typed-pointers.mlir @@ -0,0 +1,42 @@ +// RUN: mlir-opt --allow-unregistered-dialect -split-input-file -verify-diagnostics %s + +func.func @void_pointer() { + // expected-error @+1 {{invalid pointer element type}} + "some.op"() : () -> !llvm.ptr +} + +// ----- + +func.func @repeated_struct_name() { + "some.op"() : () -> !llvm.struct<"a", (ptr>)> + // expected-error @+1 {{identified type already used with a different body}} + "some.op"() : () -> !llvm.struct<"a", (i32)> +} + +// ----- + +func.func @dynamic_vector() { + // expected-error @+1 {{expected '? x x ' or ' x '}} + "some.op"() : () -> !llvm.vec> +} + +// ----- + +func.func @dynamic_scalable_vector() { + // expected-error @+1 {{expected '? x x ' or ' x '}} + "some.op"() : () -> !llvm.vec> +} + +// ----- + +func.func @unscalable_vector() { + // expected-error @+1 {{expected '? x x ' or ' x '}} + "some.op"() : () -> !llvm.vec<4x4 x ptr> +} + +// ----- + +func.func @zero_vector() { + // expected-error @+1 {{the number of vector elements must be positive}} + "some.op"() : () -> !llvm.vec<0 x ptr> +} diff --git a/mlir/test/Dialect/LLVMIR/types-invalid.mlir b/mlir/test/Dialect/LLVMIR/types-invalid.mlir index d8ac523b86d93..fce100e6a865c 100644 --- a/mlir/test/Dialect/LLVMIR/types-invalid.mlir +++ b/mlir/test/Dialect/LLVMIR/types-invalid.mlir @@ -21,15 +21,8 @@ func.func @function_taking_function() { // ----- -func.func @void_pointer() { - // expected-error @+1 {{invalid pointer element type}} - "some.op"() : () -> !llvm.ptr -} - -// ----- - func.func @repeated_struct_name() { - "some.op"() : () -> !llvm.struct<"a", (ptr>)> + "some.op"() : () -> !llvm.struct<"a", (ptr)> // expected-error @+1 {{identified type already used with a different body}} "some.op"() : () -> !llvm.struct<"a", (i32)> } @@ -113,28 +106,28 @@ func.func @identified_struct_with_void() { func.func @dynamic_vector() { // expected-error @+1 {{expected '? x x ' or ' x '}} - "some.op"() : () -> !llvm.vec> + "some.op"() : () -> !llvm.vec } // ----- func.func @dynamic_scalable_vector() { // expected-error @+1 {{expected '? x x ' or ' x '}} - "some.op"() : () -> !llvm.vec> + "some.op"() : () -> !llvm.vec } // ----- func.func @unscalable_vector() { // expected-error @+1 {{expected '? x x ' or ' x '}} - "some.op"() : () -> !llvm.vec<4x4 x ptr> + "some.op"() : () -> !llvm.vec<4x4 x ptr> } // ----- func.func @zero_vector() { // expected-error @+1 {{the number of vector elements must be positive}} - "some.op"() : () -> !llvm.vec<0 x ptr> + "some.op"() : () -> !llvm.vec<0 x ptr> } // ----- diff --git a/mlir/test/Dialect/LLVMIR/types-typed-pointers.mlir b/mlir/test/Dialect/LLVMIR/types-typed-pointers.mlir new file mode 100644 index 0000000000000..2d63f379c2ee7 --- /dev/null +++ b/mlir/test/Dialect/LLVMIR/types-typed-pointers.mlir @@ -0,0 +1,118 @@ +// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file | mlir-opt -allow-unregistered-dialect | FileCheck %s + +// CHECK-LABEL: @ptr +func.func @ptr() { + // CHECK: !llvm.ptr + "some.op"() : () -> !llvm.ptr + // CHECK: !llvm.ptr + "some.op"() : () -> !llvm.ptr + // CHECK: !llvm.ptr> + "some.op"() : () -> !llvm.ptr> + // CHECK: !llvm.ptr>>>> + "some.op"() : () -> !llvm.ptr>>>> + // CHECK: !llvm.ptr + "some.op"() : () -> !llvm.ptr + // CHECK: !llvm.ptr + "some.op"() : () -> !llvm.ptr + // CHECK: !llvm.ptr + "some.op"() : () -> !llvm.ptr + // CHECK: !llvm.ptr, 9> + "some.op"() : () -> !llvm.ptr, 9> + // CHECK: !llvm.ptr + "some.op"() : () -> !llvm.ptr + // CHECK: !llvm.ptr<42> + "some.op"() : () -> !llvm.ptr<42> + return +} + +// CHECK-LABEL: @vec +func.func @vec() { + // CHECK: vector<4xi32> + "some.op"() : () -> vector<4xi32> + // CHECK: vector<4xf32> + "some.op"() : () -> vector<4xf32> + // CHECK: !llvm.vec + "some.op"() : () -> !llvm.vec + // CHECK: !llvm.vec + "some.op"() : () -> !llvm.vec + // CHECK: !llvm.vec<4 x ptr> + "some.op"() : () -> !llvm.vec<4 x ptr> + return +} + +// CHECK-LABEL: @array +func.func @array() { + // CHECK: !llvm.array<10 x i32> + "some.op"() : () -> !llvm.array<10 x i32> + // CHECK: !llvm.array<8 x f32> + "some.op"() : () -> !llvm.array<8 x f32> + // CHECK: !llvm.array<10 x ptr> + "some.op"() : () -> !llvm.array<10 x ptr> + // CHECK: !llvm.array<10 x array<4 x f32>> + "some.op"() : () -> !llvm.array<10 x array<4 x f32>> + return +} + +// CHECK-LABEL: @identified_struct +func.func @identified_struct() { + // CHECK: !llvm.struct<"empty", ()> + "some.op"() : () -> !llvm.struct<"empty", ()> + // CHECK: !llvm.struct<"opaque", opaque> + "some.op"() : () -> !llvm.struct<"opaque", opaque> + // CHECK: !llvm.struct<"long", (i32, struct<(i32, i1)>, f32, ptr>)> + "some.op"() : () -> !llvm.struct<"long", (i32, struct<(i32, i1)>, f32, ptr>)> + // CHECK: !llvm.struct<"self-recursive", (ptr>)> + "some.op"() : () -> !llvm.struct<"self-recursive", (ptr>)> + // CHECK: !llvm.struct<"unpacked", (i32)> + "some.op"() : () -> !llvm.struct<"unpacked", (i32)> + // CHECK: !llvm.struct<"packed", packed (i32)> + "some.op"() : () -> !llvm.struct<"packed", packed (i32)> + // CHECK: !llvm.struct<"name with spaces and !^$@$#", packed (i32)> + "some.op"() : () -> !llvm.struct<"name with spaces and !^$@$#", packed (i32)> + + // CHECK: !llvm.struct<"mutually-a", (ptr, 3>)>>)> + "some.op"() : () -> !llvm.struct<"mutually-a", (ptr, 3>)>>)> + // CHECK: !llvm.struct<"mutually-b", (ptr>)>, 3>)> + "some.op"() : () -> !llvm.struct<"mutually-b", (ptr>)>, 3>)> + // CHECK: !llvm.struct<"referring-another", (ptr>)> + "some.op"() : () -> !llvm.struct<"referring-another", (ptr>)> + + // CHECK: !llvm.struct<"struct-of-arrays", (array<10 x i32>)> + "some.op"() : () -> !llvm.struct<"struct-of-arrays", (array<10 x i32>)> + // CHECK: !llvm.array<10 x struct<"array-of-structs", (i32)>> + "some.op"() : () -> !llvm.array<10 x struct<"array-of-structs", (i32)>> + // CHECK: !llvm.ptr> + "some.op"() : () -> !llvm.ptr> + return +} + +// CHECK-LABEL: @ptr_elem_interface +// CHECK-COUNT-3: !llvm.ptr +// CHECK: llvm.mlir.undef : !llvm.ptr +func.func @ptr_elem_interface(%arg0: !llvm.ptr) { + %0 = llvm.load %arg0 : !llvm.ptr + llvm.store %0, %arg0 : !llvm.ptr + llvm.mlir.undef : !llvm.ptr + return +} + +// ----- + +// Check that type aliases can be used inside LLVM dialect types. Note that +// currently they are _not_ printed back as this would require +// DialectAsmPrinter to have a mechanism for querying the presence and +// usability of an alias outside of its `printType` method. + +!baz = i64 +!qux = !llvm.struct<(!baz)> + +!rec = !llvm.struct<"a", (ptr>)> + +// CHECK: aliases +llvm.func @aliases() { + // CHECK: !llvm.struct<(i32, f32, struct<(i64)>)> + "some.op"() : () -> !llvm.struct<(i32, f32, !qux)> + // CHECK: !llvm.struct<"a", (ptr>)> + "some.op"() : () -> !rec + llvm.return +} diff --git a/mlir/test/Dialect/LLVMIR/types.mlir b/mlir/test/Dialect/LLVMIR/types.mlir index 54c44a6aa58ab..42352ce697f02 100644 --- a/mlir/test/Dialect/LLVMIR/types.mlir +++ b/mlir/test/Dialect/LLVMIR/types.mlir @@ -57,26 +57,14 @@ func.func @integer() { // CHECK-LABEL: @ptr func.func @ptr() { - // CHECK: !llvm.ptr - "some.op"() : () -> !llvm.ptr - // CHECK: !llvm.ptr - "some.op"() : () -> !llvm.ptr - // CHECK: !llvm.ptr> - "some.op"() : () -> !llvm.ptr> - // CHECK: !llvm.ptr>>>> - "some.op"() : () -> !llvm.ptr>>>> - // CHECK: !llvm.ptr - "some.op"() : () -> !llvm.ptr - // CHECK: !llvm.ptr - "some.op"() : () -> !llvm.ptr - // CHECK: !llvm.ptr - "some.op"() : () -> !llvm.ptr - // CHECK: !llvm.ptr, 9> - "some.op"() : () -> !llvm.ptr, 9> // CHECK: !llvm.ptr "some.op"() : () -> !llvm.ptr + // CHECK: !llvm.ptr + "some.op"() : () -> !llvm.ptr<0> // CHECK: !llvm.ptr<42> "some.op"() : () -> !llvm.ptr<42> + // CHECK: !llvm.ptr, 9> + "some.op"() : () -> !llvm.ptr, 9> return } @@ -90,8 +78,8 @@ func.func @vec() { "some.op"() : () -> !llvm.vec // CHECK: !llvm.vec "some.op"() : () -> !llvm.vec - // CHECK: !llvm.vec<4 x ptr> - "some.op"() : () -> !llvm.vec<4 x ptr> + // CHECK: !llvm.vec<4 x ptr> + "some.op"() : () -> !llvm.vec<4 x ptr> return } @@ -101,8 +89,8 @@ func.func @array() { "some.op"() : () -> !llvm.array<10 x i32> // CHECK: !llvm.array<8 x f32> "some.op"() : () -> !llvm.array<8 x f32> - // CHECK: !llvm.array<10 x ptr> - "some.op"() : () -> !llvm.array<10 x ptr> + // CHECK: !llvm.array<10 x ptr<4>> + "some.op"() : () -> !llvm.array<10 x ptr<4>> // CHECK: !llvm.array<10 x array<4 x f32>> "some.op"() : () -> !llvm.array<10 x array<4 x f32>> return @@ -147,30 +135,22 @@ func.func @identified_struct() { "some.op"() : () -> !llvm.struct<"empty", ()> // CHECK: !llvm.struct<"opaque", opaque> "some.op"() : () -> !llvm.struct<"opaque", opaque> - // CHECK: !llvm.struct<"long", (i32, struct<(i32, i1)>, f32, ptr>)> - "some.op"() : () -> !llvm.struct<"long", (i32, struct<(i32, i1)>, f32, ptr>)> - // CHECK: !llvm.struct<"self-recursive", (ptr>)> - "some.op"() : () -> !llvm.struct<"self-recursive", (ptr>)> + // CHECK: !llvm.struct<"long", (i32, struct<(i32, i1)>, f32, ptr)> + "some.op"() : () -> !llvm.struct<"long", (i32, struct<(i32, i1)>, f32, ptr)> // CHECK: !llvm.struct<"unpacked", (i32)> "some.op"() : () -> !llvm.struct<"unpacked", (i32)> // CHECK: !llvm.struct<"packed", packed (i32)> "some.op"() : () -> !llvm.struct<"packed", packed (i32)> // CHECK: !llvm.struct<"name with spaces and !^$@$#", packed (i32)> "some.op"() : () -> !llvm.struct<"name with spaces and !^$@$#", packed (i32)> - - // CHECK: !llvm.struct<"mutually-a", (ptr, 3>)>>)> - "some.op"() : () -> !llvm.struct<"mutually-a", (ptr, 3>)>>)> - // CHECK: !llvm.struct<"mutually-b", (ptr>)>, 3>)> - "some.op"() : () -> !llvm.struct<"mutually-b", (ptr>)>, 3>)> - // CHECK: !llvm.struct<"referring-another", (ptr>)> - "some.op"() : () -> !llvm.struct<"referring-another", (ptr>)> - + // CHECK: !llvm.struct<"outer", (struct<"nested", ()>)> + "some.op"() : () -> !llvm.struct<"outer", (struct<"nested", ()>)> + // CHECK: !llvm.struct<"referring-another", (ptr)> + "some.op"() : () -> !llvm.struct<"referring-another", (ptr)> // CHECK: !llvm.struct<"struct-of-arrays", (array<10 x i32>)> "some.op"() : () -> !llvm.struct<"struct-of-arrays", (array<10 x i32>)> // CHECK: !llvm.array<10 x struct<"array-of-structs", (i32)>> "some.op"() : () -> !llvm.array<10 x struct<"array-of-structs", (i32)>> - // CHECK: !llvm.ptr> - "some.op"() : () -> !llvm.ptr> return } @@ -180,16 +160,6 @@ func.func @verbose() { return } -// CHECK-LABEL: @ptr_elem_interface -// CHECK-COUNT-3: !llvm.ptr -// CHECK: llvm.mlir.undef : !llvm.ptr -func.func @ptr_elem_interface(%arg0: !llvm.ptr) { - %0 = llvm.load %arg0 : !llvm.ptr - llvm.store %0, %arg0 : !llvm.ptr - llvm.mlir.undef : !llvm.ptr - return -} - // ----- // Check that type aliases can be used inside LLVM dialect types. Note that @@ -200,13 +170,9 @@ func.func @ptr_elem_interface(%arg0: !llvm.ptr) { !baz = i64 !qux = !llvm.struct<(!baz)> -!rec = !llvm.struct<"a", (ptr>)> - // CHECK: aliases llvm.func @aliases() { // CHECK: !llvm.struct<(i32, f32, struct<(i64)>)> "some.op"() : () -> !llvm.struct<(i32, f32, !qux)> - // CHECK: !llvm.struct<"a", (ptr>)> - "some.op"() : () -> !rec llvm.return } From 3048c9e15498161572483e3d4b81c9c37bf2770f Mon Sep 17 00:00:00 2001 From: Dmitry Chernenkov Date: Thu, 23 Mar 2023 16:20:42 +0000 Subject: [PATCH 512/691] Revert "Recommit [Modules] Remove unnecessary check when generating name lookup table in ASTWriter" This reverts commit 25557aa38a0dab76f5b7a4518942f69d879693c0. --- clang/include/clang/Serialization/ASTWriter.h | 1 + clang/lib/Serialization/ASTWriter.cpp | 9 ++- clang/test/Modules/pr61065.cppm | 55 ------------------- 3 files changed, 9 insertions(+), 56 deletions(-) delete mode 100644 clang/test/Modules/pr61065.cppm diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index d31fa38b93825..09ee1744e8945 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -514,6 +514,7 @@ class ASTWriter : public ASTDeserializationListener, void WriteTypeAbbrevs(); void WriteType(QualType T); + bool isLookupResultExternal(StoredDeclsList &Result, DeclContext *DC); bool isLookupResultEntirelyExternal(StoredDeclsList &Result, DeclContext *DC); void GenerateNameLookupTable(const DeclContext *DC, diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 3e40812a9a0ba..31e44b52929f4 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -3849,6 +3849,12 @@ class ASTDeclContextNameLookupTrait { } // namespace +bool ASTWriter::isLookupResultExternal(StoredDeclsList &Result, + DeclContext *DC) { + return Result.hasExternalDecls() && + DC->hasNeedToReconcileExternalVisibleStorage(); +} + bool ASTWriter::isLookupResultEntirelyExternal(StoredDeclsList &Result, DeclContext *DC) { for (auto *D : Result.getLookupResult()) @@ -3891,7 +3897,8 @@ ASTWriter::GenerateNameLookupTable(const DeclContext *ConstDC, // don't need to write an entry for the name at all. If we can't // write out a lookup set without performing more deserialization, // just skip this entry. - if (isLookupResultEntirelyExternal(Result, DC)) + if (isLookupResultExternal(Result, DC) && + isLookupResultEntirelyExternal(Result, DC)) continue; // We also skip empty results. If any of the results could be external and diff --git a/clang/test/Modules/pr61065.cppm b/clang/test/Modules/pr61065.cppm deleted file mode 100644 index 44fa3679974ad..0000000000000 --- a/clang/test/Modules/pr61065.cppm +++ /dev/null @@ -1,55 +0,0 @@ -// From https://github.com/llvm/llvm-project/issues/61065 -// RUN: rm -rf %t -// RUN: mkdir -p %t -// RUN: split-file %s %t -// -// RUN: %clang_cc1 -std=c++20 %t/a.cppm -emit-module-interface -o %t/a.pcm -// RUN: %clang_cc1 -std=c++20 %t/b.cppm -emit-module-interface -o %t/b.pcm \ -// RUN: -fprebuilt-module-path=%t -// RUN: %clang_cc1 -std=c++20 %t/c.cppm -emit-module-interface -o %t/c.pcm \ -// RUN: -fprebuilt-module-path=%t -// RUN: %clang_cc1 -std=c++20 %t/d.cpp -fsyntax-only -verify -fprebuilt-module-path=%t - -//--- a.cppm -export module a; - -struct base { - base(int) {} -}; - -export struct a : base { - using base::base; -}; - -//--- b.cppm -export module b; - -import a; - -a b() { - return a(1); -} - -//--- c.cppm -export module c; - -import a; -import b; - -struct noncopyable { - noncopyable(noncopyable const &) = delete; - noncopyable() = default; -}; - -export struct c { - noncopyable c0; - a c1 = 43; - c() = default; -}; - -//--- d.cpp -// expected-no-diagnostics -import c; -void d() { - c _; -} From 558b33c5aa7ec37d313a9f348c1ccc718d8242d8 Mon Sep 17 00:00:00 2001 From: luxufan Date: Fri, 24 Mar 2023 16:26:32 +0800 Subject: [PATCH 513/691] [NFC] Regenerate test NewGVN/metadata-nonnull.ll --- .../Transforms/NewGVN/metadata-nonnull.ll | 57 ++++++++++--------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/llvm/test/Transforms/NewGVN/metadata-nonnull.ll b/llvm/test/Transforms/NewGVN/metadata-nonnull.ll index 4b050666d54e9..d76d46edc0a59 100644 --- a/llvm/test/Transforms/NewGVN/metadata-nonnull.ll +++ b/llvm/test/Transforms/NewGVN/metadata-nonnull.ll @@ -1,12 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt %s -passes=newgvn -S | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" define ptr @test1(ptr %v0, ptr %v1) { -; CHECK-LABEL: @test1( +; CHECK-LABEL: define ptr @test1 +; CHECK-SAME: (ptr [[V0:%.*]], ptr [[V1:%.*]]) { ; CHECK-NEXT: top: -; CHECK-NEXT: [[V2:%.*]] = load ptr, ptr [[V0:%[a-z0-9]+]], align 8, !nonnull !0 -; CHECK-NEXT: store ptr [[V2]], ptr [[V1:%.*]] +; CHECK-NEXT: [[V2:%.*]] = load ptr, ptr [[V0]], align 8, !nonnull !0 +; CHECK-NEXT: store ptr [[V2]], ptr [[V1]], align 8 ; CHECK-NEXT: ret ptr [[V2]] ; top: @@ -18,11 +20,11 @@ top: ; FIXME: could propagate nonnull to first load? define ptr @test2(ptr %v0, ptr %v1) { -; CHECK-LABEL: @test2( +; CHECK-LABEL: define ptr @test2 +; CHECK-SAME: (ptr [[V0:%.*]], ptr [[V1:%.*]]) { ; CHECK-NEXT: top: -; CHECK-NEXT: [[V2:%.*]] = load ptr, ptr [[V0:%[a-z0-9]+]] -; CHECK-NOT: !nonnull -; CHECK-NEXT: store ptr [[V2]], ptr [[V1:%.*]] +; CHECK-NEXT: [[V2:%.*]] = load ptr, ptr [[V0]], align 8 +; CHECK-NEXT: store ptr [[V2]], ptr [[V1]], align 8 ; CHECK-NEXT: ret ptr [[V2]] ; top: @@ -35,10 +37,10 @@ top: declare void @use1(ptr %a) readonly define ptr @test3(ptr %v0) { -; CHECK-LABEL: @test3( +; CHECK-LABEL: define ptr @test3 +; CHECK-SAME: (ptr [[V0:%.*]]) { ; CHECK-NEXT: top: -; CHECK-NEXT: [[V1:%.*]] = load ptr, ptr [[V0:%[a-z0-9]+]] -; CHECK-NOT: !nonnull +; CHECK-NEXT: [[V1:%.*]] = load ptr, ptr [[V0]], align 8 ; CHECK-NEXT: call void @use1(ptr [[V1]]) ; CHECK-NEXT: br i1 undef, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: @@ -61,10 +63,10 @@ bb2: } define ptr @test4(ptr %v0) { -; CHECK-LABEL: @test4( +; CHECK-LABEL: define ptr @test4 +; CHECK-SAME: (ptr [[V0:%.*]]) { ; CHECK-NEXT: top: -; CHECK-NEXT: [[V1:%.*]] = load ptr, ptr [[V0:%[a-z0-9]+]] -; CHECK-NOT: !nonnull +; CHECK-NEXT: [[V1:%.*]] = load ptr, ptr [[V0]], align 8 ; CHECK-NEXT: call void @use1(ptr [[V1]]) ; CHECK-NEXT: br i1 undef, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: @@ -87,9 +89,10 @@ bb2: } define ptr @test5(ptr %v0) { -; CHECK-LABEL: @test5( +; CHECK-LABEL: define ptr @test5 +; CHECK-SAME: (ptr [[V0:%.*]]) { ; CHECK-NEXT: top: -; CHECK-NEXT: [[V1:%.*]] = load ptr, ptr [[V0:%[a-z0-9]+]], align 8, !nonnull !0 +; CHECK-NEXT: [[V1:%.*]] = load ptr, ptr [[V0]], align 8, !nonnull !0 ; CHECK-NEXT: call void @use1(ptr [[V1]]) ; CHECK-NEXT: br i1 undef, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: @@ -112,18 +115,17 @@ bb2: } define ptr @test6(ptr %v0, ptr %v1) { -; CHECK-LABEL: @test6( +; CHECK-LABEL: define ptr @test6 +; CHECK-SAME: (ptr [[V0:%.*]], ptr [[V1:%.*]]) { ; CHECK-NEXT: top: ; CHECK-NEXT: br i1 undef, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[V2:%.*]] = load ptr, ptr [[V0:%[a-z0-9]+]], align 8, !nonnull !0 -; CHECK-NEXT: store ptr [[V2]], ptr [[V1:%.*]] +; CHECK-NEXT: [[V2:%.*]] = load ptr, ptr [[V0]], align 8, !nonnull !0 +; CHECK-NEXT: store ptr [[V2]], ptr [[V1]], align 8 ; CHECK-NEXT: ret ptr [[V2]] ; CHECK: bb2: -; CHECK-NEXT: [[V4:%.*]] = load ptr, ptr [[V0]] -; CHECK-NOT: !nonnull -; CHECK-NEXT: store ptr [[V4]], ptr [[V1]] -; CHECK-NOT: !nonnull +; CHECK-NEXT: [[V4:%.*]] = load ptr, ptr [[V0]], align 8 +; CHECK-NEXT: store ptr [[V4]], ptr [[V1]], align 8 ; CHECK-NEXT: ret ptr [[V4]] ; top: @@ -145,18 +147,17 @@ bb2: declare void @use2(ptr %a) define ptr @test7(ptr %v0) { -; CHECK-LABEL: @test7( +; CHECK-LABEL: define ptr @test7 +; CHECK-SAME: (ptr [[V0:%.*]]) { ; CHECK-NEXT: top: -; CHECK-NEXT: [[V1:%.*]] = load ptr, ptr [[V0:%[a-z0-9]+]], align 8, !nonnull !0 +; CHECK-NEXT: [[V1:%.*]] = load ptr, ptr [[V0]], align 8, !nonnull !0 ; CHECK-NEXT: call void @use2(ptr [[V1]]) ; CHECK-NEXT: br i1 undef, label [[BB1:%.*]], label [[BB2:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[V2:%.*]] = load ptr, ptr [[V0]] -; CHECK-NOT: !nonnull +; CHECK-NEXT: [[V2:%.*]] = load ptr, ptr [[V0]], align 8 ; CHECK-NEXT: ret ptr [[V2]] ; CHECK: bb2: -; CHECK-NEXT: [[V3:%.*]] = load ptr, ptr [[V0]] -; CHECK-NOT: !nonnull +; CHECK-NEXT: [[V3:%.*]] = load ptr, ptr [[V0]], align 8 ; CHECK-NEXT: ret ptr [[V3]] ; top: From 09aa3f7bb54bf7f35809d3abfdf9f6a679ba1003 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Fri, 24 Mar 2023 10:23:50 +0200 Subject: [PATCH 514/691] [LLD] [test] Add a missing REQUIRES: x86 in a new test This test somewhat unconventionally assembles both aarch64 and x86 object files. This fixes test failures in build configurations with the aarch64 target enabled but x86 target disabled. --- lld/test/COFF/gaps-fill.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lld/test/COFF/gaps-fill.test b/lld/test/COFF/gaps-fill.test index 17cd9cbc86ab3..f49a601cc568a 100644 --- a/lld/test/COFF/gaps-fill.test +++ b/lld/test/COFF/gaps-fill.test @@ -1,4 +1,4 @@ -# REQUIRES: aarch64 +# REQUIRES: aarch64, x86 # RUN: split-file %s %t.dir # RUN: llvm-mc -filetype=obj -triple=aarch64-windows %t.dir/arm64-dllmain.s -o %t.dir/arm64-dllmain.obj From a94d2bd5b564dafd6f4239444fc52f4dead0b990 Mon Sep 17 00:00:00 2001 From: Johannes de Fine Licht Date: Fri, 24 Mar 2023 09:27:27 +0100 Subject: [PATCH 515/691] [MLIR][LLVM] Add debug output to the LLVM inliner. This revealed a test case that wasn't hitting the intended branch because the inlinees had no function definition. Depends on D146628 Reviewed By: gysit Differential Revision: https://reviews.llvm.org/D146633 --- mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.cpp | 61 +++++++++++++++++---- mlir/test/Dialect/LLVMIR/inlining.mlir | 29 ++++++++-- 2 files changed, 74 insertions(+), 16 deletions(-) diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.cpp index 8a399b9a5d030..23dd22b9cbd03 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMInlining.cpp @@ -15,6 +15,9 @@ #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/IR/Matchers.h" #include "mlir/Transforms/InliningUtils.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "llvm-inliner" using namespace mlir; @@ -134,9 +137,17 @@ struct LLVMInlinerInterface : public DialectInlinerInterface { if (!wouldBeCloned) return false; auto callOp = dyn_cast(call); + if (!callOp) { + LLVM_DEBUG(llvm::dbgs() + << "Cannot inline: call is not an LLVM::CallOp\n"); + return false; + } auto funcOp = dyn_cast(callable); - if (!callOp || !funcOp) + if (!funcOp) { + LLVM_DEBUG(llvm::dbgs() + << "Cannot inline: callable is not an LLVM::LLVMFuncOp\n"); return false; + } if (auto attrs = funcOp.getArgAttrs()) { for (Attribute attr : *attrs) { auto attrDict = cast(attr); @@ -144,16 +155,25 @@ struct LLVMInlinerInterface : public DialectInlinerInterface { if (attr.getName() == LLVM::LLVMDialect::getByValAttrName()) continue; // TODO: Handle all argument attributes; + LLVM_DEBUG(llvm::dbgs() << "Cannot inline " << funcOp.getSymName() + << ": unhandled argument attribute \"" + << attr.getName() << "\"\n"); return false; } } } // TODO: Handle result attributes; - if (funcOp.getResAttrs()) + if (funcOp.getResAttrs()) { + LLVM_DEBUG(llvm::dbgs() << "Cannot inline " << funcOp.getSymName() + << ": unhandled result attribute\n"); return false; + } // TODO: Handle exceptions. - if (funcOp.getPersonality()) + if (funcOp.getPersonality()) { + LLVM_DEBUG(llvm::dbgs() << "Cannot inline " << funcOp.getSymName() + << ": unhandled function personality\n"); return false; + } if (funcOp.getPassthrough()) { // TODO: Used attributes should not be passthrough. DenseSet disallowed( @@ -167,7 +187,14 @@ struct LLVMInlinerInterface : public DialectInlinerInterface { auto stringAttr = dyn_cast(attr); if (!stringAttr) return false; - return disallowed.contains(stringAttr); + if (disallowed.contains(stringAttr)) { + LLVM_DEBUG(llvm::dbgs() + << "Cannot inline " << funcOp.getSymName() + << ": found disallowed function attribute " + << stringAttr << "\n"); + return true; + } + return false; })) return false; } @@ -185,14 +212,28 @@ struct LLVMInlinerInterface : public DialectInlinerInterface { // Some attributes on memory operations require handling during // inlining. Since this is not yet implemented, refuse to inline memory // operations that have any of these attributes. - if (auto iface = dyn_cast(op)) - if (iface.getAliasScopesOrNull() || iface.getNoAliasScopesOrNull()) + if (auto iface = dyn_cast(op)) { + if (iface.getAliasScopesOrNull() || iface.getNoAliasScopesOrNull()) { + LLVM_DEBUG(llvm::dbgs() + << "Cannot inline: unhandled alias analysis metadata\n"); return false; - if (auto iface = dyn_cast(op)) - if (iface.getAccessGroupsOrNull()) + } + } + if (auto iface = dyn_cast(op)) { + if (iface.getAccessGroupsOrNull()) { + LLVM_DEBUG(llvm::dbgs() + << "Cannot inline: unhandled access group metadata\n"); return false; - return isa(op); + } + } + if (!isa(op)) { + LLVM_DEBUG(llvm::dbgs() + << "Cannot inline: unhandled side effecting operation \"" + << op->getName() << "\"\n"); + return false; + } + return true; } /// Handle the given inlined return by replacing it with a branch. This diff --git a/mlir/test/Dialect/LLVMIR/inlining.mlir b/mlir/test/Dialect/LLVMIR/inlining.mlir index cefb8d5e461d4..e6dc047fd42b9 100644 --- a/mlir/test/Dialect/LLVMIR/inlining.mlir +++ b/mlir/test/Dialect/LLVMIR/inlining.mlir @@ -160,12 +160,29 @@ llvm.func @caller() { // ----- -llvm.func @callee_noinline() attributes { passthrough = ["noinline"] } -llvm.func @callee_optnone() attributes { passthrough = ["optnone"] } -llvm.func @callee_noduplicate() attributes { passthrough = ["noduplicate"] } -llvm.func @callee_presplitcoroutine() attributes { passthrough = ["presplitcoroutine"] } -llvm.func @callee_returns_twice() attributes { passthrough = ["returns_twice"] } -llvm.func @callee_strictfp() attributes { passthrough = ["strictfp"] } +llvm.func @callee_noinline() attributes { passthrough = ["noinline"] } { + llvm.return +} + +llvm.func @callee_optnone() attributes { passthrough = ["optnone"] } { + llvm.return +} + +llvm.func @callee_noduplicate() attributes { passthrough = ["noduplicate"] } { + llvm.return +} + +llvm.func @callee_presplitcoroutine() attributes { passthrough = ["presplitcoroutine"] } { + llvm.return +} + +llvm.func @callee_returns_twice() attributes { passthrough = ["returns_twice"] } { + llvm.return +} + +llvm.func @callee_strictfp() attributes { passthrough = ["strictfp"] } { + llvm.return +} // CHECK-LABEL: llvm.func @caller // CHECK-NEXT: llvm.call @callee_noinline From 4a5bc791f38a5156bdba87a0572642b1bf3521e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Thu, 23 Mar 2023 00:44:18 +0200 Subject: [PATCH 516/691] [CMake] Respect variables for specifying host tools even without LLVM_USE_HOST_TOOLS set When LLVM_NATIVE_TOOL_DIR was introduced in d3da9067d143f3d4ce59b6d9ab4606a8ef1dc937 / D131052, it consisted of refactoring a couple cases of manual logic for tools in clang-tools-extra/clang-tidy, clang-tools-extra/pseudo/include and mlir/tools/mlir-linalg-ods-gen. The former two had the same consistent behaviour while the latter was slightly different, so the refactoring would end up slightly adjusting one or the other. The difference was that the clang-tools-extra tools respected the external variable for setting the tool name, regardless of the LLVM_USE_HOST_TOOLS variable, while mlir-linalg-ods-gen tool only checked its external variable if LLVM_USE_HOST_TOOLS was set. LLVM_USE_HOST_TOOLS is supposed to be enabled automatically whenever cross compiling, so this shouldn't have been an issue. In https://github.com/llvm/llvm-project/issues/60784, it seems like some users do cross compile LLVM, without CMake knowing about it (without CMAKE_CROSSCOMPILING being set). In these cases, their build broke, as the variables for pointing to external host tools no longer were being respected. The fact that CMAKE_CROSSCOMPILING wasn't set stems from a non-obvious behaviour of CMake; CMAKE_CROSSCOMPILING isn't supposed to be set by the user (and if it was, it gets overridden), but one has to set CMAKE_SYSTEM_NAME to indicate that one is cross compiling, even if the target OS is the same as the current host. Skip the checks for LLVM_USE_HOST_TOOLS and always respect the variables for pointing to external tools (both the old tool specific variables, and the new LLVM_NATIVE_TOOL_DIR), if they're set. This makes the logic within setup_host_tool more exactly match the logic for the clang-tools-extra tools from before the refactoring in d3da9067d143f3d4ce59b6d9ab4606a8ef1dc937. This makes the behaviour consistent with that of the tablegen executables, which also respect the externally set variables regardless of LLVM_USE_HOST_TOOLS. This fixes https://github.com/llvm/llvm-project/issues/60784. Differential Revision: https://reviews.llvm.org/D146666 --- llvm/cmake/modules/AddLLVM.cmake | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake index cb9254c023e6c..be68f0dde6372 100644 --- a/llvm/cmake/modules/AddLLVM.cmake +++ b/llvm/cmake/modules/AddLLVM.cmake @@ -2418,7 +2418,7 @@ endfunction() function(setup_host_tool tool_name setting_name exe_var_name target_var_name) set(${setting_name}_DEFAULT "${tool_name}") - if(LLVM_USE_HOST_TOOLS AND LLVM_NATIVE_TOOL_DIR) + if(LLVM_NATIVE_TOOL_DIR) if(EXISTS "${LLVM_NATIVE_TOOL_DIR}/${tool_name}${LLVM_HOST_EXECUTABLE_SUFFIX}") set(${setting_name}_DEFAULT "${LLVM_NATIVE_TOOL_DIR}/${tool_name}${LLVM_HOST_EXECUTABLE_SUFFIX}") endif() @@ -2427,14 +2427,12 @@ function(setup_host_tool tool_name setting_name exe_var_name target_var_name) set(${setting_name} "${${setting_name}_DEFAULT}" CACHE STRING "Host ${tool_name} executable. Saves building if cross-compiling.") - if(LLVM_USE_HOST_TOOLS) - if(NOT ${setting_name} STREQUAL "${tool_name}") - set(exe_name ${${setting_name}}) - set(target_name ${${setting_name}}) - else() - build_native_tool(${tool_name} exe_name DEPENDS ${tool_name}) - set(target_name ${exe_name}) - endif() + if(NOT ${setting_name} STREQUAL "${tool_name}") + set(exe_name ${${setting_name}}) + set(target_name ${${setting_name}}) + elseif(LLVM_USE_HOST_TOOLS) + build_native_tool(${tool_name} exe_name DEPENDS ${tool_name}) + set(target_name ${exe_name}) else() set(exe_name $) set(target_name ${tool_name}) From 0858b5369b816fd0d3ec95ef4eef6e790ce8187b Mon Sep 17 00:00:00 2001 From: Max Kazantsev Date: Fri, 24 Mar 2023 15:42:23 +0700 Subject: [PATCH 517/691] [Test] Regenerate checks in test file --- .../LoopPredication/predicate-exits.ll | 79 ++++++++++++------- 1 file changed, 50 insertions(+), 29 deletions(-) diff --git a/llvm/test/Transforms/LoopPredication/predicate-exits.ll b/llvm/test/Transforms/LoopPredication/predicate-exits.ll index fb3c64a031926..e41f0efc75b7d 100644 --- a/llvm/test/Transforms/LoopPredication/predicate-exits.ll +++ b/llvm/test/Transforms/LoopPredication/predicate-exits.ll @@ -16,7 +16,7 @@ define i32 @test1(ptr %array, i32 %length, i32 %n, i1 %cond_0) { ; CHECK-NEXT: [[TMP2:%.*]] = freeze i1 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[COND_0:%.*]] ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[TMP3]], [[WIDENABLE_COND]] -; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof [[PROF0:![0-9]+]] ; CHECK: deopt: ; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] ; CHECK-NEXT: ret i32 [[DEOPTRET]] @@ -27,7 +27,7 @@ define i32 @test1(ptr %array, i32 %length, i32 %n, i1 %cond_0) { ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH]] -; CHECK-NEXT: br i1 true, label [[GUARDED]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-NEXT: br i1 true, label [[GUARDED]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK: deopt2: ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] @@ -97,7 +97,7 @@ define i32 @test_non_canonical(ptr %array, i32 %length, i1 %cond_0) { ; CHECK-NEXT: [[TMP2:%.*]] = freeze i1 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[COND_0:%.*]] ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[TMP3]], [[WIDENABLE_COND]] -; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] ; CHECK-NEXT: ret i32 [[DEOPTRET]] @@ -108,7 +108,7 @@ define i32 @test_non_canonical(ptr %array, i32 %length, i1 %cond_0) { ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH]] -; CHECK-NEXT: br i1 true, label [[GUARDED]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-NEXT: br i1 true, label [[GUARDED]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK: deopt2: ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] @@ -181,7 +181,7 @@ define i32 @test_two_range_checks(ptr %array, i32 %length.1, i32 %length.2, i32 ; CHECK-NEXT: [[TMP5:%.*]] = freeze i1 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = and i1 [[TMP5]], [[TMP3]] ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[TMP6]], [[WIDENABLE_COND]] -; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] ; CHECK-NEXT: ret i32 [[DEOPTRET]] @@ -192,14 +192,14 @@ define i32 @test_two_range_checks(ptr %array, i32 %length.1, i32 %length.2, i32 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED2]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH_1]] -; CHECK-NEXT: br i1 true, label [[GUARDED:%.*]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-NEXT: br i1 true, label [[GUARDED:%.*]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK: deopt2: ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] ; CHECK-NEXT: ret i32 [[DEOPTRET2]] ; CHECK: guarded: ; CHECK-NEXT: [[WITHIN_BOUNDS2:%.*]] = icmp ult i32 [[I]], [[LENGTH_2]] -; CHECK-NEXT: br i1 true, label [[GUARDED2]], label [[DEOPT3:%.*]], !prof !0 +; CHECK-NEXT: br i1 true, label [[GUARDED2]], label [[DEOPT3:%.*]], !prof [[PROF0]] ; CHECK: deopt3: ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[DEOPTRET3:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] @@ -272,7 +272,7 @@ define i32 @test_unanalyzeable_exit(ptr %array, i32 %length, i32 %n, i1 %cond_0) ; CHECK-NEXT: entry: ; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]] -; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] ; CHECK-NEXT: ret i32 [[DEOPTRET]] @@ -284,7 +284,7 @@ define i32 @test_unanalyzeable_exit(ptr %array, i32 %length, i32 %n, i1 %cond_0) ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, ptr @G, align 4 ; CHECK-NEXT: [[UNKNOWN:%.*]] = icmp eq i32 [[VOL]], 0 -; CHECK-NEXT: br i1 [[UNKNOWN]], label [[GUARDED2]], label [[DEOPT3:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[UNKNOWN]], label [[GUARDED2]], label [[DEOPT3:%.*]], !prof [[PROF0]] ; CHECK: deopt3: ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[DEOPTRET3:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] @@ -353,7 +353,7 @@ define i32 @test_unanalyzeable_exit2(ptr %array, i32 %length, i32 %n, i1 %cond_0 ; CHECK-NEXT: [[TMP2:%.*]] = freeze i1 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[COND_0:%.*]] ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[TMP3]], [[WIDENABLE_COND]] -; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] ; CHECK-NEXT: ret i32 [[DEOPTRET]] @@ -364,7 +364,7 @@ define i32 @test_unanalyzeable_exit2(ptr %array, i32 %length, i32 %n, i1 %cond_0 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED2]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH]] -; CHECK-NEXT: br i1 true, label [[GUARDED:%.*]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-NEXT: br i1 true, label [[GUARDED:%.*]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK: deopt2: ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] @@ -372,7 +372,7 @@ define i32 @test_unanalyzeable_exit2(ptr %array, i32 %length, i32 %n, i1 %cond_0 ; CHECK: guarded: ; CHECK-NEXT: [[VOL:%.*]] = load volatile i32, ptr @G, align 4 ; CHECK-NEXT: [[UNKNOWN:%.*]] = icmp eq i32 [[VOL]], 0 -; CHECK-NEXT: br i1 [[UNKNOWN]], label [[GUARDED2]], label [[DEOPT3:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[UNKNOWN]], label [[GUARDED2]], label [[DEOPT3:%.*]], !prof [[PROF0]] ; CHECK: deopt3: ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[DEOPTRET3:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] @@ -445,7 +445,7 @@ define i32 @test_unanalyzeable_latch(ptr %array, i32 %length, i32 %n, i1 %cond_0 ; CHECK-NEXT: entry: ; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]] -; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] ; CHECK-NEXT: ret i32 [[DEOPTRET]] @@ -456,7 +456,7 @@ define i32 @test_unanalyzeable_latch(ptr %array, i32 %length, i32 %n, i1 %cond_0 ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]] -; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK: deopt2: ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] @@ -523,7 +523,7 @@ define i32 @provably_taken(ptr %array, i1 %cond_0) { ; CHECK-NEXT: [[TMP0:%.*]] = freeze i1 false ; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[TMP0]], [[COND_0:%.*]] ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[TMP1]], [[WIDENABLE_COND]] -; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] ; CHECK-NEXT: ret i32 [[DEOPTRET]] @@ -534,7 +534,7 @@ define i32 @provably_taken(ptr %array, i1 %cond_0) { ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], 198 -; CHECK-NEXT: br i1 true, label [[GUARDED]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-NEXT: br i1 true, label [[GUARDED]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK: deopt2: ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] @@ -598,7 +598,7 @@ define i32 @provably_not_taken(ptr %array, i1 %cond_0) { ; CHECK-NEXT: [[TMP0:%.*]] = freeze i1 true ; CHECK-NEXT: [[TMP1:%.*]] = and i1 [[TMP0]], [[COND_0:%.*]] ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[TMP1]], [[WIDENABLE_COND]] -; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] ; CHECK-NEXT: ret i32 [[DEOPTRET]] @@ -609,7 +609,7 @@ define i32 @provably_not_taken(ptr %array, i1 %cond_0) { ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], 205 -; CHECK-NEXT: br i1 true, label [[GUARDED]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-NEXT: br i1 true, label [[GUARDED]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK: deopt2: ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] @@ -680,7 +680,7 @@ define i32 @unswitch_exit_form(ptr %array, i32 %length, i32 %n, i1 %cond_0) { ; CHECK-NEXT: [[TMP2:%.*]] = freeze i1 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[COND_0:%.*]] ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[TMP3]], [[WIDENABLE_COND]] -; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt.loopexit: ; CHECK-NEXT: br label [[DEOPT]] ; CHECK: deopt: @@ -697,7 +697,7 @@ define i32 @unswitch_exit_form(ptr %array, i32 %length, i32 %n, i1 %cond_0) { ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH]] -; CHECK-NEXT: br i1 true, label [[GUARDED]], label [[DEOPT_LOOPEXIT]], !prof !0 +; CHECK-NEXT: br i1 true, label [[GUARDED]], label [[DEOPT_LOOPEXIT]], !prof [[PROF0]] ; CHECK: guarded: ; CHECK-NEXT: [[I_I64:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: [[ARRAY_I_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAY:%.*]], i64 [[I_I64]] @@ -763,7 +763,7 @@ define i32 @swapped_wb(ptr %array, i32 %length, i32 %n, i1 %cond_0) { ; CHECK-NEXT: [[TMP2:%.*]] = freeze i1 [[TMP1]] ; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[COND_0:%.*]] ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[WIDENABLE_COND]], [[TMP3]] -; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] ; CHECK-NEXT: ret i32 [[DEOPTRET]] @@ -774,7 +774,7 @@ define i32 @swapped_wb(ptr %array, i32 %length, i32 %n, i1 %cond_0) { ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH]] -; CHECK-NEXT: br i1 true, label [[GUARDED]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-NEXT: br i1 true, label [[GUARDED]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK: deopt2: ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] @@ -841,7 +841,7 @@ define i32 @trivial_wb(ptr %array, i32 %length, i32 %n) { ; CHECK-NEXT: [[TMP2:%.*]] = freeze i1 [[TMP1]] ; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[TMP3:%.*]] = and i1 [[TMP2]], [[WIDENABLE_COND]] -; CHECK-NEXT: br i1 [[TMP3]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[TMP3]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] ; CHECK-NEXT: ret i32 [[DEOPTRET]] @@ -852,7 +852,7 @@ define i32 @trivial_wb(ptr %array, i32 %length, i32 %n) { ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH]] -; CHECK-NEXT: br i1 true, label [[GUARDED]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-NEXT: br i1 true, label [[GUARDED]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK: deopt2: ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] @@ -915,7 +915,7 @@ define i32 @todo_unconditional_latch(ptr %array, i32 %length, i1 %cond_0) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[WIDENABLE_COND:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[COND_0:%.*]], [[WIDENABLE_COND]] -; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] ; CHECK-NEXT: ret i32 [[DEOPTRET]] @@ -926,7 +926,7 @@ define i32 @todo_unconditional_latch(ptr %array, i32 %length, i1 %cond_0) { ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH:%.*]] -; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[WITHIN_BOUNDS]], label [[GUARDED]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK: deopt2: ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] @@ -993,7 +993,7 @@ define i32 @wb_in_loop(ptr %array, i32 %length, i32 %n, i1 %cond_0) { ; CHECK-NEXT: [[TMP5:%.*]] = freeze i1 [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = and i1 [[TMP5]], [[TMP3]] ; CHECK-NEXT: [[EXIPLICIT_GUARD_COND:%.*]] = and i1 [[TMP6]], [[WIDENABLE_COND]] -; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof !0 +; CHECK-NEXT: br i1 [[EXIPLICIT_GUARD_COND]], label [[LOOP_PREHEADER:%.*]], label [[DEOPT:%.*]], !prof [[PROF0]] ; CHECK: deopt: ; CHECK-NEXT: [[DEOPTRET:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] ; CHECK-NEXT: ret i32 [[DEOPTRET]] @@ -1004,7 +1004,7 @@ define i32 @wb_in_loop(ptr %array, i32 %length, i32 %n, i1 %cond_0) { ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], [[GUARDED2]] ], [ 0, [[LOOP_PREHEADER]] ] ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[WITHIN_BOUNDS:%.*]] = icmp ult i32 [[I]], [[LENGTH]] -; CHECK-NEXT: br i1 true, label [[GUARDED:%.*]], label [[DEOPT2:%.*]], !prof !0 +; CHECK-NEXT: br i1 true, label [[GUARDED:%.*]], label [[DEOPT2:%.*]], !prof [[PROF0]] ; CHECK: deopt2: ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[DEOPTRET2:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] @@ -1013,7 +1013,7 @@ define i32 @wb_in_loop(ptr %array, i32 %length, i32 %n, i1 %cond_0) { ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[WITHIN_BOUNDS2:%.*]] = icmp ult i32 [[I]], [[LENGTH]] ; CHECK-NEXT: [[WB_COND:%.*]] = and i1 [[WITHIN_BOUNDS2]], true -; CHECK-NEXT: br i1 true, label [[GUARDED2]], label [[DEOPT3:%.*]], !prof !0 +; CHECK-NEXT: br i1 true, label [[GUARDED2]], label [[DEOPT3:%.*]], !prof [[PROF0]] ; CHECK: deopt3: ; CHECK-NEXT: call void @unknown() ; CHECK-NEXT: [[DEOPTRET3:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"() ] @@ -1083,6 +1083,27 @@ exit: } define void @test_memssa() { +; CHECK-LABEL: @test_memssa( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: [[TMP:%.*]] = call i1 @llvm.experimental.widenable.condition() +; CHECK-NEXT: br i1 [[TMP]], label [[BB3:%.*]], label [[BB2:%.*]] +; CHECK: bb2: +; CHECK-NEXT: unreachable +; CHECK: bb3: +; CHECK-NEXT: br label [[BB4:%.*]] +; CHECK: bb4: +; CHECK-NEXT: [[TMP5:%.*]] = phi i32 [ [[TMP7:%.*]], [[BB6:%.*]] ], [ 0, [[BB3]] ] +; CHECK-NEXT: br i1 true, label [[BB10:%.*]], label [[BB6]] +; CHECK: bb6: +; CHECK-NEXT: [[TMP7]] = add nuw nsw i32 [[TMP5]], 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[TMP7]], undef +; CHECK-NEXT: br i1 [[TMP8]], label [[BB4]], label [[BB9:%.*]] +; CHECK: bb9: +; CHECK-NEXT: ret void +; CHECK: bb10: +; CHECK-NEXT: ret void +; bb: %tmp = call i1 @llvm.experimental.widenable.condition() %tmp1 = call i1 @llvm.experimental.widenable.condition() From 6a7d04c6b44de91dd1188d9e614b6e681d378b0e Mon Sep 17 00:00:00 2001 From: luxufan Date: Fri, 24 Mar 2023 16:35:56 +0800 Subject: [PATCH 518/691] [NFC] Regenerate test of InstCombine/load-combine-metadata-dominance.ll --- .../load-combine-metadata-dominance.ll | 27 ++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/load-combine-metadata-dominance.ll b/llvm/test/Transforms/InstCombine/load-combine-metadata-dominance.ll index 882d775291e59..4949b00f5ffcf 100644 --- a/llvm/test/Transforms/InstCombine/load-combine-metadata-dominance.ll +++ b/llvm/test/Transforms/InstCombine/load-combine-metadata-dominance.ll @@ -1,13 +1,19 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 ; RUN: opt -passes=instcombine -S < %s | FileCheck %s target datalayout = "e-m:e-p:64:64:64-i64:64-f80:128-n8:16:32:64-S128" ; Check that nonnull metadata is propagated from dominating load. -; CHECK-LABEL: @combine_metadata_dominance1( -; CHECK-LABEL: bb1: -; CHECK: load ptr, ptr %p, align 8, !nonnull !0 -; CHECK-NOT: load ptr, ptr %p define void @combine_metadata_dominance1(ptr %p) { +; CHECK-LABEL: define void @combine_metadata_dominance1 +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BB1:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[A:%.*]] = load ptr, ptr [[P]], align 8, !nonnull !0 +; CHECK-NEXT: store i32 0, ptr [[A]], align 4 +; CHECK-NEXT: ret void +; entry: %a = load ptr, ptr %p, !nonnull !0 br label %bb1 @@ -23,9 +29,18 @@ declare i32 @use(ptr, i32) readonly ; Check that nonnull from the dominated load does not get propagated. ; There are some cases where it would be safe to keep it. -; CHECK-LABEL: @combine_metadata_dominance2( -; CHECK-NOT: nonnull define void @combine_metadata_dominance2(ptr %p, i1 %c1) { +; CHECK-LABEL: define void @combine_metadata_dominance2 +; CHECK-SAME: (ptr [[P:%.*]], i1 [[C1:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C1]], label [[BB1:%.*]], label [[BB2:%.*]] +; CHECK: bb1: +; CHECK-NEXT: [[A:%.*]] = load ptr, ptr [[P]], align 8 +; CHECK-NEXT: store i32 0, ptr [[A]], align 4 +; CHECK-NEXT: ret void +; CHECK: bb2: +; CHECK-NEXT: ret void +; entry: %a = load ptr, ptr %p br i1 %c1, label %bb1, label %bb2 From cf60d3f1a688671c8eb7859bf0572c403c3c0cca Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Thu, 23 Mar 2023 17:39:48 +0000 Subject: [PATCH 519/691] [mlir][arith] Extend the `floordivsi` converter This patch extends the `createConst` method so that it can generate constant vectors (it can already generate scalars). This change is required to be able to apply the converter for `arith.floordivsi` (i.e. `FloorDivSIOpConverter`) to vectors. While `arith.floordivsi` is my main motivation for this change, this patch should also allow other Arith ops to be converted in vector cases. In my example, the Linalg vectorizer updates `arith.floordivsi` to operate on vectors and hence the need for this change. Differential Revision: https://reviews.llvm.org/D146741 --- .../mlir/Dialect/Arith/Transforms/Passes.td | 1 + .../Dialect/Arith/Transforms/ExpandOps.cpp | 12 ++++++-- mlir/test/Dialect/Arith/expand-ops.mlir | 28 +++++++++++++++++++ 3 files changed, 39 insertions(+), 2 deletions(-) diff --git a/mlir/include/mlir/Dialect/Arith/Transforms/Passes.td b/mlir/include/mlir/Dialect/Arith/Transforms/Passes.td index ee561e655965f..c5b80346bd52f 100644 --- a/mlir/include/mlir/Dialect/Arith/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/Arith/Transforms/Passes.td @@ -31,6 +31,7 @@ def ArithBufferize : Pass<"arith-bufferize", "ModuleOp"> { def ArithExpandOps : Pass<"arith-expand"> { let summary = "Legalize Arith ops to be convertible to LLVM."; let constructor = "mlir::arith::createArithExpandOpsPass()"; + let dependentDialects = ["vector::VectorDialect"]; } def ArithUnsignedWhenEquivalent : Pass<"arith-unsigned-when-equivalent"> { diff --git a/mlir/lib/Dialect/Arith/Transforms/ExpandOps.cpp b/mlir/lib/Dialect/Arith/Transforms/ExpandOps.cpp index b70110cbce913..8f34531937c5c 100644 --- a/mlir/lib/Dialect/Arith/Transforms/ExpandOps.cpp +++ b/mlir/lib/Dialect/Arith/Transforms/ExpandOps.cpp @@ -9,6 +9,7 @@ #include "mlir/Dialect/Arith/Transforms/Passes.h" #include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Vector/IR/VectorOps.h" #include "mlir/IR/TypeUtilities.h" #include "mlir/Transforms/DialectConversion.h" @@ -24,8 +25,15 @@ using namespace mlir; /// Create an integer or index constant. static Value createConst(Location loc, Type type, int value, PatternRewriter &rewriter) { - return rewriter.create( - loc, rewriter.getIntegerAttr(type, value)); + + auto elTy = getElementTypeOrSelf(type); + auto constantAttr = rewriter.getIntegerAttr(elTy, value); + + if (auto vecTy = llvm::dyn_cast(type)) + return rewriter.create( + loc, vecTy, DenseElementsAttr::get(vecTy, constantAttr)); + + return rewriter.create(loc, constantAttr); } namespace { diff --git a/mlir/test/Dialect/Arith/expand-ops.mlir b/mlir/test/Dialect/Arith/expand-ops.mlir index 3d55c2068f24e..7b7eb4003956a 100644 --- a/mlir/test/Dialect/Arith/expand-ops.mlir +++ b/mlir/test/Dialect/Arith/expand-ops.mlir @@ -114,6 +114,34 @@ func.func @floordivi_index(%arg0: index, %arg1: index) -> (index) { // ----- +// Test floor divide with vector +// CHECK-LABEL: func.func @floordivi_vec( +// CHECK-SAME: %[[VAL_0:.*]]: vector<4xi32>, +// CHECK-SAME: %[[VAL_1:.*]]: vector<4xi32>) -> vector<4xi32> { +func.func @floordivi_vec(%arg0: vector<4xi32>, %arg1: vector<4xi32>) -> (vector<4xi32>) { + %res = arith.floordivsi %arg0, %arg1 : vector<4xi32> + return %res : vector<4xi32> +// CHECK: %[[VAL_2:.*]] = arith.constant dense<1> : vector<4xi32> +// CHECK: %[[VAL_3:.*]] = arith.constant dense<0> : vector<4xi32> +// CHECK: %[[VAL_4:.*]] = arith.constant dense<-1> : vector<4xi32> +// CHECK: %[[VAL_5:.*]] = arith.cmpi slt, %[[VAL_1]], %[[VAL_3]] : vector<4xi32> +// CHECK: %[[VAL_6:.*]] = arith.select %[[VAL_5]], %[[VAL_2]], %[[VAL_4]] : vector<4xi1>, vector<4xi32> +// CHECK: %[[VAL_7:.*]] = arith.subi %[[VAL_6]], %[[VAL_0]] : vector<4xi32> +// CHECK: %[[VAL_8:.*]] = arith.divsi %[[VAL_7]], %[[VAL_1]] : vector<4xi32> +// CHECK: %[[VAL_9:.*]] = arith.subi %[[VAL_4]], %[[VAL_8]] : vector<4xi32> +// CHECK: %[[VAL_10:.*]] = arith.divsi %[[VAL_0]], %[[VAL_1]] : vector<4xi32> +// CHECK: %[[VAL_11:.*]] = arith.cmpi slt, %[[VAL_0]], %[[VAL_3]] : vector<4xi32> +// CHECK: %[[VAL_12:.*]] = arith.cmpi sgt, %[[VAL_0]], %[[VAL_3]] : vector<4xi32> +// CHECK: %[[VAL_13:.*]] = arith.cmpi slt, %[[VAL_1]], %[[VAL_3]] : vector<4xi32> +// CHECK: %[[VAL_14:.*]] = arith.cmpi sgt, %[[VAL_1]], %[[VAL_3]] : vector<4xi32> +// CHECK: %[[VAL_15:.*]] = arith.andi %[[VAL_11]], %[[VAL_14]] : vector<4xi1> +// CHECK: %[[VAL_16:.*]] = arith.andi %[[VAL_12]], %[[VAL_13]] : vector<4xi1> +// CHECK: %[[VAL_17:.*]] = arith.ori %[[VAL_15]], %[[VAL_16]] : vector<4xi1> +// CHECK: %[[VAL_18:.*]] = arith.select %[[VAL_17]], %[[VAL_9]], %[[VAL_10]] : vector<4xi1>, vector<4xi32> +} + +// ----- + // Test ceil divide with unsigned integer // CHECK-LABEL: func @ceildivui // CHECK-SAME: ([[ARG0:%.+]]: i32, [[ARG1:%.+]]: i32) -> i32 { From 410a239996725e68b431e7e3e43e270290be93d2 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 23 Mar 2023 10:59:59 +0000 Subject: [PATCH 520/691] [RISCV] Add test for shuffles that could be done as vmerges Reviewed By: reames Differential Revision: https://reviews.llvm.org/D146710 --- .../RISCV/rvv/fixed-vectors-int-shuffles.ll | 126 ++++++++++++++++++ 1 file changed, 126 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index 7b28ce7ad654c..9a1cfef9bbc83 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -644,3 +644,129 @@ entry: %5 = shufflevector <4 x i16> %3, <4 x i16> %4, <4 x i32> ret <4 x i16> %5 } + +define <8 x i8> @merge_start_into_end(<8 x i8> %v, <8 x i8> %w) { +; CHECK-LABEL: merge_start_into_end: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: li a0, 240 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vadd.vi v8, v11, -4 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> + ret <8 x i8> %res +} + +define <8 x i8> @merge_start_into_end_non_contiguous(<8 x i8> %v, <8 x i8> %w) { +; CHECK-LABEL: merge_start_into_end_non_contiguous: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: li a0, 144 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vadd.vi v8, v11, -4 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> + ret <8 x i8> %res +} + +define <8 x i8> @merge_end_into_end(<8 x i8> %v, <8 x i8> %w) { +; CHECK-LABEL: merge_end_into_end: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 15 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret + %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> + ret <8 x i8> %res +} + +define <8 x i8> @merge_start_into_middle(<8 x i8> %v, <8 x i8> %w) { +; CHECK-LABEL: merge_start_into_middle: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: li a0, 30 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vadd.vi v8, v11, -1 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> + ret <8 x i8> %res +} + +define <8 x i8> @merge_start_into_start(<8 x i8> %v, <8 x i8> %w) { +; CHECK-LABEL: merge_start_into_start: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 240 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: ret + %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> + ret <8 x i8> %res +} + +define <8 x i8> @merge_slidedown(<8 x i8> %v, <8 x i8> %w) { +; CHECK-LABEL: merge_slidedown: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vadd.vi v12, v11, 1 +; CHECK-NEXT: li a0, 195 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> + ret <8 x i8> %res +} + +; This should slide %v down by 2 and %w up by 1 before merging them +define <8 x i8> @merge_non_contiguous_slideup_slidedown(<8 x i8> %v, <8 x i8> %w) { +; CHECK-LABEL: merge_non_contiguous_slideup_slidedown: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vid.v v11 +; CHECK-NEXT: vadd.vi v12, v11, 2 +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: li a0, 234 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vadd.vi v8, v11, -1 +; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> + ret <8 x i8> %res +} + +; This shouldn't generate a vmerge because the elements of %w are not consecutive +define <8 x i8> @unmergable(<8 x i8> %v, <8 x i8> %w) { +; CHECK-LABEL: unmergable: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vadd.vi v11, v10, 2 +; CHECK-NEXT: lui a0, %hi(.LCPI44_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI44_0) +; CHECK-NEXT: vle8.v v12, (a0) +; CHECK-NEXT: li a0, 234 +; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: vrgather.vv v10, v9, v12, v0.t +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret + %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> + ret <8 x i8> %res +} From 5c1d160cd979dd6099f4266db600020a0cb4867a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Gr=C3=A4nitz?= Date: Thu, 23 Mar 2023 11:10:39 +0100 Subject: [PATCH 521/691] Reland "[JITLink] Initial AArch32 backend" This first version lays the foundations for AArch32 support in JITLink. ELFLinkGraphBuilder_aarch32 processes REL-type relocations and populates LinkGraphs from ELF object files for both big- and little-endian systems. The ArmCfg member controls subarchitecture-specific details throughout the linking process (i.e. it's passed to ELFJITLinker_aarch32). Relocation types follow the ABI documentation's division into classes: Data (endian-sensitive), Arm (32-bit little-endian) and Thumb (2x 16-bit little-endian, "Thumb32" in the docs). The implementation of instruction encoding/decoding for relocation resolution is implemented symmetrically and is testable in isolation (see AArch32 category in JITLinkTests). Callable Thumb functions are marked with a ThumbSymbol target-flag and stored in the LinkGraph with their real addresses. The thumb-bit is added back in when the owning JITDylib requests the address for such a symbol. The StubsManager can generate (absolute) Thumb-state stubs for branch range extensions on v7+ targets. Proper GOT/PLT handling is not yet implemented. This patch is based on the backend implementation in ez-clang and has just enough functionality to model the infrastructure and link a Thumb function `main()` that calls `printf()` to dump "Hello Arm!" on Armv7a. It was tested on Raspberry Pi with 32-bit Raspbian OS. Reviewed By: lhames Differential Revision: https://reviews.llvm.org/D144083 --- .../ExecutionEngine/JITLink/ELF_aarch32.h | 38 ++ .../llvm/ExecutionEngine/JITLink/JITLink.h | 1 + .../llvm/ExecutionEngine/JITLink/aarch32.h | 293 ++++++++++ .../ExecutionEngine/JITLink/CMakeLists.txt | 2 + llvm/lib/ExecutionEngine/JITLink/ELF.cpp | 9 + .../JITLink/ELFLinkGraphBuilder.h | 21 + .../ExecutionEngine/JITLink/ELF_aarch32.cpp | 299 ++++++++++ llvm/lib/ExecutionEngine/JITLink/aarch32.cpp | 514 ++++++++++++++++++ .../Orc/ObjectLinkingLayer.cpp | 21 +- .../JITLink/AArch32/ELF_thumbv7_printf.s | 46 ++ .../JITLink/AArch32/lit.local.cfg | 2 + .../ExecutionEngine/JITLink/AArch32Tests.cpp | 200 +++++++ .../ExecutionEngine/JITLink/CMakeLists.txt | 1 + 13 files changed, 1443 insertions(+), 4 deletions(-) create mode 100644 llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch32.h create mode 100644 llvm/include/llvm/ExecutionEngine/JITLink/aarch32.h create mode 100644 llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp create mode 100644 llvm/lib/ExecutionEngine/JITLink/aarch32.cpp create mode 100644 llvm/test/ExecutionEngine/JITLink/AArch32/ELF_thumbv7_printf.s create mode 100644 llvm/test/ExecutionEngine/JITLink/AArch32/lit.local.cfg create mode 100644 llvm/unittests/ExecutionEngine/JITLink/AArch32Tests.cpp diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch32.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch32.h new file mode 100644 index 0000000000000..25d1c3aac2c26 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch32.h @@ -0,0 +1,38 @@ +//===---- ELF_aarch32.h - JIT link functions for arm/thumb -----*- C++ -*--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// jit-link functions for ELF/aarch32. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_JITLINK_ELF_AARCH32 +#define LLVM_EXECUTIONENGINE_JITLINK_ELF_AARCH32 + +#include "llvm/ExecutionEngine/JITLink/JITLink.h" +#include "llvm/ExecutionEngine/JITLink/aarch32.h" + +namespace llvm { +namespace jitlink { + +/// Create a LinkGraph from an ELF/arm relocatable object +/// +/// Note: The graph does not take ownership of the underlying buffer, nor copy +/// its contents. The caller is responsible for ensuring that the object buffer +/// outlives the graph. +Expected> +createLinkGraphFromELFObject_aarch32(MemoryBufferRef ObjectBuffer); + +/// jit-link the given object buffer, which must be an ELF arm/thumb object +/// file. +void link_ELF_aarch32(std::unique_ptr G, + std::unique_ptr Ctx); + +} // end namespace jitlink +} // end namespace llvm + +#endif // LLVM_EXECUTIONENGINE_JITLINK_ELF_AARCH32 diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h index 464b21d536300..b1ee8a668869e 100644 --- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h +++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLink.h @@ -420,6 +420,7 @@ class Symbol { setScope(S); setLive(IsLive); setCallable(IsCallable); + setTargetFlags(TargetFlagsType{}); } static Symbol &constructExternal(BumpPtrAllocator &Allocator, diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/aarch32.h b/llvm/include/llvm/ExecutionEngine/JITLink/aarch32.h new file mode 100644 index 0000000000000..8488b10278771 --- /dev/null +++ b/llvm/include/llvm/ExecutionEngine/JITLink/aarch32.h @@ -0,0 +1,293 @@ +//===------ aarch32.h - Generic JITLink arm/thumb utilities -----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Generic utilities for graphs representing arm/thumb objects. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_EXECUTIONENGINE_JITLINK_AARCH32 +#define LLVM_EXECUTIONENGINE_JITLINK_AARCH32 + +#include "TableManager.h" +#include "llvm/ExecutionEngine/JITLink/JITLink.h" +#include "llvm/ExecutionEngine/Orc/Shared/ExecutorAddress.h" +#include "llvm/Support/ARMBuildAttributes.h" +#include "llvm/Support/Error.h" + +namespace llvm { +namespace jitlink { +namespace aarch32 { + +/// JITLink-internal AArch32 fixup kinds +enum EdgeKind_aarch32 : Edge::Kind { + + /// + /// Relocations of class Data + /// + FirstDataRelocation = Edge::FirstRelocation, + + /// Plain 32-bit value relocation in target endianness + Data_Delta32 = FirstDataRelocation, + + LastDataRelocation = Data_Delta32, + + /// + /// Relocations of class Arm (covers fixed-width 4-byte instruction subset) + /// + FirstArmRelocation, + + /// TODO: Arm_Call is here only as a placeholder for now. + Arm_Call = FirstArmRelocation, + + LastArmRelocation = Arm_Call, + + /// + /// Relocations of class Thumb16 and Thumb32 (covers Thumb instruction subset) + /// + FirstThumbRelocation, + + /// Write immediate value for PC-relative branch with link (can bridge between + /// Arm and Thumb). + Thumb_Call = FirstThumbRelocation, + + /// Write immediate value for (unconditional) PC-relative branch without link. + Thumb_Jump24, + + /// Write immediate value to the lower halfword of the destination register + Thumb_MovwAbsNC, + + /// Write immediate value to the top halfword of the destination register + Thumb_MovtAbs, + + LastThumbRelocation = Thumb_MovtAbs, +}; + +/// Flags enum for AArch32-specific symbol properties +enum TargetFlags_aarch32 : TargetFlagsType { + ThumbSymbol = 1 << 0, +}; + +/// Human-readable name for a given CPU architecture kind +const char *getCPUArchName(ARMBuildAttrs::CPUArch K); + +/// Get a human-readable name for the given AArch32 edge kind. +const char *getEdgeKindName(Edge::Kind K); + +/// AArch32 uses stubs for a number of purposes, like branch range extension +/// or interworking between Arm and Thumb instruction subsets. +/// +/// Stub implementations vary depending on CPU architecture (v4, v6, v7), +/// instruction subset and branch type (absolute/PC-relative). +/// +/// For each kind of stub, the StubsFlavor defines one concrete form that is +/// used throughout the LinkGraph. +/// +/// Stubs are often called "veneers" in the official docs and online. +/// +enum StubsFlavor { + Unsupported = 0, + Thumbv7, +}; + +/// JITLink sub-arch configuration for Arm CPU models +struct ArmConfig { + bool J1J2BranchEncoding = false; + StubsFlavor Stubs = Unsupported; +}; + +/// Obtain the sub-arch configuration for a given Arm CPU model. +inline ArmConfig getArmConfigForCPUArch(ARMBuildAttrs::CPUArch CPUArch) { + ArmConfig ArmCfg; + switch (CPUArch) { + case ARMBuildAttrs::v7: + case ARMBuildAttrs::v8_A: + ArmCfg.J1J2BranchEncoding = true; + ArmCfg.Stubs = Thumbv7; + break; + default: + DEBUG_WITH_TYPE("jitlink", { + dbgs() << " Warning: ARM config not defined for CPU architecture " + << getCPUArchName(CPUArch); + }); + break; + } + return ArmCfg; +} + +/// Immutable pair of halfwords, Hi and Lo, with overflow check +struct HalfWords { + constexpr HalfWords() : Hi(0), Lo(0) {} + constexpr HalfWords(uint32_t Hi, uint32_t Lo) : Hi(Hi), Lo(Lo) { + assert(isUInt<16>(Hi) && "Overflow in first half-word"); + assert(isUInt<16>(Lo) && "Overflow in second half-word"); + } + const uint16_t Hi; // First halfword + const uint16_t Lo; // Second halfword +}; + +/// Collection of named constants per fixup kind. It may contain but is not +/// limited to the following entries: +/// +/// Opcode - Values of the op-code bits in the instruction, with +/// unaffected bits nulled +/// OpcodeMask - Mask with all bits set that encode the op-code +/// ImmMask - Mask with all bits set that encode the immediate value +/// RegMask - Mask with all bits set that encode the register +/// +template struct FixupInfo {}; + +template <> struct FixupInfo { + static constexpr HalfWords Opcode{0xf000, 0x8000}; + static constexpr HalfWords OpcodeMask{0xf800, 0x8000}; + static constexpr HalfWords ImmMask{0x07ff, 0x2fff}; + static constexpr uint16_t LoBitConditional = 0x1000; +}; + +template <> struct FixupInfo { + static constexpr HalfWords Opcode{0xf000, 0xc000}; + static constexpr HalfWords OpcodeMask{0xf800, 0xc000}; + static constexpr HalfWords ImmMask{0x07ff, 0x2fff}; + static constexpr uint16_t LoBitH = 0x0001; + static constexpr uint16_t LoBitNoBlx = 0x1000; +}; + +template <> struct FixupInfo { + static constexpr HalfWords Opcode{0xf2c0, 0x0000}; + static constexpr HalfWords OpcodeMask{0xfbf0, 0x8000}; + static constexpr HalfWords ImmMask{0x040f, 0x70ff}; + static constexpr HalfWords RegMask{0x0000, 0x0f00}; +}; + +template <> +struct FixupInfo : public FixupInfo { + static constexpr HalfWords Opcode{0xf240, 0x0000}; +}; + +/// Helper function to read the initial addend for Data-class relocations. +Expected readAddendData(LinkGraph &G, Block &B, const Edge &E); + +/// Helper function to read the initial addend for Arm-class relocations. +Expected readAddendArm(LinkGraph &G, Block &B, const Edge &E); + +/// Helper function to read the initial addend for Thumb-class relocations. +Expected readAddendThumb(LinkGraph &G, Block &B, const Edge &E, + const ArmConfig &ArmCfg); + +/// Read the initial addend for a REL-type relocation. It's the value encoded +/// in the immediate field of the fixup location by the compiler. +inline Expected readAddend(LinkGraph &G, Block &B, const Edge &E, + const ArmConfig &ArmCfg) { + Edge::Kind Kind = E.getKind(); + if (Kind <= LastDataRelocation) + return readAddendData(G, B, E); + + if (Kind <= LastArmRelocation) + return readAddendArm(G, B, E); + + if (Kind <= LastThumbRelocation) + return readAddendThumb(G, B, E, ArmCfg); + + llvm_unreachable("Relocation must be of class Data, Arm or Thumb"); +} + +/// Helper function to apply the fixup for Data-class relocations. +Error applyFixupData(LinkGraph &G, Block &B, const Edge &E); + +/// Helper function to apply the fixup for Arm-class relocations. +Error applyFixupArm(LinkGraph &G, Block &B, const Edge &E); + +/// Helper function to apply the fixup for Thumb-class relocations. +Error applyFixupThumb(LinkGraph &G, Block &B, const Edge &E, + const ArmConfig &ArmCfg); + +/// Apply fixup expression for edge to block content. +inline Error applyFixup(LinkGraph &G, Block &B, const Edge &E, + const ArmConfig &ArmCfg) { + Edge::Kind Kind = E.getKind(); + + if (Kind <= LastDataRelocation) + return applyFixupData(G, B, E); + + if (Kind <= LastArmRelocation) + return applyFixupArm(G, B, E); + + if (Kind <= LastThumbRelocation) + return applyFixupThumb(G, B, E, ArmCfg); + + llvm_unreachable("Relocation must be of class Data, Arm or Thumb"); +} + +/// Stubs builder for a specific StubsFlavor +/// +/// Right now we only have one default stub kind, but we want to extend this +/// and allow creation of specific kinds in the future (e.g. branch range +/// extension or interworking). +/// +/// Let's keep it simple for the moment and not wire this through a GOT. +/// +template +class StubsManager : public TableManager> { +public: + StubsManager() = default; + + /// Name of the object file section that will contain all our stubs. + static StringRef getSectionName() { return "__llvm_jitlink_STUBS"; } + + /// Implements link-graph traversal via visitExistingEdges(). + bool visitEdge(LinkGraph &G, Block *B, Edge &E) { + if (E.getTarget().isDefined()) + return false; + + switch (E.getKind()) { + case Thumb_Call: + case Thumb_Jump24: { + DEBUG_WITH_TYPE("jitlink", { + dbgs() << " Fixing " << G.getEdgeKindName(E.getKind()) << " edge at " + << B->getFixupAddress(E) << " (" << B->getAddress() << " + " + << formatv("{0:x}", E.getOffset()) << ")\n"; + }); + E.setTarget(this->getEntryForTarget(G, E.getTarget())); + return true; + } + } + return false; + } + + /// Create a branch range extension stub for the class's flavor. + Symbol &createEntry(LinkGraph &G, Symbol &Target); + +private: + /// Create a new node in the link-graph for the given stub template. + template + Block &addStub(LinkGraph &G, const uint8_t (&Code)[Size], + uint64_t Alignment) { + ArrayRef Template(reinterpret_cast(Code), Size); + return G.createContentBlock(getStubsSection(G), Template, + orc::ExecutorAddr(), Alignment, 0); + } + + /// Get or create the object file section that will contain all our stubs. + Section &getStubsSection(LinkGraph &G) { + if (!StubsSection) + StubsSection = &G.createSection(getSectionName(), + orc::MemProt::Read | orc::MemProt::Exec); + return *StubsSection; + } + + Section *StubsSection = nullptr; +}; + +/// Create a branch range extension stub with Thumb encoding for v7 CPUs. +template <> +Symbol &StubsManager::createEntry(LinkGraph &G, Symbol &Target); + +} // namespace aarch32 +} // namespace jitlink +} // namespace llvm + +#endif // LLVM_EXECUTIONENGINE_JITLINK_AARCH32 diff --git a/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt b/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt index 52ff5e8370031..bc86f45d3c185 100644 --- a/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/JITLink/CMakeLists.txt @@ -20,6 +20,7 @@ add_llvm_component_library(LLVMJITLink # ELF ELF.cpp ELFLinkGraphBuilder.cpp + ELF_aarch32.cpp ELF_aarch64.cpp ELF_i386.cpp ELF_loongarch.cpp @@ -33,6 +34,7 @@ add_llvm_component_library(LLVMJITLink COFF_x86_64.cpp # Architectures: + aarch32.cpp aarch64.cpp i386.cpp loongarch.cpp diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF.cpp index ef0f19a785712..340a0ce134475 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELF.cpp +++ b/llvm/lib/ExecutionEngine/JITLink/ELF.cpp @@ -13,6 +13,7 @@ #include "llvm/ExecutionEngine/JITLink/ELF.h" #include "llvm/BinaryFormat/ELF.h" +#include "llvm/ExecutionEngine/JITLink/ELF_aarch32.h" #include "llvm/ExecutionEngine/JITLink/ELF_aarch64.h" #include "llvm/ExecutionEngine/JITLink/ELF_i386.h" #include "llvm/ExecutionEngine/JITLink/ELF_loongarch.h" @@ -69,6 +70,8 @@ createLinkGraphFromELFObject(MemoryBufferRef ObjectBuffer) { switch (*TargetMachineArch) { case ELF::EM_AARCH64: return createLinkGraphFromELFObject_aarch64(ObjectBuffer); + case ELF::EM_ARM: + return createLinkGraphFromELFObject_aarch32(ObjectBuffer); case ELF::EM_LOONGARCH: return createLinkGraphFromELFObject_loongarch(ObjectBuffer); case ELF::EM_RISCV: @@ -90,6 +93,12 @@ void link_ELF(std::unique_ptr G, case Triple::aarch64: link_ELF_aarch64(std::move(G), std::move(Ctx)); return; + case Triple::arm: + case Triple::armeb: + case Triple::thumb: + case Triple::thumbeb: + link_ELF_aarch32(std::move(G), std::move(Ctx)); + return; case Triple::loongarch32: case Triple::loongarch64: link_ELF_loongarch(std::move(G), std::move(Ctx)); diff --git a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h index 9d2d4958dcf6c..1d98acf868695 100644 --- a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h +++ b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h @@ -127,6 +127,12 @@ class ELFLinkGraphBuilder : public ELFLinkGraphBuilderBase { Error graphifySections(); Error graphifySymbols(); + /// Override in derived classes to suppress certain sections in the link + /// graph. + virtual bool excludeSection(const typename ELFT::Shdr &Sect) const { + return false; + } + /// Traverse all matching ELFT::Rela relocation records in the given section. /// The handler function Func should be callable with this signature: /// Error(const typename ELFT::Rela &, @@ -321,6 +327,13 @@ template Error ELFLinkGraphBuilder::graphifySections() { auto Name = Obj.getSectionName(Sec, SectionStringTab); if (!Name) return Name.takeError(); + if (excludeSection(Sec)) { + LLVM_DEBUG({ + dbgs() << " " << SecIndex << ": Skipping section \"" << *Name + << "\" explicitly\n"; + }); + continue; + } // Skip null sections. if (Sec.sh_type == ELF::SHT_NULL) { @@ -564,6 +577,10 @@ Error ELFLinkGraphBuilder::forEachRelaRelocation( LLVM_DEBUG(dbgs() << " skipped (dwarf section)\n\n"); return Error::success(); } + if (excludeSection(**FixupSection)) { + LLVM_DEBUG(dbgs() << " skipped (fixup section excluded explicitly)\n\n"); + return Error::success(); + } // Lookup the link-graph node corresponding to the target section name. auto *BlockToFix = getGraphBlock(RelSect.sh_info); @@ -610,6 +627,10 @@ Error ELFLinkGraphBuilder::forEachRelRelocation( LLVM_DEBUG(dbgs() << " skipped (dwarf section)\n\n"); return Error::success(); } + if (excludeSection(**FixupSection)) { + LLVM_DEBUG(dbgs() << " skipped (fixup section excluded explicitly)\n\n"); + return Error::success(); + } // Lookup the link-graph node corresponding to the target section name. auto *BlockToFix = getGraphBlock(RelSect.sh_info); diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp new file mode 100644 index 0000000000000..0010088fef1e7 --- /dev/null +++ b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch32.cpp @@ -0,0 +1,299 @@ +//===----- ELF_aarch32.cpp - JIT linker implementation for arm/thumb ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// ELF/aarch32 jit-link implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/JITLink/ELF_aarch32.h" + +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/ExecutionEngine/JITLink/JITLink.h" +#include "llvm/ExecutionEngine/JITLink/aarch32.h" +#include "llvm/Object/ELF.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/TargetParser/ARMTargetParser.h" + +#include "ELFLinkGraphBuilder.h" +#include "JITLinkGeneric.h" + +#define DEBUG_TYPE "jitlink" + +using namespace llvm::object; + +namespace llvm { +namespace jitlink { + +/// Translate from ELF relocation type to JITLink-internal edge kind. +Expected getJITLinkEdgeKind(uint32_t ELFType) { + switch (ELFType) { + case ELF::R_ARM_REL32: + return aarch32::Data_Delta32; + case ELF::R_ARM_CALL: + return aarch32::Arm_Call; + case ELF::R_ARM_THM_CALL: + return aarch32::Thumb_Call; + case ELF::R_ARM_THM_JUMP24: + return aarch32::Thumb_Jump24; + case ELF::R_ARM_THM_MOVW_ABS_NC: + return aarch32::Thumb_MovwAbsNC; + case ELF::R_ARM_THM_MOVT_ABS: + return aarch32::Thumb_MovtAbs; + } + + return make_error( + "Unsupported aarch32 relocation " + formatv("{0:d}: ", ELFType) + + object::getELFRelocationTypeName(ELF::EM_ARM, ELFType)); +} + +/// Translate from JITLink-internal edge kind back to ELF relocation type. +Expected getELFRelocationType(Edge::Kind Kind) { + switch (static_cast(Kind)) { + case aarch32::Data_Delta32: + return ELF::R_ARM_REL32; + case aarch32::Arm_Call: + return ELF::R_ARM_CALL; + case aarch32::Thumb_Call: + return ELF::R_ARM_THM_CALL; + case aarch32::Thumb_Jump24: + return ELF::R_ARM_THM_JUMP24; + case aarch32::Thumb_MovwAbsNC: + return ELF::R_ARM_THM_MOVW_ABS_NC; + case aarch32::Thumb_MovtAbs: + return ELF::R_ARM_THM_MOVT_ABS; + } + + return make_error(formatv("Invalid aarch32 edge {0:d}: ", + Kind)); +} + +/// Get a human-readable name for the given ELF AArch32 edge kind. +const char *getELFAArch32EdgeKindName(Edge::Kind R) { + // No ELF-specific edge kinds yet + return aarch32::getEdgeKindName(R); +} + +class ELFJITLinker_aarch32 : public JITLinker { + friend class JITLinker; + +public: + ELFJITLinker_aarch32(std::unique_ptr Ctx, + std::unique_ptr G, PassConfiguration PassCfg, + aarch32::ArmConfig ArmCfg) + : JITLinker(std::move(Ctx), std::move(G), std::move(PassCfg)), + ArmCfg(std::move(ArmCfg)) {} + +private: + aarch32::ArmConfig ArmCfg; + + Error applyFixup(LinkGraph &G, Block &B, const Edge &E) const { + return aarch32::applyFixup(G, B, E, ArmCfg); + } +}; + +template +class ELFLinkGraphBuilder_aarch32 + : public ELFLinkGraphBuilder> { +private: + using ELFT = ELFType; + using Base = ELFLinkGraphBuilder; + + bool excludeSection(const typename ELFT::Shdr &Sect) const override { + // TODO: An .ARM.exidx (Exception Index table) entry is 8-bytes in size and + // consists of 2 words. It might be sufficient to process only relocations + // in the the second word (offset 4). Please find more details in: Exception + // Handling ABI for the Arm® Architecture -> Index table entries + if (Sect.sh_type == ELF::SHT_ARM_EXIDX) + return true; + return false; + } + + Error addRelocations() override { + LLVM_DEBUG(dbgs() << "Processing relocations:\n"); + using Self = ELFLinkGraphBuilder_aarch32; + for (const auto &RelSect : Base::Sections) { + if (Error Err = Base::forEachRelRelocation(RelSect, this, + &Self::addSingleRelRelocation)) + return Err; + } + return Error::success(); + } + + Error addSingleRelRelocation(const typename ELFT::Rel &Rel, + const typename ELFT::Shdr &FixupSect, + Block &BlockToFix) { + uint32_t SymbolIndex = Rel.getSymbol(false); + auto ObjSymbol = Base::Obj.getRelocationSymbol(Rel, Base::SymTabSec); + if (!ObjSymbol) + return ObjSymbol.takeError(); + + Symbol *GraphSymbol = Base::getGraphSymbol(SymbolIndex); + if (!GraphSymbol) + return make_error( + formatv("Could not find symbol at given index, did you add it to " + "JITSymbolTable? index: {0}, shndx: {1} Size of table: {2}", + SymbolIndex, (*ObjSymbol)->st_shndx, + Base::GraphSymbols.size()), + inconvertibleErrorCode()); + + uint32_t Type = Rel.getType(false); + Expected Kind = getJITLinkEdgeKind(Type); + if (!Kind) + return Kind.takeError(); + + auto FixupAddress = orc::ExecutorAddr(FixupSect.sh_addr) + Rel.r_offset; + Edge::OffsetT Offset = FixupAddress - BlockToFix.getAddress(); + Edge E(*Kind, Offset, *GraphSymbol, 0); + + Expected Addend = + aarch32::readAddend(*Base::G, BlockToFix, E, ArmCfg); + if (!Addend) + return Addend.takeError(); + + E.setAddend(*Addend); + LLVM_DEBUG({ + dbgs() << " "; + printEdge(dbgs(), BlockToFix, E, getELFAArch32EdgeKindName(*Kind)); + dbgs() << "\n"; + }); + + BlockToFix.addEdge(std::move(E)); + return Error::success(); + } + + aarch32::ArmConfig ArmCfg; + +protected: + TargetFlagsType makeTargetFlags(const typename ELFT::Sym &Sym) override { + if (Sym.getValue() & 0x01) + return aarch32::ThumbSymbol; + return TargetFlagsType{}; + } + + orc::ExecutorAddrDiff getRawOffset(const typename ELFT::Sym &Sym, + TargetFlagsType Flags) override { + assert((makeTargetFlags(Sym) & Flags) == Flags); + static constexpr uint64_t ThumbBit = 0x01; + return Sym.getValue() & ~ThumbBit; + } + +public: + ELFLinkGraphBuilder_aarch32(StringRef FileName, const ELFFile &Obj, + Triple TT, aarch32::ArmConfig ArmCfg) + : ELFLinkGraphBuilder(Obj, std::move(TT), FileName, + getELFAArch32EdgeKindName), + ArmCfg(std::move(ArmCfg)) {} +}; + +template +Error buildTables_ELF_aarch32(LinkGraph &G) { + LLVM_DEBUG(dbgs() << "Visiting edges in graph:\n"); + + aarch32::StubsManager PLT; + visitExistingEdges(G, PLT); + return Error::success(); +} + +Expected> +createLinkGraphFromELFObject_aarch32(MemoryBufferRef ObjectBuffer) { + LLVM_DEBUG({ + dbgs() << "Building jitlink graph for new input " + << ObjectBuffer.getBufferIdentifier() << "...\n"; + }); + + auto ELFObj = ObjectFile::createELFObjectFile(ObjectBuffer); + if (!ELFObj) + return ELFObj.takeError(); + + // Find out what exact AArch32 instruction set and features we target. + auto TT = (*ELFObj)->makeTriple(); + ARM::ArchKind AK = ARM::parseArch(TT.getArchName()); + if (AK == ARM::ArchKind::INVALID) + return make_error( + "Failed to build ELF link graph: Invalid ARM ArchKind"); + + // Resolve our internal configuration for the target. If at some point the + // CPUArch alone becomes too unprecise, we can find more details in the + // Tag_CPU_arch_profile. + aarch32::ArmConfig ArmCfg; + using namespace ARMBuildAttrs; + auto Arch = static_cast(ARM::getArchAttr(AK)); + switch (Arch) { + case v7: + case v8_A: + ArmCfg = aarch32::getArmConfigForCPUArch(Arch); + assert(ArmCfg.Stubs != aarch32::Unsupported && + "Provide a config for each supported CPU"); + break; + default: + return make_error( + "Failed to build ELF link graph: Unsupported CPU arch " + + StringRef(aarch32::getCPUArchName(Arch))); + } + + // Populate the link-graph. + switch (TT.getArch()) { + case Triple::arm: + case Triple::thumb: { + auto &ELFFile = cast>(**ELFObj).getELFFile(); + return ELFLinkGraphBuilder_aarch32( + (*ELFObj)->getFileName(), ELFFile, TT, ArmCfg) + .buildGraph(); + } + case Triple::armeb: + case Triple::thumbeb: { + auto &ELFFile = cast>(**ELFObj).getELFFile(); + return ELFLinkGraphBuilder_aarch32((*ELFObj)->getFileName(), + ELFFile, TT, ArmCfg) + .buildGraph(); + } + default: + return make_error( + "Failed to build ELF/aarch32 link graph: Invalid target triple " + + TT.getTriple()); + } +} + +void link_ELF_aarch32(std::unique_ptr G, + std::unique_ptr Ctx) { + const Triple &TT = G->getTargetTriple(); + + using namespace ARMBuildAttrs; + ARM::ArchKind AK = ARM::parseArch(TT.getArchName()); + auto CPU = static_cast(ARM::getArchAttr(AK)); + aarch32::ArmConfig ArmCfg = aarch32::getArmConfigForCPUArch(CPU); + + PassConfiguration PassCfg; + if (Ctx->shouldAddDefaultTargetPasses(TT)) { + // Add a mark-live pass. + if (auto MarkLive = Ctx->getMarkLivePass(TT)) + PassCfg.PrePrunePasses.push_back(std::move(MarkLive)); + else + PassCfg.PrePrunePasses.push_back(markAllSymbolsLive); + + switch (ArmCfg.Stubs) { + case aarch32::Thumbv7: + PassCfg.PostPrunePasses.push_back( + buildTables_ELF_aarch32); + break; + case aarch32::Unsupported: + llvm_unreachable("Check before building graph"); + } + } + + if (auto Err = Ctx->modifyPassConfig(*G, PassCfg)) + return Ctx->notifyFailed(std::move(Err)); + + ELFJITLinker_aarch32::link(std::move(Ctx), std::move(G), std::move(PassCfg), + std::move(ArmCfg)); +} + +} // namespace jitlink +} // namespace llvm diff --git a/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp b/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp new file mode 100644 index 0000000000000..6f49a4578cf7c --- /dev/null +++ b/llvm/lib/ExecutionEngine/JITLink/aarch32.cpp @@ -0,0 +1,514 @@ +//===--------- aarch32.cpp - Generic JITLink arm/thumb utilities ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Generic utilities for graphs representing arm/thumb objects. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/JITLink/aarch32.h" + +#include "llvm/ADT/StringExtras.h" +#include "llvm/BinaryFormat/ELF.h" +#include "llvm/ExecutionEngine/JITLink/JITLink.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/MathExtras.h" + +#define DEBUG_TYPE "jitlink" + +namespace llvm { +namespace jitlink { +namespace aarch32 { + +using namespace support; +using namespace support::endian; + +/// Encode 22-bit immediate value for branch instructions without J1J2 range +/// extension (formats B T4, BL T1 and BLX T2). +/// +/// 00000:Imm11H:Imm11L:0 -> [ 00000:Imm11H, 00000:Imm11L ] +/// J1^ ^J2 will always be 1 +/// +HalfWords encodeImmBT4BlT1BlxT2(int64_t Value) { + constexpr uint32_t J1J2 = 0x2800; + uint32_t Imm11H = (Value >> 12) & 0x07ff; + uint32_t Imm11L = (Value >> 1) & 0x07ff; + return HalfWords{Imm11H, Imm11L | J1J2}; +} + +/// Decode 22-bit immediate value for branch instructions without J1J2 range +/// extension (formats B T4, BL T1 and BLX T2). +/// +/// [ 00000:Imm11H, 00000:Imm11L ] -> 00000:Imm11H:Imm11L:0 +/// J1^ ^J2 will always be 1 +/// +int64_t decodeImmBT4BlT1BlxT2(uint32_t Hi, uint32_t Lo) { + uint32_t Imm11H = Hi & 0x07ff; + uint32_t Imm11L = Lo & 0x07ff; + return SignExtend64<22>(Imm11H << 12 | Imm11L << 1); +} + +/// Encode 25-bit immediate value for branch instructions with J1J2 range +/// extension (formats B T4, BL T1 and BLX T2). +/// +/// S:I1:I2:Imm10:Imm11:0 -> [ 00000:S:Imm10, 00:J1:0:J2:Imm11 ] +/// +HalfWords encodeImmBT4BlT1BlxT2_J1J2(int64_t Value) { + uint32_t S = (Value >> 14) & 0x0400; + uint32_t J1 = (((~(Value >> 10)) ^ (Value >> 11)) & 0x2000); + uint32_t J2 = (((~(Value >> 11)) ^ (Value >> 13)) & 0x0800); + uint32_t Imm10 = (Value >> 12) & 0x03ff; + uint32_t Imm11 = (Value >> 1) & 0x07ff; + return HalfWords{S | Imm10, J1 | J2 | Imm11}; +} + +/// Decode 25-bit immediate value for branch instructions with J1J2 range +/// extension (formats B T4, BL T1 and BLX T2). +/// +/// [ 00000:S:Imm10, 00:J1:0:J2:Imm11] -> S:I1:I2:Imm10:Imm11:0 +/// +int64_t decodeImmBT4BlT1BlxT2_J1J2(uint32_t Hi, uint32_t Lo) { + uint32_t S = Hi & 0x0400; + uint32_t I1 = ~((Lo ^ (Hi << 3)) << 10) & 0x00800000; + uint32_t I2 = ~((Lo ^ (Hi << 1)) << 11) & 0x00400000; + uint32_t Imm10 = Hi & 0x03ff; + uint32_t Imm11 = Lo & 0x07ff; + return SignExtend64<25>(S << 14 | I1 | I2 | Imm10 << 12 | Imm11 << 1); +} + +/// Encode 16-bit immediate value for move instruction formats MOVT T1 and +/// MOVW T3. +/// +/// Imm4:Imm1:Imm3:Imm8 -> [ 00000:i:000000:Imm4, 0:Imm3:0000:Imm8 ] +/// +HalfWords encodeImmMovtT1MovwT3(uint16_t Value) { + uint32_t Imm4 = (Value >> 12) & 0x0f; + uint32_t Imm1 = (Value >> 11) & 0x01; + uint32_t Imm3 = (Value >> 8) & 0x07; + uint32_t Imm8 = Value & 0xff; + return HalfWords{Imm1 << 10 | Imm4, Imm3 << 12 | Imm8}; +} + +/// Decode 16-bit immediate value from move instruction formats MOVT T1 and +/// MOVW T3. +/// +/// [ 00000:i:000000:Imm4, 0:Imm3:0000:Imm8 ] -> Imm4:Imm1:Imm3:Imm8 +/// +uint16_t decodeImmMovtT1MovwT3(uint32_t Hi, uint32_t Lo) { + uint32_t Imm4 = Hi & 0x0f; + uint32_t Imm1 = (Hi >> 10) & 0x01; + uint32_t Imm3 = (Lo >> 12) & 0x07; + uint32_t Imm8 = Lo & 0xff; + uint32_t Imm16 = Imm4 << 12 | Imm1 << 11 | Imm3 << 8 | Imm8; + assert(Imm16 <= 0xffff && "Decoded value out-of-range"); + return Imm16; +} + +/// Encode register ID for instruction formats MOVT T1 and MOVW T3. +/// +/// Rd4 -> [0000000000000000, 0000:Rd4:00000000] +/// +HalfWords encodeRegMovtT1MovwT3(int64_t Value) { + uint32_t Rd4 = (Value & 0x0f) << 8; + return HalfWords{0, Rd4}; +} + +/// Decode register ID from instruction formats MOVT T1 and MOVW T3. +/// +/// [0000000000000000, 0000:Rd4:00000000] -> Rd4 +/// +int64_t decodeRegMovtT1MovwT3(uint32_t Hi, uint32_t Lo) { + uint32_t Rd4 = (Lo >> 8) & 0x0f; + return Rd4; +} + +/// 32-bit Thumb instructions are stored as two little-endian halfwords. +/// An instruction at address A encodes bytes A+1, A in the first halfword (Hi), +/// followed by bytes A+3, A+2 in the second halfword (Lo). +struct WritableThumbRelocation { + /// Create a writable reference to a Thumb32 fixup. + WritableThumbRelocation(char *FixupPtr) + : Hi{*reinterpret_cast(FixupPtr)}, + Lo{*reinterpret_cast(FixupPtr + 2)} {} + + support::ulittle16_t &Hi; // First halfword + support::ulittle16_t &Lo; // Second halfword +}; + +struct ThumbRelocation { + /// Create a read-only reference to a Thumb32 fixup. + ThumbRelocation(const char *FixupPtr) + : Hi{*reinterpret_cast(FixupPtr)}, + Lo{*reinterpret_cast(FixupPtr + 2)} {} + + /// Create a read-only Thumb32 fixup from a writeable one. + ThumbRelocation(WritableThumbRelocation &Writable) + : Hi{Writable.Hi}, Lo(Writable.Lo) {} + + const support::ulittle16_t &Hi; // First halfword + const support::ulittle16_t &Lo; // Second halfword +}; + +Error makeUnexpectedOpcodeError(const LinkGraph &G, const ThumbRelocation &R, + Edge::Kind Kind) { + return make_error( + formatv("Invalid opcode [ 0x{0:x4}, 0x{1:x4} ] for relocation: {2}", R.Hi, + R.Lo, G.getEdgeKindName(Kind))); +} + +template bool checkOpcode(const ThumbRelocation &R) { + uint16_t Hi = R.Hi & FixupInfo::OpcodeMask.Hi; + uint16_t Lo = R.Lo & FixupInfo::OpcodeMask.Lo; + return Hi == FixupInfo::Opcode.Hi && Lo == FixupInfo::Opcode.Lo; +} + +template +bool checkRegister(const ThumbRelocation &R, HalfWords Reg) { + uint16_t Hi = R.Hi & FixupInfo::RegMask.Hi; + uint16_t Lo = R.Lo & FixupInfo::RegMask.Lo; + return Hi == Reg.Hi && Lo == Reg.Lo; +} + +template +bool writeRegister(WritableThumbRelocation &R, HalfWords Reg) { + static constexpr HalfWords Mask = FixupInfo::RegMask; + assert((Mask.Hi & Reg.Hi) == Reg.Hi && (Mask.Hi & Reg.Hi) == Reg.Hi && + "Value bits exceed bit range of given mask"); + R.Hi = (R.Hi & ~Mask.Hi) | Reg.Hi; + R.Lo = (R.Lo & ~Mask.Lo) | Reg.Lo; +} + +template +void writeImmediate(WritableThumbRelocation &R, HalfWords Imm) { + static constexpr HalfWords Mask = FixupInfo::ImmMask; + assert((Mask.Hi & Imm.Hi) == Imm.Hi && (Mask.Hi & Imm.Hi) == Imm.Hi && + "Value bits exceed bit range of given mask"); + R.Hi = (R.Hi & ~Mask.Hi) | Imm.Hi; + R.Lo = (R.Lo & ~Mask.Lo) | Imm.Lo; +} + +Expected readAddendData(LinkGraph &G, Block &B, const Edge &E) { + endianness Endian = G.getEndianness(); + assert(Endian != native && "Declare as little or big explicitly"); + + Edge::Kind Kind = E.getKind(); + const char *BlockWorkingMem = B.getContent().data(); + const char *FixupPtr = BlockWorkingMem + E.getOffset(); + + switch (Kind) { + case Data_Delta32: + return SignExtend64<32>((Endian == little) ? read32(FixupPtr) + : read32(FixupPtr)); + default: + return make_error( + "In graph " + G.getName() + ", section " + B.getSection().getName() + + " can not read implicit addend for aarch32 edge kind " + + G.getEdgeKindName(E.getKind())); + } +} + +Expected readAddendArm(LinkGraph &G, Block &B, const Edge &E) { + Edge::Kind Kind = E.getKind(); + + switch (Kind) { + case Arm_Call: + return make_error( + "Addend extraction for relocation type not yet implemented: " + + StringRef(G.getEdgeKindName(Kind))); + default: + return make_error( + "In graph " + G.getName() + ", section " + B.getSection().getName() + + " can not read implicit addend for aarch32 edge kind " + + G.getEdgeKindName(E.getKind())); + } +} + +Expected readAddendThumb(LinkGraph &G, Block &B, const Edge &E, + const ArmConfig &ArmCfg) { + ThumbRelocation R(B.getContent().data() + E.getOffset()); + Edge::Kind Kind = E.getKind(); + + switch (Kind) { + case Thumb_Call: + if (!checkOpcode(R)) + return makeUnexpectedOpcodeError(G, R, Kind); + return LLVM_LIKELY(ArmCfg.J1J2BranchEncoding) + ? decodeImmBT4BlT1BlxT2_J1J2(R.Hi, R.Lo) + : decodeImmBT4BlT1BlxT2(R.Hi, R.Lo); + + case Thumb_Jump24: + if (!checkOpcode(R)) + return makeUnexpectedOpcodeError(G, R, Kind); + if (R.Lo & FixupInfo::LoBitConditional) + return make_error("Relocation expects an unconditional " + "B.W branch instruction: " + + StringRef(G.getEdgeKindName(Kind))); + return LLVM_LIKELY(ArmCfg.J1J2BranchEncoding) + ? decodeImmBT4BlT1BlxT2_J1J2(R.Hi, R.Lo) + : decodeImmBT4BlT1BlxT2(R.Hi, R.Lo); + + case Thumb_MovwAbsNC: + if (!checkOpcode(R)) + return makeUnexpectedOpcodeError(G, R, Kind); + // Initial addend is interpreted as a signed value + return SignExtend64<16>(decodeImmMovtT1MovwT3(R.Hi, R.Lo)); + + case Thumb_MovtAbs: + if (!checkOpcode(R)) + return makeUnexpectedOpcodeError(G, R, Kind); + // Initial addend is interpreted as a signed value + return SignExtend64<16>(decodeImmMovtT1MovwT3(R.Hi, R.Lo)); + + default: + return make_error( + "In graph " + G.getName() + ", section " + B.getSection().getName() + + " can not read implicit addend for aarch32 edge kind " + + G.getEdgeKindName(E.getKind())); + } +} + +Error applyFixupData(LinkGraph &G, Block &B, const Edge &E) { + using namespace support; + + char *BlockWorkingMem = B.getAlreadyMutableContent().data(); + char *FixupPtr = BlockWorkingMem + E.getOffset(); + + auto Write32 = [FixupPtr, Endian = G.getEndianness()](int64_t Value) { + assert(Endian != native && "Must be explicit: little or big"); + assert(isInt<32>(Value) && "Must be in signed 32-bit range"); + uint32_t Imm = static_cast(Value); + if (LLVM_LIKELY(Endian == little)) + endian::write32(FixupPtr, Imm); + else + endian::write32(FixupPtr, Imm); + }; + + Edge::Kind Kind = E.getKind(); + uint64_t FixupAddress = (B.getAddress() + E.getOffset()).getValue(); + int64_t Addend = E.getAddend(); + Symbol &TargetSymbol = E.getTarget(); + uint64_t TargetAddress = TargetSymbol.getAddress().getValue(); + assert(!TargetSymbol.hasTargetFlags(ThumbSymbol)); + + // Regular data relocations have size 4, alignment 1 and write the full 32-bit + // result to the place; no need for overflow checking. There are three + // exceptions: R_ARM_ABS8, R_ARM_ABS16, R_ARM_PREL31 + switch (Kind) { + case Data_Delta32: { + int64_t Value = TargetAddress - FixupAddress + Addend; + if (!isInt<32>(Value)) + return makeTargetOutOfRangeError(G, B, E); + Write32(Value); + return Error::success(); + } + default: + return make_error( + "In graph " + G.getName() + ", section " + B.getSection().getName() + + " encountered unfixable aarch32 edge kind " + + G.getEdgeKindName(E.getKind())); + } +} + +Error applyFixupArm(LinkGraph &G, Block &B, const Edge &E) { + Edge::Kind Kind = E.getKind(); + + switch (Kind) { + case Arm_Call: + return make_error( + "Fix-up for relocation type not yet implemented: " + + StringRef(G.getEdgeKindName(Kind))); + default: + return make_error( + "In graph " + G.getName() + ", section " + B.getSection().getName() + + " encountered unfixable aarch32 edge kind " + + G.getEdgeKindName(E.getKind())); + } +} + +Error applyFixupThumb(LinkGraph &G, Block &B, const Edge &E, + const ArmConfig &ArmCfg) { + WritableThumbRelocation R(B.getAlreadyMutableContent().data() + + E.getOffset()); + + Edge::Kind Kind = E.getKind(); + uint64_t FixupAddress = (B.getAddress() + E.getOffset()).getValue(); + int64_t Addend = E.getAddend(); + Symbol &TargetSymbol = E.getTarget(); + uint64_t TargetAddress = TargetSymbol.getAddress().getValue(); + if (TargetSymbol.hasTargetFlags(ThumbSymbol)) + TargetAddress |= 0x01; + + switch (Kind) { + case Thumb_Jump24: { + if (!checkOpcode(R)) + return makeUnexpectedOpcodeError(G, R, Kind); + if (R.Lo & FixupInfo::LoBitConditional) + return make_error("Relocation expects an unconditional " + "B.W branch instruction: " + + StringRef(G.getEdgeKindName(Kind))); + if (!(TargetSymbol.hasTargetFlags(ThumbSymbol))) + return make_error("Branch relocation needs interworking " + "stub when bridging to ARM: " + + StringRef(G.getEdgeKindName(Kind))); + + int64_t Value = TargetAddress - FixupAddress + Addend; + if (LLVM_LIKELY(ArmCfg.J1J2BranchEncoding)) { + if (!isInt<25>(Value)) + return makeTargetOutOfRangeError(G, B, E); + writeImmediate(R, encodeImmBT4BlT1BlxT2_J1J2(Value)); + } else { + if (!isInt<22>(Value)) + return makeTargetOutOfRangeError(G, B, E); + writeImmediate(R, encodeImmBT4BlT1BlxT2(Value)); + } + + return Error::success(); + } + + case Thumb_Call: { + if (!checkOpcode(R)) + return makeUnexpectedOpcodeError(G, R, Kind); + + int64_t Value = TargetAddress - FixupAddress + Addend; + + // The call instruction itself is Thumb. The call destination can either be + // Thumb or Arm. We use BL to stay in Thumb and BLX to change to Arm. + bool TargetIsArm = !TargetSymbol.hasTargetFlags(ThumbSymbol); + bool InstrIsBlx = (R.Lo & FixupInfo::LoBitNoBlx) == 0; + if (TargetIsArm != InstrIsBlx) { + if (LLVM_LIKELY(TargetIsArm)) { + // Change opcode BL -> BLX and fix range value (account for 4-byte + // aligned destination while instruction may only be 2-byte aligned + // and clear Thumb bit). + R.Lo = R.Lo & ~FixupInfo::LoBitNoBlx; + R.Lo = R.Lo & ~FixupInfo::LoBitH; + Value = alignTo(Value, 4); + } else { + // Change opcode BLX -> BL and set Thumb bit + R.Lo = R.Lo & ~FixupInfo::LoBitNoBlx; + Value |= 0x01; + } + } + + if (LLVM_LIKELY(ArmCfg.J1J2BranchEncoding)) { + if (!isInt<25>(Value)) + return makeTargetOutOfRangeError(G, B, E); + writeImmediate(R, encodeImmBT4BlT1BlxT2_J1J2(Value)); + } else { + if (!isInt<22>(Value)) + return makeTargetOutOfRangeError(G, B, E); + writeImmediate(R, encodeImmBT4BlT1BlxT2(Value)); + } + + assert(((R.Lo & FixupInfo::LoBitNoBlx) || + (R.Lo & FixupInfo::LoBitH) == 0) && + "Opcode BLX implies H bit is clear (avoid UB in BLX T2)"); + return Error::success(); + } + + case Thumb_MovwAbsNC: { + if (!checkOpcode(R)) + return makeUnexpectedOpcodeError(G, R, Kind); + uint16_t Value = (TargetAddress + Addend) & 0xffff; + writeImmediate(R, encodeImmMovtT1MovwT3(Value)); + return Error::success(); + } + + case Thumb_MovtAbs: { + if (!checkOpcode(R)) + return makeUnexpectedOpcodeError(G, R, Kind); + uint16_t Value = ((TargetAddress + Addend) >> 16) & 0xffff; + writeImmediate(R, encodeImmMovtT1MovwT3(Value)); + return Error::success(); + } + + default: + return make_error( + "In graph " + G.getName() + ", section " + B.getSection().getName() + + " encountered unfixable aarch32 edge kind " + + G.getEdgeKindName(E.getKind())); + } +} + +const uint8_t Thumbv7ABS[] = { + 0x40, 0xf2, 0x00, 0x0c, // movw r12, #0x0000 ; lower 16-bit + 0xc0, 0xf2, 0x00, 0x0c, // movt r12, #0x0000 ; upper 16-bit + 0x60, 0x47 // bx r12 +}; + +template <> +Symbol &StubsManager::createEntry(LinkGraph &G, Symbol &Target) { + constexpr uint64_t Alignment = 4; + Block &B = addStub(G, Thumbv7ABS, Alignment); + LLVM_DEBUG({ + const char *StubPtr = B.getContent().data(); + HalfWords Reg12 = encodeRegMovtT1MovwT3(12); + assert(checkRegister(StubPtr, Reg12) && + checkRegister(StubPtr + 4, Reg12) && + "Linker generated stubs may only corrupt register r12 (IP)"); + }); + B.addEdge(Thumb_MovwAbsNC, 0, Target, 0); + B.addEdge(Thumb_MovtAbs, 4, Target, 0); + Symbol &Stub = G.addAnonymousSymbol(B, 0, B.getSize(), true, false); + Stub.setTargetFlags(ThumbSymbol); + return Stub; +} + +const char *getEdgeKindName(Edge::Kind K) { +#define KIND_NAME_CASE(K) \ + case K: \ + return #K; + + switch (K) { + KIND_NAME_CASE(Data_Delta32) + KIND_NAME_CASE(Arm_Call) + KIND_NAME_CASE(Thumb_Call) + KIND_NAME_CASE(Thumb_Jump24) + KIND_NAME_CASE(Thumb_MovwAbsNC) + KIND_NAME_CASE(Thumb_MovtAbs) + default: + return getGenericEdgeKindName(K); + } +#undef KIND_NAME_CASE +} + +const char *getCPUArchName(ARMBuildAttrs::CPUArch K) { +#define CPUARCH_NAME_CASE(K) \ + case K: \ + return #K; + + using namespace ARMBuildAttrs; + switch (K) { + CPUARCH_NAME_CASE(Pre_v4) + CPUARCH_NAME_CASE(v4) + CPUARCH_NAME_CASE(v4T) + CPUARCH_NAME_CASE(v5T) + CPUARCH_NAME_CASE(v5TE) + CPUARCH_NAME_CASE(v5TEJ) + CPUARCH_NAME_CASE(v6) + CPUARCH_NAME_CASE(v6KZ) + CPUARCH_NAME_CASE(v6T2) + CPUARCH_NAME_CASE(v6K) + CPUARCH_NAME_CASE(v7) + CPUARCH_NAME_CASE(v6_M) + CPUARCH_NAME_CASE(v6S_M) + CPUARCH_NAME_CASE(v7E_M) + CPUARCH_NAME_CASE(v8_A) + CPUARCH_NAME_CASE(v8_R) + CPUARCH_NAME_CASE(v8_M_Base) + CPUARCH_NAME_CASE(v8_M_Main) + CPUARCH_NAME_CASE(v8_1_M_Main) + CPUARCH_NAME_CASE(v9_A) + } + llvm_unreachable("Missing CPUArch in switch?"); +#undef CPUARCH_NAME_CASE +} + +} // namespace aarch32 +} // namespace jitlink +} // namespace llvm diff --git a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp index 2c270cd66285d..9103c62a337bb 100644 --- a/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp +++ b/llvm/lib/ExecutionEngine/Orc/ObjectLinkingLayer.cpp @@ -8,6 +8,7 @@ #include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h" #include "llvm/ExecutionEngine/JITLink/EHFrameSupport.h" +#include "llvm/ExecutionEngine/JITLink/aarch32.h" #include "llvm/ExecutionEngine/Orc/DebugObjectManagerPlugin.h" #include "llvm/ExecutionEngine/Orc/ObjectFileInterface.h" #include "llvm/ExecutionEngine/Orc/Shared/ObjectFormats.h" @@ -39,8 +40,20 @@ bool hasInitializerSection(jitlink::LinkGraph &G) { return false; } -JITTargetAddress getJITSymbolPtrForSymbol(Symbol &Sym) { - return Sym.getAddress().getValue(); +JITTargetAddress getJITSymbolPtrForSymbol(Symbol &Sym, const Triple &TT) { + uint64_t CallableAddr = Sym.getAddress().getValue(); + switch (TT.getArch()) { + case Triple::arm: + case Triple::armeb: + case Triple::thumb: + case Triple::thumbeb: + if (Sym.hasTargetFlags(aarch32::ThumbSymbol) && Sym.isCallable()) + CallableAddr |= 0x01; // LSB is thumb bit + break; + default: + break; + } + return CallableAddr; } JITSymbolFlags getJITSymbolFlagsForSymbol(Symbol &Sym) { @@ -219,7 +232,7 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { for (auto *Sym : G.defined_symbols()) if (Sym->hasName() && Sym->getScope() != Scope::Local) { auto InternedName = ES.intern(Sym->getName()); - auto Ptr = getJITSymbolPtrForSymbol(*Sym); + auto Ptr = getJITSymbolPtrForSymbol(*Sym, G.getTargetTriple()); auto Flags = getJITSymbolFlagsForSymbol(*Sym); InternedResult[InternedName] = JITEvaluatedSymbol(Ptr, Flags); if (AutoClaim && !MR->getSymbols().count(InternedName)) { @@ -232,7 +245,7 @@ class ObjectLinkingLayerJITLinkContext final : public JITLinkContext { for (auto *Sym : G.absolute_symbols()) if (Sym->hasName() && Sym->getScope() != Scope::Local) { auto InternedName = ES.intern(Sym->getName()); - auto Ptr = getJITSymbolPtrForSymbol(*Sym); + auto Ptr = getJITSymbolPtrForSymbol(*Sym, G.getTargetTriple()); auto Flags = getJITSymbolFlagsForSymbol(*Sym); InternedResult[InternedName] = JITEvaluatedSymbol(Ptr, Flags); if (AutoClaim && !MR->getSymbols().count(InternedName)) { diff --git a/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_thumbv7_printf.s b/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_thumbv7_printf.s new file mode 100644 index 0000000000000..11a77c95cfa8f --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/AArch32/ELF_thumbv7_printf.s @@ -0,0 +1,46 @@ +// RUN: llvm-mc -triple=thumbv7-none-linux-gnueabi -arm-add-build-attributes -filetype=obj -o %t.o %s +// RUN: llvm-jitlink -noexec -slab-address 0x76ff0000 -slab-allocate 10Kb -slab-page-size 4096 -abs printf=0x76bbe880 -show-entry-es %t.o | FileCheck %s + +// Check that main is a thumb symbol (with LSB set) and printf is arm (with LSB clear) +// +// CHECK-LABEL: Symbol table: +// CHECK-NEXT: "main": 0x{{[0-9a-f]+[13579bdf]}} [Callable] Ready +// CHECK-NEXT: "printf": 0x76bbe880 [Data] Ready + + .globl main + .p2align 2 + .type main,%function + .code 16 + .thumb_func +main: + .fnstart + .save {r7, lr} + push {r7, lr} + .setfp r7, sp + mov r7, sp + .pad #8 + sub sp, #8 + movs r0, #0 + str r0, [sp] + str r0, [sp, #4] + ldr r0, .LCPI0_0 +.LPC0_0: + add r0, pc + bl printf + ldr r0, [sp] + add sp, #8 + pop {r7, pc} + + .p2align 2 +.LCPI0_0: + .long .L.str-(.LPC0_0+4) + + .size main, .-main + .cantunwind + .fnend + + .type .L.str,%object + .section .rodata.str1.1,"aMS",%progbits,1 +.L.str: + .asciz "Hello AArch32!\n" + .size .L.str, 12 diff --git a/llvm/test/ExecutionEngine/JITLink/AArch32/lit.local.cfg b/llvm/test/ExecutionEngine/JITLink/AArch32/lit.local.cfg new file mode 100644 index 0000000000000..20e19aeb06f9d --- /dev/null +++ b/llvm/test/ExecutionEngine/JITLink/AArch32/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'ARM' in config.root.targets: + config.unsupported = True diff --git a/llvm/unittests/ExecutionEngine/JITLink/AArch32Tests.cpp b/llvm/unittests/ExecutionEngine/JITLink/AArch32Tests.cpp new file mode 100644 index 0000000000000..0e41174040b68 --- /dev/null +++ b/llvm/unittests/ExecutionEngine/JITLink/AArch32Tests.cpp @@ -0,0 +1,200 @@ +//===------- AArch32Tests.cpp - Unit tests for the AArch32 backend --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::jitlink; +using namespace llvm::jitlink::aarch32; +using namespace llvm::support; +using namespace llvm::support::endian; + +struct MutableHalfWords { + MutableHalfWords(HalfWords Preset) : Hi(Preset.Hi), Lo(Preset.Lo) {} + + void patch(HalfWords Value, HalfWords Mask) { + Hi = (Hi & ~Mask.Hi) | Value.Hi; + Lo = (Lo & ~Mask.Lo) | Value.Lo; + } + + uint16_t Hi; // First halfword + uint16_t Lo; // Second halfword +}; + +namespace llvm { +namespace jitlink { + +Expected getJITLinkEdgeKind(uint32_t ELFType); +Expected getELFRelocationType(Edge::Kind Kind); + +} // namespace jitlink +} // namespace llvm + +TEST(AArch32_ELF, EdgeKinds) { + // Fails: Invalid ELF type -> JITLink kind + Expected ErrKind = getJITLinkEdgeKind(ELF::R_ARM_NONE); + EXPECT_TRUE(errorToBool(ErrKind.takeError())); + + // Fails: Invalid JITLink kind -> ELF type + Expected ErrType = getELFRelocationType(Edge::Invalid); + EXPECT_TRUE(errorToBool(ErrType.takeError())); + + for (Edge::Kind K = FirstDataRelocation; K < LastThumbRelocation; K += 1) { + Expected ELFType = getELFRelocationType(K); + EXPECT_FALSE(errorToBool(ELFType.takeError())) + << "Failed to translate JITLink kind -> ELF type"; + + Expected JITLinkKind = getJITLinkEdgeKind(*ELFType); + EXPECT_FALSE(errorToBool(JITLinkKind.takeError())) + << "Failed to translate ELF type -> JITLink kind"; + + EXPECT_EQ(*JITLinkKind, K) << "Round-trip value inconsistent?"; + } +} + +namespace llvm { +namespace jitlink { +namespace aarch32 { + +HalfWords encodeImmBT4BlT1BlxT2(int64_t Value); +HalfWords encodeImmBT4BlT1BlxT2_J1J2(int64_t Value); +HalfWords encodeImmMovtT1MovwT3(uint16_t Value); +HalfWords encodeRegMovtT1MovwT3(int64_t Value); + +int64_t decodeImmBT4BlT1BlxT2(uint32_t Hi, uint32_t Lo); +int64_t decodeImmBT4BlT1BlxT2_J1J2(uint32_t Hi, uint32_t Lo); +uint16_t decodeImmMovtT1MovwT3(uint32_t Hi, uint32_t Lo); +int64_t decodeRegMovtT1MovwT3(uint32_t Hi, uint32_t Lo); + +} // namespace aarch32 +} // namespace jitlink +} // namespace llvm + +// Big-endian for v7 and v8 (and v6 unless in legacy backwards compatible mode +// be32) have little-endian instructions and big-endian data. In ELF relocatable +// objects big-endian instructions may still be encountered. A be8 supporting +// linker is expected to endian-reverse instructions for the executable. +template +static HalfWords makeHalfWords(std::array Mem) { + return HalfWords{read16(Mem.data()), read16(Mem.data() + 2)}; +} + +/// 25-bit branch with link (with J1J2 range extension) +TEST(AArch32_Relocations, Thumb_Call_J1J2) { + static_assert(isInt<25>(16777215), "Max value"); + static_assert(isInt<25>(-16777215), "Min value"); + static_assert(!isInt<25>(16777217), "First overflow"); + static_assert(!isInt<25>(-16777217), "First underflow"); + + constexpr HalfWords ImmMask = FixupInfo::ImmMask; + + static std::array MemPresets{ + makeHalfWords({0xff, 0xf7, 0xfe, 0xef}), // common + makeHalfWords({0x00, 0x00, 0x00, 0x00}), // zeros + makeHalfWords({0xff, 0xff, 0xff, 0xff}), // ones + }; + + auto EncodeDecode = [ImmMask](int64_t In, MutableHalfWords &Mem) { + Mem.patch(encodeImmBT4BlT1BlxT2_J1J2(In), ImmMask); + return decodeImmBT4BlT1BlxT2_J1J2(Mem.Hi, Mem.Lo); + }; + + for (MutableHalfWords Mem : MemPresets) { + HalfWords UnaffectedBits(Mem.Hi & ~ImmMask.Hi, Mem.Lo & ~ImmMask.Lo); + + EXPECT_EQ(EncodeDecode(1, Mem), 0); // Zero value + EXPECT_EQ(EncodeDecode(0x41, Mem), 0x40); // Common value + EXPECT_EQ(EncodeDecode(16777215, Mem), 16777214); // Maximum value + EXPECT_EQ(EncodeDecode(-16777215, Mem), -16777216); // Minimum value + EXPECT_NE(EncodeDecode(16777217, Mem), 16777217); // First overflow + EXPECT_NE(EncodeDecode(-16777217, Mem), -16777217); // First underflow + + EXPECT_TRUE(UnaffectedBits.Hi == (Mem.Hi & ~ImmMask.Hi) && + UnaffectedBits.Lo == (Mem.Lo & ~ImmMask.Lo)) + << "Diff outside immediate field"; + } +} + +/// 22-bit branch with link (without J1J2 range extension) +TEST(AArch32_Relocations, Thumb_Call_Bare) { + static_assert(isInt<22>(2097151), "Max value"); + static_assert(isInt<22>(-2097151), "Min value"); + static_assert(!isInt<22>(2097153), "First overflow"); + static_assert(!isInt<22>(-2097153), "First underflow"); + + constexpr HalfWords ImmMask = FixupInfo::ImmMask; + + static std::array MemPresets{ + makeHalfWords({0xff, 0xf7, 0xfe, 0xef}), // common + makeHalfWords({0x00, 0x00, 0x00, 0x00}), // zeros + makeHalfWords({0xff, 0xff, 0xff, 0xff}), // ones + }; + + auto EncodeDecode = [ImmMask](int64_t In, MutableHalfWords &Mem) { + Mem.patch(encodeImmBT4BlT1BlxT2_J1J2(In), ImmMask); + return decodeImmBT4BlT1BlxT2_J1J2(Mem.Hi, Mem.Lo); + }; + + for (MutableHalfWords Mem : MemPresets) { + HalfWords UnaffectedBits(Mem.Hi & ~ImmMask.Hi, Mem.Lo & ~ImmMask.Lo); + + EXPECT_EQ(EncodeDecode(1, Mem), 0); // Zero value + EXPECT_EQ(EncodeDecode(0x41, Mem), 0x40); // Common value + EXPECT_EQ(EncodeDecode(2097151, Mem), 2097150); // Maximum value + EXPECT_EQ(EncodeDecode(-2097151, Mem), -2097152); // Minimum value + EXPECT_NE(EncodeDecode(2097153, Mem), 2097153); // First overflow + EXPECT_NE(EncodeDecode(-2097153, Mem), -2097153); // First underflow + + EXPECT_TRUE(UnaffectedBits.Hi == (Mem.Hi & ~ImmMask.Hi) && + UnaffectedBits.Lo == (Mem.Lo & ~ImmMask.Lo)) + << "Diff outside immediate field"; + } +} + +/// Write immediate value to the top halfword of the destination register +TEST(AArch32_Relocations, Thumb_MovtAbs) { + static_assert(isUInt<16>(65535), "Max value"); + static_assert(!isUInt<16>(65536), "First overflow"); + + constexpr HalfWords ImmMask = FixupInfo::ImmMask; + constexpr HalfWords RegMask = FixupInfo::RegMask; + + static std::array Registers{0, 5, 12}; + static std::array MemPresets{ + makeHalfWords({0xff, 0xf7, 0xfe, 0xef}), // common + makeHalfWords({0x00, 0x00, 0x00, 0x00}), // zeros + makeHalfWords({0xff, 0xff, 0xff, 0xff}), // ones + }; + + auto EncodeDecode = [ImmMask](uint32_t In, MutableHalfWords &Mem) { + Mem.patch(encodeImmMovtT1MovwT3(In), ImmMask); + return decodeImmMovtT1MovwT3(Mem.Hi, Mem.Lo); + }; + + for (MutableHalfWords Mem : MemPresets) { + for (uint8_t Reg : Registers) { + HalfWords UnaffectedBits(Mem.Hi & ~(ImmMask.Hi | RegMask.Hi), + Mem.Lo & ~(ImmMask.Lo | RegMask.Lo)); + + Mem.patch(encodeRegMovtT1MovwT3(Reg), RegMask); + EXPECT_EQ(EncodeDecode(0x76bb, Mem), 0x76bb); // Common value + EXPECT_EQ(EncodeDecode(0, Mem), 0); // Minimum value + EXPECT_EQ(EncodeDecode(0xffff, Mem), 0xffff); // Maximum value + EXPECT_NE(EncodeDecode(0x10000, Mem), 0x10000); // First overflow + + // Destination register as well as unaffacted bits should be intact + EXPECT_EQ(decodeRegMovtT1MovwT3(Mem.Hi, Mem.Lo), Reg); + EXPECT_TRUE(UnaffectedBits.Hi == (Mem.Hi & ~(ImmMask.Hi | RegMask.Hi)) && + UnaffectedBits.Lo == (Mem.Lo & ~(ImmMask.Lo | RegMask.Lo))) + << "Diff outside immediate/register field"; + } + } +} diff --git a/llvm/unittests/ExecutionEngine/JITLink/CMakeLists.txt b/llvm/unittests/ExecutionEngine/JITLink/CMakeLists.txt index 1a71a62d3756d..978914c748c63 100644 --- a/llvm/unittests/ExecutionEngine/JITLink/CMakeLists.txt +++ b/llvm/unittests/ExecutionEngine/JITLink/CMakeLists.txt @@ -8,6 +8,7 @@ set(LLVM_LINK_COMPONENTS ) add_llvm_unittest(JITLinkTests + AArch32Tests.cpp EHFrameSupportTests.cpp LinkGraphTests.cpp ) From b94175b0ee5712cab8302ac62854106b82d3be08 Mon Sep 17 00:00:00 2001 From: mydeveloperday Date: Fri, 24 Mar 2023 09:27:23 +0000 Subject: [PATCH 522/691] [clang-format] NFC Format.h and ClangFormatStyleOptions.rst are out of date Regenerate the style documentation, requires some minor sphinx changes to avoid warnings Reviewed By: klimek Differential Revision: https://reviews.llvm.org/D146704 --- clang/docs/ClangFormatStyleOptions.rst | 43 ++++++++++++++++++++++++++ clang/include/clang/Format/Format.h | 27 ++++++++++------ 2 files changed, 60 insertions(+), 10 deletions(-) diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index fd8f2bbb54322..24ae02a2eddb2 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -3642,6 +3642,49 @@ the configuration (without a prefix: ``Auto``). **MacroBlockEnd** (``String``) :versionbadge:`clang-format 3.7` :ref:`¶ ` A regular expression matching macros that end a block. +.. _Macros: + +**Macros** (``List of Strings``) :versionbadge:`clang-format 17.0` :ref:`¶ ` + A list of macros of the form ``=`` . + + Code will be parsed with macros expanded, in order to determine how to + interpret and format the macro arguments. + + For example, the code: + + .. code-block:: c++ + + A(a*b); + + will usually be interpreted as a call to a function A, and the + multiplication expression will be formatted as `a * b`. + + If we specify the macro definition: + + .. code-block:: yaml + + Macros: + - A(x)=x + + the code will now be parsed as a declaration of the variable b of type a*, + and formatted as `a* b` (depending on pointer-binding rules). + + Features and restrictions: + * Both function-like macros and object-like macros are supported. + * Macro arguments must be used exactly once in the expansion. + * No recursive expansion; macros referencing other macros will be + ignored. + * Overloading by arity is supported: for example, given the macro + definitions A=x, A()=y, A(a)=a + + + .. code-block:: c++ + + A; -> x; + A(); -> y; + A(z); -> z; + A(a, b); // will not be expanded. + .. _MaxEmptyLinesToKeep: **MaxEmptyLinesToKeep** (``Unsigned``) :versionbadge:`clang-format 3.7` :ref:`¶ ` diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 66904a6a11232..a55cd76d149ca 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -2754,28 +2754,35 @@ struct FormatStyle { /// \code /// A(a*b); /// \endcode + /// /// will usually be interpreted as a call to a function A, and the /// multiplication expression will be formatted as `a * b`. /// /// If we specify the macro definition: - /// \code + /// \code{.yaml} /// Macros: /// - A(x)=x /// \endcode + /// /// the code will now be parsed as a declaration of the variable b of type a*, /// and formatted as `a* b` (depending on pointer-binding rules). /// /// Features and restrictions: - /// * Both function-like macros and object-like macros are supported. - /// * Macro arguments must be used exactly once in the expansion. - /// * No recursive expansion; macros referencing other macros will be + /// * Both function-like macros and object-like macros are supported. + /// * Macro arguments must be used exactly once in the expansion. + /// * No recursive expansion; macros referencing other macros will be /// ignored. - /// * Overloading by arity is supported: for example, given the macro - /// definitions A=x, A()=y, A(a)=a, - /// 'A;' -> 'x;' - /// 'A();' -> 'y;' - /// 'A(z);' -> 'z;' - /// 'A(a, b) will not be expanded. + /// * Overloading by arity is supported: for example, given the macro + /// definitions A=x, A()=y, A(a)=a + /// + /// \code + /// A; -> x; + /// A(); -> y; + /// A(z); -> z; + /// A(a, b); // will not be expanded. + /// \endcode + /// + /// \version 17.0 std::vector Macros; /// The maximum number of consecutive empty lines to keep. From 0c36ab19081fd45d2894ada7b6d2852161a4310f Mon Sep 17 00:00:00 2001 From: Akshay Khadse Date: Fri, 24 Mar 2023 17:14:30 +0800 Subject: [PATCH 523/691] [NFC] Fix auto usage to avoid copies Fixes some usages of the "auto" keyword to avoid creation of copies. Reviewed By: LuoYuanke Differential Revision: https://reviews.llvm.org/D146694 --- llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp index 8b0ab67cb9185..e43824198af49 100644 --- a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp +++ b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp @@ -854,15 +854,15 @@ class MemLocFragmentFill { } // Insert new location defs. - for (auto Pair : BBInsertBeforeMap) { + for (auto &Pair : BBInsertBeforeMap) { InsertMap &Map = Pair.second; - for (auto Pair : Map) { + for (auto &Pair : Map) { Instruction *InsertBefore = Pair.first; assert(InsertBefore && "should never be null"); auto FragMemLocs = Pair.second; auto &Ctx = Fn.getContext(); - for (auto FragMemLoc : FragMemLocs) { + for (auto &FragMemLoc : FragMemLocs) { DIExpression *Expr = DIExpression::get(Ctx, std::nullopt); Expr = *DIExpression::createFragmentExpression( Expr, FragMemLoc.OffsetInBits, FragMemLoc.SizeInBits); From bd0c281fcdcbce224411319e7eaf981ca0a89777 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Thu, 16 Mar 2023 11:39:00 +0000 Subject: [PATCH 524/691] [NFC][LoopVectorize] Change trip counts for some tests to guarantee a scalar tail Quite a few vectoriser tests were using a trip count of 1024, which meant: 1. For fixed-length VFs we would never actually tail-fold, e.g. see Transforms/LoopVectorize/RISCV/uniform-load-store.ll. This is because we can prove at compile-time there will never be a scalar tail. 2. As of D146199 the same optimisation mentioned above will also apply to scalable VFs too. I've changed all such trip counts to be 1025 instead. Differential Revision: https://reviews.llvm.org/D146219 --- .../LoopVectorize/AArch64/masked-call.ll | 88 ++-- .../AArch64/sve-tail-folding-optsize.ll | 14 +- .../LoopVectorize/AArch64/sve-tail-folding.ll | 66 +++ .../LoopVectorize/RISCV/scalable-tailfold.ll | 109 ++++- .../LoopVectorize/RISCV/uniform-load-store.ll | 386 +++++++++--------- 5 files changed, 402 insertions(+), 261 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll index 9756651c86b2e..7d896f61d4de1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call.ll @@ -12,13 +12,13 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: entry: ; TFNONE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; TFNONE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; TFNONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; TFNONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] ; TFNONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TFNONE: vector.ph: ; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 -; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] -; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] +; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]] ; TFNONE: vector.body: ; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -33,7 +33,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TFNONE: middle.block: -; TFNONE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; TFNONE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] ; TFNONE-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; TFNONE: scalar.ph: ; TFNONE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -46,7 +46,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] ; TFNONE-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 4 ; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; TFNONE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 +; TFNONE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1025 ; TFNONE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; TFNONE: for.cond.cleanup: ; TFNONE-NEXT: ret void @@ -82,14 +82,14 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFALWAYS-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFALWAYS-NEXT: call void @llvm.masked.store.v2i64.p0(<2 x i64> [[TMP10]], ptr [[TMP11]], i32 4, <2 x i1> [[ACTIVE_LANE_MASK]]) ; TFALWAYS-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 -; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX_NEXT]], i64 1024) +; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFALWAYS-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], ; TFALWAYS-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0 ; TFALWAYS-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TFALWAYS: middle.block: ; TFALWAYS-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; TFALWAYS: scalar.ph: -; TFALWAYS-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; TFALWAYS-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1026, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; TFALWAYS-NEXT: br label [[FOR_BODY:%.*]] ; TFALWAYS: for.body: ; TFALWAYS-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -99,7 +99,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFALWAYS-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] ; TFALWAYS-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 4 ; TFALWAYS-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; TFALWAYS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 +; TFALWAYS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1025 ; TFALWAYS-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; TFALWAYS: for.cond.cleanup: ; TFALWAYS-NEXT: ret void @@ -135,14 +135,14 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFFALLBACK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFFALLBACK-NEXT: call void @llvm.masked.store.v2i64.p0(<2 x i64> [[TMP10]], ptr [[TMP11]], i32 4, <2 x i1> [[ACTIVE_LANE_MASK]]) ; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 -; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX_NEXT]], i64 1024) +; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFFALLBACK-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], ; TFFALLBACK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0 ; TFFALLBACK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TFFALLBACK: middle.block: ; TFFALLBACK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; TFFALLBACK: scalar.ph: -; TFFALLBACK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; TFFALLBACK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1026, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; TFFALLBACK-NEXT: br label [[FOR_BODY:%.*]] ; TFFALLBACK: for.body: ; TFFALLBACK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -152,7 +152,7 @@ define void @test_widen(ptr noalias %a, ptr readnone %b) #4 { ; TFFALLBACK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] ; TFFALLBACK-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 4 ; TFFALLBACK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; TFFALLBACK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 +; TFFALLBACK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1025 ; TFFALLBACK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; TFFALLBACK: for.cond.cleanup: ; TFFALLBACK-NEXT: ret void @@ -168,7 +168,7 @@ for.body: %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv store i64 %call, ptr %arrayidx %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 1024 + %exitcond = icmp eq i64 %indvars.iv.next, 1025 br i1 %exitcond, label %for.cond.cleanup, label %for.body for.cond.cleanup: @@ -194,7 +194,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDVARS_IV]] ; TFNONE-NEXT: store i64 [[TMP2]], ptr [[ARRAYIDX1]], align 8 ; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; TFNONE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 +; TFNONE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1025 ; TFNONE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] ; TFNONE: for.cond.cleanup: ; TFNONE-NEXT: ret void @@ -216,7 +216,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFALWAYS-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDVARS_IV]] ; TFALWAYS-NEXT: store i64 [[TMP2]], ptr [[ARRAYIDX1]], align 8 ; TFALWAYS-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; TFALWAYS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 +; TFALWAYS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1025 ; TFALWAYS-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] ; TFALWAYS: for.cond.cleanup: ; TFALWAYS-NEXT: ret void @@ -238,7 +238,7 @@ define void @test_if_then(ptr noalias %a, ptr readnone %b) #4 { ; TFFALLBACK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDVARS_IV]] ; TFFALLBACK-NEXT: store i64 [[TMP2]], ptr [[ARRAYIDX1]], align 8 ; TFFALLBACK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; TFFALLBACK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 +; TFFALLBACK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1025 ; TFFALLBACK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] ; TFFALLBACK: for.cond.cleanup: ; TFFALLBACK-NEXT: ret void @@ -262,7 +262,7 @@ if.end: %arrayidx1 = getelementptr inbounds i64, ptr %b, i64 %indvars.iv store i64 %2, ptr %arrayidx1, align 8 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 1024 + %exitcond = icmp eq i64 %indvars.iv.next, 1025 br i1 %exitcond, label %for.cond.cleanup, label %for.body for.cond.cleanup: @@ -294,7 +294,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDVARS_IV]] ; TFNONE-NEXT: store i64 [[TMP3]], ptr [[ARRAYIDX1]], align 8 ; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; TFNONE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 +; TFNONE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1025 ; TFNONE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] ; TFNONE: for.cond.cleanup: ; TFNONE-NEXT: ret void @@ -319,7 +319,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFALWAYS-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDVARS_IV]] ; TFALWAYS-NEXT: store i64 [[TMP3]], ptr [[ARRAYIDX1]], align 8 ; TFALWAYS-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; TFALWAYS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 +; TFALWAYS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1025 ; TFALWAYS-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] ; TFALWAYS: for.cond.cleanup: ; TFALWAYS-NEXT: ret void @@ -344,7 +344,7 @@ define void @test_widen_if_then_else(ptr noalias %a, ptr readnone %b) #4 { ; TFFALLBACK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[INDVARS_IV]] ; TFFALLBACK-NEXT: store i64 [[TMP3]], ptr [[ARRAYIDX1]], align 8 ; TFFALLBACK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; TFFALLBACK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 +; TFFALLBACK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1025 ; TFFALLBACK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] ; TFFALLBACK: for.cond.cleanup: ; TFFALLBACK-NEXT: ret void @@ -372,7 +372,7 @@ if.end: %arrayidx1 = getelementptr inbounds i64, ptr %b, i64 %indvars.iv store i64 %3, ptr %arrayidx1, align 8 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 1024 + %exitcond = icmp eq i64 %indvars.iv.next, 1025 br i1 %exitcond, label %for.cond.cleanup, label %for.body for.cond.cleanup: @@ -387,13 +387,13 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: entry: ; TFNONE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; TFNONE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; TFNONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; TFNONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] ; TFNONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TFNONE: vector.ph: ; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 -; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] -; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] +; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]] ; TFNONE: vector.body: ; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -408,7 +408,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TFNONE: middle.block: -; TFNONE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; TFNONE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] ; TFNONE-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; TFNONE: scalar.ph: ; TFNONE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -421,7 +421,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] ; TFNONE-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 4 ; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; TFNONE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 +; TFNONE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1025 ; TFNONE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; TFNONE: for.cond.cleanup: ; TFNONE-NEXT: ret void @@ -437,7 +437,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFALWAYS-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDVARS_IV]] ; TFALWAYS-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 4 ; TFALWAYS-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; TFALWAYS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 +; TFALWAYS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1025 ; TFALWAYS-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] ; TFALWAYS: for.cond.cleanup: ; TFALWAYS-NEXT: ret void @@ -446,13 +446,13 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFFALLBACK-NEXT: entry: ; TFFALLBACK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; TFFALLBACK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; TFFALLBACK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; TFFALLBACK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] ; TFFALLBACK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TFFALLBACK: vector.ph: ; TFFALLBACK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; TFFALLBACK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 -; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] -; TFFALLBACK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; TFFALLBACK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] +; TFFALLBACK-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; TFFALLBACK-NEXT: br label [[VECTOR_BODY:%.*]] ; TFFALLBACK: vector.body: ; TFFALLBACK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -467,7 +467,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFFALLBACK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TFFALLBACK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TFFALLBACK: middle.block: -; TFFALLBACK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; TFFALLBACK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] ; TFFALLBACK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; TFFALLBACK: scalar.ph: ; TFFALLBACK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -480,7 +480,7 @@ define void @test_widen_nomask(ptr noalias %a, ptr readnone %b) #4 { ; TFFALLBACK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] ; TFFALLBACK-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 4 ; TFFALLBACK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; TFFALLBACK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 +; TFFALLBACK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1025 ; TFFALLBACK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; TFFALLBACK: for.cond.cleanup: ; TFFALLBACK-NEXT: ret void @@ -496,7 +496,7 @@ for.body: %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv store i64 %call, ptr %arrayidx %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 1024 + %exitcond = icmp eq i64 %indvars.iv.next, 1025 br i1 %exitcond, label %for.cond.cleanup, label %for.body for.cond.cleanup: @@ -511,13 +511,13 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: entry: ; TFNONE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; TFNONE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; TFNONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; TFNONE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] ; TFNONE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; TFNONE: vector.ph: ; TFNONE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; TFNONE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 -; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] -; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; TFNONE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] +; TFNONE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; TFNONE-NEXT: br label [[VECTOR_BODY:%.*]] ; TFNONE: vector.body: ; TFNONE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -532,7 +532,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; TFNONE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TFNONE: middle.block: -; TFNONE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; TFNONE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] ; TFNONE-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; TFNONE: scalar.ph: ; TFNONE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -545,7 +545,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFNONE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] ; TFNONE-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 4 ; TFNONE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; TFNONE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 +; TFNONE-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1025 ; TFNONE-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; TFNONE: for.cond.cleanup: ; TFNONE-NEXT: ret void @@ -581,14 +581,14 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFALWAYS-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFALWAYS-NEXT: call void @llvm.masked.store.v2i64.p0(<2 x i64> [[TMP10]], ptr [[TMP11]], i32 4, <2 x i1> [[ACTIVE_LANE_MASK]]) ; TFALWAYS-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 -; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX_NEXT]], i64 1024) +; TFALWAYS-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFALWAYS-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], ; TFALWAYS-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0 ; TFALWAYS-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TFALWAYS: middle.block: ; TFALWAYS-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; TFALWAYS: scalar.ph: -; TFALWAYS-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; TFALWAYS-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1026, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; TFALWAYS-NEXT: br label [[FOR_BODY:%.*]] ; TFALWAYS: for.body: ; TFALWAYS-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -598,7 +598,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFALWAYS-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] ; TFALWAYS-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 4 ; TFALWAYS-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; TFALWAYS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 +; TFALWAYS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1025 ; TFALWAYS-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; TFALWAYS: for.cond.cleanup: ; TFALWAYS-NEXT: ret void @@ -634,14 +634,14 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFFALLBACK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; TFFALLBACK-NEXT: call void @llvm.masked.store.v2i64.p0(<2 x i64> [[TMP10]], ptr [[TMP11]], i32 4, <2 x i1> [[ACTIVE_LANE_MASK]]) ; TFFALLBACK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2 -; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX_NEXT]], i64 1024) +; TFFALLBACK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; TFFALLBACK-NEXT: [[TMP12:%.*]] = xor <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], ; TFFALLBACK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP12]], i32 0 ; TFFALLBACK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TFFALLBACK: middle.block: ; TFFALLBACK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; TFFALLBACK: scalar.ph: -; TFFALLBACK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; TFFALLBACK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1026, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; TFFALLBACK-NEXT: br label [[FOR_BODY:%.*]] ; TFFALLBACK: for.body: ; TFFALLBACK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -651,7 +651,7 @@ define void @test_widen_optmask(ptr noalias %a, ptr readnone %b) #4 { ; TFFALLBACK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDVARS_IV]] ; TFFALLBACK-NEXT: store i64 [[CALL]], ptr [[ARRAYIDX]], align 4 ; TFFALLBACK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; TFFALLBACK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024 +; TFFALLBACK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1025 ; TFFALLBACK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; TFFALLBACK: for.cond.cleanup: ; TFFALLBACK-NEXT: ret void @@ -667,7 +667,7 @@ for.body: %arrayidx = getelementptr inbounds i64, ptr %a, i64 %indvars.iv store i64 %call, ptr %arrayidx %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond = icmp eq i64 %indvars.iv.next, 1024 + %exitcond = icmp eq i64 %indvars.iv.next, 1025 br i1 %exitcond, label %for.cond.cleanup, label %for.body for.cond.cleanup: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll index aec63ff71aac8..c609380eebfb5 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-optsize.ll @@ -3,8 +3,8 @@ target triple = "aarch64-unknown-linux-gnu" -define void @trip1024_i64(i64* noalias nocapture noundef %dst, i64* noalias nocapture noundef readonly %src) #0 { -; CHECK-LABEL: @trip1024_i64( +define void @trip1025_i64(i64* noalias nocapture noundef %dst, i64* noalias nocapture noundef readonly %src) #0 { +; CHECK-LABEL: @trip1025_i64( ; CHECK-NEXT: entry: ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -13,10 +13,10 @@ define void @trip1024_i64(i64* noalias nocapture noundef %dst, i64* noalias noca ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] -; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1024) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 1025) ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -37,7 +37,7 @@ define void @trip1024_i64(i64* noalias nocapture noundef %dst, i64* noalias noca ; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP16]] -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1024) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX_NEXT]], i64 1025) ; CHECK-NEXT: [[TMP17:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) ; CHECK-NEXT: [[TMP18:%.*]] = extractelement [[TMP17]], i32 0 ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] @@ -56,7 +56,7 @@ define void @trip1024_i64(i64* noalias nocapture noundef %dst, i64* noalias noca ; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP20]], [[MUL]] ; CHECK-NEXT: store i64 [[ADD]], i64* [[ARRAYIDX1]], align 8 ; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_06]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 1024 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 1025 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -74,7 +74,7 @@ for.body: ; preds = %entry, %for.body %add = add nsw i64 %1, %mul store i64 %add, i64* %arrayidx1, align 8 %inc = add nuw nsw i64 %i.06, 1 - %exitcond.not = icmp eq i64 %inc, 1024 + %exitcond.not = icmp eq i64 %inc, 1025 br i1 %exitcond.not, label %for.end, label %for.body for.end: ; preds = %for.body diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll index cd71f9e9f6706..157e763440278 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll @@ -774,6 +774,72 @@ while.end.loopexit: ; preds = %while.body ret void } +define void @simple_memset_trip1024(i32 %val, ptr %ptr, i64 %n) #0 { +; CHECK-LABEL: @simple_memset_trip1024( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = sub i64 1024, [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i64 1024, [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 1024) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[VAL:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX1]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[PTR:%.*]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP11]], i32 0 +; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[BROADCAST_SPLAT]], ptr [[TMP12]], i32 4, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]]) +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 +; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP14]] +; CHECK-NEXT: [[TMP15:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[TMP16:%.*]] = extractelement [[TMP15]], i32 0 +; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[WHILE_BODY:%.*]] +; CHECK: while.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[WHILE_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[PTR]], i64 [[INDEX]] +; CHECK-NEXT: store i32 [[VAL]], ptr [[GEP]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nsw i64 [[INDEX]], 1 +; CHECK-NEXT: [[CMP10:%.*]] = icmp ult i64 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[CMP10]], label [[WHILE_BODY]], label [[WHILE_END_LOOPEXIT]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK: while.end.loopexit: +; CHECK-NEXT: ret void +; +entry: + br label %while.body + +while.body: ; preds = %while.body, %entry + %index = phi i64 [ %index.next, %while.body ], [ 0, %entry ] + %gep = getelementptr i32, ptr %ptr, i64 %index + store i32 %val, ptr %gep + %index.next = add nsw i64 %index, 1 + %cmp10 = icmp ult i64 %index.next, 1024 + br i1 %cmp10, label %while.body, label %while.end.loopexit, !llvm.loop !0 + +while.end.loopexit: ; preds = %while.body + ret void +} + !0 = distinct !{!0, !1, !2} !1 = !{!"llvm.loop.vectorize.width", i32 4} !2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll index 5e231da7e7b57..0a8c7cfda9efb 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-tailfold.ll @@ -16,7 +16,7 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 @@ -25,7 +25,7 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) @@ -48,7 +48,7 @@ define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]] ; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -63,7 +63,7 @@ for.body: %add = add i64 %elem, %v store i64 %add, ptr %arrayidx %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, 1024 + %exitcond.not = icmp eq i64 %iv.next, 1025 br i1 %exitcond.not, label %for.end, label %for.body for.end: @@ -82,7 +82,7 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 @@ -91,7 +91,7 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) @@ -114,7 +114,7 @@ define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]] ; CHECK-NEXT: store i64 [[V]], ptr [[AADDR]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -129,7 +129,7 @@ for.body: %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx store i64 %v, ptr %aaddr %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, 1024 + %exitcond.not = icmp eq i64 %iv.next, 1025 br i1 %exitcond.not, label %for.end, label %for.body for.end: @@ -146,7 +146,7 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -154,7 +154,7 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 ; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) @@ -183,7 +183,7 @@ define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[SUM_NEXT]] = add i64 [[SUM]], [[ELEM]] -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] @@ -201,7 +201,7 @@ for.body: %elem = load i64, ptr %aaddr %iv.next = add nuw nsw i64 %iv, 1 %sum.next = add i64 %sum, %elem - %exitcond.not = icmp eq i64 %iv.next, 1024 + %exitcond.not = icmp eq i64 %iv.next, 1025 br i1 %exitcond.not, label %for.end, label %for.body for.end: @@ -218,7 +218,7 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 @@ -227,7 +227,7 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) @@ -246,7 +246,7 @@ define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -259,7 +259,7 @@ for.body: %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv store i64 %v, ptr %arrayidx %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, 1024 + %exitcond.not = icmp eq i64 %iv.next, 1025 br i1 %exitcond.not, label %for.end, label %for.body for.end: @@ -276,7 +276,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 -; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 @@ -285,7 +285,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) ; CHECK-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 @@ -306,7 +306,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; CHECK-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void @@ -320,7 +320,7 @@ for.body: %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv store i64 %v, ptr %arrayidx %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, 1024 + %exitcond.not = icmp eq i64 %iv.next, 1025 br i1 %exitcond.not, label %for.end, label %for.body for.end: @@ -337,7 +337,7 @@ define i64 @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] ; CHECK-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; CHECK: for.end: ; CHECK-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ] @@ -352,9 +352,74 @@ for.body: %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv store i64 %v, ptr %arrayidx %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, 1024 + %exitcond.not = icmp eq i64 %iv.next, 1025 br i1 %exitcond.not, label %for.end, label %for.body for.end: ret i64 %v } + + +define void @vector_add_trip1024(ptr noalias nocapture %a, i64 %v, i64 %n) { +; CHECK-LABEL: @vector_add_trip1024( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 +; CHECK-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv2i64.p0(ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP8:%.*]] = add [[WIDE_MASKED_LOAD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0( [[TMP8]], ptr [[TMP7]], i32 8, [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]] +; CHECK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv + %elem = load i64, ptr %arrayidx + %add = add i64 %elem, %v + store i64 %add, ptr %arrayidx + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, 1024 + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll index 0bdcf5b1efd01..0d06938e49688 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/uniform-load-store.ll @@ -14,13 +14,13 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] ; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: ; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -37,7 +37,7 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SCALABLE: scalar.ph: ; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -48,7 +48,7 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; SCALABLE: for.end: ; SCALABLE-NEXT: ret void @@ -75,7 +75,7 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; FIXEDLEN-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; FIXEDLEN: middle.block: -; FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 +; FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, 1024 ; FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; FIXEDLEN: scalar.ph: ; FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -86,7 +86,7 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; FIXEDLEN: for.end: ; FIXEDLEN-NEXT: ret void @@ -100,14 +100,14 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-SCALABLE: vector.body: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) +; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) ; TF-SCALABLE-NEXT: [[TMP6:%.*]] = load i64, ptr [[B:%.*]], align 8 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -130,7 +130,7 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; TF-SCALABLE: for.end: ; TF-SCALABLE-NEXT: ret void @@ -143,20 +143,20 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; TF-FIXEDLEN: vector.body: ; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; TF-FIXEDLEN-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP0]], i64 1025) ; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = load i64, ptr [[B:%.*]], align 8 ; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 ; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; TF-FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 ; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; TF-FIXEDLEN: middle.block: -; TF-FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 -; TF-FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-FIXEDLEN: scalar.ph: -; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1028, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] ; TF-FIXEDLEN: for.body: ; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -164,7 +164,7 @@ define void @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i6 ; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; TF-FIXEDLEN: for.end: ; TF-FIXEDLEN-NEXT: ret void @@ -178,7 +178,7 @@ for.body: %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv store i64 %v, ptr %arrayidx %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, 1024 + %exitcond.not = icmp eq i64 %iv.next, 1025 br i1 %exitcond.not, label %for.end, label %for.body for.end: @@ -190,13 +190,13 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] ; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: ; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -213,7 +213,7 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SCALABLE: scalar.ph: ; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -224,7 +224,7 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; SCALABLE: for.end: ; SCALABLE-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ] @@ -252,7 +252,7 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; FIXEDLEN-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; FIXEDLEN: middle.block: -; FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 +; FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, 1024 ; FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; FIXEDLEN: scalar.ph: ; FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -263,7 +263,7 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; FIXEDLEN: for.end: ; FIXEDLEN-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ] @@ -278,7 +278,7 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] ; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; TF-SCALABLE: for.end: ; TF-SCALABLE-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ] @@ -286,37 +286,17 @@ define i64 @uniform_load_outside_use(ptr noalias nocapture %a, ptr noalias nocap ; ; TF-FIXEDLEN-LABEL: @uniform_load_outside_use( ; TF-FIXEDLEN-NEXT: entry: -; TF-FIXEDLEN-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; TF-FIXEDLEN: vector.ph: -; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] -; TF-FIXEDLEN: vector.body: -; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = load i64, ptr [[B:%.*]], align 8 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 -; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer -; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] -; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; TF-FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; TF-FIXEDLEN: middle.block: -; TF-FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 -; TF-FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] -; TF-FIXEDLEN: scalar.ph: -; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] ; TF-FIXEDLEN: for.body: -; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] -; TF-FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 -; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] +; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; TF-FIXEDLEN-NEXT: [[V:%.*]] = load i64, ptr [[B:%.*]], align 8 +; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] ; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 +; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] ; TF-FIXEDLEN: for.end: -; TF-FIXEDLEN-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ] +; TF-FIXEDLEN-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ] ; TF-FIXEDLEN-NEXT: ret i64 [[V_LCSSA]] ; entry: @@ -328,7 +308,7 @@ for.body: %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv store i64 %v, ptr %arrayidx %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, 1024 + %exitcond.not = icmp eq i64 %iv.next, 1025 br i1 %exitcond.not, label %for.end, label %for.body for.end: @@ -341,13 +321,13 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] ; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: ; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() ; SCALABLE-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer ; SCALABLE-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) @@ -378,7 +358,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; SCALABLE-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SCALABLE: scalar.ph: ; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -395,7 +375,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; SCALABLE-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8 ; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; SCALABLE: for.end: ; SCALABLE-NEXT: ret void @@ -434,7 +414,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; FIXEDLEN-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; FIXEDLEN: middle.block: -; FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 +; FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, 1024 ; FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; FIXEDLEN: scalar.ph: ; FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -451,7 +431,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; FIXEDLEN-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8 ; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; FIXEDLEN: for.end: ; FIXEDLEN-NEXT: ret void @@ -465,7 +445,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() @@ -484,7 +464,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-SCALABLE-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0 -; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP11]], i64 1024) +; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP11]], i64 1025) ; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp ugt [[VEC_IND]], shufflevector ( insertelement ( poison, i64 10, i64 0), poison, zeroinitializer) ; TF-SCALABLE-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP12]], zeroinitializer ; TF-SCALABLE-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], i32 8, [[TMP13]], poison) @@ -518,7 +498,7 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-SCALABLE-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8 ; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; TF-SCALABLE: for.end: ; TF-SCALABLE-NEXT: ret void @@ -534,22 +514,25 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; TF-FIXEDLEN-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP0]], i64 1025) ; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], -; TF-FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT]], i32 8, <4 x i1> [[TMP1]], <4 x i64> poison) -; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[TMP1]], -; TF-FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> [[WIDE_MASKED_GATHER]], <4 x i64> zeroinitializer -; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] -; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 -; TF-FIXEDLEN-NEXT: store <4 x i64> [[PREDPHI]], ptr [[TMP4]], align 8 -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer +; TF-FIXEDLEN-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> [[BROADCAST_SPLAT]], i32 8, <4 x i1> [[TMP2]], <4 x i64> poison) +; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = xor <4 x i1> [[TMP1]], +; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer +; TF-FIXEDLEN-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i64> [[WIDE_MASKED_GATHER]], <4 x i64> zeroinitializer +; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] +; TF-FIXEDLEN-NEXT: [[TMP6:%.*]] = or <4 x i1> [[TMP2]], [[TMP4]] +; TF-FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 +; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[PREDPHI]], ptr [[TMP7]], i32 8, <4 x i1> [[TMP6]]) +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; TF-FIXEDLEN-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; TF-FIXEDLEN-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 +; TF-FIXEDLEN-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; TF-FIXEDLEN: middle.block: -; TF-FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 -; TF-FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-FIXEDLEN: scalar.ph: -; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1028, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] ; TF-FIXEDLEN: for.body: ; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] @@ -563,8 +546,8 @@ define void @conditional_uniform_load(ptr noalias nocapture %a, ptr noalias noca ; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-FIXEDLEN-NEXT: store i64 [[PHI]], ptr [[ARRAYIDX]], align 8 ; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 +; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] ; TF-FIXEDLEN: for.end: ; TF-FIXEDLEN-NEXT: ret void ; @@ -584,7 +567,7 @@ latch: %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv store i64 %phi, ptr %arrayidx %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, 1024 + %exitcond.not = icmp eq i64 %iv.next, 1025 br i1 %exitcond.not, label %for.end, label %for.body for.end: @@ -596,13 +579,13 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] ; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: ; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; SCALABLE: vector.body: ; SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -619,7 +602,7 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; SCALABLE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SCALABLE: scalar.ph: ; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -630,7 +613,7 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; SCALABLE: for.end: ; SCALABLE-NEXT: ret void @@ -657,7 +640,7 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; FIXEDLEN-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; FIXEDLEN: middle.block: -; FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 +; FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, 1024 ; FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; FIXEDLEN: scalar.ph: ; FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -668,7 +651,7 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; FIXEDLEN: for.end: ; FIXEDLEN-NEXT: ret void @@ -682,14 +665,14 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TF-SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-SCALABLE: vector.body: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) +; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) ; TF-SCALABLE-NEXT: [[TMP6:%.*]] = load i64, ptr [[B:%.*]], align 1 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP6]], i64 0 ; TF-SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer @@ -712,7 +695,7 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; TF-SCALABLE: for.end: ; TF-SCALABLE-NEXT: ret void @@ -725,20 +708,20 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; TF-FIXEDLEN: vector.body: ; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; TF-FIXEDLEN-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP0]], i64 1025) ; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = load i64, ptr [[B:%.*]], align 1 ; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TMP1]], i64 0 ; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; TF-FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 +; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; TF-FIXEDLEN: middle.block: -; TF-FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 -; TF-FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-FIXEDLEN: scalar.ph: -; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1028, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] ; TF-FIXEDLEN: for.body: ; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -746,8 +729,8 @@ define void @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapt ; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 +; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] ; TF-FIXEDLEN: for.end: ; TF-FIXEDLEN-NEXT: ret void ; @@ -760,7 +743,7 @@ for.body: %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv store i64 %v, ptr %arrayidx %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, 1024 + %exitcond.not = icmp eq i64 %iv.next, 1025 br i1 %exitcond.not, label %for.end, label %for.body for.end: @@ -772,13 +755,13 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] ; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: ; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -795,7 +778,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SCALABLE: scalar.ph: ; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -806,7 +789,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; SCALABLE: for.end: ; SCALABLE-NEXT: ret void @@ -835,7 +818,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; FIXEDLEN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; FIXEDLEN: middle.block: -; FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 +; FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, 1024 ; FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; FIXEDLEN: scalar.ph: ; FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -846,7 +829,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; FIXEDLEN: for.end: ; FIXEDLEN-NEXT: ret void @@ -860,7 +843,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 @@ -869,7 +852,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; TF-SCALABLE: vector.body: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) +; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) ; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 ; TF-SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 @@ -890,7 +873,7 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; TF-SCALABLE: for.end: ; TF-SCALABLE-NEXT: ret void @@ -905,18 +888,18 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; TF-FIXEDLEN: vector.body: ; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; TF-FIXEDLEN-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP0]], i64 1025) ; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 ; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -; TF-FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 +; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; TF-FIXEDLEN: middle.block: -; TF-FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 -; TF-FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-FIXEDLEN: scalar.ph: -; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1028, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] ; TF-FIXEDLEN: for.body: ; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -924,8 +907,8 @@ define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i ; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 +; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; TF-FIXEDLEN: for.end: ; TF-FIXEDLEN-NEXT: ret void ; @@ -938,7 +921,7 @@ for.body: %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv store i64 %v, ptr %arrayidx %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, 1024 + %exitcond.not = icmp eq i64 %iv.next, 1025 br i1 %exitcond.not, label %for.end, label %for.body for.end: @@ -950,13 +933,13 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] ; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: ; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -984,7 +967,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; SCALABLE-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SCALABLE: scalar.ph: ; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -995,7 +978,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; SCALABLE: for.end: ; SCALABLE-NEXT: ret void @@ -1030,7 +1013,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; FIXEDLEN-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; FIXEDLEN: middle.block: -; FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 +; FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, 1024 ; FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; FIXEDLEN: scalar.ph: ; FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -1041,7 +1024,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; FIXEDLEN: for.end: ; FIXEDLEN-NEXT: ret void @@ -1055,7 +1038,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() @@ -1076,7 +1059,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-SCALABLE-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0 -; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP11]], i64 1024) +; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP11]], i64 1025) ; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[VEC_IND]], [[BROADCAST_SPLAT]], i32 8, [[ACTIVE_LANE_MASK]]) ; TF-SCALABLE-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP11]] ; TF-SCALABLE-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i32 0 @@ -1098,7 +1081,7 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; TF-SCALABLE: for.end: ; TF-SCALABLE-NEXT: ret void @@ -1111,23 +1094,46 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-FIXEDLEN-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; TF-FIXEDLEN-NEXT: br label [[VECTOR_BODY:%.*]] ; TF-FIXEDLEN: vector.body: -; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] ; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 -; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; TF-FIXEDLEN-NEXT: store i64 [[TMP3]], ptr [[B:%.*]], align 8 -; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] -; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 -; TF-FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP5]], align 8 -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; TF-FIXEDLEN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; TF-FIXEDLEN-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; TF-FIXEDLEN-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP0]], i64 1025) +; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0 +; TF-FIXEDLEN-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; TF-FIXEDLEN: pred.store.if: +; TF-FIXEDLEN-NEXT: store i64 [[TMP0]], ptr [[B:%.*]], align 8 +; TF-FIXEDLEN-NEXT: br label [[PRED_STORE_CONTINUE]] +; TF-FIXEDLEN: pred.store.continue: +; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1 +; TF-FIXEDLEN-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] +; TF-FIXEDLEN: pred.store.if1: +; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 1 +; TF-FIXEDLEN-NEXT: store i64 [[TMP3]], ptr [[B]], align 8 +; TF-FIXEDLEN-NEXT: br label [[PRED_STORE_CONTINUE2]] +; TF-FIXEDLEN: pred.store.continue2: +; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2 +; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] +; TF-FIXEDLEN: pred.store.if3: +; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 2 +; TF-FIXEDLEN-NEXT: store i64 [[TMP5]], ptr [[B]], align 8 +; TF-FIXEDLEN-NEXT: br label [[PRED_STORE_CONTINUE4]] +; TF-FIXEDLEN: pred.store.continue4: +; TF-FIXEDLEN-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3 +; TF-FIXEDLEN-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] +; TF-FIXEDLEN: pred.store.if5: +; TF-FIXEDLEN-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 3 +; TF-FIXEDLEN-NEXT: store i64 [[TMP7]], ptr [[B]], align 8 +; TF-FIXEDLEN-NEXT: br label [[PRED_STORE_CONTINUE6]] +; TF-FIXEDLEN: pred.store.continue6: +; TF-FIXEDLEN-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] +; TF-FIXEDLEN-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0 +; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP9]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; TF-FIXEDLEN-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 +; TF-FIXEDLEN-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; TF-FIXEDLEN: middle.block: -; TF-FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 -; TF-FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-FIXEDLEN: scalar.ph: -; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1028, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] ; TF-FIXEDLEN: for.body: ; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -1135,8 +1141,8 @@ define void @uniform_store_of_loop_varying(ptr noalias nocapture %a, ptr noalias ; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 +; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] ; TF-FIXEDLEN: for.end: ; TF-FIXEDLEN-NEXT: ret void ; @@ -1149,7 +1155,7 @@ for.body: %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv store i64 %v, ptr %arrayidx %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, 1024 + %exitcond.not = icmp eq i64 %iv.next, 1025 br i1 %exitcond.not, label %for.end, label %for.body for.end: @@ -1161,13 +1167,13 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] ; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: ; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; SCALABLE-NEXT: [[TMP4:%.*]] = call @llvm.experimental.stepvector.nxv2i64() ; SCALABLE-NEXT: [[TMP5:%.*]] = add [[TMP4]], zeroinitializer ; SCALABLE-NEXT: [[TMP6:%.*]] = mul [[TMP5]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) @@ -1198,7 +1204,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; SCALABLE-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SCALABLE: scalar.ph: ; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -1214,7 +1220,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; SCALABLE: for.end: ; SCALABLE-NEXT: ret void @@ -1253,7 +1259,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; FIXEDLEN-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; FIXEDLEN: middle.block: -; FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 +; FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, 1024 ; FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; FIXEDLEN: scalar.ph: ; FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -1269,7 +1275,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; FIXEDLEN: for.end: ; FIXEDLEN-NEXT: ret void @@ -1283,7 +1289,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = call @llvm.experimental.stepvector.nxv2i64() @@ -1304,7 +1310,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-SCALABLE-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-SCALABLE-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0 -; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP11]], i64 1024) +; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP11]], i64 1025) ; TF-SCALABLE-NEXT: [[TMP12:%.*]] = icmp ugt [[VEC_IND]], shufflevector ( insertelement ( poison, i64 10, i64 0), poison, zeroinitializer) ; TF-SCALABLE-NEXT: [[TMP13:%.*]] = select [[ACTIVE_LANE_MASK]], [[TMP12]], zeroinitializer ; TF-SCALABLE-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0( [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]], i32 8, [[TMP13]]) @@ -1336,7 +1342,7 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; TF-SCALABLE: for.end: ; TF-SCALABLE-NEXT: ret void @@ -1354,20 +1360,24 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-FIXEDLEN-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; TF-FIXEDLEN-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP0]], i64 1025) ; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i64> [[VEC_IND]], -; TF-FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> [[BROADCAST_SPLAT2]], i32 8, <4 x i1> [[TMP1]]) -; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] -; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 -; TF-FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP3]], align 8 -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer +; TF-FIXEDLEN-NEXT: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> [[BROADCAST_SPLAT]], <4 x ptr> [[BROADCAST_SPLAT2]], i32 8, <4 x i1> [[TMP2]]) +; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] +; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = xor <4 x i1> [[TMP1]], +; TF-FIXEDLEN-NEXT: [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP4]], <4 x i1> zeroinitializer +; TF-FIXEDLEN-NEXT: [[TMP6:%.*]] = or <4 x i1> [[TMP2]], [[TMP5]] +; TF-FIXEDLEN-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 +; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP7]], i32 8, <4 x i1> [[TMP6]]) +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 ; TF-FIXEDLEN-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], -; TF-FIXEDLEN-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; TF-FIXEDLEN-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; TF-FIXEDLEN-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 +; TF-FIXEDLEN-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] ; TF-FIXEDLEN: middle.block: -; TF-FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 -; TF-FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-FIXEDLEN: scalar.ph: -; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1028, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] ; TF-FIXEDLEN: for.body: ; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] @@ -1380,8 +1390,8 @@ define void @conditional_uniform_store(ptr noalias nocapture %a, ptr noalias noc ; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 +; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; TF-FIXEDLEN: for.end: ; TF-FIXEDLEN-NEXT: ret void ; @@ -1399,7 +1409,7 @@ latch: %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv store i64 %v, ptr %arrayidx %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, 1024 + %exitcond.not = icmp eq i64 %iv.next, 1025 br i1 %exitcond.not, label %for.end, label %for.body for.end: @@ -1412,13 +1422,13 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-NEXT: entry: ; SCALABLE-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 -; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] +; SCALABLE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1025, [[TMP1]] ; SCALABLE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; SCALABLE: vector.ph: ; SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 -; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] -; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] +; SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] +; SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 ; SCALABLE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; SCALABLE-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1435,7 +1445,7 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SCALABLE-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; SCALABLE: middle.block: -; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] +; SCALABLE-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] ; SCALABLE-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SCALABLE: scalar.ph: ; SCALABLE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -1446,7 +1456,7 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; SCALABLE: for.end: ; SCALABLE-NEXT: ret void @@ -1475,7 +1485,7 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; FIXEDLEN-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; FIXEDLEN-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; FIXEDLEN: middle.block: -; FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 +; FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, 1024 ; FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; FIXEDLEN: scalar.ph: ; FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] @@ -1486,7 +1496,7 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] ; FIXEDLEN: for.end: ; FIXEDLEN-NEXT: ret void @@ -1500,7 +1510,7 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; TF-SCALABLE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() ; TF-SCALABLE-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 ; TF-SCALABLE-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 -; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; TF-SCALABLE-NEXT: [[N_RND_UP:%.*]] = add i64 1025, [[TMP4]] ; TF-SCALABLE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] ; TF-SCALABLE-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] ; TF-SCALABLE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[V:%.*]], i64 0 @@ -1509,7 +1519,7 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; TF-SCALABLE: vector.body: ; TF-SCALABLE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-SCALABLE-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 0 -; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1024) +; TF-SCALABLE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[TMP5]], i64 1025) ; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[B:%.*]], align 1 ; TF-SCALABLE-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP5]] ; TF-SCALABLE-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP6]], i32 0 @@ -1530,7 +1540,7 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; TF-SCALABLE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-SCALABLE-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; TF-SCALABLE-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 +; TF-SCALABLE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 ; TF-SCALABLE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; TF-SCALABLE: for.end: ; TF-SCALABLE-NEXT: ret void @@ -1545,18 +1555,18 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; TF-FIXEDLEN: vector.body: ; TF-FIXEDLEN-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; TF-FIXEDLEN-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; TF-FIXEDLEN-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[TMP0]], i64 1025) ; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[B:%.*]], align 1 ; TF-FIXEDLEN-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] ; TF-FIXEDLEN-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -; TF-FIXEDLEN-NEXT: store <4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 8 -; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; TF-FIXEDLEN-NEXT: call void @llvm.masked.store.v4i64.p0(<4 x i64> [[BROADCAST_SPLAT]], ptr [[TMP2]], i32 8, <4 x i1> [[ACTIVE_LANE_MASK]]) +; TF-FIXEDLEN-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; TF-FIXEDLEN-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1028 +; TF-FIXEDLEN-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; TF-FIXEDLEN: middle.block: -; TF-FIXEDLEN-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 -; TF-FIXEDLEN-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] +; TF-FIXEDLEN-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] ; TF-FIXEDLEN: scalar.ph: -; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; TF-FIXEDLEN-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1028, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] ; TF-FIXEDLEN-NEXT: br label [[FOR_BODY:%.*]] ; TF-FIXEDLEN: for.body: ; TF-FIXEDLEN-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] @@ -1564,8 +1574,8 @@ define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocap ; TF-FIXEDLEN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] ; TF-FIXEDLEN-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 ; TF-FIXEDLEN-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 -; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; TF-FIXEDLEN-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1025 +; TF-FIXEDLEN-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; TF-FIXEDLEN: for.end: ; TF-FIXEDLEN-NEXT: ret void ; @@ -1578,7 +1588,7 @@ for.body: %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv store i64 %v, ptr %arrayidx %iv.next = add nuw nsw i64 %iv, 1 - %exitcond.not = icmp eq i64 %iv.next, 1024 + %exitcond.not = icmp eq i64 %iv.next, 1025 br i1 %exitcond.not, label %for.end, label %for.body for.end: From 2a6e39dbf84af4b3f8b31930fed786b3c56287f5 Mon Sep 17 00:00:00 2001 From: Ben Shi Date: Mon, 9 Jan 2023 12:05:00 +0800 Subject: [PATCH 525/691] [AVR] Do not emit 'LPM Rd, Z' on devices without FeatureLPMX The 'LPM' instruction has three forms: ------------------------ | form | feature | | ---------- | --------| | LPM | hasLPM | | LPM Rd, Z | hasLPMX | | LPM Rd, Z+ | hasLPMX | ------------------------ The second form is always selected in ISelDAGToDAG, even on devices without FeatureLPMX. This patch emits "LPM + MOV" on devices with only FeatureLPM. Reviewed By: jacquesguan Differential Revision: https://reviews.llvm.org/D141246 --- llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp | 33 ++++++++--- llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp | 3 +- llvm/lib/Target/AVR/AVRInstrInfo.td | 5 ++ llvm/test/CodeGen/AVR/elpm.ll | 58 ++++++++++++++++++++ llvm/test/CodeGen/AVR/pseudo/LPMBRdZ.mir | 40 ++++++++++++++ 5 files changed, 130 insertions(+), 9 deletions(-) create mode 100644 llvm/test/CodeGen/AVR/pseudo/LPMBRdZ.mir diff --git a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp index 06dc2b7c5b27b..b29eb87a55a01 100644 --- a/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp @@ -98,6 +98,8 @@ class AVRExpandPseudo : public MachineFunctionPass { // Common implementation of LPMWRdZ and ELPMWRdZ. bool expandLPMWELPMW(Block &MBB, BlockIt MBBI, bool IsExt); + // Common implementation of LPMBRdZ and ELPMBRdZ. + bool expandLPMBELPMB(Block &MBB, BlockIt MBBI, bool IsExt); }; char AVRExpandPseudo::ID = 0; @@ -858,28 +860,32 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { return expandLPMWELPMW(MBB, MBBI, true); } -template <> -bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { +bool AVRExpandPseudo::expandLPMBELPMB(Block &MBB, BlockIt MBBI, bool IsExt) { MachineInstr &MI = *MBBI; Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); - Register BankReg = MI.getOperand(2).getReg(); bool SrcIsKill = MI.getOperand(1).isKill(); const AVRSubtarget &STI = MBB.getParent()->getSubtarget(); + bool HasX = IsExt ? STI.hasELPMX() : STI.hasLPMX(); // Set the I/O register RAMPZ for ELPM (out RAMPZ, rtmp). - buildMI(MBB, MBBI, AVR::OUTARr).addImm(STI.getIORegRAMPZ()).addReg(BankReg); + if (IsExt) { + Register BankReg = MI.getOperand(2).getReg(); + buildMI(MBB, MBBI, AVR::OUTARr).addImm(STI.getIORegRAMPZ()).addReg(BankReg); + } // Load byte. - if (STI.hasELPMX()) { - auto MILB = buildMI(MBB, MBBI, AVR::ELPMRdZ) + if (HasX) { + unsigned Opc = IsExt ? AVR::ELPMRdZ : AVR::LPMRdZ; + auto MILB = buildMI(MBB, MBBI, Opc) .addReg(DstReg, RegState::Define) .addReg(SrcReg, getKillRegState(SrcIsKill)); MILB.setMemRefs(MI.memoperands()); } else { - // For the basic 'ELPM' instruction, its operand[0] is the implicit + // For the basic ELPM/LPM instruction, its operand[0] is the implicit // 'Z' register, and its operand[1] is the implicit 'R0' register. - auto MILB = buildMI(MBB, MBBI, AVR::ELPM); + unsigned Opc = IsExt ? AVR::ELPM : AVR::LPM; + auto MILB = buildMI(MBB, MBBI, Opc); buildMI(MBB, MBBI, AVR::MOVRdRr) .addReg(DstReg, RegState::Define) .addReg(AVR::R0, RegState::Kill); @@ -890,6 +896,16 @@ bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { return true; } +template <> +bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + return expandLPMBELPMB(MBB, MBBI, true); +} + +template <> +bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { + return expandLPMBELPMB(MBB, MBBI, false); +} + template <> bool AVRExpandPseudo::expand(Block &MBB, BlockIt MBBI) { llvm_unreachable("16-bit LPMPi is unimplemented"); @@ -2437,6 +2453,7 @@ bool AVRExpandPseudo::expandMI(Block &MBB, BlockIt MBBI) { EXPAND(AVR::LDWRdPtrPd); case AVR::LDDWRdYQ: //: FIXME: remove this once PR13375 gets fixed EXPAND(AVR::LDDWRdPtrQ); + EXPAND(AVR::LPMBRdZ); EXPAND(AVR::LPMWRdZ); EXPAND(AVR::LPMWRdZPi); EXPAND(AVR::ELPMBRdZ); diff --git a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp index 6ea8e200bd4e9..8718997b86d07 100644 --- a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp +++ b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp @@ -399,8 +399,9 @@ template <> bool AVRDAGToDAGISel::select(SDNode *N) { switch (VT.SimpleTy) { case MVT::i8: if (ProgMemBank == 0) { + unsigned Opc = Subtarget->hasLPMX() ? AVR::LPMRdZ : AVR::LPMBRdZ; ResNode = - CurDAG->getMachineNode(AVR::LPMRdZ, DL, MVT::i8, MVT::Other, Ptr); + CurDAG->getMachineNode(Opc, DL, MVT::i8, MVT::Other, Ptr); } else { // Do not combine the LDI instruction into the ELPM pseudo instruction, // since it may be reused by other ELPM pseudo instructions. diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td index c272711bb8663..1e56f9447cb80 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -1690,6 +1690,11 @@ let canFoldAsLoad = 1, isReMaterializable = 1, mayLoad = 1, : F16<0b1001010111001000, (outs), (ins), "lpm", []>, Requires<[HasLPM]>; + // This pseudo is combination of LPM and MOV instructions. + let Defs = [R0] in + def LPMBRdZ : Pseudo<(outs GPR8:$dst), (ins ZREG:$z), "lpmb\t$dst, $z", []>, + Requires<[HasLPM]>; + def LPMRdZ : FLPMX<0, 0, (outs GPR8 : $rd), diff --git a/llvm/test/CodeGen/AVR/elpm.ll b/llvm/test/CodeGen/AVR/elpm.ll index ba28bc814591d..2e989d1442ffd 100644 --- a/llvm/test/CodeGen/AVR/elpm.ll +++ b/llvm/test/CodeGen/AVR/elpm.ll @@ -152,6 +152,24 @@ define signext i8 @foob0(i16 %a, i16 %b) { ; CHECK-NEXT: lsl r25 ; CHECK-NEXT: sbc r25, r25 ; CHECK-NEXT: ret +; +; NOX-LABEL: foob0: +; NOX: ; %bb.0: ; %entry +; NOX-NEXT: subi r22, lo8(-(arrb1)) +; NOX-NEXT: sbci r23, hi8(-(arrb1)) +; NOX-NEXT: movw r30, r22 +; NOX-NEXT: lpm +; NOX-NEXT: mov r18, r0 +; NOX-NEXT: subi r24, lo8(-(arrb1)) +; NOX-NEXT: sbci r25, hi8(-(arrb1)) +; NOX-NEXT: movw r30, r24 +; NOX-NEXT: lpm +; NOX-NEXT: mov r24, r0 +; NOX-NEXT: sub r24, r18 +; NOX-NEXT: mov r25, r24 +; NOX-NEXT: lsl r25 +; NOX-NEXT: sbc r25, r25 +; NOX-NEXT: ret entry: %arrayidx = getelementptr inbounds [4 x i8], [4 x i8] addrspace(1)* @arrb1, i16 0, i16 %a %0 = load i8, i8 addrspace(1)* %arrayidx, align 1 @@ -179,6 +197,26 @@ define signext i8 @foob1(i16 %a, i16 %b) { ; CHECK-NEXT: lsl r25 ; CHECK-NEXT: sbc r25, r25 ; CHECK-NEXT: ret +; +; NOX-LABEL: foob1: +; NOX: ; %bb.0: ; %entry +; NOX-NEXT: subi r22, lo8(-(arrb3)) +; NOX-NEXT: sbci r23, hi8(-(arrb3)) +; NOX-NEXT: movw r30, r22 +; NOX-NEXT: ldi r18, 2 +; NOX-NEXT: out 59, r18 +; NOX-NEXT: elpm +; NOX-NEXT: mov r18, r0 +; NOX-NEXT: subi r24, lo8(-(arrb1)) +; NOX-NEXT: sbci r25, hi8(-(arrb1)) +; NOX-NEXT: movw r30, r24 +; NOX-NEXT: lpm +; NOX-NEXT: mov r24, r0 +; NOX-NEXT: sub r24, r18 +; NOX-NEXT: mov r25, r24 +; NOX-NEXT: lsl r25 +; NOX-NEXT: sbc r25, r25 +; NOX-NEXT: ret entry: %arrayidx = getelementptr inbounds [4 x i8], [4 x i8] addrspace(1)* @arrb1, i16 0, i16 %a %0 = load i8, i8 addrspace(1)* %arrayidx, align 1 @@ -206,6 +244,26 @@ define signext i8 @foob2(i16 %a, i16 %b) { ; CHECK-NEXT: lsl r25 ; CHECK-NEXT: sbc r25, r25 ; CHECK-NEXT: ret +; +; NOX-LABEL: foob2: +; NOX: ; %bb.0: ; %entry +; NOX-NEXT: subi r24, lo8(-(arrb5)) +; NOX-NEXT: sbci r25, hi8(-(arrb5)) +; NOX-NEXT: movw r30, r24 +; NOX-NEXT: ldi r24, 4 +; NOX-NEXT: out 59, r24 +; NOX-NEXT: elpm +; NOX-NEXT: mov r24, r0 +; NOX-NEXT: subi r22, lo8(-(arrb1)) +; NOX-NEXT: sbci r23, hi8(-(arrb1)) +; NOX-NEXT: movw r30, r22 +; NOX-NEXT: lpm +; NOX-NEXT: mov r25, r0 +; NOX-NEXT: sub r24, r25 +; NOX-NEXT: mov r25, r24 +; NOX-NEXT: lsl r25 +; NOX-NEXT: sbc r25, r25 +; NOX-NEXT: ret entry: %arrayidx = getelementptr inbounds [4 x i8], [4 x i8] addrspace(5)* @arrb5, i16 0, i16 %a %0 = load i8, i8 addrspace(5)* %arrayidx, align 1 diff --git a/llvm/test/CodeGen/AVR/pseudo/LPMBRdZ.mir b/llvm/test/CodeGen/AVR/pseudo/LPMBRdZ.mir new file mode 100644 index 0000000000000..6eaa9435220ea --- /dev/null +++ b/llvm/test/CodeGen/AVR/pseudo/LPMBRdZ.mir @@ -0,0 +1,40 @@ +# RUN: llc -mtriple=avr -mattr=+lpm -mattr=+lpmx -start-before=greedy %s -o - \ +# RUN: | FileCheck %s +# RUN: llc -mtriple=avr -mattr=+lpm -mattr=-lpmx -start-before=greedy %s -o - \ +# RUN: | FileCheck --check-prefix=NOX %s + +# This test checks the expansion of the 8-bit LPMBRdZ pseudo instruction and that +# the register allocator won't use R31R30 as an output register (which would +# lead to undefined behavior). + +--- | + target triple = "avr--" + define void @test_lpmbrdz() { + entry: + ret void + } +... + +--- +name: test_lpmbrdz +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $r31r30 + + ; CHECK-LABEL: test_lpmbrdz: + ; CHECK: ; %bb.0: + ; CHECK-NEXT: lpm r30, Z + ; CHECK-NEXT: ret + + ; NOX-LABEL: test_lpmbrdz + ; NOX: ; %bb.0: + ; NOX-NEXT: lpm + ; NOX-NEXT: mov r30, r0 + ; NOX-NEXT: ret + + %1:zreg = COPY killed $r31r30 + %2:gpr8 = LPMBRdZ %1, implicit-def dead $r0 + $r30 = COPY %2 + RET implicit $r30 +... From ff426a6250e9fa3860c00ef7c5c7e53534a4dc67 Mon Sep 17 00:00:00 2001 From: Karl-Johan Karlsson Date: Fri, 24 Mar 2023 10:33:46 +0100 Subject: [PATCH 526/691] [compiler-rt] Fix signed integer overflow in int_mulo_impl.inc When compiling compiler-rt with -fsanitize=undefined and running testcases you end up with the following warning: UBSan:/repo/uabkaka/llvm-project/compiler-rt/lib/builtins/int_mulo_impl.inc:24:23: signed integer overflow: -1 * -2147483648 cannot be represented in type 'si_int' (aka 'long') This can be avoided by doing the multiplication in a matching unsigned variant of the type. This was found in an out of tree target. Reviewed By: phosek Differential Revision: https://reviews.llvm.org/D146623 --- compiler-rt/lib/builtins/int_mulo_impl.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/lib/builtins/int_mulo_impl.inc b/compiler-rt/lib/builtins/int_mulo_impl.inc index 592b7893edbdc..27e7c8c43d600 100644 --- a/compiler-rt/lib/builtins/int_mulo_impl.inc +++ b/compiler-rt/lib/builtins/int_mulo_impl.inc @@ -21,7 +21,7 @@ static __inline fixint_t __muloXi4(fixint_t a, fixint_t b, int *overflow) { const fixint_t MIN = (fixint_t)((fixuint_t)1 << (N - 1)); const fixint_t MAX = ~MIN; *overflow = 0; - fixint_t result = a * b; + fixint_t result = (fixuint_t)a * b; if (a == MIN) { if (b != 0 && b != 1) *overflow = 1; From 03101e141bf745f036be604e2a5a7c085eb02f5e Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Thu, 23 Mar 2023 17:36:54 +0100 Subject: [PATCH 527/691] [include-cleaner] Attribute references to explicit specializations Fixes https://github.com/llvm/llvm-project/issues/61652 Differential Revision: https://reviews.llvm.org/D146732 --- .../include-cleaner/lib/WalkAST.cpp | 27 ++++++++++-- .../include-cleaner/unittests/WalkASTTest.cpp | 42 +++++++++++++++++-- 2 files changed, 61 insertions(+), 8 deletions(-) diff --git a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp index 0ca84145721a6..e70a24367d6a9 100644 --- a/clang-tools-extra/include-cleaner/lib/WalkAST.cpp +++ b/clang-tools-extra/include-cleaner/lib/WalkAST.cpp @@ -7,16 +7,19 @@ //===----------------------------------------------------------------------===// #include "AnalysisInternal.h" +#include "clang-include-cleaner/Types.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/TemplateBase.h" #include "clang/AST/TemplateName.h" #include "clang/AST/Type.h" #include "clang/AST/TypeLoc.h" #include "clang/Basic/SourceLocation.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/Support/Casting.h" namespace clang::include_cleaner { @@ -62,6 +65,24 @@ class ASTWalker : public RecursiveASTVisitor { return resolveTemplateName(TST->getTemplateName()); return Base->getAsRecordDecl(); } + // Templated as TemplateSpecializationType and + // DeducedTemplateSpecializationType doesn't share a common base. + template + // Picks the most specific specialization for a + // (Deduced)TemplateSpecializationType, while prioritizing using-decls. + NamedDecl *getMostRelevantTemplatePattern(const T *TST) { + // This is the underlying decl used by TemplateSpecializationType, can be + // null when type is dependent. + auto *RD = TST->getAsTagDecl(); + auto *ND = resolveTemplateName(TST->getTemplateName()); + // In case of exported template names always prefer the using-decl. This + // implies we'll point at the using-decl even when there's an explicit + // specializaiton using the exported name, but that's rare. + if (llvm::isa_and_present(ND)) + return ND; + // Fallback to primary template for dependent instantiations. + return RD ? RD : ND; + } public: ASTWalker(DeclCallback Callback) : Callback(Callback) {} @@ -161,17 +182,15 @@ class ASTWalker : public RecursiveASTVisitor { } bool VisitTemplateSpecializationTypeLoc(TemplateSpecializationTypeLoc TL) { - // FIXME: Handle explicit specializations. report(TL.getTemplateNameLoc(), - resolveTemplateName(TL.getTypePtr()->getTemplateName())); + getMostRelevantTemplatePattern(TL.getTypePtr())); return true; } bool VisitDeducedTemplateSpecializationTypeLoc( DeducedTemplateSpecializationTypeLoc TL) { - // FIXME: Handle specializations. report(TL.getTemplateNameLoc(), - resolveTemplateName(TL.getTypePtr()->getTemplateName())); + getMostRelevantTemplatePattern(TL.getTypePtr())); return true; } diff --git a/clang-tools-extra/include-cleaner/unittests/WalkASTTest.cpp b/clang-tools-extra/include-cleaner/unittests/WalkASTTest.cpp index 68b6b217a2e01..8fcc2b5886ae4 100644 --- a/clang-tools-extra/include-cleaner/unittests/WalkASTTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/WalkASTTest.cpp @@ -114,6 +114,25 @@ TEST(WalkAST, TagType) { // One explicit call from the TypeLoc in constructor spelling, another // implicit reference through the constructor call. testWalk("struct $explicit^$implicit^S { static int x; };", "auto y = ^S();"); + testWalk("template struct $explicit^Foo {};", "^Foo x;"); + testWalk(R"cpp( + template struct Foo {}; + template<> struct $explicit^Foo {};)cpp", + "^Foo x;"); + testWalk(R"cpp( + template struct Foo {}; + template struct $explicit^Foo { void x(); };)cpp", + "^Foo x;"); + testWalk(R"cpp( + template struct Foo {}; + template struct $explicit^Foo;)cpp", + "^Foo x;"); + // FIXME: This is broken due to + // https://github.com/llvm/llvm-project/issues/42259. + testWalk(R"cpp( + template struct $explicit^Foo { Foo(T); }; + template<> struct Foo { void get(); Foo(int); };)cpp", + "^Foo x(3);"); } TEST(WalkAST, Alias) { @@ -124,6 +143,25 @@ TEST(WalkAST, Alias) { "int y = ^x;"); testWalk("using $explicit^foo = int;", "^foo x;"); testWalk("struct S {}; using $explicit^foo = S;", "^foo x;"); + testWalk(R"cpp( + template struct Foo {}; + template<> struct Foo {}; + namespace ns { using ::$explicit^Foo; })cpp", + "ns::^Foo x;"); + testWalk(R"cpp( + template struct Foo {}; + namespace ns { using ::Foo; } + template<> struct ns::$explicit^Foo {};)cpp", + "^Foo x;"); + // AST doesn't have enough information to figure out whether specialization + // happened through an exported type or not. So err towards attributing use to + // the using-decl, specializations on the exported type should be rare and + // they're not permitted on type-aliases. + testWalk(R"cpp( + template struct Foo {}; + namespace ns { using ::$explicit^Foo; } + template<> struct ns::Foo {};)cpp", + "ns::^Foo x;"); } TEST(WalkAST, Using) { @@ -183,10 +221,6 @@ TEST(WalkAST, TemplateNames) { template